codexparser 0.1.65 → 0.1.67
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/CodexParser.js +111 -72
- package/src/abbr.js +19 -19
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.67",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -74,53 +74,57 @@ class CodexParser {
|
|
|
74
74
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
75
75
|
const abbreviations = Object.keys(this.abbreviations)
|
|
76
76
|
this.found = []
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
77
|
+
|
|
78
|
+
// Normalize text: remove curly quotes, replace periods before numbers with colons
|
|
79
|
+
let normalizedText = text
|
|
80
|
+
.replace(/[“”]/g, "") // Remove curly quotes
|
|
81
|
+
.replace(/\.(?=\d)/g, ":")
|
|
81
82
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
82
83
|
let i = 0
|
|
83
84
|
|
|
84
|
-
const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
|
|
85
|
-
const isNextBibleBook = (startIndex) => {
|
|
86
|
-
const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
|
|
87
|
-
return (
|
|
88
|
-
lowercaseBibleFullNames.some((book) => textAfterCurrentPosition.startsWith(book)) ||
|
|
89
|
-
lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
|
|
90
|
-
)
|
|
91
|
-
}
|
|
92
|
-
const detectSuffix = (startIndex) => {
|
|
93
|
-
const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
|
|
94
|
-
return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
|
|
95
|
-
}
|
|
96
|
-
|
|
97
85
|
while (i < lowerCaseText.length) {
|
|
98
86
|
let foundBook = null
|
|
99
87
|
let startIndex = -1
|
|
100
88
|
let matchedLength = 0
|
|
89
|
+
let hasOpeningParen = false
|
|
90
|
+
let parenStartIndex = -1
|
|
101
91
|
|
|
102
|
-
// Skip whitespace
|
|
103
|
-
while (i < lowerCaseText.length &&
|
|
92
|
+
// Skip whitespace
|
|
93
|
+
while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
|
|
104
94
|
i++
|
|
105
95
|
}
|
|
106
96
|
if (i >= lowerCaseText.length) break
|
|
107
97
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
98
|
+
// Check for opening parenthesis
|
|
99
|
+
if (i < lowerCaseText.length && lowerCaseText[i] === "(") {
|
|
100
|
+
hasOpeningParen = true
|
|
101
|
+
parenStartIndex = i
|
|
102
|
+
i++
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Record potential start of reference
|
|
106
|
+
startIndex = i
|
|
107
|
+
|
|
108
|
+
// Check for book names or abbreviations
|
|
109
|
+
for (let book of fullNames) {
|
|
110
|
+
if (
|
|
111
|
+
lowerCaseText.startsWith(book.toLowerCase(), i) &&
|
|
112
|
+
(i + book.length >= lowerCaseText.length || /\s|:|\d/.test(lowerCaseText[i + book.length]))
|
|
113
|
+
) {
|
|
114
|
+
foundBook = book
|
|
113
115
|
matchedLength = book.length
|
|
116
|
+
break
|
|
114
117
|
}
|
|
115
118
|
}
|
|
116
|
-
|
|
117
119
|
if (!foundBook) {
|
|
118
|
-
for (let
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
120
|
+
for (let abbr of abbreviations) {
|
|
121
|
+
if (
|
|
122
|
+
lowerCaseText.startsWith(abbr.toLowerCase(), i) &&
|
|
123
|
+
(i + abbr.length >= lowerCaseText.length || /\s|:|\d/.test(lowerCaseText[i + abbr.length]))
|
|
124
|
+
) {
|
|
125
|
+
foundBook = this.abbreviations[abbr]
|
|
126
|
+
matchedLength = abbr.length
|
|
127
|
+
break
|
|
124
128
|
}
|
|
125
129
|
}
|
|
126
130
|
}
|
|
@@ -128,43 +132,52 @@ class CodexParser {
|
|
|
128
132
|
if (foundBook) {
|
|
129
133
|
i += matchedLength
|
|
130
134
|
let chapterVerse = ""
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
if (normalizedText[i] === ";") {
|
|
137
|
-
const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
|
|
138
|
-
if (formattedReference) references.push(formattedReference)
|
|
139
|
-
chapterVerse = ""
|
|
140
|
-
i++
|
|
141
|
-
continue
|
|
142
|
-
}
|
|
143
|
-
chapterVerse += normalizedText[i]
|
|
135
|
+
let hasColon = false
|
|
136
|
+
|
|
137
|
+
// Capture space after book
|
|
138
|
+
if (i < normalizedText.length && normalizedText[i] === " ") {
|
|
139
|
+
chapterVerse += " "
|
|
144
140
|
i++
|
|
145
141
|
}
|
|
146
142
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
143
|
+
// Capture chapter-verse
|
|
144
|
+
while (
|
|
145
|
+
i < lowerCaseText.length &&
|
|
146
|
+
(/[\d]/.test(normalizedText[i]) ||
|
|
147
|
+
normalizedText[i] === ":" ||
|
|
148
|
+
normalizedText[i] === "," ||
|
|
149
|
+
normalizedText[i] === "-")
|
|
150
|
+
) {
|
|
151
|
+
if (normalizedText[i] === ":") hasColon = true
|
|
152
|
+
chapterVerse += normalizedText[i]
|
|
153
|
+
i++
|
|
150
154
|
}
|
|
151
155
|
|
|
152
|
-
//
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
156
|
+
// Only proceed if valid reference
|
|
157
|
+
if (hasColon && chapterVerse.trim().length > 0) {
|
|
158
|
+
let endIndex = i
|
|
159
|
+
let version = null
|
|
160
|
+
|
|
161
|
+
// Detect suffix
|
|
162
|
+
const suffixMatch = normalizedText.substring(i).match(/\b(LXX|MT)\b/i)
|
|
163
|
+
if (suffixMatch) {
|
|
164
|
+
version = suffixMatch[0].toUpperCase()
|
|
165
|
+
endIndex += suffixMatch[0].length
|
|
166
|
+
i += suffixMatch[0].length
|
|
167
|
+
}
|
|
160
168
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
169
|
+
// Handle closing parenthesis
|
|
170
|
+
if (hasOpeningParen && i < lowerCaseText.length && normalizedText[i] === ")") {
|
|
171
|
+
endIndex = i + 1
|
|
172
|
+
i++
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Use original text for reference only (exclude parentheses)
|
|
176
|
+
const originalText = normalizedText.slice(startIndex, hasOpeningParen ? endIndex - 1 : endIndex)
|
|
165
177
|
|
|
166
|
-
|
|
178
|
+
// Determine type
|
|
167
179
|
let type
|
|
180
|
+
const ref = chapterVerse.trim()
|
|
168
181
|
if (ref.includes(":")) {
|
|
169
182
|
if (ref.includes("-")) {
|
|
170
183
|
const [start, end] = ref.split("-")
|
|
@@ -190,18 +203,21 @@ class CodexParser {
|
|
|
190
203
|
this.found.push({
|
|
191
204
|
book: foundBook,
|
|
192
205
|
reference: ref,
|
|
193
|
-
startIndex:
|
|
194
|
-
endIndex
|
|
195
|
-
version
|
|
206
|
+
startIndex: hasOpeningParen ? parenStartIndex : startIndex,
|
|
207
|
+
endIndex,
|
|
208
|
+
version,
|
|
196
209
|
type,
|
|
197
|
-
originalText
|
|
210
|
+
originalText,
|
|
198
211
|
})
|
|
199
|
-
}
|
|
212
|
+
} else {
|
|
213
|
+
i = startIndex + 1
|
|
214
|
+
}
|
|
200
215
|
} else {
|
|
201
216
|
i++
|
|
202
217
|
}
|
|
203
218
|
}
|
|
204
219
|
|
|
220
|
+
console.log("Found references:", JSON.stringify(this.found, null, 2)) // Debug
|
|
205
221
|
return this
|
|
206
222
|
}
|
|
207
223
|
|
|
@@ -268,7 +284,7 @@ class CodexParser {
|
|
|
268
284
|
// Use abbreviation with period for abbreviated books
|
|
269
285
|
parsedPassage.abbr = `${abbrKey}. ${passage.reference}${passage.version ? " " + passage.version : ""}`
|
|
270
286
|
} else {
|
|
271
|
-
// Fallback to original if no abbreviation
|
|
287
|
+
// Fallback to original if no abbreviation
|
|
272
288
|
parsedPassage.abbr = parsedPassage.original
|
|
273
289
|
}
|
|
274
290
|
|
|
@@ -1074,6 +1090,12 @@ class CodexParser {
|
|
|
1074
1090
|
return { name: "English", value: "ENG", abbreviation: "eng" }
|
|
1075
1091
|
}
|
|
1076
1092
|
|
|
1093
|
+
/**
|
|
1094
|
+
* Replaces scripture references in text with formatted references.
|
|
1095
|
+
* @param {string} text - The original text.
|
|
1096
|
+
* @param {boolean} useAbbreviations - Whether to use abbreviated book names.
|
|
1097
|
+
* @returns {string} Text with replaced references.
|
|
1098
|
+
*/
|
|
1077
1099
|
replace(text, useAbbreviations = true) {
|
|
1078
1100
|
if (!this.passages.length) {
|
|
1079
1101
|
console.log("No parsed passages to replace")
|
|
@@ -1081,15 +1103,32 @@ class CodexParser {
|
|
|
1081
1103
|
}
|
|
1082
1104
|
|
|
1083
1105
|
let result = text
|
|
1084
|
-
// Process replacements in reverse order to avoid index shifting
|
|
1085
1106
|
for (let i = this.passages.length - 1; i >= 0; i--) {
|
|
1086
1107
|
const passage = this.passages[i]
|
|
1087
|
-
const {
|
|
1088
|
-
|
|
1089
|
-
// Use abbreviated or full reference
|
|
1108
|
+
const { originalText, abbr, original } = passage
|
|
1090
1109
|
const newReference = useAbbreviations ? abbr : original
|
|
1091
|
-
|
|
1092
|
-
|
|
1110
|
+
|
|
1111
|
+
// Create regex to match originalText with optional parentheses
|
|
1112
|
+
const escapedOriginalText = originalText.replace(/([:.])/g, "\\$1").replace(/\s+/g, "\\s*")
|
|
1113
|
+
const regex = new RegExp(`(\\()?\\s*${escapedOriginalText}\\s*(\\))?`, "g")
|
|
1114
|
+
|
|
1115
|
+
// Find all matches
|
|
1116
|
+
const matches = [...result.matchAll(regex)]
|
|
1117
|
+
if (matches.length > 0) {
|
|
1118
|
+
// Process matches in reverse to avoid index shifting
|
|
1119
|
+
for (let j = matches.length - 1; j >= 0; j--) {
|
|
1120
|
+
const match = matches[j]
|
|
1121
|
+
const startIndex = match.index
|
|
1122
|
+
const endIndex = startIndex + match[0].length
|
|
1123
|
+
// Preserve parentheses if present in the match
|
|
1124
|
+
const hasParens = match[1] === "(" && match[2] === ")"
|
|
1125
|
+
const replacement = hasParens ? `(${newReference})` : newReference
|
|
1126
|
+
console.log(`Replacing "${match[0]}" with "${replacement}" at [${startIndex}, ${endIndex}]`)
|
|
1127
|
+
result = result.slice(0, startIndex) + replacement + result.slice(endIndex)
|
|
1128
|
+
}
|
|
1129
|
+
} else {
|
|
1130
|
+
console.log(`No match found for originalText "${originalText}"`)
|
|
1131
|
+
}
|
|
1093
1132
|
}
|
|
1094
1133
|
|
|
1095
1134
|
return result
|
package/src/abbr.js
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
const abbrevations = {
|
|
2
2
|
Gen: "Genesis",
|
|
3
|
-
Ge: "Genesis",
|
|
3
|
+
/* Ge: "Genesis", */
|
|
4
4
|
Gn: "Genesis",
|
|
5
5
|
Ex: "Exodus",
|
|
6
6
|
Exo: "Exodus",
|
|
7
7
|
Exod: "Exodus",
|
|
8
8
|
Lev: "Leviticus",
|
|
9
|
-
Le: "Leviticus",
|
|
9
|
+
/* Le: "Leviticus", */
|
|
10
10
|
Lv: "Leviticus",
|
|
11
11
|
Num: "Numbers",
|
|
12
12
|
Nu: "Numbers",
|
|
13
13
|
Nb: "Numbers",
|
|
14
14
|
Nm: "Numbers",
|
|
15
15
|
Deut: "Deuteronomy",
|
|
16
|
-
De: "Deuteronomy",
|
|
16
|
+
/* De: "Deuteronomy", */
|
|
17
17
|
Dt: "Deuteronomy",
|
|
18
18
|
Josh: "Joshua",
|
|
19
19
|
Jos: "Joshua",
|
|
@@ -24,7 +24,7 @@ const abbrevations = {
|
|
|
24
24
|
Jdg: "Judges",
|
|
25
25
|
Ruth: "Ruth",
|
|
26
26
|
Rth: "Ruth",
|
|
27
|
-
Ru: "Ruth",
|
|
27
|
+
/* Ru: "Ruth", */
|
|
28
28
|
"1Sam": "1 Samuel",
|
|
29
29
|
"1sam": "1 Samuel",
|
|
30
30
|
"1SA": "1 Samuel",
|
|
@@ -92,10 +92,10 @@ const abbrevations = {
|
|
|
92
92
|
Ezra: "Ezra",
|
|
93
93
|
Ezr: "Ezra",
|
|
94
94
|
Neh: "Nehemiah",
|
|
95
|
-
Ne: "Nehemiah",
|
|
95
|
+
/* Ne: "Nehemiah", */
|
|
96
96
|
Esth: "Esther",
|
|
97
97
|
Est: "Esther",
|
|
98
|
-
Es: "Esther",
|
|
98
|
+
/* Es: "Esther", */
|
|
99
99
|
Job: "Job",
|
|
100
100
|
Jb: "Job",
|
|
101
101
|
Ps: "Psalms",
|
|
@@ -107,12 +107,12 @@ const abbrevations = {
|
|
|
107
107
|
Prov: "Proverbs",
|
|
108
108
|
Pro: "Proverbs",
|
|
109
109
|
Prv: "Proverbs",
|
|
110
|
-
Pr: "Proverbs",
|
|
110
|
+
/* Pr: "Proverbs", */
|
|
111
111
|
Eccl: "Ecclesiastes",
|
|
112
112
|
Eccles: "Ecclesiastes",
|
|
113
113
|
Eccle: "Ecclesiastes",
|
|
114
114
|
Ecc: "Ecclesiastes",
|
|
115
|
-
Ec: "Ecclesiastes",
|
|
115
|
+
/* Ec: "Ecclesiastes", */
|
|
116
116
|
Qoh: "Ecclesiastes",
|
|
117
117
|
Song: "Song of Songs",
|
|
118
118
|
SOS: "Song of Songs",
|
|
@@ -121,22 +121,22 @@ const abbrevations = {
|
|
|
121
121
|
Isa: "Isaiah",
|
|
122
122
|
Is: "Isaiah",
|
|
123
123
|
Jer: "Jeremiah",
|
|
124
|
-
Je: "Jeremiah",
|
|
124
|
+
/* Je: "Jeremiah", */
|
|
125
125
|
Lam: "Lamentations",
|
|
126
126
|
La: "Lamentations",
|
|
127
127
|
Ezek: "Ezekiel",
|
|
128
128
|
Eze: "Ezekiel",
|
|
129
129
|
Ezk: "Ezekiel",
|
|
130
130
|
Dan: "Daniel",
|
|
131
|
-
Da: "Daniel",
|
|
131
|
+
/* Da: "Daniel", */
|
|
132
132
|
Dn: "Daniel",
|
|
133
133
|
Hos: "Hosea",
|
|
134
|
-
Ho: "Hosea",
|
|
134
|
+
/* Ho: "Hosea", */
|
|
135
135
|
Joel: "Joel",
|
|
136
136
|
Jl: "Joel",
|
|
137
137
|
Amos: "Amos",
|
|
138
138
|
Am: "Amos",
|
|
139
|
-
Ob: "Obadiah",
|
|
139
|
+
/* Ob: "Obadiah", */
|
|
140
140
|
Obad: "Obadiah",
|
|
141
141
|
Jonah: "Jonah",
|
|
142
142
|
Jnh: "Jonah",
|
|
@@ -144,7 +144,7 @@ const abbrevations = {
|
|
|
144
144
|
Mic: "Micah",
|
|
145
145
|
Mc: "Micah",
|
|
146
146
|
Nah: "Nahum",
|
|
147
|
-
Na: "Nahum",
|
|
147
|
+
/* Na: "Nahum", */
|
|
148
148
|
Hb: "Habakkuk",
|
|
149
149
|
Hab: "Habakkuk",
|
|
150
150
|
Zeph: "Zephaniah",
|
|
@@ -177,7 +177,7 @@ const abbrevations = {
|
|
|
177
177
|
Act: "Acts",
|
|
178
178
|
Ac: "Acts",
|
|
179
179
|
Rom: "Romans",
|
|
180
|
-
Ro: "Romans",
|
|
180
|
+
/* Ro: "Romans", */
|
|
181
181
|
Rm: "Romans",
|
|
182
182
|
"1Cor": "1 Corinthians",
|
|
183
183
|
"1 Cor": "1 Corinthians",
|
|
@@ -200,14 +200,14 @@ const abbrevations = {
|
|
|
200
200
|
"2 co": "2 Corinthians",
|
|
201
201
|
"2co": "2 Corinthians",
|
|
202
202
|
Gal: "Galatians",
|
|
203
|
-
Ga: "Galatians",
|
|
203
|
+
/* Ga: "Galatians", */
|
|
204
204
|
Eph: "Ephesians",
|
|
205
205
|
Ephes: "Ephesians",
|
|
206
206
|
Php: "Philippians",
|
|
207
207
|
Phil: "Philippians",
|
|
208
208
|
Pp: "Philippians",
|
|
209
209
|
Col: "Colossians",
|
|
210
|
-
Co: "Colossians",
|
|
210
|
+
/* Co: "Colossians", */
|
|
211
211
|
"1Thess": "1 Thessalonians",
|
|
212
212
|
"1 Thess": "1 Thessalonians",
|
|
213
213
|
"1Thes": "1 Thessalonians",
|
|
@@ -233,11 +233,11 @@ const abbrevations = {
|
|
|
233
233
|
Titus: "Titus",
|
|
234
234
|
Tt: "Titus",
|
|
235
235
|
Tit: "Titus",
|
|
236
|
-
Ti: "Titus",
|
|
236
|
+
/* Ti: "Titus", */
|
|
237
237
|
Phlm: "Philemon",
|
|
238
238
|
Phm: "Philemon",
|
|
239
239
|
Philem: "Philemon",
|
|
240
|
-
He: "Hebrews",
|
|
240
|
+
/* He: "Hebrews", */
|
|
241
241
|
Hebr: "Hebrews",
|
|
242
242
|
Heb: "Hebrews",
|
|
243
243
|
Jas: "James",
|
|
@@ -275,7 +275,7 @@ const abbrevations = {
|
|
|
275
275
|
Jud: "Jude",
|
|
276
276
|
Jd: "Jude",
|
|
277
277
|
Rev: "Revelation",
|
|
278
|
-
Re: "Revelation",
|
|
278
|
+
/* Re: "Revelation", */
|
|
279
279
|
Mt: "Matthew",
|
|
280
280
|
Mc: "Mark",
|
|
281
281
|
Act: "Acts",
|