codexparser 0.1.65 → 0.1.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codexparser",
3
- "version": "0.1.65",
3
+ "version": "0.1.67",
4
4
  "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -74,53 +74,57 @@ class CodexParser {
74
74
  const fullNames = [...this.bible.old, ...this.bible.new]
75
75
  const abbreviations = Object.keys(this.abbreviations)
76
76
  this.found = []
77
- // Minimal normalization: fix periods before numbers, remove trailing periods
78
- let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
79
- const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
80
- const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
77
+
78
+ // Normalize text: remove curly quotes, replace periods before numbers with colons
79
+ let normalizedText = text
80
+ .replace(/[“”]/g, "") // Remove curly quotes
81
+ .replace(/\.(?=\d)/g, ":")
81
82
  const lowerCaseText = normalizedText.toLowerCase()
82
83
  let i = 0
83
84
 
84
- const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
85
- const isNextBibleBook = (startIndex) => {
86
- const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
87
- return (
88
- lowercaseBibleFullNames.some((book) => textAfterCurrentPosition.startsWith(book)) ||
89
- lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
90
- )
91
- }
92
- const detectSuffix = (startIndex) => {
93
- const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
94
- return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
95
- }
96
-
97
85
  while (i < lowerCaseText.length) {
98
86
  let foundBook = null
99
87
  let startIndex = -1
100
88
  let matchedLength = 0
89
+ let hasOpeningParen = false
90
+ let parenStartIndex = -1
101
91
 
102
- // Skip whitespace and special characters before checking for book
103
- while (i < lowerCaseText.length && /[\s—-]/.test(lowerCaseText[i])) {
92
+ // Skip whitespace
93
+ while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
104
94
  i++
105
95
  }
106
96
  if (i >= lowerCaseText.length) break
107
97
 
108
- for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
109
- const book = lowercaseBibleFullNames[j]
110
- if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
111
- foundBook = fullNames[j]
112
- startIndex = i
98
+ // Check for opening parenthesis
99
+ if (i < lowerCaseText.length && lowerCaseText[i] === "(") {
100
+ hasOpeningParen = true
101
+ parenStartIndex = i
102
+ i++
103
+ }
104
+
105
+ // Record potential start of reference
106
+ startIndex = i
107
+
108
+ // Check for book names or abbreviations
109
+ for (let book of fullNames) {
110
+ if (
111
+ lowerCaseText.startsWith(book.toLowerCase(), i) &&
112
+ (i + book.length >= lowerCaseText.length || /\s|:|\d/.test(lowerCaseText[i + book.length]))
113
+ ) {
114
+ foundBook = book
113
115
  matchedLength = book.length
116
+ break
114
117
  }
115
118
  }
116
-
117
119
  if (!foundBook) {
118
- for (let k = 0; k < lowercaseBibleAbbreviations.length; k++) {
119
- const abbreviation = lowercaseBibleAbbreviations[k]
120
- if (lowerCaseText.startsWith(abbreviation, i) && abbreviation.length > matchedLength) {
121
- foundBook = this.abbreviations[abbreviations[k]]
122
- startIndex = i
123
- matchedLength = abbreviation.length
120
+ for (let abbr of abbreviations) {
121
+ if (
122
+ lowerCaseText.startsWith(abbr.toLowerCase(), i) &&
123
+ (i + abbr.length >= lowerCaseText.length || /\s|:|\d/.test(lowerCaseText[i + abbr.length]))
124
+ ) {
125
+ foundBook = this.abbreviations[abbr]
126
+ matchedLength = abbr.length
127
+ break
124
128
  }
125
129
  }
126
130
  }
@@ -128,43 +132,52 @@ class CodexParser {
128
132
  if (foundBook) {
129
133
  i += matchedLength
130
134
  let chapterVerse = ""
131
- const references = []
132
- const startOfReference = startIndex
133
-
134
- while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
135
- if (isNextBibleBook(i)) break
136
- if (normalizedText[i] === ";") {
137
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
138
- if (formattedReference) references.push(formattedReference)
139
- chapterVerse = ""
140
- i++
141
- continue
142
- }
143
- chapterVerse += normalizedText[i]
135
+ let hasColon = false
136
+
137
+ // Capture space after book
138
+ if (i < normalizedText.length && normalizedText[i] === " ") {
139
+ chapterVerse += " "
144
140
  i++
145
141
  }
146
142
 
147
- if (chapterVerse.trim().length > 0) {
148
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
149
- if (formattedReference) references.push(formattedReference)
143
+ // Capture chapter-verse
144
+ while (
145
+ i < lowerCaseText.length &&
146
+ (/[\d]/.test(normalizedText[i]) ||
147
+ normalizedText[i] === ":" ||
148
+ normalizedText[i] === "," ||
149
+ normalizedText[i] === "-")
150
+ ) {
151
+ if (normalizedText[i] === ":") hasColon = true
152
+ chapterVerse += normalizedText[i]
153
+ i++
150
154
  }
151
155
 
152
- // Set endIndex to the current position
153
- let endIndex = i
154
- const suffixData = detectSuffix(i)
155
- const suffix = suffixData ? suffixData.suffix : null
156
- if (suffixData) {
157
- endIndex += suffixData.length
158
- i += suffixData.length
159
- }
156
+ // Only proceed if valid reference
157
+ if (hasColon && chapterVerse.trim().length > 0) {
158
+ let endIndex = i
159
+ let version = null
160
+
161
+ // Detect suffix
162
+ const suffixMatch = normalizedText.substring(i).match(/\b(LXX|MT)\b/i)
163
+ if (suffixMatch) {
164
+ version = suffixMatch[0].toUpperCase()
165
+ endIndex += suffixMatch[0].length
166
+ i += suffixMatch[0].length
167
+ }
160
168
 
161
- // Trim endIndex to exclude trailing whitespace or non-reference characters
162
- while (endIndex > startOfReference && /[\s]/.test(normalizedText[endIndex - 1])) {
163
- endIndex--
164
- }
169
+ // Handle closing parenthesis
170
+ if (hasOpeningParen && i < lowerCaseText.length && normalizedText[i] === ")") {
171
+ endIndex = i + 1
172
+ i++
173
+ }
174
+
175
+ // Use original text for reference only (exclude parentheses)
176
+ const originalText = normalizedText.slice(startIndex, hasOpeningParen ? endIndex - 1 : endIndex)
165
177
 
166
- references.forEach((ref) => {
178
+ // Determine type
167
179
  let type
180
+ const ref = chapterVerse.trim()
168
181
  if (ref.includes(":")) {
169
182
  if (ref.includes("-")) {
170
183
  const [start, end] = ref.split("-")
@@ -190,18 +203,21 @@ class CodexParser {
190
203
  this.found.push({
191
204
  book: foundBook,
192
205
  reference: ref,
193
- startIndex: startOfReference + 1,
194
- endIndex: endIndex + 1,
195
- version: suffix || null,
206
+ startIndex: hasOpeningParen ? parenStartIndex : startIndex,
207
+ endIndex,
208
+ version,
196
209
  type,
197
- originalText: text.slice(startOfReference, endIndex), // Use original text
210
+ originalText,
198
211
  })
199
- })
212
+ } else {
213
+ i = startIndex + 1
214
+ }
200
215
  } else {
201
216
  i++
202
217
  }
203
218
  }
204
219
 
220
+ console.log("Found references:", JSON.stringify(this.found, null, 2)) // Debug
205
221
  return this
206
222
  }
207
223
 
@@ -268,7 +284,7 @@ class CodexParser {
268
284
  // Use abbreviation with period for abbreviated books
269
285
  parsedPassage.abbr = `${abbrKey}. ${passage.reference}${passage.version ? " " + passage.version : ""}`
270
286
  } else {
271
- // Fallback to original if no abbreviation (shouldn't occur with proper data)
287
+ // Fallback to original if no abbreviation
272
288
  parsedPassage.abbr = parsedPassage.original
273
289
  }
274
290
 
@@ -1074,6 +1090,12 @@ class CodexParser {
1074
1090
  return { name: "English", value: "ENG", abbreviation: "eng" }
1075
1091
  }
1076
1092
 
1093
+ /**
1094
+ * Replaces scripture references in text with formatted references.
1095
+ * @param {string} text - The original text.
1096
+ * @param {boolean} useAbbreviations - Whether to use abbreviated book names.
1097
+ * @returns {string} Text with replaced references.
1098
+ */
1077
1099
  replace(text, useAbbreviations = true) {
1078
1100
  if (!this.passages.length) {
1079
1101
  console.log("No parsed passages to replace")
@@ -1081,15 +1103,32 @@ class CodexParser {
1081
1103
  }
1082
1104
 
1083
1105
  let result = text
1084
- // Process replacements in reverse order to avoid index shifting
1085
1106
  for (let i = this.passages.length - 1; i >= 0; i--) {
1086
1107
  const passage = this.passages[i]
1087
- const { startIndex, endIndex, originalText, abbr, original } = passage
1088
-
1089
- // Use abbreviated or full reference
1108
+ const { originalText, abbr, original } = passage
1090
1109
  const newReference = useAbbreviations ? abbr : original
1091
- console.log(`Replacing "${originalText}" with "${newReference}" at [${startIndex}, ${endIndex}]`) // Debug
1092
- result = result.slice(0, startIndex) + newReference + result.slice(endIndex)
1110
+
1111
+ // Create regex to match originalText with optional parentheses
1112
+ const escapedOriginalText = originalText.replace(/([:.])/g, "\\$1").replace(/\s+/g, "\\s*")
1113
+ const regex = new RegExp(`(\\()?\\s*${escapedOriginalText}\\s*(\\))?`, "g")
1114
+
1115
+ // Find all matches
1116
+ const matches = [...result.matchAll(regex)]
1117
+ if (matches.length > 0) {
1118
+ // Process matches in reverse to avoid index shifting
1119
+ for (let j = matches.length - 1; j >= 0; j--) {
1120
+ const match = matches[j]
1121
+ const startIndex = match.index
1122
+ const endIndex = startIndex + match[0].length
1123
+ // Preserve parentheses if present in the match
1124
+ const hasParens = match[1] === "(" && match[2] === ")"
1125
+ const replacement = hasParens ? `(${newReference})` : newReference
1126
+ console.log(`Replacing "${match[0]}" with "${replacement}" at [${startIndex}, ${endIndex}]`)
1127
+ result = result.slice(0, startIndex) + replacement + result.slice(endIndex)
1128
+ }
1129
+ } else {
1130
+ console.log(`No match found for originalText "${originalText}"`)
1131
+ }
1093
1132
  }
1094
1133
 
1095
1134
  return result
package/src/abbr.js CHANGED
@@ -1,19 +1,19 @@
1
1
  const abbrevations = {
2
2
  Gen: "Genesis",
3
- Ge: "Genesis",
3
+ /* Ge: "Genesis", */
4
4
  Gn: "Genesis",
5
5
  Ex: "Exodus",
6
6
  Exo: "Exodus",
7
7
  Exod: "Exodus",
8
8
  Lev: "Leviticus",
9
- Le: "Leviticus",
9
+ /* Le: "Leviticus", */
10
10
  Lv: "Leviticus",
11
11
  Num: "Numbers",
12
12
  Nu: "Numbers",
13
13
  Nb: "Numbers",
14
14
  Nm: "Numbers",
15
15
  Deut: "Deuteronomy",
16
- De: "Deuteronomy",
16
+ /* De: "Deuteronomy", */
17
17
  Dt: "Deuteronomy",
18
18
  Josh: "Joshua",
19
19
  Jos: "Joshua",
@@ -24,7 +24,7 @@ const abbrevations = {
24
24
  Jdg: "Judges",
25
25
  Ruth: "Ruth",
26
26
  Rth: "Ruth",
27
- Ru: "Ruth",
27
+ /* Ru: "Ruth", */
28
28
  "1Sam": "1 Samuel",
29
29
  "1sam": "1 Samuel",
30
30
  "1SA": "1 Samuel",
@@ -92,10 +92,10 @@ const abbrevations = {
92
92
  Ezra: "Ezra",
93
93
  Ezr: "Ezra",
94
94
  Neh: "Nehemiah",
95
- Ne: "Nehemiah",
95
+ /* Ne: "Nehemiah", */
96
96
  Esth: "Esther",
97
97
  Est: "Esther",
98
- Es: "Esther",
98
+ /* Es: "Esther", */
99
99
  Job: "Job",
100
100
  Jb: "Job",
101
101
  Ps: "Psalms",
@@ -107,12 +107,12 @@ const abbrevations = {
107
107
  Prov: "Proverbs",
108
108
  Pro: "Proverbs",
109
109
  Prv: "Proverbs",
110
- Pr: "Proverbs",
110
+ /* Pr: "Proverbs", */
111
111
  Eccl: "Ecclesiastes",
112
112
  Eccles: "Ecclesiastes",
113
113
  Eccle: "Ecclesiastes",
114
114
  Ecc: "Ecclesiastes",
115
- Ec: "Ecclesiastes",
115
+ /* Ec: "Ecclesiastes", */
116
116
  Qoh: "Ecclesiastes",
117
117
  Song: "Song of Songs",
118
118
  SOS: "Song of Songs",
@@ -121,22 +121,22 @@ const abbrevations = {
121
121
  Isa: "Isaiah",
122
122
  Is: "Isaiah",
123
123
  Jer: "Jeremiah",
124
- Je: "Jeremiah",
124
+ /* Je: "Jeremiah", */
125
125
  Lam: "Lamentations",
126
126
  La: "Lamentations",
127
127
  Ezek: "Ezekiel",
128
128
  Eze: "Ezekiel",
129
129
  Ezk: "Ezekiel",
130
130
  Dan: "Daniel",
131
- Da: "Daniel",
131
+ /* Da: "Daniel", */
132
132
  Dn: "Daniel",
133
133
  Hos: "Hosea",
134
- Ho: "Hosea",
134
+ /* Ho: "Hosea", */
135
135
  Joel: "Joel",
136
136
  Jl: "Joel",
137
137
  Amos: "Amos",
138
138
  Am: "Amos",
139
- Ob: "Obadiah",
139
+ /* Ob: "Obadiah", */
140
140
  Obad: "Obadiah",
141
141
  Jonah: "Jonah",
142
142
  Jnh: "Jonah",
@@ -144,7 +144,7 @@ const abbrevations = {
144
144
  Mic: "Micah",
145
145
  Mc: "Micah",
146
146
  Nah: "Nahum",
147
- Na: "Nahum",
147
+ /* Na: "Nahum", */
148
148
  Hb: "Habakkuk",
149
149
  Hab: "Habakkuk",
150
150
  Zeph: "Zephaniah",
@@ -177,7 +177,7 @@ const abbrevations = {
177
177
  Act: "Acts",
178
178
  Ac: "Acts",
179
179
  Rom: "Romans",
180
- Ro: "Romans",
180
+ /* Ro: "Romans", */
181
181
  Rm: "Romans",
182
182
  "1Cor": "1 Corinthians",
183
183
  "1 Cor": "1 Corinthians",
@@ -200,14 +200,14 @@ const abbrevations = {
200
200
  "2 co": "2 Corinthians",
201
201
  "2co": "2 Corinthians",
202
202
  Gal: "Galatians",
203
- Ga: "Galatians",
203
+ /* Ga: "Galatians", */
204
204
  Eph: "Ephesians",
205
205
  Ephes: "Ephesians",
206
206
  Php: "Philippians",
207
207
  Phil: "Philippians",
208
208
  Pp: "Philippians",
209
209
  Col: "Colossians",
210
- Co: "Colossians",
210
+ /* Co: "Colossians", */
211
211
  "1Thess": "1 Thessalonians",
212
212
  "1 Thess": "1 Thessalonians",
213
213
  "1Thes": "1 Thessalonians",
@@ -233,11 +233,11 @@ const abbrevations = {
233
233
  Titus: "Titus",
234
234
  Tt: "Titus",
235
235
  Tit: "Titus",
236
- Ti: "Titus",
236
+ /* Ti: "Titus", */
237
237
  Phlm: "Philemon",
238
238
  Phm: "Philemon",
239
239
  Philem: "Philemon",
240
- He: "Hebrews",
240
+ /* He: "Hebrews", */
241
241
  Hebr: "Hebrews",
242
242
  Heb: "Hebrews",
243
243
  Jas: "James",
@@ -275,7 +275,7 @@ const abbrevations = {
275
275
  Jud: "Jude",
276
276
  Jd: "Jude",
277
277
  Rev: "Revelation",
278
- Re: "Revelation",
278
+ /* Re: "Revelation", */
279
279
  Mt: "Matthew",
280
280
  Mc: "Mark",
281
281
  Act: "Acts",