codexparser 0.1.81 → 0.1.83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/CodexParser.js +59 -131
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codexparser",
3
- "version": "0.1.81",
3
+ "version": "0.1.83",
4
4
  "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -85,6 +85,11 @@ class CodexParser {
85
85
  return singleChapterBook ? singleChapterBook[book][chapter] || [] : this.chapterVerses[book]?.[chapter] || []
86
86
  }
87
87
 
88
+ /**
89
+ * Scans text for scripture references and stores them in `this.found`.
90
+ * @param {string} text - The text to scan.
91
+ * @returns {CodexParser} The parser instance for method chaining.
92
+ */
88
93
  /**
89
94
  * Scans text for scripture references and stores them in `this.found`.
90
95
  * @param {string} text - The text to scan.
@@ -94,81 +99,53 @@ class CodexParser {
94
99
  const fullNames = [...this.bible.old, ...this.bible.new]
95
100
  const abbreviations = Object.keys(this.abbreviations)
96
101
  this.found = []
97
- // Normalize text for parsing but keep original for originalText
98
- let normalizedText = text
99
- .replace(/[“”]/g, "") // Remove curly quotes
100
- .replace(/\.(?=\d)/g, ":") // Replace periods before digits with colons (e.g., "Re13.8" -> "Re13:8")
101
- .replace(/\s+/g, " ") // Normalize multiple spaces to single
102
+ // Minimal normalization: fix periods before numbers, remove trailing periods
103
+ let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
102
104
  const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
103
105
  const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
104
106
  const lowerCaseText = normalizedText.toLowerCase()
105
107
  let i = 0
106
108
 
107
- console.log("[Scan] Input text:", text)
108
- console.log("[Scan] Normalized text:", normalizedText)
109
-
110
- const isValidChapterVerseChar = (char) => /[^A-Za-z]/.test(char) // Non-letter characters
109
+ const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
111
110
  const isNextBibleBook = (startIndex) => {
112
111
  const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
113
- // Check if the text starts with a book name or abbreviation followed by a digit
114
112
  return (
115
- lowercaseBibleFullNames.some((book) => {
116
- if (textAfterCurrentPosition.startsWith(book)) {
117
- const nextIndex = startIndex + book.length
118
- const nextChar = lowerCaseText[nextIndex]
119
- return nextChar && /\d/.test(nextChar)
120
- }
121
- return false
122
- }) ||
123
- lowercaseBibleAbbreviations.some((abbr) => {
124
- if (textAfterCurrentPosition.startsWith(abbr)) {
125
- const nextIndex = startIndex + abbr.length
126
- const nextChar = lowerCaseText[nextIndex]
127
- return nextChar && (/\d/.test(nextChar) || /\./.test(nextChar))
128
- }
129
- return false
130
- })
113
+ lowercaseBibleFullNames.some((book) => textAfterCurrentPosition.startsWith(book)) ||
114
+ lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
131
115
  )
132
116
  }
133
- const detectSuffix = (startIndex, inputText) => {
134
- const suffixMatch = inputText.substring(startIndex).match(/\b(LXX|MT)\b/i)
135
- return suffixMatch ? { version: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
117
+ const detectSuffix = (startIndex) => {
118
+ const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
119
+ return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
136
120
  }
137
121
 
138
122
  while (i < lowerCaseText.length) {
139
123
  let foundBook = null
124
+ let startIndex = -1
140
125
  let matchedLength = 0
141
- let originalBookText = ""
142
- let startIndex = i
143
126
 
144
- // Check full book names
127
+ // Skip whitespace and special characters before checking for book
128
+ while (i < lowerCaseText.length && /[\s—-]/.test(lowerCaseText[i])) {
129
+ i++
130
+ }
131
+ if (i >= lowerCaseText.length) break
132
+
145
133
  for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
146
134
  const book = lowercaseBibleFullNames[j]
147
- if (
148
- lowerCaseText.startsWith(book, i) &&
149
- (i + book.length >= lowerCaseText.length || /\d/.test(lowerCaseText[i + book.length]))
150
- ) {
135
+ if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
151
136
  foundBook = fullNames[j]
137
+ startIndex = i
152
138
  matchedLength = book.length
153
- originalBookText = text.slice(i, i + book.length)
154
- console.log(`[Scan] Matched full book name: "${foundBook}" at index ${i}`)
155
139
  }
156
140
  }
157
141
 
158
- // Check abbreviations
159
142
  if (!foundBook) {
160
143
  for (let k = 0; k < lowercaseBibleAbbreviations.length; k++) {
161
144
  const abbreviation = lowercaseBibleAbbreviations[k]
162
- const abbrPattern = abbreviation.replace(/\./g, "\\.?")
163
- const regex = new RegExp(`^${abbrPattern}(\\.?\\s*\\d)`, "i")
164
- const match = lowerCaseText.slice(i).match(regex)
165
- if (match) {
145
+ if (lowerCaseText.startsWith(abbreviation, i) && abbreviation.length > matchedLength) {
166
146
  foundBook = this.abbreviations[abbreviations[k]]
167
- matchedLength = match[0].length - match[1].length // Exclude chapter-verse part
168
- originalBookText = text.slice(i, i + matchedLength)
169
- console.log(
170
- `[Scan] Matched abbreviation: "${abbreviations[k]}" -> "${foundBook}" at index ${i}`
171
- )
147
+ startIndex = i
148
+ matchedLength = abbreviation.length
172
149
  }
173
150
  }
174
151
  }
@@ -176,91 +153,52 @@ class CodexParser {
176
153
  if (foundBook) {
177
154
  i += matchedLength
178
155
  let chapterVerse = ""
179
- let originalChapterVerseText = ""
180
156
  const references = []
157
+ const startOfReference = startIndex
181
158
 
182
- // Capture chapter-verse until a letter (potential new book) or semicolon
183
159
  while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
184
- if (isNextBibleBook(i)) {
185
- console.log(`[Scan] Detected next book at index ${i}, breaking`)
186
- break
187
- }
160
+ if (isNextBibleBook(i)) break
188
161
  if (normalizedText[i] === ";") {
189
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9:,\-]+$/g, "")
190
- if (formattedReference) {
191
- // Find the last digit in the reference
192
- const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
193
- let endIndex = i - 1 // Default to position before semicolon
194
- if (lastDigitMatch) {
195
- const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
196
- endIndex = startIndex + matchedLength + lastDigitIndex
197
- }
198
- references.push({
199
- reference: formattedReference,
200
- originalText: (originalBookText + originalChapterVerseText).trim(),
201
- startIndex,
202
- endIndex,
203
- })
204
- }
162
+ const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
163
+ if (formattedReference) references.push(formattedReference)
205
164
  chapterVerse = ""
206
- originalChapterVerseText = ""
207
- originalBookText = foundBook // Reuse book for semicolon-separated references
208
- startIndex = i + 1 // Start of next reference
209
165
  i++
210
166
  continue
211
167
  }
212
168
  chapterVerse += normalizedText[i]
213
- originalChapterVerseText += text[i]
214
169
  i++
215
170
  }
216
171
 
217
- // Add any remaining reference
218
172
  if (chapterVerse.trim().length > 0) {
219
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9:,\-]+$/g, "")
220
- if (formattedReference) {
221
- // Find the last digit in the reference
222
- const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
223
- let endIndex = i - 1 // Default to last character
224
- if (lastDigitMatch) {
225
- const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
226
- endIndex = startIndex + matchedLength + lastDigitIndex
227
- }
228
- references.push({
229
- reference: formattedReference,
230
- originalText: (originalBookText + originalChapterVerseText).trim(),
231
- startIndex,
232
- endIndex,
233
- })
234
- }
173
+ const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
174
+ if (formattedReference) references.push(formattedReference)
235
175
  }
236
176
 
237
- // Process each reference
238
- references.forEach((refObj) => {
239
- // Detect version suffix (LXX or MT)
240
- let version = null
241
- let originalText = refObj.originalText
242
- const suffix = detectSuffix(i, text)
243
- if (suffix) {
244
- version = suffix.version
245
- originalText += ` ${version}`
246
- i += suffix.length
247
- // Update endIndex if version suffix follows a digit
248
- if (refObj.endIndex === i - suffix.length - 1) {
249
- refObj.endIndex = i - 1
250
- }
251
- }
177
+ // Set endIndex to the current position
178
+ let endIndex = i
179
+ const suffixData = detectSuffix(i)
180
+ const suffix = suffixData ? suffixData.suffix : null
181
+ if (suffixData) {
182
+ endIndex += suffixData.length
183
+ i += suffixData.length
184
+ }
185
+
186
+ // Trim endIndex to exclude trailing whitespace or non-reference characters
187
+ while (endIndex > startOfReference && /[\s]/.test(normalizedText[endIndex - 1])) {
188
+ endIndex--
189
+ }
252
190
 
191
+ references.forEach((ref) => {
253
192
  let type
254
- let ref = refObj.reference.replace(/^\.\s*/, "") // Remove leading period and space
255
- if (this.config.booksOnly && !ref) {
256
- type = "book_only"
257
- } else if (ref.includes(":")) {
193
+ if (ref.includes(":")) {
258
194
  if (ref.includes("-")) {
259
- const [start, end] = ref.split("-").map((s) => s.trim())
260
- const startParts = start.split(":").map((s) => s.trim())
261
- const endParts = end.split(":").map((s) => s.trim())
195
+ const [start, end] = ref.split("-")
196
+ const startParts = start.split(":")
197
+ const endParts = end.split(":")
262
198
  type =
263
- startParts.length > 1 && endParts.length > 1 && startParts[0] !== endParts[0]
199
+ startParts.length > 1 &&
200
+ endParts.length > 1 &&
201
+ startParts[0].trim() !== endParts[0].trim()
264
202
  ? "multi_chapter_verse_range"
265
203
  : "chapter_verse_range"
266
204
  } else if (ref.includes(",")) {
@@ -270,35 +208,25 @@ class CodexParser {
270
208
  }
271
209
  } else if (ref.includes("-")) {
272
210
  type = "chapter_range"
273
- } else if (/\d/.test(ref)) {
274
- type = "single_chapter"
275
211
  } else {
276
- type = "book_only"
212
+ type = "single_chapter"
277
213
  }
278
214
 
279
- const referenceObj = {
215
+ this.found.push({
280
216
  book: foundBook,
281
217
  reference: ref,
282
- version,
218
+ startIndex: startOfReference + 1,
219
+ endIndex: endIndex + 1,
220
+ version: suffix || null,
283
221
  type,
284
- originalText,
285
- startIndex: refObj.startIndex,
286
- endIndex: refObj.endIndex,
287
- }
288
- this.found.push(referenceObj)
289
- console.log(`[Scan] Stored reference: ${JSON.stringify(referenceObj)}`)
222
+ originalText: text.slice(startOfReference, endIndex), // Use original text
223
+ })
290
224
  })
291
-
292
- // Skip any trailing spaces after the reference
293
- while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
294
- i++
295
- }
296
225
  } else {
297
226
  i++
298
227
  }
299
228
  }
300
229
 
301
- console.log("[Scan] Final found references:", JSON.stringify(this.found, null, 2))
302
230
  return this
303
231
  }
304
232