npm - codexparser - Versions diffs - 0.1.62 → 0.1.64 - Mend

codexparser 0.1.62 → 0.1.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +1 -1
package/src/CodexParser.js +21 -11

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codexparser",
-  "version": "0.1.62",
+  "version": "0.1.64",
   "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
   "main": "index.js",
   "scripts": {

package/src/CodexParser.js CHANGED Viewed

@@ -74,16 +74,14 @@ class CodexParser {
         const fullNames = [...this.bible.old, ...this.bible.new]
         const abbreviations = Object.keys(this.abbreviations)
         this.found = []
-        let normalizedText = text
-            .replace(/\.(?=\d)/g, ":")
-            .replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
-            .replace(/\s+/g, " ")
+        // Minimal normalization: fix periods before numbers, remove trailing periods
+        let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
         const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
         const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
         const lowerCaseText = normalizedText.toLowerCase()
         let i = 0
-        const isValidChapterVerseChar = (char) => /[^A-Za-z]/.test(char)
+        const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
         const isNextBibleBook = (startIndex) => {
             const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
             return (
@@ -101,6 +99,12 @@ class CodexParser {
             let startIndex = -1
             let matchedLength = 0
+            // Skip whitespace and special characters before checking for book
+            while (i < lowerCaseText.length && /[\s—-]/.test(lowerCaseText[i])) {
+                i++
+            }
+            if (i >= lowerCaseText.length) break
             for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
                 const book = lowercaseBibleFullNames[j]
                 if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
@@ -145,12 +149,18 @@ class CodexParser {
                     if (formattedReference) references.push(formattedReference)
                 }
+                // Set endIndex to the current position
+                let endIndex = i
                 const suffixData = detectSuffix(i)
                 const suffix = suffixData ? suffixData.suffix : null
-                let endIndex = i // Set endIndex before suffix
                 if (suffixData) {
-                    endIndex += suffixData.length // Include suffix in endIndex
-                    i += suffixData.length // Advance i
+                    endIndex += suffixData.length
+                    i += suffixData.length
+                }
+                // Trim endIndex to exclude trailing whitespace or non-reference characters
+                while (endIndex > startOfReference && /[\s]/.test(normalizedText[endIndex - 1])) {
+                    endIndex--
                 }
                 references.forEach((ref) => {
@@ -180,11 +190,11 @@ class CodexParser {
                     this.found.push({
                         book: foundBook,
                         reference: ref,
-                        startIndex: startOfReference,
-                        endIndex: endIndex,
+                        startIndex: startOfReference + 1,
+                        endIndex: endIndex + 1,
                         version: suffix || null,
                         type,
-                        originalText: normalizedText.slice(startOfReference, endIndex),
+                        originalText: text.slice(startOfReference, endIndex), // Use original text
                     })
                 })
             } else {