npm - codexparser - Versions diffs - 0.5.2 → 0.5.4 - Mend

codexparser 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md +10 -0
package/package.json +1 -1
package/src/core/ReferenceParser.js +59 -1
package/src/core/ScriptureScanner.js +29 -33

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,16 @@
 All notable changes to this project are documented here. For full details, see the Release Notes in README and the GitHub Releases page.
+## 0.5.3 — 2026-05-25
+### Fixed
+- **Scan character offsets (`startIndex`/`endIndex`/`originalText`) were wrong for references following punctuation or another reference.** Normalization in `ScriptureScanner.scan` deleted the period after a book abbreviation (`Ps.` → `Ps`), which shortened `normalizedText` and shifted every subsequent index out of alignment with the source `text`. The downstream `indexOf(fullRefText)` remap (which also searched for a `:`→`.` mangled form) then drifted, so e.g. scanning `… John 3:16 (cf. Lamentations 3:1)` returned `originalText: " John 3:1"` (leading space, truncated verse). Both normalization substitutions are now **length-preserving** (`Ps.` → `Ps `), and spans are taken directly from the scanner's own tracked indices with leading/trailing separator trimming. `text.slice(startIndex, endIndex) === originalText` now holds exactly, including abbreviated and numbered books (`1 Cor. 13:4`), semicolon lists (`Isa 1:1; 2:2` → `2:2` → `Isa. 2:2`), and trailing-comma cases.
+### Added
+- **En-dash / em-dash range support.** `3:22–24` and `3:22—24` (U+2013 / U+2014) are now parsed as ranges (previously only ASCII `-` was recognized, so `Lamentations 3:22–24` captured only `3:22`). Implemented as a length-preserving `–|— → -` substitution in `scan` normalization, so range hashes/abbreviations are complete (`Lam.3.22-Lam.3.24`) while `originalText` preserves the source dash.
 ## 0.5.2 — 2026-05-25
 ### Fixed

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codexparser",
-  "version": "0.5.2",
+  "version": "0.5.4",
   "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
   "main": "index.js",
   "files": [

package/src/core/ReferenceParser.js CHANGED Viewed

@@ -71,7 +71,7 @@ class ReferenceParser {
      * @returns {Array} Array of parsed passage objects
      */
     parse(foundReferences, currentVersion = null) {
-        return foundReferences.map((reference) => {
+        return this.#splitChapterSwitchingRefs(foundReferences).map((reference) => {
             const book = this.#normalizeBookName(reference.book)
             const testament = bible.old.includes(book) ? "old" : "new"
@@ -136,6 +136,64 @@ class ReferenceParser {
         })
     }
+    /**
+     * Splits a chapter-switching comma reference (e.g. "Daniel 8:16-18,9:21,23,10:8-10")
+     * into one reference per chapter group, so each is parsed by the single-chapter path.
+     * Single-chapter comma lists ("9:21,23") and bare-verse lists ("1:1,2,3") are left as-is.
+     * @private
+     */
+    #splitChapterSwitchingRefs(foundReferences) {
+        const out = []
+        for (const reference of foundReferences) {
+            const groups = this.#chapterGroups(reference.reference)
+            if (!groups) {
+                out.push(reference)
+            } else {
+                for (const groupRef of groups) {
+                    // Force the general parse path; #parseReferenceParts re-derives the real type.
+                    out.push({
+                        ...reference,
+                        reference: groupRef,
+                        type: ReferenceParser.REFERENCE_TYPES.CHAPTER_VERSE_RANGE,
+                    })
+                }
+            }
+        }
+        return out
+    }
+    /**
+     * Groups a post-book reference string by chapter. Returns one ref string per chapter group
+     * (e.g. ["8:16-18", "9:21,23", "10:8-10"]) only when the list actually switches chapters;
+     * returns null otherwise (no comma, single chapter, or a leading bare verse).
+     * @private
+     */
+    #chapterGroups(reference) {
+        if (typeof reference !== "string" || !reference.includes(",")) return null
+        const parts = reference
+            .split(",")
+            .map((p) => p.trim())
+            .filter(Boolean)
+        const groups = []
+        let current = null
+        for (const part of parts) {
+            const match = part.match(/^(\d+)\s*[:.]/)
+            if (match) {
+                const chapter = match[1]
+                if (!current || current.chapter !== chapter) {
+                    current = { chapter, parts: [] }
+                    groups.push(current)
+                }
+                current.parts.push(part)
+            } else {
+                if (!current) return null // leading bare verse — leave to normal parsing
+                current.parts.push(part)
+            }
+        }
+        if (groups.length < 2) return null
+        return groups.map((g) => g.parts.join(","))
+    }
     /**
      * Normalizes book names using abbreviations or full names
      * @private

package/src/core/ScriptureScanner.js CHANGED Viewed

@@ -33,8 +33,14 @@ class ScriptureScanner {
         const abbreviationKeys = Object.keys(this.#abbreviations)
         const found = []
-        // Minimal normalization: fix periods before numbers, remove trailing periods
-        const normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
+        // Minimal normalization: fix periods before numbers, neutralize trailing
+        // periods after book abbreviations.  Both substitutions are
+        // LENGTH-PRESERVING (1 char -> 1 char) so indices into normalizedText
+        // map 1:1 onto the original `text`, keeping startIndex/endIndex exact.
+        const normalizedText = text
+            .replace(/\.(?=\d)/g, ":")
+            .replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1 ")
+            .replace(/[–—]/g, "-")
         const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
         const lowercaseBibleAbbreviations = abbreviationKeys.map((abbr) => abbr.toLowerCase())
@@ -122,46 +128,36 @@ class ScriptureScanner {
                     }
                 }
-                // Align indices with original text
-                const originalBookText = text.slice(bookStartIndex, bookStartIndex + matchedLength)
-                const originalBookStartIndex =
-                    text.indexOf(originalBookText, bookStartIndex) !== -1
-                        ? text.indexOf(originalBookText, bookStartIndex)
-                        : bookStartIndex
                 references.forEach(({ ref, start, end }) => {
                     const type = this.#determineReferenceType(ref)
-                    const fullRefText = `${originalBookText} ${ref.replace(":", ".")}`
                     const suffixData = this.#detectSuffix(normalizedText, end)
                     const suffix = suffixData ? suffixData.suffix : null
-                    let refEndIndex = end
-                    if (suffixData) {
-                        refEndIndex += suffixData.length
-                        i += suffixData.length
+                    // Normalization is length-preserving, so the tracked scan
+                    // indices map 1:1 onto the original text. Use them directly
+                    // instead of the old indexOf remapping (which drifted and
+                    // truncated references that followed punctuation).
+                    let originalStartIndex = start
+                    let originalEndIndex = suffixData ? end + suffixData.length : end
+                    if (suffixData) i += suffixData.length
+                    // Trim leading separators/whitespace (e.g. after "(", ";", ".")
+                    while (
+                        originalStartIndex < originalEndIndex &&
+                        /[\s.,;:()[\]—-]/.test(text[originalStartIndex])
+                    ) {
+                        originalStartIndex++
                     }
-                    // Map to original text
-                    let originalStartIndex =
-                        text.indexOf(fullRefText, originalRefStartIndex) !== -1
-                            ? text.indexOf(fullRefText, originalRefStartIndex)
-                            : originalBookStartIndex
-                    let originalEndIndex = originalStartIndex + fullRefText.length
-                    let originalText = text.slice(originalStartIndex, originalEndIndex)
-                    // Adjust for suffix in original text
-                    if (suffixData) {
-                        originalEndIndex += suffixData.length
-                        originalText = text.slice(originalStartIndex, originalEndIndex)
-                    }
-                    // Trim trailing whitespace from originalText
-                    while (originalEndIndex > originalStartIndex && /[\s]/.test(text[originalEndIndex - 1])) {
+                    // Trim trailing whitespace/punctuation
+                    while (
+                        originalEndIndex > originalStartIndex &&
+                        /[\s.,;]/.test(text[originalEndIndex - 1])
+                    ) {
                         originalEndIndex--
-                        originalText = text.slice(originalStartIndex, originalEndIndex)
                     }
+                    const originalText = text.slice(originalStartIndex, originalEndIndex)
                     found.push({
                         book: foundBook,
                         reference: ref,