codexparser 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,16 @@
2
2
 
3
3
  All notable changes to this project are documented here. For full details, see the Release Notes in README and the GitHub Releases page.
4
4
 
5
+ ## 0.5.3 — 2026-05-25
6
+
7
+ ### Fixed
8
+
9
+ - **Scan character offsets (`startIndex`/`endIndex`/`originalText`) were wrong for references following punctuation or another reference.** Normalization in `ScriptureScanner.scan` deleted the period after a book abbreviation (`Ps.` → `Ps`), which shortened `normalizedText` and shifted every subsequent index out of alignment with the source `text`. The downstream `indexOf(fullRefText)` remap (which also searched for a `:`→`.` mangled form) then drifted, so e.g. scanning `… John 3:16 (cf. Lamentations 3:1)` returned `originalText: " John 3:1"` (leading space, truncated verse). Both normalization substitutions are now **length-preserving** (`Ps.` → `Ps `), and spans are taken directly from the scanner's own tracked indices with leading/trailing separator trimming. `text.slice(startIndex, endIndex) === originalText` now holds exactly, including abbreviated and numbered books (`1 Cor. 13:4`), semicolon lists (`Isa 1:1; 2:2` → `2:2` → `Isa. 2:2`), and trailing-comma cases.
10
+
11
+ ### Added
12
+
13
+ - **En-dash / em-dash range support.** `3:22–24` and `3:22—24` (U+2013 / U+2014) are now parsed as ranges (previously only ASCII `-` was recognized, so `Lamentations 3:22–24` captured only `3:22`). Implemented as a length-preserving `–|— → -` substitution in `scan` normalization, so range hashes/abbreviations are complete (`Lam.3.22-Lam.3.24`) while `originalText` preserves the source dash.
14
+
5
15
  ## 0.5.2 — 2026-05-25
6
16
 
7
17
  ### Fixed
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codexparser",
3
- "version": "0.5.2",
3
+ "version": "0.5.4",
4
4
  "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
5
5
  "main": "index.js",
6
6
  "files": [
@@ -71,7 +71,7 @@ class ReferenceParser {
71
71
  * @returns {Array} Array of parsed passage objects
72
72
  */
73
73
  parse(foundReferences, currentVersion = null) {
74
- return foundReferences.map((reference) => {
74
+ return this.#splitChapterSwitchingRefs(foundReferences).map((reference) => {
75
75
  const book = this.#normalizeBookName(reference.book)
76
76
  const testament = bible.old.includes(book) ? "old" : "new"
77
77
 
@@ -136,6 +136,64 @@ class ReferenceParser {
136
136
  })
137
137
  }
138
138
 
139
+ /**
140
+ * Splits a chapter-switching comma reference (e.g. "Daniel 8:16-18,9:21,23,10:8-10")
141
+ * into one reference per chapter group, so each is parsed by the single-chapter path.
142
+ * Single-chapter comma lists ("9:21,23") and bare-verse lists ("1:1,2,3") are left as-is.
143
+ * @private
144
+ */
145
+ #splitChapterSwitchingRefs(foundReferences) {
146
+ const out = []
147
+ for (const reference of foundReferences) {
148
+ const groups = this.#chapterGroups(reference.reference)
149
+ if (!groups) {
150
+ out.push(reference)
151
+ } else {
152
+ for (const groupRef of groups) {
153
+ // Force the general parse path; #parseReferenceParts re-derives the real type.
154
+ out.push({
155
+ ...reference,
156
+ reference: groupRef,
157
+ type: ReferenceParser.REFERENCE_TYPES.CHAPTER_VERSE_RANGE,
158
+ })
159
+ }
160
+ }
161
+ }
162
+ return out
163
+ }
164
+
165
+ /**
166
+ * Groups a post-book reference string by chapter. Returns one ref string per chapter group
167
+ * (e.g. ["8:16-18", "9:21,23", "10:8-10"]) only when the list actually switches chapters;
168
+ * returns null otherwise (no comma, single chapter, or a leading bare verse).
169
+ * @private
170
+ */
171
+ #chapterGroups(reference) {
172
+ if (typeof reference !== "string" || !reference.includes(",")) return null
173
+ const parts = reference
174
+ .split(",")
175
+ .map((p) => p.trim())
176
+ .filter(Boolean)
177
+ const groups = []
178
+ let current = null
179
+ for (const part of parts) {
180
+ const match = part.match(/^(\d+)\s*[:.]/)
181
+ if (match) {
182
+ const chapter = match[1]
183
+ if (!current || current.chapter !== chapter) {
184
+ current = { chapter, parts: [] }
185
+ groups.push(current)
186
+ }
187
+ current.parts.push(part)
188
+ } else {
189
+ if (!current) return null // leading bare verse — leave to normal parsing
190
+ current.parts.push(part)
191
+ }
192
+ }
193
+ if (groups.length < 2) return null
194
+ return groups.map((g) => g.parts.join(","))
195
+ }
196
+
139
197
  /**
140
198
  * Normalizes book names using abbreviations or full names
141
199
  * @private
@@ -33,8 +33,14 @@ class ScriptureScanner {
33
33
  const abbreviationKeys = Object.keys(this.#abbreviations)
34
34
  const found = []
35
35
 
36
- // Minimal normalization: fix periods before numbers, remove trailing periods
37
- const normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
36
+ // Minimal normalization: fix periods before numbers, neutralize trailing
37
+ // periods after book abbreviations. Both substitutions are
38
+ // LENGTH-PRESERVING (1 char -> 1 char) so indices into normalizedText
39
+ // map 1:1 onto the original `text`, keeping startIndex/endIndex exact.
40
+ const normalizedText = text
41
+ .replace(/\.(?=\d)/g, ":")
42
+ .replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1 ")
43
+ .replace(/[–—]/g, "-")
38
44
 
39
45
  const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
40
46
  const lowercaseBibleAbbreviations = abbreviationKeys.map((abbr) => abbr.toLowerCase())
@@ -122,46 +128,36 @@ class ScriptureScanner {
122
128
  }
123
129
  }
124
130
 
125
- // Align indices with original text
126
- const originalBookText = text.slice(bookStartIndex, bookStartIndex + matchedLength)
127
- const originalBookStartIndex =
128
- text.indexOf(originalBookText, bookStartIndex) !== -1
129
- ? text.indexOf(originalBookText, bookStartIndex)
130
- : bookStartIndex
131
-
132
131
  references.forEach(({ ref, start, end }) => {
133
132
  const type = this.#determineReferenceType(ref)
134
- const fullRefText = `${originalBookText} ${ref.replace(":", ".")}`
135
133
  const suffixData = this.#detectSuffix(normalizedText, end)
136
134
  const suffix = suffixData ? suffixData.suffix : null
137
- let refEndIndex = end
138
135
 
139
- if (suffixData) {
140
- refEndIndex += suffixData.length
141
- i += suffixData.length
136
+ // Normalization is length-preserving, so the tracked scan
137
+ // indices map 1:1 onto the original text. Use them directly
138
+ // instead of the old indexOf remapping (which drifted and
139
+ // truncated references that followed punctuation).
140
+ let originalStartIndex = start
141
+ let originalEndIndex = suffixData ? end + suffixData.length : end
142
+ if (suffixData) i += suffixData.length
143
+
144
+ // Trim leading separators/whitespace (e.g. after "(", ";", ".")
145
+ while (
146
+ originalStartIndex < originalEndIndex &&
147
+ /[\s.,;:()[\]—-]/.test(text[originalStartIndex])
148
+ ) {
149
+ originalStartIndex++
142
150
  }
143
-
144
- // Map to original text
145
- let originalStartIndex =
146
- text.indexOf(fullRefText, originalRefStartIndex) !== -1
147
- ? text.indexOf(fullRefText, originalRefStartIndex)
148
- : originalBookStartIndex
149
-
150
- let originalEndIndex = originalStartIndex + fullRefText.length
151
- let originalText = text.slice(originalStartIndex, originalEndIndex)
152
-
153
- // Adjust for suffix in original text
154
- if (suffixData) {
155
- originalEndIndex += suffixData.length
156
- originalText = text.slice(originalStartIndex, originalEndIndex)
157
- }
158
-
159
- // Trim trailing whitespace from originalText
160
- while (originalEndIndex > originalStartIndex && /[\s]/.test(text[originalEndIndex - 1])) {
151
+ // Trim trailing whitespace/punctuation
152
+ while (
153
+ originalEndIndex > originalStartIndex &&
154
+ /[\s.,;]/.test(text[originalEndIndex - 1])
155
+ ) {
161
156
  originalEndIndex--
162
- originalText = text.slice(originalStartIndex, originalEndIndex)
163
157
  }
164
158
 
159
+ const originalText = text.slice(originalStartIndex, originalEndIndex)
160
+
165
161
  found.push({
166
162
  book: foundBook,
167
163
  reference: ref,