codexparser 0.5.2 → 0.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +10 -0
- package/package.json +1 -1
- package/src/core/ReferenceParser.js +59 -1
- package/src/core/ScriptureScanner.js +29 -33
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,16 @@
|
|
|
2
2
|
|
|
3
3
|
All notable changes to this project are documented here. For full details, see the Release Notes in README and the GitHub Releases page.
|
|
4
4
|
|
|
5
|
+
## 0.5.3 — 2026-05-25
|
|
6
|
+
|
|
7
|
+
### Fixed
|
|
8
|
+
|
|
9
|
+
- **Scan character offsets (`startIndex`/`endIndex`/`originalText`) were wrong for references following punctuation or another reference.** Normalization in `ScriptureScanner.scan` deleted the period after a book abbreviation (`Ps.` → `Ps`), which shortened `normalizedText` and shifted every subsequent index out of alignment with the source `text`. The downstream `indexOf(fullRefText)` remap (which also searched for a `:`→`.` mangled form) then drifted, so e.g. scanning `… John 3:16 (cf. Lamentations 3:1)` returned `originalText: " John 3:1"` (leading space, truncated verse). Both normalization substitutions are now **length-preserving** (`Ps.` → `Ps `), and spans are taken directly from the scanner's own tracked indices with leading/trailing separator trimming. `text.slice(startIndex, endIndex) === originalText` now holds exactly, including abbreviated and numbered books (`1 Cor. 13:4`), semicolon lists (`Isa 1:1; 2:2` → `2:2` → `Isa. 2:2`), and trailing-comma cases.
|
|
10
|
+
|
|
11
|
+
### Added
|
|
12
|
+
|
|
13
|
+
- **En-dash / em-dash range support.** `3:22–24` and `3:22—24` (U+2013 / U+2014) are now parsed as ranges (previously only ASCII `-` was recognized, so `Lamentations 3:22–24` captured only `3:22`). Implemented as a length-preserving `–|— → -` substitution in `scan` normalization, so range hashes/abbreviations are complete (`Lam.3.22-Lam.3.24`) while `originalText` preserves the source dash.
|
|
14
|
+
|
|
5
15
|
## 0.5.2 — 2026-05-25
|
|
6
16
|
|
|
7
17
|
### Fixed
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.4",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"files": [
|
|
@@ -71,7 +71,7 @@ class ReferenceParser {
|
|
|
71
71
|
* @returns {Array} Array of parsed passage objects
|
|
72
72
|
*/
|
|
73
73
|
parse(foundReferences, currentVersion = null) {
|
|
74
|
-
return foundReferences.map((reference) => {
|
|
74
|
+
return this.#splitChapterSwitchingRefs(foundReferences).map((reference) => {
|
|
75
75
|
const book = this.#normalizeBookName(reference.book)
|
|
76
76
|
const testament = bible.old.includes(book) ? "old" : "new"
|
|
77
77
|
|
|
@@ -136,6 +136,64 @@ class ReferenceParser {
|
|
|
136
136
|
})
|
|
137
137
|
}
|
|
138
138
|
|
|
139
|
+
/**
|
|
140
|
+
* Splits a chapter-switching comma reference (e.g. "Daniel 8:16-18,9:21,23,10:8-10")
|
|
141
|
+
* into one reference per chapter group, so each is parsed by the single-chapter path.
|
|
142
|
+
* Single-chapter comma lists ("9:21,23") and bare-verse lists ("1:1,2,3") are left as-is.
|
|
143
|
+
* @private
|
|
144
|
+
*/
|
|
145
|
+
#splitChapterSwitchingRefs(foundReferences) {
|
|
146
|
+
const out = []
|
|
147
|
+
for (const reference of foundReferences) {
|
|
148
|
+
const groups = this.#chapterGroups(reference.reference)
|
|
149
|
+
if (!groups) {
|
|
150
|
+
out.push(reference)
|
|
151
|
+
} else {
|
|
152
|
+
for (const groupRef of groups) {
|
|
153
|
+
// Force the general parse path; #parseReferenceParts re-derives the real type.
|
|
154
|
+
out.push({
|
|
155
|
+
...reference,
|
|
156
|
+
reference: groupRef,
|
|
157
|
+
type: ReferenceParser.REFERENCE_TYPES.CHAPTER_VERSE_RANGE,
|
|
158
|
+
})
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return out
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Groups a post-book reference string by chapter. Returns one ref string per chapter group
|
|
167
|
+
* (e.g. ["8:16-18", "9:21,23", "10:8-10"]) only when the list actually switches chapters;
|
|
168
|
+
* returns null otherwise (no comma, single chapter, or a leading bare verse).
|
|
169
|
+
* @private
|
|
170
|
+
*/
|
|
171
|
+
#chapterGroups(reference) {
|
|
172
|
+
if (typeof reference !== "string" || !reference.includes(",")) return null
|
|
173
|
+
const parts = reference
|
|
174
|
+
.split(",")
|
|
175
|
+
.map((p) => p.trim())
|
|
176
|
+
.filter(Boolean)
|
|
177
|
+
const groups = []
|
|
178
|
+
let current = null
|
|
179
|
+
for (const part of parts) {
|
|
180
|
+
const match = part.match(/^(\d+)\s*[:.]/)
|
|
181
|
+
if (match) {
|
|
182
|
+
const chapter = match[1]
|
|
183
|
+
if (!current || current.chapter !== chapter) {
|
|
184
|
+
current = { chapter, parts: [] }
|
|
185
|
+
groups.push(current)
|
|
186
|
+
}
|
|
187
|
+
current.parts.push(part)
|
|
188
|
+
} else {
|
|
189
|
+
if (!current) return null // leading bare verse — leave to normal parsing
|
|
190
|
+
current.parts.push(part)
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
if (groups.length < 2) return null
|
|
194
|
+
return groups.map((g) => g.parts.join(","))
|
|
195
|
+
}
|
|
196
|
+
|
|
139
197
|
/**
|
|
140
198
|
* Normalizes book names using abbreviations or full names
|
|
141
199
|
* @private
|
|
@@ -33,8 +33,14 @@ class ScriptureScanner {
|
|
|
33
33
|
const abbreviationKeys = Object.keys(this.#abbreviations)
|
|
34
34
|
const found = []
|
|
35
35
|
|
|
36
|
-
// Minimal normalization: fix periods before numbers,
|
|
37
|
-
|
|
36
|
+
// Minimal normalization: fix periods before numbers, neutralize trailing
|
|
37
|
+
// periods after book abbreviations. Both substitutions are
|
|
38
|
+
// LENGTH-PRESERVING (1 char -> 1 char) so indices into normalizedText
|
|
39
|
+
// map 1:1 onto the original `text`, keeping startIndex/endIndex exact.
|
|
40
|
+
const normalizedText = text
|
|
41
|
+
.replace(/\.(?=\d)/g, ":")
|
|
42
|
+
.replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1 ")
|
|
43
|
+
.replace(/[–—]/g, "-")
|
|
38
44
|
|
|
39
45
|
const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
|
|
40
46
|
const lowercaseBibleAbbreviations = abbreviationKeys.map((abbr) => abbr.toLowerCase())
|
|
@@ -122,46 +128,36 @@ class ScriptureScanner {
|
|
|
122
128
|
}
|
|
123
129
|
}
|
|
124
130
|
|
|
125
|
-
// Align indices with original text
|
|
126
|
-
const originalBookText = text.slice(bookStartIndex, bookStartIndex + matchedLength)
|
|
127
|
-
const originalBookStartIndex =
|
|
128
|
-
text.indexOf(originalBookText, bookStartIndex) !== -1
|
|
129
|
-
? text.indexOf(originalBookText, bookStartIndex)
|
|
130
|
-
: bookStartIndex
|
|
131
|
-
|
|
132
131
|
references.forEach(({ ref, start, end }) => {
|
|
133
132
|
const type = this.#determineReferenceType(ref)
|
|
134
|
-
const fullRefText = `${originalBookText} ${ref.replace(":", ".")}`
|
|
135
133
|
const suffixData = this.#detectSuffix(normalizedText, end)
|
|
136
134
|
const suffix = suffixData ? suffixData.suffix : null
|
|
137
|
-
let refEndIndex = end
|
|
138
135
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
136
|
+
// Normalization is length-preserving, so the tracked scan
|
|
137
|
+
// indices map 1:1 onto the original text. Use them directly
|
|
138
|
+
// instead of the old indexOf remapping (which drifted and
|
|
139
|
+
// truncated references that followed punctuation).
|
|
140
|
+
let originalStartIndex = start
|
|
141
|
+
let originalEndIndex = suffixData ? end + suffixData.length : end
|
|
142
|
+
if (suffixData) i += suffixData.length
|
|
143
|
+
|
|
144
|
+
// Trim leading separators/whitespace (e.g. after "(", ";", ".")
|
|
145
|
+
while (
|
|
146
|
+
originalStartIndex < originalEndIndex &&
|
|
147
|
+
/[\s.,;:()[\]—-]/.test(text[originalStartIndex])
|
|
148
|
+
) {
|
|
149
|
+
originalStartIndex++
|
|
142
150
|
}
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
text
|
|
147
|
-
|
|
148
|
-
: originalBookStartIndex
|
|
149
|
-
|
|
150
|
-
let originalEndIndex = originalStartIndex + fullRefText.length
|
|
151
|
-
let originalText = text.slice(originalStartIndex, originalEndIndex)
|
|
152
|
-
|
|
153
|
-
// Adjust for suffix in original text
|
|
154
|
-
if (suffixData) {
|
|
155
|
-
originalEndIndex += suffixData.length
|
|
156
|
-
originalText = text.slice(originalStartIndex, originalEndIndex)
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
// Trim trailing whitespace from originalText
|
|
160
|
-
while (originalEndIndex > originalStartIndex && /[\s]/.test(text[originalEndIndex - 1])) {
|
|
151
|
+
// Trim trailing whitespace/punctuation
|
|
152
|
+
while (
|
|
153
|
+
originalEndIndex > originalStartIndex &&
|
|
154
|
+
/[\s.,;]/.test(text[originalEndIndex - 1])
|
|
155
|
+
) {
|
|
161
156
|
originalEndIndex--
|
|
162
|
-
originalText = text.slice(originalStartIndex, originalEndIndex)
|
|
163
157
|
}
|
|
164
158
|
|
|
159
|
+
const originalText = text.slice(originalStartIndex, originalEndIndex)
|
|
160
|
+
|
|
165
161
|
found.push({
|
|
166
162
|
book: foundBook,
|
|
167
163
|
reference: ref,
|