codexparser 0.1.62 → 0.1.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/CodexParser.js +35 -21
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codexparser",
3
- "version": "0.1.62",
3
+ "version": "0.1.63",
4
4
  "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -74,16 +74,14 @@ class CodexParser {
74
74
  const fullNames = [...this.bible.old, ...this.bible.new]
75
75
  const abbreviations = Object.keys(this.abbreviations)
76
76
  this.found = []
77
- let normalizedText = text
78
- .replace(/\.(?=\d)/g, ":")
79
- .replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
80
- .replace(/\s+/g, " ")
77
+ // Minimal normalization: fix periods before numbers, remove trailing periods
78
+ let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
81
79
  const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
82
80
  const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
83
81
  const lowerCaseText = normalizedText.toLowerCase()
84
82
  let i = 0
85
83
 
86
- const isValidChapterVerseChar = (char) => /[^A-Za-z]/.test(char)
84
+ const isValidChapterVerseChar = (char) => /[\d:,\-;]/.test(char)
87
85
  const isNextBibleBook = (startIndex) => {
88
86
  const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
89
87
  return (
@@ -101,6 +99,12 @@ class CodexParser {
101
99
  let startIndex = -1
102
100
  let matchedLength = 0
103
101
 
102
+ // Skip non-alphabetic characters (e.g., \n, —, spaces) before book
103
+ while (i < lowerCaseText.length && !/[A-Za-z]/.test(lowerCaseText[i])) {
104
+ i++
105
+ }
106
+ if (i >= lowerCaseText.length) break
107
+
104
108
  for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
105
109
  const book = lowercaseBibleFullNames[j]
106
110
  if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
@@ -127,10 +131,14 @@ class CodexParser {
127
131
  const references = []
128
132
  const startOfReference = startIndex
129
133
 
130
- while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
134
+ // Capture chapter/verse, allowing spaces between book and reference
135
+ while (
136
+ i < normalizedText.length &&
137
+ (isValidChapterVerseChar(normalizedText[i]) || normalizedText[i] === " ")
138
+ ) {
131
139
  if (isNextBibleBook(i)) break
132
140
  if (normalizedText[i] === ";") {
133
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
141
+ const formattedReference = chapterVerse.trim()
134
142
  if (formattedReference) references.push(formattedReference)
135
143
  chapterVerse = ""
136
144
  i++
@@ -141,23 +149,29 @@ class CodexParser {
141
149
  }
142
150
 
143
151
  if (chapterVerse.trim().length > 0) {
144
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
152
+ const formattedReference = chapterVerse.trim()
145
153
  if (formattedReference) references.push(formattedReference)
146
154
  }
147
155
 
156
+ // Set endIndex to the current position
157
+ let endIndex = i
148
158
  const suffixData = detectSuffix(i)
149
159
  const suffix = suffixData ? suffixData.suffix : null
150
- let endIndex = i // Set endIndex before suffix
151
160
  if (suffixData) {
152
- endIndex += suffixData.length // Include suffix in endIndex
153
- i += suffixData.length // Advance i
161
+ endIndex += suffixData.length
162
+ i += suffixData.length
163
+ }
164
+
165
+ // Trim endIndex to exclude trailing non-reference characters
166
+ while (endIndex > startOfReference && /[^A-Za-z0-9]/.test(normalizedText[endIndex - 1])) {
167
+ endIndex--
154
168
  }
155
169
 
156
- references.forEach((ref) => {
170
+ references.forEach((reference) => {
157
171
  let type
158
- if (ref.includes(":")) {
159
- if (ref.includes("-")) {
160
- const [start, end] = ref.split("-")
172
+ if (reference.includes(":")) {
173
+ if (reference.includes("-")) {
174
+ const [start, end] = reference.split("-")
161
175
  const startParts = start.split(":")
162
176
  const endParts = end.split(":")
163
177
  type =
@@ -166,12 +180,12 @@ class CodexParser {
166
180
  startParts[0].trim() !== endParts[0].trim()
167
181
  ? "multi_chapter_verse_range"
168
182
  : "chapter_verse_range"
169
- } else if (ref.includes(",")) {
183
+ } else if (reference.includes(",")) {
170
184
  type = "comma_separated_verses"
171
185
  } else {
172
186
  type = "chapter_verse"
173
187
  }
174
- } else if (ref.includes("-")) {
188
+ } else if (reference.includes("-")) {
175
189
  type = "chapter_range"
176
190
  } else {
177
191
  type = "single_chapter"
@@ -179,12 +193,12 @@ class CodexParser {
179
193
 
180
194
  this.found.push({
181
195
  book: foundBook,
182
- reference: ref,
183
- startIndex: startOfReference,
184
- endIndex: endIndex,
196
+ reference: reference,
197
+ startIndex: startOfReference + 1,
198
+ endIndex: endIndex + 1,
185
199
  version: suffix || null,
186
200
  type,
187
- originalText: normalizedText.slice(startOfReference, endIndex),
201
+ originalText: text.slice(startOfReference, endIndex),
188
202
  })
189
203
  })
190
204
  } else {