codexparser 0.1.62 → 0.1.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/CodexParser.js +21 -11
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codexparser",
3
- "version": "0.1.62",
3
+ "version": "0.1.64",
4
4
  "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -74,16 +74,14 @@ class CodexParser {
74
74
  const fullNames = [...this.bible.old, ...this.bible.new]
75
75
  const abbreviations = Object.keys(this.abbreviations)
76
76
  this.found = []
77
- let normalizedText = text
78
- .replace(/\.(?=\d)/g, ":")
79
- .replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
80
- .replace(/\s+/g, " ")
77
+ // Minimal normalization: fix periods before numbers, remove trailing periods
78
+ let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
81
79
  const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
82
80
  const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
83
81
  const lowerCaseText = normalizedText.toLowerCase()
84
82
  let i = 0
85
83
 
86
- const isValidChapterVerseChar = (char) => /[^A-Za-z]/.test(char)
84
+ const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
87
85
  const isNextBibleBook = (startIndex) => {
88
86
  const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
89
87
  return (
@@ -101,6 +99,12 @@ class CodexParser {
101
99
  let startIndex = -1
102
100
  let matchedLength = 0
103
101
 
102
+ // Skip whitespace and special characters before checking for book
103
+ while (i < lowerCaseText.length && /[\s—-]/.test(lowerCaseText[i])) {
104
+ i++
105
+ }
106
+ if (i >= lowerCaseText.length) break
107
+
104
108
  for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
105
109
  const book = lowercaseBibleFullNames[j]
106
110
  if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
@@ -145,12 +149,18 @@ class CodexParser {
145
149
  if (formattedReference) references.push(formattedReference)
146
150
  }
147
151
 
152
+ // Set endIndex to the current position
153
+ let endIndex = i
148
154
  const suffixData = detectSuffix(i)
149
155
  const suffix = suffixData ? suffixData.suffix : null
150
- let endIndex = i // Set endIndex before suffix
151
156
  if (suffixData) {
152
- endIndex += suffixData.length // Include suffix in endIndex
153
- i += suffixData.length // Advance i
157
+ endIndex += suffixData.length
158
+ i += suffixData.length
159
+ }
160
+
161
+ // Trim endIndex to exclude trailing whitespace or non-reference characters
162
+ while (endIndex > startOfReference && /[\s]/.test(normalizedText[endIndex - 1])) {
163
+ endIndex--
154
164
  }
155
165
 
156
166
  references.forEach((ref) => {
@@ -180,11 +190,11 @@ class CodexParser {
180
190
  this.found.push({
181
191
  book: foundBook,
182
192
  reference: ref,
183
- startIndex: startOfReference,
184
- endIndex: endIndex,
193
+ startIndex: startOfReference + 1,
194
+ endIndex: endIndex + 1,
185
195
  version: suffix || null,
186
196
  type,
187
- originalText: normalizedText.slice(startOfReference, endIndex),
197
+ originalText: text.slice(startOfReference, endIndex), // Use original text
188
198
  })
189
199
  })
190
200
  } else {