codexparser 0.1.61 → 0.1.63

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/CodexParser.js +35 -29
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codexparser",
3
- "version": "0.1.61",
3
+ "version": "0.1.63",
4
4
  "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -74,16 +74,14 @@ class CodexParser {
74
74
  const fullNames = [...this.bible.old, ...this.bible.new]
75
75
  const abbreviations = Object.keys(this.abbreviations)
76
76
  this.found = []
77
- let normalizedText = text
78
- .replace(/\.(?=\d)/g, ":")
79
- .replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
80
- .replace(/\s+/g, " ")
77
+ // Minimal normalization: fix periods before numbers, remove trailing periods
78
+ let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
81
79
  const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
82
80
  const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
83
81
  const lowerCaseText = normalizedText.toLowerCase()
84
82
  let i = 0
85
83
 
86
- const isValidChapterVerseChar = (char) => /[^A-Za-z]/.test(char)
84
+ const isValidChapterVerseChar = (char) => /[\d:,\-;]/.test(char)
87
85
  const isNextBibleBook = (startIndex) => {
88
86
  const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
89
87
  return (
@@ -93,13 +91,7 @@ class CodexParser {
93
91
  }
94
92
  const detectSuffix = (startIndex) => {
95
93
  const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
96
- return suffixMatch
97
- ? {
98
- suffix: suffixMatch[0].toUpperCase(),
99
- length:
100
- suffixMatch[0].length + (normalizedText[startIndex + suffixMatch[0].length] === " " ? 1 : 0),
101
- }
102
- : null
94
+ return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
103
95
  }
104
96
 
105
97
  while (i < lowerCaseText.length) {
@@ -107,6 +99,12 @@ class CodexParser {
107
99
  let startIndex = -1
108
100
  let matchedLength = 0
109
101
 
102
+ // Skip non-alphabetic characters (e.g., \n, —, spaces) before book
103
+ while (i < lowerCaseText.length && !/[A-Za-z]/.test(lowerCaseText[i])) {
104
+ i++
105
+ }
106
+ if (i >= lowerCaseText.length) break
107
+
110
108
  for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
111
109
  const book = lowercaseBibleFullNames[j]
112
110
  if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
@@ -133,10 +131,14 @@ class CodexParser {
133
131
  const references = []
134
132
  const startOfReference = startIndex
135
133
 
136
- while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
134
+ // Capture chapter/verse, allowing spaces between book and reference
135
+ while (
136
+ i < normalizedText.length &&
137
+ (isValidChapterVerseChar(normalizedText[i]) || normalizedText[i] === " ")
138
+ ) {
137
139
  if (isNextBibleBook(i)) break
138
140
  if (normalizedText[i] === ";") {
139
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
141
+ const formattedReference = chapterVerse.trim()
140
142
  if (formattedReference) references.push(formattedReference)
141
143
  chapterVerse = ""
142
144
  i++
@@ -147,25 +149,29 @@ class CodexParser {
147
149
  }
148
150
 
149
151
  if (chapterVerse.trim().length > 0) {
150
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
152
+ const formattedReference = chapterVerse.trim()
151
153
  if (formattedReference) references.push(formattedReference)
152
154
  }
153
155
 
156
+ // Set endIndex to the current position
157
+ let endIndex = i
154
158
  const suffixData = detectSuffix(i)
155
159
  const suffix = suffixData ? suffixData.suffix : null
156
- if (suffixData) i += suffixData.length
160
+ if (suffixData) {
161
+ endIndex += suffixData.length
162
+ i += suffixData.length
163
+ }
157
164
 
158
- // Adjust endIndex to exclude trailing space if present
159
- let endIndex = i
160
- if (endIndex > 0 && normalizedText[endIndex - 1] === " ") {
165
+ // Trim endIndex to exclude trailing non-reference characters
166
+ while (endIndex > startOfReference && /[^A-Za-z0-9]/.test(normalizedText[endIndex - 1])) {
161
167
  endIndex--
162
168
  }
163
169
 
164
- references.forEach((ref) => {
170
+ references.forEach((reference) => {
165
171
  let type
166
- if (ref.includes(":")) {
167
- if (ref.includes("-")) {
168
- const [start, end] = ref.split("-")
172
+ if (reference.includes(":")) {
173
+ if (reference.includes("-")) {
174
+ const [start, end] = reference.split("-")
169
175
  const startParts = start.split(":")
170
176
  const endParts = end.split(":")
171
177
  type =
@@ -174,12 +180,12 @@ class CodexParser {
174
180
  startParts[0].trim() !== endParts[0].trim()
175
181
  ? "multi_chapter_verse_range"
176
182
  : "chapter_verse_range"
177
- } else if (ref.includes(",")) {
183
+ } else if (reference.includes(",")) {
178
184
  type = "comma_separated_verses"
179
185
  } else {
180
186
  type = "chapter_verse"
181
187
  }
182
- } else if (ref.includes("-")) {
188
+ } else if (reference.includes("-")) {
183
189
  type = "chapter_range"
184
190
  } else {
185
191
  type = "single_chapter"
@@ -187,12 +193,12 @@ class CodexParser {
187
193
 
188
194
  this.found.push({
189
195
  book: foundBook,
190
- reference: ref,
191
- startIndex: startOfReference,
192
- endIndex: endIndex,
196
+ reference: reference,
197
+ startIndex: startOfReference + 1,
198
+ endIndex: endIndex + 1,
193
199
  version: suffix || null,
194
200
  type,
195
- originalText: normalizedText.slice(startOfReference, endIndex),
201
+ originalText: text.slice(startOfReference, endIndex),
196
202
  })
197
203
  })
198
204
  } else {