codexparser 0.1.78 → 0.1.79

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/CodexParser.js +99 -112
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codexparser",
3
- "version": "0.1.78",
3
+ "version": "0.1.79",
4
4
  "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -87,44 +87,24 @@ class CodexParser {
87
87
  */
88
88
  scan(text) {
89
89
  const fullNames = [...this.bible.old, ...this.bible.new]
90
- const abbreviations = Object.keys(this.abbreviations)
90
+ const abbreviations = Object.keys(this.abbreviations).filter((abbr) => abbr.length >= 3)
91
91
  this.found = []
92
92
 
93
93
  // Normalize text: remove curly quotes, replace periods before numbers with colons
94
- let normalizedText = text
95
- .replace(/[“”]/g, "") // Remove curly quotes
96
- .replace(/\.(?=\d)/g, ":")
94
+ let normalizedText = text.replace(/[“”]/g, "").replace(/\.(?=\d)/g, ":")
97
95
  const lowerCaseText = normalizedText.toLowerCase()
98
96
  let i = 0
99
97
 
100
98
  while (i < lowerCaseText.length) {
101
99
  let foundBook = null
102
- let startIndex = -1
100
+ let startIndex = i
103
101
  let matchedLength = 0
104
- let hasOpeningParen = false
105
- let parenStartIndex = -1
106
-
107
- // Skip whitespace
108
- while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
109
- i++
110
- }
111
- if (i >= lowerCaseText.length) break
112
-
113
- // Check for opening parenthesis
114
- if (i < lowerCaseText.length && lowerCaseText[i] === "(") {
115
- hasOpeningParen = true
116
- parenStartIndex = i
117
- i++
118
- }
119
-
120
- // Record potential start of reference
121
- startIndex = i
122
102
 
123
103
  // Check for book names or abbreviations
124
104
  for (let book of fullNames) {
125
105
  if (
126
106
  lowerCaseText.startsWith(book.toLowerCase(), i) &&
127
- (i + book.length >= lowerCaseText.length || /[\s:;\d]/.test(lowerCaseText[i + book.length]))
107
+ (i + book.length >= lowerCaseText.length || /[\s:;]/.test(lowerCaseText[i + book.length]))
128
108
  ) {
129
109
  foundBook = book
130
110
  matchedLength = book.length
@@ -135,7 +115,7 @@ class CodexParser {
135
115
  for (let abbr of abbreviations) {
136
116
  if (
137
117
  lowerCaseText.startsWith(abbr.toLowerCase(), i) &&
138
- (i + abbr.length >= lowerCaseText.length || /[\s:;\d]/.test(lowerCaseText[i + abbr.length]))
118
+ (i + abbr.length >= lowerCaseText.length || /[\s:;]/.test(lowerCaseText[i + abbr.length]))
139
119
  ) {
140
120
  foundBook = this.abbreviations[abbr]
141
121
  matchedLength = abbr.length
@@ -145,109 +125,116 @@ class CodexParser {
145
125
  }
146
126
 
147
127
  if (foundBook) {
148
- // Check if book is followed by a valid reference or version when booksOnly is false
149
- let isFollowedByReference = false
150
128
  let j = i + matchedLength
151
- // Skip spaces
152
- while (j < lowerCaseText.length && /\s/.test(lowerCaseText[j])) {
153
- j++
154
- }
155
- // Check for digit (chapter number) or version suffix (LXX/MT)
156
- if (
157
- j < lowerCaseText.length &&
158
- (/\d/.test(lowerCaseText[j]) || lowerCaseText.substring(j).match(/^(lxx|mt)\b/i))
159
- ) {
160
- isFollowedByReference = true
161
- }
162
-
163
- if (!this.config.booksOnly && !hasOpeningParen && !isFollowedByReference) {
164
- i++
165
- continue
166
- }
129
+ let currentBook = foundBook
130
+ let currentStartIndex = startIndex
167
131
 
168
- i += matchedLength
169
- let chapterVerse = ""
170
- let hasColon = false
171
-
172
- // Capture space after book
173
- if (i < normalizedText.length && normalizedText[i] === " ") {
174
- chapterVerse += " "
175
- i++
176
- }
132
+ // Process multiple references for the same book
133
+ while (j < lowerCaseText.length) {
134
+ let chapterVerse = ""
135
+ let hasColon = false
136
+ let version = null
137
+ let refStart = j
177
138
 
178
- // Capture chapter-verse (allow digits, colons, commas, dashes, spaces)
179
- while (i < lowerCaseText.length && (/[\d:,\-]/.test(normalizedText[i]) || normalizedText[i] === " ")) {
180
- if (normalizedText[i] === ":") hasColon = true
181
- chapterVerse += normalizedText[i]
182
- i++
183
- }
139
+ // Skip spaces
140
+ while (j < lowerCaseText.length && /\s/.test(lowerCaseText[j])) {
141
+ chapterVerse += normalizedText[j]
142
+ j++
143
+ }
144
+ refStart = j // Update start after spaces
145
+
146
+ // Next character must be a digit or version suffix
147
+ if (j < lowerCaseText.length) {
148
+ const nextChar = lowerCaseText[j]
149
+ const isVersion = lowerCaseText.substring(j).match(/^(lxx|mt)\b/i)
150
+ if (!/\d/.test(nextChar) && !isVersion && !this.config.booksOnly) {
151
+ break
152
+ }
153
+ } else if (!this.config.booksOnly) {
154
+ break
155
+ }
184
156
 
185
- // Only proceed if valid reference or booksOnly is true
186
- if (
187
- (chapterVerse.trim().length > 0 && (hasColon || /\d/.test(chapterVerse.trim()))) ||
188
- (this.config.booksOnly && !chapterVerse.trim())
189
- ) {
190
- let endIndex = i
191
- let version = null
157
+ // Capture chapter-verse
158
+ while (j < lowerCaseText.length && /\d/.test(lowerCaseText[j])) {
159
+ chapterVerse += normalizedText[j]
160
+ j++
161
+ }
162
+ while (
163
+ j < lowerCaseText.length &&
164
+ (/[\d:,\-;]/.test(normalizedText[j]) || normalizedText[j] === " ")
165
+ ) {
166
+ if (normalizedText[j] === ":") hasColon = true
167
+ chapterVerse += normalizedText[j]
168
+ if (normalizedText[j] === ";") break
169
+ j++
170
+ }
192
171
 
193
- // Detect suffix
194
- const suffixMatch = normalizedText.substring(i).match(/\b(LXX|MT)\b/i)
172
+ // Check for version suffix
173
+ let endIndex = j
174
+ const suffixMatch = normalizedText.substring(j).match(/\b(LXX|MT)\b/i)
195
175
  if (suffixMatch) {
196
176
  version = suffixMatch[0].toUpperCase()
197
177
  endIndex += suffixMatch[0].length
198
- i += suffixMatch[0].length
178
+ j += suffixMatch[0].length
199
179
  }
200
180
 
201
- // Handle closing parenthesis
202
- if (hasOpeningParen && i < lowerCaseText.length && normalizedText[i] === ")") {
203
- endIndex = i + 1
204
- i++
205
- }
206
-
207
- // Use original text for reference only (exclude parentheses)
208
- const originalText = normalizedText.slice(startIndex, hasOpeningParen ? endIndex - 1 : endIndex)
209
-
210
- // Determine type
211
- let type
181
+ // Store the reference
212
182
  const ref = chapterVerse.trim()
213
- if (this.config.booksOnly && !ref) {
214
- type = "book_only"
215
- } else if (ref.includes(":")) {
216
- if (ref.includes("-")) {
217
- const [start, end] = ref.split("-")
218
- const startParts = start.split(":")
219
- const endParts = end.split(":")
220
- type =
221
- startParts.length > 1 &&
222
- endParts.length > 1 &&
223
- startParts[0].trim() !== endParts[0].trim()
224
- ? "multi_chapter_verse_range"
225
- : "chapter_verse_range"
226
- } else if (ref.includes(",")) {
227
- type = "comma_separated_verses"
183
+ if (ref.length > 0 || version || this.config.booksOnly) {
184
+ let type
185
+ if (this.config.booksOnly && !ref) {
186
+ type = "book_only"
187
+ } else if (ref.includes(":")) {
188
+ if (ref.includes("-")) {
189
+ const [start, end] = ref.split("-")
190
+ const startParts = start.split(":")
191
+ const endParts = end.split(":")
192
+ type =
193
+ startParts.length > 1 &&
194
+ endParts.length > 1 &&
195
+ startParts[0].trim() !== endParts[0].trim()
196
+ ? "multi_chapter_verse_range"
197
+ : "chapter_verse_range"
198
+ } else if (ref.includes(",")) {
199
+ type = "comma_separated_verses"
200
+ } else {
201
+ type = "chapter_verse"
202
+ }
203
+ } else if (ref.includes("-")) {
204
+ type = "chapter_range"
205
+ } else if (/\d/.test(ref)) {
206
+ type = "single_chapter"
228
207
  } else {
229
- type = "chapter_verse"
208
+ type = "book_only"
230
209
  }
231
- } else if (ref.includes("-")) {
232
- type = "chapter_range"
233
- } else if (/\d/.test(ref)) {
234
- type = "single_chapter"
235
- } else {
236
- type = "book_only"
210
+
211
+ this.found.push({
212
+ book: currentBook,
213
+ reference: ref,
214
+ startIndex: currentStartIndex,
215
+ endIndex,
216
+ version,
217
+ type,
218
+ originalText: normalizedText.slice(currentStartIndex, endIndex),
219
+ })
237
220
  }
238
221
 
239
- this.found.push({
240
- book: foundBook,
241
- reference: ref,
242
- startIndex: hasOpeningParen ? parenStartIndex : startIndex,
243
- endIndex,
244
- version,
245
- type,
246
- originalText,
247
- })
248
- } else {
249
- i = startIndex + 1
222
+ // Handle semicolon for next reference
223
+ if (j < lowerCaseText.length && lowerCaseText[j] === ";") {
224
+ j++ // Move past semicolon
225
+ currentStartIndex = j // Reset start for next reference
226
+ // Skip spaces after semicolon
227
+ while (j < lowerCaseText.length && /\s/.test(lowerCaseText[j])) {
228
+ j++
229
+ }
230
+ continue // Process next reference
231
+ }
232
+
233
+ // Exit if no semicolon or end of reference
234
+ break
250
235
  }
236
+
237
+ i = j
251
238
  } else {
252
239
  i++
253
240
  }