codexparser 0.1.77 → 0.1.79

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/CodexParser.js +140 -113
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codexparser",
3
- "version": "0.1.77",
3
+ "version": "0.1.79",
4
4
  "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -87,44 +87,24 @@ class CodexParser {
87
87
  */
88
88
  scan(text) {
89
89
  const fullNames = [...this.bible.old, ...this.bible.new]
90
- const abbreviations = Object.keys(this.abbreviations)
90
+ const abbreviations = Object.keys(this.abbreviations).filter((abbr) => abbr.length >= 3)
91
91
  this.found = []
92
92
 
93
93
  // Normalize text: remove curly quotes, replace periods before numbers with colons
94
- let normalizedText = text
95
- .replace(/[“”]/g, "") // Remove curly quotes
96
- .replace(/\.(?=\d)/g, ":")
94
+ let normalizedText = text.replace(/[“”]/g, "").replace(/\.(?=\d)/g, ":")
97
95
  const lowerCaseText = normalizedText.toLowerCase()
98
96
  let i = 0
99
97
 
100
98
  while (i < lowerCaseText.length) {
101
99
  let foundBook = null
102
- let startIndex = -1
100
+ let startIndex = i
103
101
  let matchedLength = 0
104
- let hasOpeningParen = false
105
- let parenStartIndex = -1
106
-
107
- // Skip whitespace
108
- while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
109
- i++
110
- }
111
- if (i >= lowerCaseText.length) break
112
-
113
- // Check for opening parenthesis
114
- if (i < lowerCaseText.length && lowerCaseText[i] === "(") {
115
- hasOpeningParen = true
116
- parenStartIndex = i
117
- i++
118
- }
119
-
120
- // Record potential start of reference
121
- startIndex = i
122
102
 
123
103
  // Check for book names or abbreviations
124
104
  for (let book of fullNames) {
125
105
  if (
126
106
  lowerCaseText.startsWith(book.toLowerCase(), i) &&
127
- (i + book.length >= lowerCaseText.length || /[\s:;\d]/.test(lowerCaseText[i + book.length]))
107
+ (i + book.length >= lowerCaseText.length || /[\s:;]/.test(lowerCaseText[i + book.length]))
128
108
  ) {
129
109
  foundBook = book
130
110
  matchedLength = book.length
@@ -135,7 +115,7 @@ class CodexParser {
135
115
  for (let abbr of abbreviations) {
136
116
  if (
137
117
  lowerCaseText.startsWith(abbr.toLowerCase(), i) &&
138
- (i + abbr.length >= lowerCaseText.length || /[\s:;\d]/.test(lowerCaseText[i + abbr.length]))
118
+ (i + abbr.length >= lowerCaseText.length || /[\s:;]/.test(lowerCaseText[i + abbr.length]))
139
119
  ) {
140
120
  foundBook = this.abbreviations[abbr]
141
121
  matchedLength = abbr.length
@@ -145,105 +125,116 @@ class CodexParser {
145
125
  }
146
126
 
147
127
  if (foundBook) {
148
- // Check if book is followed by a valid reference when booksOnly is false
149
- let isFollowedByReference = false
150
- if (!this.config.booksOnly && !hasOpeningParen) {
151
- let j = i + matchedLength
128
+ let j = i + matchedLength
129
+ let currentBook = foundBook
130
+ let currentStartIndex = startIndex
131
+
132
+ // Process multiple references for the same book
133
+ while (j < lowerCaseText.length) {
134
+ let chapterVerse = ""
135
+ let hasColon = false
136
+ let version = null
137
+ let refStart = j
138
+
152
139
  // Skip spaces
153
140
  while (j < lowerCaseText.length && /\s/.test(lowerCaseText[j])) {
141
+ chapterVerse += normalizedText[j]
154
142
  j++
155
143
  }
156
- // Check for a digit (chapter number) to start a valid reference
157
- if (j < lowerCaseText.length && /\d/.test(lowerCaseText[j])) {
158
- isFollowedByReference = true
144
+ refStart = j // Update start after spaces
145
+
146
+ // Next character must be a digit or version suffix
147
+ if (j < lowerCaseText.length) {
148
+ const nextChar = lowerCaseText[j]
149
+ const isVersion = lowerCaseText.substring(j).match(/^(lxx|mt)\b/i)
150
+ if (!/\d/.test(nextChar) && !isVersion && !this.config.booksOnly) {
151
+ break
152
+ }
153
+ } else if (!this.config.booksOnly) {
154
+ break
159
155
  }
160
- } else {
161
- isFollowedByReference = true // Allow if booksOnly or in parentheses
162
- }
163
156
 
164
- if (!isFollowedByReference) {
165
- i++
166
- continue
167
- }
168
-
169
- i += matchedLength
170
- let chapterVerse = ""
171
- let hasColon = false
172
-
173
- // Capture space after book
174
- if (i < normalizedText.length && normalizedText[i] === " ") {
175
- chapterVerse += " "
176
- i++
177
- }
178
-
179
- // Capture chapter-verse (allow digits, colons, commas, dashes, spaces)
180
- while (i < lowerCaseText.length && (/[\d:,\-]/.test(normalizedText[i]) || normalizedText[i] === " ")) {
181
- if (normalizedText[i] === ":") hasColon = true
182
- chapterVerse += normalizedText[i]
183
- i++
184
- }
185
-
186
- // Only proceed if valid reference or booksOnly is true
187
- if ((hasColon && chapterVerse.trim().length > 0) || (this.config.booksOnly && !chapterVerse.trim())) {
188
- let endIndex = i
189
- let version = null
157
+ // Capture chapter-verse
158
+ while (j < lowerCaseText.length && /\d/.test(lowerCaseText[j])) {
159
+ chapterVerse += normalizedText[j]
160
+ j++
161
+ }
162
+ while (
163
+ j < lowerCaseText.length &&
164
+ (/[\d:,\-;]/.test(normalizedText[j]) || normalizedText[j] === " ")
165
+ ) {
166
+ if (normalizedText[j] === ":") hasColon = true
167
+ chapterVerse += normalizedText[j]
168
+ if (normalizedText[j] === ";") break
169
+ j++
170
+ }
190
171
 
191
- // Detect suffix
192
- const suffixMatch = normalizedText.substring(i).match(/\b(LXX|MT)\b/i)
172
+ // Check for version suffix
173
+ let endIndex = j
174
+ const suffixMatch = normalizedText.substring(j).match(/\b(LXX|MT)\b/i)
193
175
  if (suffixMatch) {
194
176
  version = suffixMatch[0].toUpperCase()
195
177
  endIndex += suffixMatch[0].length
196
- i += suffixMatch[0].length
197
- }
198
-
199
- // Handle closing parenthesis
200
- if (hasOpeningParen && i < lowerCaseText.length && normalizedText[i] === ")") {
201
- endIndex = i + 1
202
- i++
178
+ j += suffixMatch[0].length
203
179
  }
204
180
 
205
- // Use original text for reference only (exclude parentheses)
206
- const originalText = normalizedText.slice(startIndex, hasOpeningParen ? endIndex - 1 : endIndex)
207
-
208
- // Determine type
209
- let type
181
+ // Store the reference
210
182
  const ref = chapterVerse.trim()
211
- if (this.config.booksOnly && !ref) {
212
- type = "book_only"
213
- } else if (ref.includes(":")) {
214
- if (ref.includes("-")) {
215
- const [start, end] = ref.split("-")
216
- const startParts = start.split(":")
217
- const endParts = end.split(":")
218
- type =
219
- startParts.length > 1 &&
220
- endParts.length > 1 &&
221
- startParts[0].trim() !== endParts[0].trim()
222
- ? "multi_chapter_verse_range"
223
- : "chapter_verse_range"
224
- } else if (ref.includes(",")) {
225
- type = "comma_separated_verses"
183
+ if (ref.length > 0 || version || this.config.booksOnly) {
184
+ let type
185
+ if (this.config.booksOnly && !ref) {
186
+ type = "book_only"
187
+ } else if (ref.includes(":")) {
188
+ if (ref.includes("-")) {
189
+ const [start, end] = ref.split("-")
190
+ const startParts = start.split(":")
191
+ const endParts = end.split(":")
192
+ type =
193
+ startParts.length > 1 &&
194
+ endParts.length > 1 &&
195
+ startParts[0].trim() !== endParts[0].trim()
196
+ ? "multi_chapter_verse_range"
197
+ : "chapter_verse_range"
198
+ } else if (ref.includes(",")) {
199
+ type = "comma_separated_verses"
200
+ } else {
201
+ type = "chapter_verse"
202
+ }
203
+ } else if (ref.includes("-")) {
204
+ type = "chapter_range"
205
+ } else if (/\d/.test(ref)) {
206
+ type = "single_chapter"
226
207
  } else {
227
- type = "chapter_verse"
208
+ type = "book_only"
209
+ }
210
+
211
+ this.found.push({
212
+ book: currentBook,
213
+ reference: ref,
214
+ startIndex: currentStartIndex,
215
+ endIndex,
216
+ version,
217
+ type,
218
+ originalText: normalizedText.slice(currentStartIndex, endIndex),
219
+ })
220
+ }
221
+
222
+ // Handle semicolon for next reference
223
+ if (j < lowerCaseText.length && lowerCaseText[j] === ";") {
224
+ j++ // Move past semicolon
225
+ currentStartIndex = j // Reset start for next reference
226
+ // Skip spaces after semicolon
227
+ while (j < lowerCaseText.length && /\s/.test(lowerCaseText[j])) {
228
+ j++
228
229
  }
229
- } else if (ref.includes("-")) {
230
- type = "chapter_range"
231
- } else {
232
- type = "single_chapter"
230
+ continue // Process next reference
233
231
  }
234
232
 
235
- this.found.push({
236
- book: foundBook,
237
- reference: ref,
238
- startIndex: hasOpeningParen ? parenStartIndex : startIndex,
239
- endIndex,
240
- version,
241
- type,
242
- originalText,
243
- })
244
- } else {
245
- i = startIndex + 1
233
+ // Exit if no semicolon or end of reference
234
+ break
246
235
  }
236
+
237
+ i = j
247
238
  } else {
248
239
  i++
249
240
  }
@@ -296,10 +287,31 @@ class CodexParser {
296
287
  abbr: null,
297
288
  }
298
289
 
299
- this.parseReferenceParts(parsedPassage, passage.reference.split(","))
290
+ // Clean reference for parsing, removing version suffix
291
+ let cleanReference = passage.reference
292
+ if (passage.version) {
293
+ cleanReference = cleanReference.replace(/\s*(LXX|MT)$/i, "").trim()
294
+ }
295
+
296
+ // Handle chapter-only references (e.g., "113 :" or "113")
297
+ if (!cleanReference || cleanReference.match(/^\d+\s*[:;]?\s*$/)) {
298
+ const chapterMatch = cleanReference.match(/\d+/) || ["1"]
299
+ const chapter = Number(chapterMatch[0])
300
+ parsedPassage.chapter = chapter
301
+ parsedPassage.type = this.SINGLE_CHAPTER
302
+ const chapterVerses = this.getChapterVerses(book, chapter)
303
+ if (chapterVerses.length) {
304
+ const startVerse = chapterVerses[0]
305
+ const endVerse = chapterVerses[chapterVerses.length - 1]
306
+ parsedPassage.verses = [`${startVerse}-${endVerse}`]
307
+ }
308
+ } else {
309
+ this.parseReferenceParts(parsedPassage, cleanReference.split(","))
310
+ }
311
+
300
312
  parsedPassage.passages = this.populate(parsedPassage)
301
313
  parsedPassage.scripture = this.scripturize(parsedPassage)
302
- parsedPassage.valid = this._isValid(parsedPassage, passage.reference)
314
+ parsedPassage.valid = this._isValid(parsedPassage, cleanReference)
303
315
 
304
316
  // Set abbr property using SBL-style abbreviations
305
317
  const sblEntry = Object.entries(this.sblAbbreviations).find(
@@ -307,16 +319,16 @@ class CodexParser {
307
319
  )
308
320
  if (sblEntry) {
309
321
  const { value, abbr } = sblEntry[1]
322
+ const ref = passage.reference.replace(/\s*(LXX|MT)$/i, "").trim()
310
323
  parsedPassage.abbr = abbr
311
- ? `${value}. ${passage.reference}${passage.version ? " " + passage.version : ""}`
312
- : `${value} ${passage.reference}${passage.version ? " " + passage.version : ""}`
324
+ ? `${value}. ${ref}${passage.version ? " " + passage.version : ""}`
325
+ : `${value} ${ref}${passage.version ? " " + passage.version : ""}`
313
326
  } else {
314
- // Fallback to original
315
327
  parsedPassage.abbr = parsedPassage.original
316
328
  }
317
329
 
318
330
  if (parsedPassage.type === this.MULTI_CHAPTER_RANGE) {
319
- this.handleMultiChapterRange(parsedPassage, passage.reference)
331
+ this.handleMultiChapterRange(parsedPassage, cleanReference)
320
332
  } else {
321
333
  delete parsedPassage.to
322
334
  }
@@ -370,6 +382,22 @@ class CodexParser {
370
382
  if (!part) return // Skip empty parts from trailing commas
371
383
  const isFirstPart = index === 0
372
384
 
385
+ // Handle chapter-only references (e.g., "113 :" or "113")
386
+ if (!part.includes(":") && !part.includes("-") && !singleChapterBook) {
387
+ const chapter = Number(part.replace(/[^0-9]/g, "")) // Extract number, remove trailing colon
388
+ if (chapter > 0) {
389
+ passage.chapter = chapter
390
+ passage.type = this.SINGLE_CHAPTER
391
+ const chapterVerses = this.getChapterVerses(passage.book, chapter)
392
+ if (chapterVerses.length) {
393
+ const startVerse = chapterVerses[0]
394
+ const endVerse = chapterVerses[chapterVerses.length - 1]
395
+ passage.verses = [`${startVerse}-${endVerse}`]
396
+ }
397
+ return
398
+ }
399
+ }
400
+
373
401
  if (part.includes(":")) {
374
402
  this.parseChapterVerse(passage, part, isFirstPart)
375
403
  } else if (singleChapterBook) {
@@ -381,7 +409,6 @@ class CodexParser {
381
409
  }
382
410
  })
383
411
  }
384
-
385
412
  /**
386
413
  * Parses chapter-verse references (e.g., "3:16").
387
414
  * @param {Object} passage - The passage object.