codexparser 0.1.82 → 0.1.84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.trunk/trunk.yaml CHANGED
@@ -7,7 +7,7 @@ cli:
7
7
  plugins:
8
8
  sources:
9
9
  - id: trunk
10
- ref: v1.7.0
10
+ ref: v1.7.1
11
11
  uri: https://github.com/trunk-io/plugins
12
12
  # Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes)
13
13
  runtimes:
@@ -17,11 +17,11 @@ runtimes:
17
17
  # This is the section where you manage your linters. (https://docs.trunk.io/check/configuration)
18
18
  lint:
19
19
  enabled:
20
- - checkov@3.2.445
20
+ - checkov@3.2.446
21
21
  - git-diff-check
22
22
  - markdownlint@0.45.0
23
23
  - osv-scanner@2.0.3
24
- - prettier@3.6.0
24
+ - prettier@3.6.1
25
25
  - trufflehog@3.89.2
26
26
  actions:
27
27
  disabled:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codexparser",
3
- "version": "0.1.82",
3
+ "version": "0.1.84",
4
4
  "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -94,165 +94,132 @@ class CodexParser {
94
94
  const fullNames = [...this.bible.old, ...this.bible.new]
95
95
  const abbreviations = Object.keys(this.abbreviations)
96
96
  this.found = []
97
- // Normalize text for parsing but keep original for originalText
98
- let normalizedText = text
99
- .replace(/[“”]/g, "") // Remove curly quotes
100
- .replace(/\.(?=\d)/g, ":") // Replace periods before digits with colons (e.g., "Re13.8" -> "Re13:8")
101
- .replace(/\s+/g, " ") // Normalize multiple spaces to single
97
+ // Minimal normalization: fix periods before numbers, remove trailing periods
98
+ let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
99
+ console.log(`Input text: ${text}`)
100
+ console.log(`Normalized text: ${normalizedText}`)
102
101
  const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
103
102
  const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
104
103
  const lowerCaseText = normalizedText.toLowerCase()
105
104
  let i = 0
106
105
 
107
- const isValidChapterVerseChar = (char) => /[^A-Za-z]/.test(char) // Non-letter characters
106
+ const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
108
107
  const isNextBibleBook = (startIndex) => {
109
108
  const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
110
- // Check if the text starts with a book name or abbreviation followed by a digit
111
109
  return (
112
- lowercaseBibleFullNames.some((book) => {
113
- if (textAfterCurrentPosition.startsWith(book)) {
114
- const nextIndex = startIndex + book.length
115
- const nextChar = lowerCaseText[nextIndex]
116
- return nextChar && /\d/.test(nextChar)
117
- }
118
- return false
119
- }) ||
120
- lowercaseBibleAbbreviations.some((abbr) => {
121
- if (textAfterCurrentPosition.startsWith(abbr)) {
122
- const nextIndex = startIndex + abbr.length
123
- const nextChar = lowerCaseText[nextIndex]
124
- return nextChar && (/\d/.test(nextChar) || /\./.test(nextChar))
125
- }
126
- return false
127
- })
110
+ lowercaseBibleFullNames.some((book) => textAfterCurrentPosition.startsWith(book)) ||
111
+ lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
128
112
  )
129
113
  }
130
- const detectSuffix = (startIndex, inputText) => {
131
- const suffixMatch = inputText.substring(startIndex).match(/\b(LXX|MT)\b/i)
132
- return suffixMatch ? { version: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
114
+ const detectSuffix = (startIndex) => {
115
+ const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
116
+ return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
133
117
  }
134
118
 
135
119
  while (i < lowerCaseText.length) {
136
120
  let foundBook = null
121
+ let bookStartIndex = -1
137
122
  let matchedLength = 0
138
- let originalBookText = ""
139
- let startIndex = i
140
123
 
141
- // Check full book names
124
+ // Skip whitespace and special characters before checking for book
125
+ while (i < lowerCaseText.length && /[\s—-]/.test(lowerCaseText[i])) {
126
+ i++
127
+ }
128
+ if (i >= lowerCaseText.length) break
129
+
130
+ console.log(`Scanning at index ${i}: ${lowerCaseText.slice(i, i + 10)}...`)
131
+
142
132
  for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
143
133
  const book = lowercaseBibleFullNames[j]
144
- if (
145
- lowerCaseText.startsWith(book, i) &&
146
- (i + book.length >= lowerCaseText.length || /\d/.test(lowerCaseText[i + book.length]))
147
- ) {
134
+ if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
148
135
  foundBook = fullNames[j]
136
+ bookStartIndex = i
149
137
  matchedLength = book.length
150
- originalBookText = text.slice(i, i + book.length)
151
138
  }
152
139
  }
153
140
 
154
- // Check abbreviations
155
141
  if (!foundBook) {
156
142
  for (let k = 0; k < lowercaseBibleAbbreviations.length; k++) {
157
143
  const abbreviation = lowercaseBibleAbbreviations[k]
158
- const abbrPattern = abbreviation.replace(/\./g, "\\.?")
159
- const regex = new RegExp(`^${abbrPattern}(\\.?\\s*\\d)`, "i")
160
- const match = lowerCaseText.slice(i).match(regex)
161
- if (match) {
144
+ if (lowerCaseText.startsWith(abbreviation, i) && abbreviation.length > matchedLength) {
162
145
  foundBook = this.abbreviations[abbreviations[k]]
163
- matchedLength = match[0].length - match[1].length // Exclude chapter-verse part
164
- originalBookText = text.slice(i, i + matchedLength)
146
+ bookStartIndex = i
147
+ matchedLength = abbreviation.length
165
148
  }
166
149
  }
167
150
  }
168
151
 
169
152
  if (foundBook) {
153
+ console.log(`Found book: ${foundBook} at index ${bookStartIndex}, length ${matchedLength}`)
170
154
  i += matchedLength
171
155
  let chapterVerse = ""
172
- let originalChapterVerseText = ""
173
156
  const references = []
157
+ let refStartIndex = bookStartIndex // Start of reference (including book) in normalizedText
158
+ let originalRefStartIndex = bookStartIndex // Start in original text
174
159
 
175
- // Capture chapter-verse until a letter (potential new book) or semicolon
176
160
  while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
177
161
  if (isNextBibleBook(i)) {
162
+ console.log(`Next book detected at index ${i}, stopping reference parsing`)
178
163
  break
179
164
  }
180
165
  if (normalizedText[i] === ";") {
181
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9:,\-]+$/g, "")
166
+ const formattedReference = chapterVerse.trim()
182
167
  if (formattedReference) {
183
- // Find the last digit in the reference
184
- const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
185
- let endIndex = i - 1 // Default to position before semicolon
186
- if (lastDigitMatch) {
187
- const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
188
- endIndex = startIndex + matchedLength + lastDigitIndex
189
- }
168
+ const refEndIndex = i
190
169
  references.push({
191
- reference: formattedReference,
192
- originalText: (originalBookText + originalChapterVerseText).trim(),
193
- startIndex,
194
- endIndex,
170
+ ref: formattedReference,
171
+ start: refStartIndex,
172
+ end: refEndIndex,
195
173
  })
174
+ console.log(
175
+ `Reference found: ${formattedReference}, normalized indices ${refStartIndex}-${refEndIndex}`
176
+ )
196
177
  }
197
178
  chapterVerse = ""
198
- originalChapterVerseText = ""
199
- originalBookText = foundBook // Reuse book for semicolon-separated references
200
- startIndex = i + 1 // Start of next reference
179
+ refStartIndex = i + 1
180
+ const semicolonIndex = text.indexOf(";", originalRefStartIndex)
181
+ originalRefStartIndex = semicolonIndex !== -1 ? semicolonIndex + 1 : refStartIndex
201
182
  i++
202
183
  continue
203
184
  }
204
185
  chapterVerse += normalizedText[i]
205
- originalChapterVerseText += text[i]
206
186
  i++
207
187
  }
208
188
 
209
- // Add any remaining reference
210
189
  if (chapterVerse.trim().length > 0) {
211
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9:,\-]+$/g, "")
190
+ const formattedReference = chapterVerse.trim()
212
191
  if (formattedReference) {
213
- // Find the last digit in the reference
214
- const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
215
- let endIndex = i - 1 // Default to last character
216
- if (lastDigitMatch) {
217
- const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
218
- endIndex = startIndex + matchedLength + lastDigitIndex
219
- }
192
+ const refEndIndex = i
220
193
  references.push({
221
- reference: formattedReference,
222
- originalText: (originalBookText + originalChapterVerseText).trim(),
223
- startIndex,
224
- endIndex,
194
+ ref: formattedReference,
195
+ start: refStartIndex,
196
+ end: refEndIndex,
225
197
  })
198
+ console.log(
199
+ `Final reference found: ${formattedReference}, normalized indices ${refStartIndex}-${refEndIndex}`
200
+ )
226
201
  }
227
202
  }
228
203
 
229
- // Process each reference
230
- references.forEach((refObj) => {
231
- // Detect version suffix (LXX or MT)
232
- let version = null
233
- let originalText = refObj.originalText
234
- const suffix = detectSuffix(i, text)
235
- if (suffix) {
236
- version = suffix.version
237
- originalText += ` ${version}`
238
- i += suffix.length
239
- // Update endIndex if version suffix follows a digit
240
- if (refObj.endIndex === i - suffix.length - 1) {
241
- refObj.endIndex = i - 1
242
- }
243
- }
204
+ // Align indices with original text
205
+ const originalBookText = text.slice(bookStartIndex, bookStartIndex + matchedLength)
206
+ const originalBookStartIndex =
207
+ text.indexOf(originalBookText, bookStartIndex) !== -1
208
+ ? text.indexOf(originalBookText, bookStartIndex)
209
+ : bookStartIndex
210
+ console.log(`Original book text: ${originalBookText}, original start index: ${originalBookStartIndex}`)
244
211
 
212
+ references.forEach(({ ref, start, end }, refIndex) => {
245
213
  let type
246
- let ref = refObj.reference.replace(/^\.\s*/, "") // Remove leading period and space
247
- if (this.config.booksOnly && !ref) {
248
- type = "book_only"
249
- } else if (ref.includes(":")) {
214
+ if (ref.includes(":")) {
250
215
  if (ref.includes("-")) {
251
- const [start, end] = ref.split("-").map((s) => s.trim())
252
- const startParts = start.split(":").map((s) => s.trim())
253
- const endParts = end.split(":").map((s) => s.trim())
216
+ const [start, end] = ref.split("-")
217
+ const startParts = start.split(":")
218
+ const endParts = end.split(":")
254
219
  type =
255
- startParts.length > 1 && endParts.length > 1 && startParts[0] !== endParts[0]
220
+ startParts.length > 1 &&
221
+ endParts.length > 1 &&
222
+ startParts[0].trim() !== endParts[0].trim()
256
223
  ? "multi_chapter_verse_range"
257
224
  : "chapter_verse_range"
258
225
  } else if (ref.includes(",")) {
@@ -262,32 +229,68 @@ class CodexParser {
262
229
  }
263
230
  } else if (ref.includes("-")) {
264
231
  type = "chapter_range"
265
- } else if (/\d/.test(ref)) {
266
- type = "single_chapter"
267
232
  } else {
268
- type = "book_only"
233
+ type = "single_chapter"
234
+ }
235
+
236
+ // Construct full reference text for original text
237
+ const fullRefText = `${originalBookText} ${ref.replace(":", ".")}`
238
+ const suffixData = detectSuffix(end)
239
+ const suffix = suffixData ? suffixData.suffix : null
240
+ let refEndIndex = end
241
+ if (suffixData) {
242
+ refEndIndex += suffixData.length
243
+ i += suffixData.length // Skip suffix
269
244
  }
270
245
 
271
- const referenceObj = {
246
+ // Map to original text
247
+ let originalStartIndex =
248
+ text.indexOf(fullRefText, originalRefStartIndex) !== -1
249
+ ? text.indexOf(fullRefText, originalRefStartIndex)
250
+ : originalBookStartIndex
251
+ console.log(
252
+ `Searching for fullRefText: ${fullRefText} at index ${originalRefStartIndex}, found at ${originalStartIndex}`
253
+ )
254
+
255
+ let originalEndIndex = originalStartIndex + fullRefText.length
256
+ let originalText = text.slice(originalStartIndex, originalEndIndex)
257
+
258
+ // Adjust for suffix in original text
259
+ if (suffixData) {
260
+ originalEndIndex += suffixData.length
261
+ originalText = text.slice(originalStartIndex, originalEndIndex)
262
+ }
263
+
264
+ // Trim trailing whitespace from originalText
265
+ while (originalEndIndex > originalStartIndex && /[\s]/.test(text[originalEndIndex - 1])) {
266
+ originalEndIndex--
267
+ originalText = text.slice(originalStartIndex, originalEndIndex)
268
+ }
269
+
270
+ console.log(
271
+ `Reference ${
272
+ refIndex + 1
273
+ }: ${originalText}, original indices ${originalStartIndex}-${originalEndIndex}, type: ${type}, suffix: ${
274
+ suffix || "none"
275
+ }, search text: ${fullRefText}`
276
+ )
277
+
278
+ this.found.push({
272
279
  book: foundBook,
273
280
  reference: ref,
274
- version,
281
+ startIndex: originalStartIndex,
282
+ endIndex: originalEndIndex,
283
+ version: suffix || null,
275
284
  type,
276
- originalText,
277
- startIndex: refObj.startIndex,
278
- endIndex: refObj.endIndex,
279
- }
280
- this.found.push(referenceObj)
285
+ originalText: originalText,
286
+ })
281
287
  })
282
-
283
- // Skip any trailing spaces after the reference
284
- while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
285
- i++
286
- }
287
288
  } else {
288
289
  i++
289
290
  }
290
291
  }
292
+
293
+ console.log(`Found references: ${JSON.stringify(this.found, null, 2)}`)
291
294
  return this
292
295
  }
293
296
 
@@ -337,6 +340,7 @@ class CodexParser {
337
340
 
338
341
  // Clean reference for parsing
339
342
  let cleanReference = passage.reference.replace(/\s*(LXX|MT)$/i, "").trim()
343
+ console.log(`Parsing reference: ${cleanReference}, type: ${passage.type}`)
340
344
  if (cleanReference.endsWith(",")) {
341
345
  cleanReference = cleanReference.slice(0, -1).trim()
342
346
  }
@@ -344,6 +348,7 @@ class CodexParser {
344
348
  // Handle book-only or empty references
345
349
  if (!cleanReference && this.config.booksOnly) {
346
350
  parsedPassage.type = "book_only"
351
+ console.log(`Book-only reference: ${book}`)
347
352
  } else if (!cleanReference || cleanReference.match(/^\d+\s*[:;]?\s*$/)) {
348
353
  const chapterMatch = cleanReference.match(/\d+/) || ["1"]
349
354
  const chapter = Number(chapterMatch[0])
@@ -355,8 +360,18 @@ class CodexParser {
355
360
  const endVerse = chapterVerses[chapterVerses.length - 1]
356
361
  parsedPassage.verses = [`${startVerse}-${endVerse}`]
357
362
  }
363
+ console.log(`Single chapter: ${chapter}, verses: ${parsedPassage.verses}`)
364
+ } else if (passage.type === "comma_separated_verses") {
365
+ // Handle comma-separated verses (e.g., "1:7,18")
366
+ const [chapter, verses] = cleanReference.split(":")
367
+ parsedPassage.chapter = Number(chapter)
368
+ parsedPassage.verses = verses.split(",").map((v) => v.trim())
369
+ console.log(`Comma-separated verses: chapter ${chapter}, verses ${parsedPassage.verses}`)
358
370
  } else {
359
371
  this.parseReferenceParts(parsedPassage, cleanReference)
372
+ console.log(
373
+ `Parsed with parseReferenceParts: chapter ${parsedPassage.chapter}, verses ${parsedPassage.verses}`
374
+ )
360
375
  }
361
376
 
362
377
  parsedPassage.passages = this.populate(parsedPassage)
@@ -376,6 +391,7 @@ class CodexParser {
376
391
  } else {
377
392
  parsedPassage.abbr = parsedPassage.original
378
393
  }
394
+ console.log(`Abbreviation set: ${parsedPassage.abbr}`)
379
395
 
380
396
  if (parsedPassage.type === this.MULTI_CHAPTER_RANGE) {
381
397
  this.handleMultiChapterRange(parsedPassage, cleanReference)
@@ -401,6 +417,7 @@ class CodexParser {
401
417
  chapter: lastPassage.chapter,
402
418
  verse: lastPassage.verse,
403
419
  }
420
+ console.log(`Start: ${JSON.stringify(parsedPassage.start)}, End: ${JSON.stringify(parsedPassage.end)}`)
404
421
  }
405
422
 
406
423
  if (!parsedPassage.version) {
@@ -415,9 +432,9 @@ class CodexParser {
415
432
  })
416
433
 
417
434
  this.versification()
435
+ console.log(`Final passages: ${JSON.stringify(this.passages, null, 2)}`)
418
436
  return this
419
437
  }
420
-
421
438
  /**
422
439
  * Parses reference parts into chapter and verse components.
423
440
  * @param {Object} passage - The passage object to populate.