npm - codexparser - Versions diffs - 0.1.82 → 0.1.84 - Mend

codexparser 0.1.82 → 0.1.84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/.trunk/trunk.yaml CHANGED Viewed

@@ -7,7 +7,7 @@ cli:
 plugins:
   sources:
     - id: trunk
-      ref: v1.7.0
+      ref: v1.7.1
       uri: https://github.com/trunk-io/plugins
 # Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes)
 runtimes:
@@ -17,11 +17,11 @@ runtimes:
 # This is the section where you manage your linters. (https://docs.trunk.io/check/configuration)
 lint:
   enabled:
-    - checkov@3.2.445
+    - checkov@3.2.446
     - git-diff-check
     - markdownlint@0.45.0
     - osv-scanner@2.0.3
-    - prettier@3.6.0
+    - prettier@3.6.1
     - trufflehog@3.89.2
 actions:
   disabled:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "codexparser",
-  "version": "0.1.82",
+  "version": "0.1.84",
   "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
   "main": "index.js",
   "scripts": {

package/src/CodexParser.js CHANGED Viewed

@@ -94,165 +94,132 @@ class CodexParser {
         const fullNames = [...this.bible.old, ...this.bible.new]
         const abbreviations = Object.keys(this.abbreviations)
         this.found = []
-        // Normalize text for parsing but keep original for originalText
-        let normalizedText = text
-            .replace(/[“”]/g, "") // Remove curly quotes
-            .replace(/\.(?=\d)/g, ":") // Replace periods before digits with colons (e.g., "Re13.8" -> "Re13:8")
-            .replace(/\s+/g, " ") // Normalize multiple spaces to single
+        // Minimal normalization: fix periods before numbers, remove trailing periods
+        let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
+        console.log(`Input text: ${text}`)
+        console.log(`Normalized text: ${normalizedText}`)
         const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
         const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
         const lowerCaseText = normalizedText.toLowerCase()
         let i = 0
-        const isValidChapterVerseChar = (char) => /[^A-Za-z]/.test(char) // Non-letter characters
+        const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
         const isNextBibleBook = (startIndex) => {
             const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
-            // Check if the text starts with a book name or abbreviation followed by a digit
             return (
-                lowercaseBibleFullNames.some((book) => {
-                    if (textAfterCurrentPosition.startsWith(book)) {
-                        const nextIndex = startIndex + book.length
-                        const nextChar = lowerCaseText[nextIndex]
-                        return nextChar && /\d/.test(nextChar)
-                    }
-                    return false
-                }) ||
-                lowercaseBibleAbbreviations.some((abbr) => {
-                    if (textAfterCurrentPosition.startsWith(abbr)) {
-                        const nextIndex = startIndex + abbr.length
-                        const nextChar = lowerCaseText[nextIndex]
-                        return nextChar && (/\d/.test(nextChar) || /\./.test(nextChar))
-                    }
-                    return false
-                })
+                lowercaseBibleFullNames.some((book) => textAfterCurrentPosition.startsWith(book)) ||
+                lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
             )
         }
-        const detectSuffix = (startIndex, inputText) => {
-            const suffixMatch = inputText.substring(startIndex).match(/\b(LXX|MT)\b/i)
-            return suffixMatch ? { version: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
+        const detectSuffix = (startIndex) => {
+            const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
+            return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
         }
         while (i < lowerCaseText.length) {
             let foundBook = null
+            let bookStartIndex = -1
             let matchedLength = 0
-            let originalBookText = ""
-            let startIndex = i
-            // Check full book names
+            // Skip whitespace and special characters before checking for book
+            while (i < lowerCaseText.length && /[\s—-]/.test(lowerCaseText[i])) {
+                i++
+            }
+            if (i >= lowerCaseText.length) break
+            console.log(`Scanning at index ${i}: ${lowerCaseText.slice(i, i + 10)}...`)
             for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
                 const book = lowercaseBibleFullNames[j]
-                if (
-                    lowerCaseText.startsWith(book, i) &&
-                    (i + book.length >= lowerCaseText.length || /\d/.test(lowerCaseText[i + book.length]))
-                ) {
+                if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
                     foundBook = fullNames[j]
+                    bookStartIndex = i
                     matchedLength = book.length
-                    originalBookText = text.slice(i, i + book.length)
                 }
             }
-            // Check abbreviations
             if (!foundBook) {
                 for (let k = 0; k < lowercaseBibleAbbreviations.length; k++) {
                     const abbreviation = lowercaseBibleAbbreviations[k]
-                    const abbrPattern = abbreviation.replace(/\./g, "\\.?")
-                    const regex = new RegExp(`^${abbrPattern}(\\.?\\s*\\d)`, "i")
-                    const match = lowerCaseText.slice(i).match(regex)
-                    if (match) {
+                    if (lowerCaseText.startsWith(abbreviation, i) && abbreviation.length > matchedLength) {
                         foundBook = this.abbreviations[abbreviations[k]]
-                        matchedLength = match[0].length - match[1].length // Exclude chapter-verse part
-                        originalBookText = text.slice(i, i + matchedLength)
+                        bookStartIndex = i
+                        matchedLength = abbreviation.length
                     }
                 }
             }
             if (foundBook) {
+                console.log(`Found book: ${foundBook} at index ${bookStartIndex}, length ${matchedLength}`)
                 i += matchedLength
                 let chapterVerse = ""
-                let originalChapterVerseText = ""
                 const references = []
+                let refStartIndex = bookStartIndex // Start of reference (including book) in normalizedText
+                let originalRefStartIndex = bookStartIndex // Start in original text
-                // Capture chapter-verse until a letter (potential new book) or semicolon
                 while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
                     if (isNextBibleBook(i)) {
+                        console.log(`Next book detected at index ${i}, stopping reference parsing`)
                         break
                     }
                     if (normalizedText[i] === ";") {
-                        const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9:,\-]+$/g, "")
+                        const formattedReference = chapterVerse.trim()
                         if (formattedReference) {
-                            // Find the last digit in the reference
-                            const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
-                            let endIndex = i - 1 // Default to position before semicolon
-                            if (lastDigitMatch) {
-                                const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
-                                endIndex = startIndex + matchedLength + lastDigitIndex
-                            }
+                            const refEndIndex = i
                             references.push({
-                                reference: formattedReference,
-                                originalText: (originalBookText + originalChapterVerseText).trim(),
-                                startIndex,
-                                endIndex,
+                                ref: formattedReference,
+                                start: refStartIndex,
+                                end: refEndIndex,
                             })
+                            console.log(
+                                `Reference found: ${formattedReference}, normalized indices ${refStartIndex}-${refEndIndex}`
+                            )
                         }
                         chapterVerse = ""
-                        originalChapterVerseText = ""
-                        originalBookText = foundBook // Reuse book for semicolon-separated references
-                        startIndex = i + 1 // Start of next reference
+                        refStartIndex = i + 1
+                        const semicolonIndex = text.indexOf(";", originalRefStartIndex)
+                        originalRefStartIndex = semicolonIndex !== -1 ? semicolonIndex + 1 : refStartIndex
                         i++
                         continue
                     }
                     chapterVerse += normalizedText[i]
-                    originalChapterVerseText += text[i]
                     i++
                 }
-                // Add any remaining reference
                 if (chapterVerse.trim().length > 0) {
-                    const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9:,\-]+$/g, "")
+                    const formattedReference = chapterVerse.trim()
                     if (formattedReference) {
-                        // Find the last digit in the reference
-                        const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
-                        let endIndex = i - 1 // Default to last character
-                        if (lastDigitMatch) {
-                            const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
-                            endIndex = startIndex + matchedLength + lastDigitIndex
-                        }
+                        const refEndIndex = i
                         references.push({
-                            reference: formattedReference,
-                            originalText: (originalBookText + originalChapterVerseText).trim(),
-                            startIndex,
-                            endIndex,
+                            ref: formattedReference,
+                            start: refStartIndex,
+                            end: refEndIndex,
                         })
+                        console.log(
+                            `Final reference found: ${formattedReference}, normalized indices ${refStartIndex}-${refEndIndex}`
+                        )
                     }
                 }
-                // Process each reference
-                references.forEach((refObj) => {
-                    // Detect version suffix (LXX or MT)
-                    let version = null
-                    let originalText = refObj.originalText
-                    const suffix = detectSuffix(i, text)
-                    if (suffix) {
-                        version = suffix.version
-                        originalText += ` ${version}`
-                        i += suffix.length
-                        // Update endIndex if version suffix follows a digit
-                        if (refObj.endIndex === i - suffix.length - 1) {
-                            refObj.endIndex = i - 1
-                        }
-                    }
+                // Align indices with original text
+                const originalBookText = text.slice(bookStartIndex, bookStartIndex + matchedLength)
+                const originalBookStartIndex =
+                    text.indexOf(originalBookText, bookStartIndex) !== -1
+                        ? text.indexOf(originalBookText, bookStartIndex)
+                        : bookStartIndex
+                console.log(`Original book text: ${originalBookText}, original start index: ${originalBookStartIndex}`)
+                references.forEach(({ ref, start, end }, refIndex) => {
                     let type
-                    let ref = refObj.reference.replace(/^\.\s*/, "") // Remove leading period and space
-                    if (this.config.booksOnly && !ref) {
-                        type = "book_only"
-                    } else if (ref.includes(":")) {
+                    if (ref.includes(":")) {
                         if (ref.includes("-")) {
-                            const [start, end] = ref.split("-").map((s) => s.trim())
-                            const startParts = start.split(":").map((s) => s.trim())
-                            const endParts = end.split(":").map((s) => s.trim())
+                            const [start, end] = ref.split("-")
+                            const startParts = start.split(":")
+                            const endParts = end.split(":")
                             type =
-                                startParts.length > 1 && endParts.length > 1 && startParts[0] !== endParts[0]
+                                startParts.length > 1 &&
+                                endParts.length > 1 &&
+                                startParts[0].trim() !== endParts[0].trim()
                                     ? "multi_chapter_verse_range"
                                     : "chapter_verse_range"
                         } else if (ref.includes(",")) {
@@ -262,32 +229,68 @@ class CodexParser {
                         }
                     } else if (ref.includes("-")) {
                         type = "chapter_range"
-                    } else if (/\d/.test(ref)) {
-                        type = "single_chapter"
                     } else {
-                        type = "book_only"
+                        type = "single_chapter"
+                    }
+                    // Construct full reference text for original text
+                    const fullRefText = `${originalBookText} ${ref.replace(":", ".")}`
+                    const suffixData = detectSuffix(end)
+                    const suffix = suffixData ? suffixData.suffix : null
+                    let refEndIndex = end
+                    if (suffixData) {
+                        refEndIndex += suffixData.length
+                        i += suffixData.length // Skip suffix
                     }
-                    const referenceObj = {
+                    // Map to original text
+                    let originalStartIndex =
+                        text.indexOf(fullRefText, originalRefStartIndex) !== -1
+                            ? text.indexOf(fullRefText, originalRefStartIndex)
+                            : originalBookStartIndex
+                    console.log(
+                        `Searching for fullRefText: ${fullRefText} at index ${originalRefStartIndex}, found at ${originalStartIndex}`
+                    )
+                    let originalEndIndex = originalStartIndex + fullRefText.length
+                    let originalText = text.slice(originalStartIndex, originalEndIndex)
+                    // Adjust for suffix in original text
+                    if (suffixData) {
+                        originalEndIndex += suffixData.length
+                        originalText = text.slice(originalStartIndex, originalEndIndex)
+                    }
+                    // Trim trailing whitespace from originalText
+                    while (originalEndIndex > originalStartIndex && /[\s]/.test(text[originalEndIndex - 1])) {
+                        originalEndIndex--
+                        originalText = text.slice(originalStartIndex, originalEndIndex)
+                    }
+                    console.log(
+                        `Reference ${
+                            refIndex + 1
+                        }: ${originalText}, original indices ${originalStartIndex}-${originalEndIndex}, type: ${type}, suffix: ${
+                            suffix || "none"
+                        }, search text: ${fullRefText}`
+                    )
+                    this.found.push({
                         book: foundBook,
                         reference: ref,
-                        version,
+                        startIndex: originalStartIndex,
+                        endIndex: originalEndIndex,
+                        version: suffix || null,
                         type,
-                        originalText,
-                        startIndex: refObj.startIndex,
-                        endIndex: refObj.endIndex,
-                    }
-                    this.found.push(referenceObj)
+                        originalText: originalText,
+                    })
                 })
-                // Skip any trailing spaces after the reference
-                while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
-                    i++
-                }
             } else {
                 i++
             }
         }
+        console.log(`Found references: ${JSON.stringify(this.found, null, 2)}`)
         return this
     }
@@ -337,6 +340,7 @@ class CodexParser {
             // Clean reference for parsing
             let cleanReference = passage.reference.replace(/\s*(LXX|MT)$/i, "").trim()
+            console.log(`Parsing reference: ${cleanReference}, type: ${passage.type}`)
             if (cleanReference.endsWith(",")) {
                 cleanReference = cleanReference.slice(0, -1).trim()
             }
@@ -344,6 +348,7 @@ class CodexParser {
             // Handle book-only or empty references
             if (!cleanReference && this.config.booksOnly) {
                 parsedPassage.type = "book_only"
+                console.log(`Book-only reference: ${book}`)
             } else if (!cleanReference || cleanReference.match(/^\d+\s*[:;]?\s*$/)) {
                 const chapterMatch = cleanReference.match(/\d+/) || ["1"]
                 const chapter = Number(chapterMatch[0])
@@ -355,8 +360,18 @@ class CodexParser {
                     const endVerse = chapterVerses[chapterVerses.length - 1]
                     parsedPassage.verses = [`${startVerse}-${endVerse}`]
                 }
+                console.log(`Single chapter: ${chapter}, verses: ${parsedPassage.verses}`)
+            } else if (passage.type === "comma_separated_verses") {
+                // Handle comma-separated verses (e.g., "1:7,18")
+                const [chapter, verses] = cleanReference.split(":")
+                parsedPassage.chapter = Number(chapter)
+                parsedPassage.verses = verses.split(",").map((v) => v.trim())
+                console.log(`Comma-separated verses: chapter ${chapter}, verses ${parsedPassage.verses}`)
             } else {
                 this.parseReferenceParts(parsedPassage, cleanReference)
+                console.log(
+                    `Parsed with parseReferenceParts: chapter ${parsedPassage.chapter}, verses ${parsedPassage.verses}`
+                )
             }
             parsedPassage.passages = this.populate(parsedPassage)
@@ -376,6 +391,7 @@ class CodexParser {
             } else {
                 parsedPassage.abbr = parsedPassage.original
             }
+            console.log(`Abbreviation set: ${parsedPassage.abbr}`)
             if (parsedPassage.type === this.MULTI_CHAPTER_RANGE) {
                 this.handleMultiChapterRange(parsedPassage, cleanReference)
@@ -401,6 +417,7 @@ class CodexParser {
                     chapter: lastPassage.chapter,
                     verse: lastPassage.verse,
                 }
+                console.log(`Start: ${JSON.stringify(parsedPassage.start)}, End: ${JSON.stringify(parsedPassage.end)}`)
             }
             if (!parsedPassage.version) {
@@ -415,9 +432,9 @@ class CodexParser {
         })
         this.versification()
+        console.log(`Final passages: ${JSON.stringify(this.passages, null, 2)}`)
         return this
     }
     /**
      * Parses reference parts into chapter and verse components.
      * @param {Object} passage - The passage object to populate.