codexparser 0.1.65 → 0.1.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/CodexParser.js +111 -72
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codexparser",
3
- "version": "0.1.65",
3
+ "version": "0.1.66",
4
4
  "description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -74,53 +74,57 @@ class CodexParser {
74
74
  const fullNames = [...this.bible.old, ...this.bible.new]
75
75
  const abbreviations = Object.keys(this.abbreviations)
76
76
  this.found = []
77
- // Minimal normalization: fix periods before numbers, remove trailing periods
78
- let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
79
- const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
80
- const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
77
+
78
+ // Normalize text: remove curly quotes, replace periods before numbers with colons
79
+ let normalizedText = text
80
+ .replace(/[“”]/g, "") // Remove curly quotes
81
+ .replace(/\.(?=\d)/g, ":")
81
82
  const lowerCaseText = normalizedText.toLowerCase()
82
83
  let i = 0
83
84
 
84
- const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
85
- const isNextBibleBook = (startIndex) => {
86
- const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
87
- return (
88
- lowercaseBibleFullNames.some((book) => textAfterCurrentPosition.startsWith(book)) ||
89
- lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
90
- )
91
- }
92
- const detectSuffix = (startIndex) => {
93
- const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
94
- return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
95
- }
96
-
97
85
  while (i < lowerCaseText.length) {
98
86
  let foundBook = null
99
87
  let startIndex = -1
100
88
  let matchedLength = 0
89
+ let hasOpeningParen = false
90
+ let parenStartIndex = -1
101
91
 
102
- // Skip whitespace and special characters before checking for book
103
- while (i < lowerCaseText.length && /[\s—-]/.test(lowerCaseText[i])) {
92
+ // Skip whitespace
93
+ while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
104
94
  i++
105
95
  }
106
96
  if (i >= lowerCaseText.length) break
107
97
 
108
- for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
109
- const book = lowercaseBibleFullNames[j]
110
- if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
111
- foundBook = fullNames[j]
112
- startIndex = i
98
+ // Check for opening parenthesis
99
+ if (i < lowerCaseText.length && lowerCaseText[i] === "(") {
100
+ hasOpeningParen = true
101
+ parenStartIndex = i
102
+ i++
103
+ }
104
+
105
+ // Record potential start of reference
106
+ startIndex = i
107
+
108
+ // Check for book names or abbreviations
109
+ for (let book of fullNames) {
110
+ if (
111
+ lowerCaseText.startsWith(book.toLowerCase(), i) &&
112
+ (i + book.length >= lowerCaseText.length || /\s|:|\d/.test(lowerCaseText[i + book.length]))
113
+ ) {
114
+ foundBook = book
113
115
  matchedLength = book.length
116
+ break
114
117
  }
115
118
  }
116
-
117
119
  if (!foundBook) {
118
- for (let k = 0; k < lowercaseBibleAbbreviations.length; k++) {
119
- const abbreviation = lowercaseBibleAbbreviations[k]
120
- if (lowerCaseText.startsWith(abbreviation, i) && abbreviation.length > matchedLength) {
121
- foundBook = this.abbreviations[abbreviations[k]]
122
- startIndex = i
123
- matchedLength = abbreviation.length
120
+ for (let abbr of abbreviations) {
121
+ if (
122
+ lowerCaseText.startsWith(abbr.toLowerCase(), i) &&
123
+ (i + abbr.length >= lowerCaseText.length || /\s|:|\d/.test(lowerCaseText[i + abbr.length]))
124
+ ) {
125
+ foundBook = this.abbreviations[abbr]
126
+ matchedLength = abbr.length
127
+ break
124
128
  }
125
129
  }
126
130
  }
@@ -128,43 +132,52 @@ class CodexParser {
128
132
  if (foundBook) {
129
133
  i += matchedLength
130
134
  let chapterVerse = ""
131
- const references = []
132
- const startOfReference = startIndex
133
-
134
- while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
135
- if (isNextBibleBook(i)) break
136
- if (normalizedText[i] === ";") {
137
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
138
- if (formattedReference) references.push(formattedReference)
139
- chapterVerse = ""
140
- i++
141
- continue
142
- }
143
- chapterVerse += normalizedText[i]
135
+ let hasColon = false
136
+
137
+ // Capture space after book
138
+ if (i < normalizedText.length && normalizedText[i] === " ") {
139
+ chapterVerse += " "
144
140
  i++
145
141
  }
146
142
 
147
- if (chapterVerse.trim().length > 0) {
148
- const formattedReference = chapterVerse.trim().replace(/[^a-zA-Z0-9]+$/, "")
149
- if (formattedReference) references.push(formattedReference)
143
+ // Capture chapter-verse
144
+ while (
145
+ i < lowerCaseText.length &&
146
+ (/[\d]/.test(normalizedText[i]) ||
147
+ normalizedText[i] === ":" ||
148
+ normalizedText[i] === "," ||
149
+ normalizedText[i] === "-")
150
+ ) {
151
+ if (normalizedText[i] === ":") hasColon = true
152
+ chapterVerse += normalizedText[i]
153
+ i++
150
154
  }
151
155
 
152
- // Set endIndex to the current position
153
- let endIndex = i
154
- const suffixData = detectSuffix(i)
155
- const suffix = suffixData ? suffixData.suffix : null
156
- if (suffixData) {
157
- endIndex += suffixData.length
158
- i += suffixData.length
159
- }
156
+ // Only proceed if valid reference
157
+ if (hasColon && chapterVerse.trim().length > 0) {
158
+ let endIndex = i
159
+ let version = null
160
+
161
+ // Detect suffix
162
+ const suffixMatch = normalizedText.substring(i).match(/\b(LXX|MT)\b/i)
163
+ if (suffixMatch) {
164
+ version = suffixMatch[0].toUpperCase()
165
+ endIndex += suffixMatch[0].length
166
+ i += suffixMatch[0].length
167
+ }
160
168
 
161
- // Trim endIndex to exclude trailing whitespace or non-reference characters
162
- while (endIndex > startOfReference && /[\s]/.test(normalizedText[endIndex - 1])) {
163
- endIndex--
164
- }
169
+ // Handle closing parenthesis
170
+ if (hasOpeningParen && i < lowerCaseText.length && normalizedText[i] === ")") {
171
+ endIndex = i + 1
172
+ i++
173
+ }
174
+
175
+ // Use original text for reference only (exclude parentheses)
176
+ const originalText = normalizedText.slice(startIndex, hasOpeningParen ? endIndex - 1 : endIndex)
165
177
 
166
- references.forEach((ref) => {
178
+ // Determine type
167
179
  let type
180
+ const ref = chapterVerse.trim()
168
181
  if (ref.includes(":")) {
169
182
  if (ref.includes("-")) {
170
183
  const [start, end] = ref.split("-")
@@ -190,18 +203,21 @@ class CodexParser {
190
203
  this.found.push({
191
204
  book: foundBook,
192
205
  reference: ref,
193
- startIndex: startOfReference + 1,
194
- endIndex: endIndex + 1,
195
- version: suffix || null,
206
+ startIndex: hasOpeningParen ? parenStartIndex : startIndex,
207
+ endIndex,
208
+ version,
196
209
  type,
197
- originalText: text.slice(startOfReference, endIndex), // Use original text
210
+ originalText,
198
211
  })
199
- })
212
+ } else {
213
+ i = startIndex + 1
214
+ }
200
215
  } else {
201
216
  i++
202
217
  }
203
218
  }
204
219
 
220
+ console.log("Found references:", JSON.stringify(this.found, null, 2)) // Debug
205
221
  return this
206
222
  }
207
223
 
@@ -268,7 +284,7 @@ class CodexParser {
268
284
  // Use abbreviation with period for abbreviated books
269
285
  parsedPassage.abbr = `${abbrKey}. ${passage.reference}${passage.version ? " " + passage.version : ""}`
270
286
  } else {
271
- // Fallback to original if no abbreviation (shouldn't occur with proper data)
287
+ // Fallback to original if no abbreviation
272
288
  parsedPassage.abbr = parsedPassage.original
273
289
  }
274
290
 
@@ -1074,6 +1090,12 @@ class CodexParser {
1074
1090
  return { name: "English", value: "ENG", abbreviation: "eng" }
1075
1091
  }
1076
1092
 
1093
+ /**
1094
+ * Replaces scripture references in text with formatted references.
1095
+ * @param {string} text - The original text.
1096
+ * @param {boolean} useAbbreviations - Whether to use abbreviated book names.
1097
+ * @returns {string} Text with replaced references.
1098
+ */
1077
1099
  replace(text, useAbbreviations = true) {
1078
1100
  if (!this.passages.length) {
1079
1101
  console.log("No parsed passages to replace")
@@ -1081,15 +1103,32 @@ class CodexParser {
1081
1103
  }
1082
1104
 
1083
1105
  let result = text
1084
- // Process replacements in reverse order to avoid index shifting
1085
1106
  for (let i = this.passages.length - 1; i >= 0; i--) {
1086
1107
  const passage = this.passages[i]
1087
- const { startIndex, endIndex, originalText, abbr, original } = passage
1088
-
1089
- // Use abbreviated or full reference
1108
+ const { originalText, abbr, original } = passage
1090
1109
  const newReference = useAbbreviations ? abbr : original
1091
- console.log(`Replacing "${originalText}" with "${newReference}" at [${startIndex}, ${endIndex}]`) // Debug
1092
- result = result.slice(0, startIndex) + newReference + result.slice(endIndex)
1110
+
1111
+ // Create regex to match originalText with optional parentheses
1112
+ const escapedOriginalText = originalText.replace(/([:.])/g, "\\$1").replace(/\s+/g, "\\s*")
1113
+ const regex = new RegExp(`(\\()?\\s*${escapedOriginalText}\\s*(\\))?`, "g")
1114
+
1115
+ // Find all matches
1116
+ const matches = [...result.matchAll(regex)]
1117
+ if (matches.length > 0) {
1118
+ // Process matches in reverse to avoid index shifting
1119
+ for (let j = matches.length - 1; j >= 0; j--) {
1120
+ const match = matches[j]
1121
+ const startIndex = match.index
1122
+ const endIndex = startIndex + match[0].length
1123
+ // Preserve parentheses if present in the match
1124
+ const hasParens = match[1] === "(" && match[2] === ")"
1125
+ const replacement = hasParens ? `(${newReference})` : newReference
1126
+ console.log(`Replacing "${match[0]}" with "${replacement}" at [${startIndex}, ${endIndex}]`)
1127
+ result = result.slice(0, startIndex) + replacement + result.slice(endIndex)
1128
+ }
1129
+ } else {
1130
+ console.log(`No match found for originalText "${originalText}"`)
1131
+ }
1093
1132
  }
1094
1133
 
1095
1134
  return result