codexparser 0.1.82 → 0.1.84
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.trunk/trunk.yaml +3 -3
- package/package.json +1 -1
- package/src/CodexParser.js +128 -111
package/.trunk/trunk.yaml
CHANGED
|
@@ -7,7 +7,7 @@ cli:
|
|
|
7
7
|
plugins:
|
|
8
8
|
sources:
|
|
9
9
|
- id: trunk
|
|
10
|
-
ref: v1.7.
|
|
10
|
+
ref: v1.7.1
|
|
11
11
|
uri: https://github.com/trunk-io/plugins
|
|
12
12
|
# Many linters and tools depend on runtimes - configure them here. (https://docs.trunk.io/runtimes)
|
|
13
13
|
runtimes:
|
|
@@ -17,11 +17,11 @@ runtimes:
|
|
|
17
17
|
# This is the section where you manage your linters. (https://docs.trunk.io/check/configuration)
|
|
18
18
|
lint:
|
|
19
19
|
enabled:
|
|
20
|
-
- checkov@3.2.
|
|
20
|
+
- checkov@3.2.446
|
|
21
21
|
- git-diff-check
|
|
22
22
|
- markdownlint@0.45.0
|
|
23
23
|
- osv-scanner@2.0.3
|
|
24
|
-
- prettier@3.6.
|
|
24
|
+
- prettier@3.6.1
|
|
25
25
|
- trufflehog@3.89.2
|
|
26
26
|
actions:
|
|
27
27
|
disabled:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "codexparser",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.84",
|
|
4
4
|
"description": "This is a Javascript Bible parser and text scanner. It will search through texts and collate all scripture references into an array and parse them into objects, and it will parse passages into objects by book, chapter, verse, and testament. ",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
package/src/CodexParser.js
CHANGED
|
@@ -94,165 +94,132 @@ class CodexParser {
|
|
|
94
94
|
const fullNames = [...this.bible.old, ...this.bible.new]
|
|
95
95
|
const abbreviations = Object.keys(this.abbreviations)
|
|
96
96
|
this.found = []
|
|
97
|
-
//
|
|
98
|
-
let normalizedText = text
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
.replace(/\s+/g, " ") // Normalize multiple spaces to single
|
|
97
|
+
// Minimal normalization: fix periods before numbers, remove trailing periods
|
|
98
|
+
let normalizedText = text.replace(/\.(?=\d)/g, ":").replace(/(\b[A-Za-z]+)\.(?=\s|$)/g, "$1")
|
|
99
|
+
console.log(`Input text: ${text}`)
|
|
100
|
+
console.log(`Normalized text: ${normalizedText}`)
|
|
102
101
|
const lowercaseBibleFullNames = fullNames.map((book) => book.toLowerCase())
|
|
103
102
|
const lowercaseBibleAbbreviations = abbreviations.map((abbr) => abbr.toLowerCase())
|
|
104
103
|
const lowerCaseText = normalizedText.toLowerCase()
|
|
105
104
|
let i = 0
|
|
106
105
|
|
|
107
|
-
const isValidChapterVerseChar = (char) => /[
|
|
106
|
+
const isValidChapterVerseChar = (char) => /[\d:,\-;\s]/.test(char)
|
|
108
107
|
const isNextBibleBook = (startIndex) => {
|
|
109
108
|
const textAfterCurrentPosition = lowerCaseText.substring(startIndex).trim()
|
|
110
|
-
// Check if the text starts with a book name or abbreviation followed by a digit
|
|
111
109
|
return (
|
|
112
|
-
lowercaseBibleFullNames.some((book) =>
|
|
113
|
-
|
|
114
|
-
const nextIndex = startIndex + book.length
|
|
115
|
-
const nextChar = lowerCaseText[nextIndex]
|
|
116
|
-
return nextChar && /\d/.test(nextChar)
|
|
117
|
-
}
|
|
118
|
-
return false
|
|
119
|
-
}) ||
|
|
120
|
-
lowercaseBibleAbbreviations.some((abbr) => {
|
|
121
|
-
if (textAfterCurrentPosition.startsWith(abbr)) {
|
|
122
|
-
const nextIndex = startIndex + abbr.length
|
|
123
|
-
const nextChar = lowerCaseText[nextIndex]
|
|
124
|
-
return nextChar && (/\d/.test(nextChar) || /\./.test(nextChar))
|
|
125
|
-
}
|
|
126
|
-
return false
|
|
127
|
-
})
|
|
110
|
+
lowercaseBibleFullNames.some((book) => textAfterCurrentPosition.startsWith(book)) ||
|
|
111
|
+
lowercaseBibleAbbreviations.some((abbr) => textAfterCurrentPosition.startsWith(abbr))
|
|
128
112
|
)
|
|
129
113
|
}
|
|
130
|
-
const detectSuffix = (startIndex
|
|
131
|
-
const suffixMatch =
|
|
132
|
-
return suffixMatch ? {
|
|
114
|
+
const detectSuffix = (startIndex) => {
|
|
115
|
+
const suffixMatch = normalizedText.substring(startIndex).match(/\b(LXX|MT)\b/i)
|
|
116
|
+
return suffixMatch ? { suffix: suffixMatch[0].toUpperCase(), length: suffixMatch[0].length } : null
|
|
133
117
|
}
|
|
134
118
|
|
|
135
119
|
while (i < lowerCaseText.length) {
|
|
136
120
|
let foundBook = null
|
|
121
|
+
let bookStartIndex = -1
|
|
137
122
|
let matchedLength = 0
|
|
138
|
-
let originalBookText = ""
|
|
139
|
-
let startIndex = i
|
|
140
123
|
|
|
141
|
-
//
|
|
124
|
+
// Skip whitespace and special characters before checking for book
|
|
125
|
+
while (i < lowerCaseText.length && /[\s—-]/.test(lowerCaseText[i])) {
|
|
126
|
+
i++
|
|
127
|
+
}
|
|
128
|
+
if (i >= lowerCaseText.length) break
|
|
129
|
+
|
|
130
|
+
console.log(`Scanning at index ${i}: ${lowerCaseText.slice(i, i + 10)}...`)
|
|
131
|
+
|
|
142
132
|
for (let j = 0; j < lowercaseBibleFullNames.length; j++) {
|
|
143
133
|
const book = lowercaseBibleFullNames[j]
|
|
144
|
-
if (
|
|
145
|
-
lowerCaseText.startsWith(book, i) &&
|
|
146
|
-
(i + book.length >= lowerCaseText.length || /\d/.test(lowerCaseText[i + book.length]))
|
|
147
|
-
) {
|
|
134
|
+
if (lowerCaseText.startsWith(book, i) && book.length > matchedLength) {
|
|
148
135
|
foundBook = fullNames[j]
|
|
136
|
+
bookStartIndex = i
|
|
149
137
|
matchedLength = book.length
|
|
150
|
-
originalBookText = text.slice(i, i + book.length)
|
|
151
138
|
}
|
|
152
139
|
}
|
|
153
140
|
|
|
154
|
-
// Check abbreviations
|
|
155
141
|
if (!foundBook) {
|
|
156
142
|
for (let k = 0; k < lowercaseBibleAbbreviations.length; k++) {
|
|
157
143
|
const abbreviation = lowercaseBibleAbbreviations[k]
|
|
158
|
-
|
|
159
|
-
const regex = new RegExp(`^${abbrPattern}(\\.?\\s*\\d)`, "i")
|
|
160
|
-
const match = lowerCaseText.slice(i).match(regex)
|
|
161
|
-
if (match) {
|
|
144
|
+
if (lowerCaseText.startsWith(abbreviation, i) && abbreviation.length > matchedLength) {
|
|
162
145
|
foundBook = this.abbreviations[abbreviations[k]]
|
|
163
|
-
|
|
164
|
-
|
|
146
|
+
bookStartIndex = i
|
|
147
|
+
matchedLength = abbreviation.length
|
|
165
148
|
}
|
|
166
149
|
}
|
|
167
150
|
}
|
|
168
151
|
|
|
169
152
|
if (foundBook) {
|
|
153
|
+
console.log(`Found book: ${foundBook} at index ${bookStartIndex}, length ${matchedLength}`)
|
|
170
154
|
i += matchedLength
|
|
171
155
|
let chapterVerse = ""
|
|
172
|
-
let originalChapterVerseText = ""
|
|
173
156
|
const references = []
|
|
157
|
+
let refStartIndex = bookStartIndex // Start of reference (including book) in normalizedText
|
|
158
|
+
let originalRefStartIndex = bookStartIndex // Start in original text
|
|
174
159
|
|
|
175
|
-
// Capture chapter-verse until a letter (potential new book) or semicolon
|
|
176
160
|
while (i < normalizedText.length && isValidChapterVerseChar(normalizedText[i])) {
|
|
177
161
|
if (isNextBibleBook(i)) {
|
|
162
|
+
console.log(`Next book detected at index ${i}, stopping reference parsing`)
|
|
178
163
|
break
|
|
179
164
|
}
|
|
180
165
|
if (normalizedText[i] === ";") {
|
|
181
|
-
const formattedReference = chapterVerse.trim()
|
|
166
|
+
const formattedReference = chapterVerse.trim()
|
|
182
167
|
if (formattedReference) {
|
|
183
|
-
|
|
184
|
-
const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
|
|
185
|
-
let endIndex = i - 1 // Default to position before semicolon
|
|
186
|
-
if (lastDigitMatch) {
|
|
187
|
-
const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
|
|
188
|
-
endIndex = startIndex + matchedLength + lastDigitIndex
|
|
189
|
-
}
|
|
168
|
+
const refEndIndex = i
|
|
190
169
|
references.push({
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
endIndex,
|
|
170
|
+
ref: formattedReference,
|
|
171
|
+
start: refStartIndex,
|
|
172
|
+
end: refEndIndex,
|
|
195
173
|
})
|
|
174
|
+
console.log(
|
|
175
|
+
`Reference found: ${formattedReference}, normalized indices ${refStartIndex}-${refEndIndex}`
|
|
176
|
+
)
|
|
196
177
|
}
|
|
197
178
|
chapterVerse = ""
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
179
|
+
refStartIndex = i + 1
|
|
180
|
+
const semicolonIndex = text.indexOf(";", originalRefStartIndex)
|
|
181
|
+
originalRefStartIndex = semicolonIndex !== -1 ? semicolonIndex + 1 : refStartIndex
|
|
201
182
|
i++
|
|
202
183
|
continue
|
|
203
184
|
}
|
|
204
185
|
chapterVerse += normalizedText[i]
|
|
205
|
-
originalChapterVerseText += text[i]
|
|
206
186
|
i++
|
|
207
187
|
}
|
|
208
188
|
|
|
209
|
-
// Add any remaining reference
|
|
210
189
|
if (chapterVerse.trim().length > 0) {
|
|
211
|
-
const formattedReference = chapterVerse.trim()
|
|
190
|
+
const formattedReference = chapterVerse.trim()
|
|
212
191
|
if (formattedReference) {
|
|
213
|
-
|
|
214
|
-
const lastDigitMatch = formattedReference.match(/\d(?=[^0-9]*$)/)
|
|
215
|
-
let endIndex = i - 1 // Default to last character
|
|
216
|
-
if (lastDigitMatch) {
|
|
217
|
-
const lastDigitIndex = formattedReference.lastIndexOf(lastDigitMatch[0])
|
|
218
|
-
endIndex = startIndex + matchedLength + lastDigitIndex
|
|
219
|
-
}
|
|
192
|
+
const refEndIndex = i
|
|
220
193
|
references.push({
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
endIndex,
|
|
194
|
+
ref: formattedReference,
|
|
195
|
+
start: refStartIndex,
|
|
196
|
+
end: refEndIndex,
|
|
225
197
|
})
|
|
198
|
+
console.log(
|
|
199
|
+
`Final reference found: ${formattedReference}, normalized indices ${refStartIndex}-${refEndIndex}`
|
|
200
|
+
)
|
|
226
201
|
}
|
|
227
202
|
}
|
|
228
203
|
|
|
229
|
-
//
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
version = suffix.version
|
|
237
|
-
originalText += ` ${version}`
|
|
238
|
-
i += suffix.length
|
|
239
|
-
// Update endIndex if version suffix follows a digit
|
|
240
|
-
if (refObj.endIndex === i - suffix.length - 1) {
|
|
241
|
-
refObj.endIndex = i - 1
|
|
242
|
-
}
|
|
243
|
-
}
|
|
204
|
+
// Align indices with original text
|
|
205
|
+
const originalBookText = text.slice(bookStartIndex, bookStartIndex + matchedLength)
|
|
206
|
+
const originalBookStartIndex =
|
|
207
|
+
text.indexOf(originalBookText, bookStartIndex) !== -1
|
|
208
|
+
? text.indexOf(originalBookText, bookStartIndex)
|
|
209
|
+
: bookStartIndex
|
|
210
|
+
console.log(`Original book text: ${originalBookText}, original start index: ${originalBookStartIndex}`)
|
|
244
211
|
|
|
212
|
+
references.forEach(({ ref, start, end }, refIndex) => {
|
|
245
213
|
let type
|
|
246
|
-
|
|
247
|
-
if (this.config.booksOnly && !ref) {
|
|
248
|
-
type = "book_only"
|
|
249
|
-
} else if (ref.includes(":")) {
|
|
214
|
+
if (ref.includes(":")) {
|
|
250
215
|
if (ref.includes("-")) {
|
|
251
|
-
const [start, end] = ref.split("-")
|
|
252
|
-
const startParts = start.split(":")
|
|
253
|
-
const endParts = end.split(":")
|
|
216
|
+
const [start, end] = ref.split("-")
|
|
217
|
+
const startParts = start.split(":")
|
|
218
|
+
const endParts = end.split(":")
|
|
254
219
|
type =
|
|
255
|
-
startParts.length > 1 &&
|
|
220
|
+
startParts.length > 1 &&
|
|
221
|
+
endParts.length > 1 &&
|
|
222
|
+
startParts[0].trim() !== endParts[0].trim()
|
|
256
223
|
? "multi_chapter_verse_range"
|
|
257
224
|
: "chapter_verse_range"
|
|
258
225
|
} else if (ref.includes(",")) {
|
|
@@ -262,32 +229,68 @@ class CodexParser {
|
|
|
262
229
|
}
|
|
263
230
|
} else if (ref.includes("-")) {
|
|
264
231
|
type = "chapter_range"
|
|
265
|
-
} else if (/\d/.test(ref)) {
|
|
266
|
-
type = "single_chapter"
|
|
267
232
|
} else {
|
|
268
|
-
type = "
|
|
233
|
+
type = "single_chapter"
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
// Construct full reference text for original text
|
|
237
|
+
const fullRefText = `${originalBookText} ${ref.replace(":", ".")}`
|
|
238
|
+
const suffixData = detectSuffix(end)
|
|
239
|
+
const suffix = suffixData ? suffixData.suffix : null
|
|
240
|
+
let refEndIndex = end
|
|
241
|
+
if (suffixData) {
|
|
242
|
+
refEndIndex += suffixData.length
|
|
243
|
+
i += suffixData.length // Skip suffix
|
|
269
244
|
}
|
|
270
245
|
|
|
271
|
-
|
|
246
|
+
// Map to original text
|
|
247
|
+
let originalStartIndex =
|
|
248
|
+
text.indexOf(fullRefText, originalRefStartIndex) !== -1
|
|
249
|
+
? text.indexOf(fullRefText, originalRefStartIndex)
|
|
250
|
+
: originalBookStartIndex
|
|
251
|
+
console.log(
|
|
252
|
+
`Searching for fullRefText: ${fullRefText} at index ${originalRefStartIndex}, found at ${originalStartIndex}`
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
let originalEndIndex = originalStartIndex + fullRefText.length
|
|
256
|
+
let originalText = text.slice(originalStartIndex, originalEndIndex)
|
|
257
|
+
|
|
258
|
+
// Adjust for suffix in original text
|
|
259
|
+
if (suffixData) {
|
|
260
|
+
originalEndIndex += suffixData.length
|
|
261
|
+
originalText = text.slice(originalStartIndex, originalEndIndex)
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// Trim trailing whitespace from originalText
|
|
265
|
+
while (originalEndIndex > originalStartIndex && /[\s]/.test(text[originalEndIndex - 1])) {
|
|
266
|
+
originalEndIndex--
|
|
267
|
+
originalText = text.slice(originalStartIndex, originalEndIndex)
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
console.log(
|
|
271
|
+
`Reference ${
|
|
272
|
+
refIndex + 1
|
|
273
|
+
}: ${originalText}, original indices ${originalStartIndex}-${originalEndIndex}, type: ${type}, suffix: ${
|
|
274
|
+
suffix || "none"
|
|
275
|
+
}, search text: ${fullRefText}`
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
this.found.push({
|
|
272
279
|
book: foundBook,
|
|
273
280
|
reference: ref,
|
|
274
|
-
|
|
281
|
+
startIndex: originalStartIndex,
|
|
282
|
+
endIndex: originalEndIndex,
|
|
283
|
+
version: suffix || null,
|
|
275
284
|
type,
|
|
276
|
-
originalText,
|
|
277
|
-
|
|
278
|
-
endIndex: refObj.endIndex,
|
|
279
|
-
}
|
|
280
|
-
this.found.push(referenceObj)
|
|
285
|
+
originalText: originalText,
|
|
286
|
+
})
|
|
281
287
|
})
|
|
282
|
-
|
|
283
|
-
// Skip any trailing spaces after the reference
|
|
284
|
-
while (i < lowerCaseText.length && /\s/.test(lowerCaseText[i])) {
|
|
285
|
-
i++
|
|
286
|
-
}
|
|
287
288
|
} else {
|
|
288
289
|
i++
|
|
289
290
|
}
|
|
290
291
|
}
|
|
292
|
+
|
|
293
|
+
console.log(`Found references: ${JSON.stringify(this.found, null, 2)}`)
|
|
291
294
|
return this
|
|
292
295
|
}
|
|
293
296
|
|
|
@@ -337,6 +340,7 @@ class CodexParser {
|
|
|
337
340
|
|
|
338
341
|
// Clean reference for parsing
|
|
339
342
|
let cleanReference = passage.reference.replace(/\s*(LXX|MT)$/i, "").trim()
|
|
343
|
+
console.log(`Parsing reference: ${cleanReference}, type: ${passage.type}`)
|
|
340
344
|
if (cleanReference.endsWith(",")) {
|
|
341
345
|
cleanReference = cleanReference.slice(0, -1).trim()
|
|
342
346
|
}
|
|
@@ -344,6 +348,7 @@ class CodexParser {
|
|
|
344
348
|
// Handle book-only or empty references
|
|
345
349
|
if (!cleanReference && this.config.booksOnly) {
|
|
346
350
|
parsedPassage.type = "book_only"
|
|
351
|
+
console.log(`Book-only reference: ${book}`)
|
|
347
352
|
} else if (!cleanReference || cleanReference.match(/^\d+\s*[:;]?\s*$/)) {
|
|
348
353
|
const chapterMatch = cleanReference.match(/\d+/) || ["1"]
|
|
349
354
|
const chapter = Number(chapterMatch[0])
|
|
@@ -355,8 +360,18 @@ class CodexParser {
|
|
|
355
360
|
const endVerse = chapterVerses[chapterVerses.length - 1]
|
|
356
361
|
parsedPassage.verses = [`${startVerse}-${endVerse}`]
|
|
357
362
|
}
|
|
363
|
+
console.log(`Single chapter: ${chapter}, verses: ${parsedPassage.verses}`)
|
|
364
|
+
} else if (passage.type === "comma_separated_verses") {
|
|
365
|
+
// Handle comma-separated verses (e.g., "1:7,18")
|
|
366
|
+
const [chapter, verses] = cleanReference.split(":")
|
|
367
|
+
parsedPassage.chapter = Number(chapter)
|
|
368
|
+
parsedPassage.verses = verses.split(",").map((v) => v.trim())
|
|
369
|
+
console.log(`Comma-separated verses: chapter ${chapter}, verses ${parsedPassage.verses}`)
|
|
358
370
|
} else {
|
|
359
371
|
this.parseReferenceParts(parsedPassage, cleanReference)
|
|
372
|
+
console.log(
|
|
373
|
+
`Parsed with parseReferenceParts: chapter ${parsedPassage.chapter}, verses ${parsedPassage.verses}`
|
|
374
|
+
)
|
|
360
375
|
}
|
|
361
376
|
|
|
362
377
|
parsedPassage.passages = this.populate(parsedPassage)
|
|
@@ -376,6 +391,7 @@ class CodexParser {
|
|
|
376
391
|
} else {
|
|
377
392
|
parsedPassage.abbr = parsedPassage.original
|
|
378
393
|
}
|
|
394
|
+
console.log(`Abbreviation set: ${parsedPassage.abbr}`)
|
|
379
395
|
|
|
380
396
|
if (parsedPassage.type === this.MULTI_CHAPTER_RANGE) {
|
|
381
397
|
this.handleMultiChapterRange(parsedPassage, cleanReference)
|
|
@@ -401,6 +417,7 @@ class CodexParser {
|
|
|
401
417
|
chapter: lastPassage.chapter,
|
|
402
418
|
verse: lastPassage.verse,
|
|
403
419
|
}
|
|
420
|
+
console.log(`Start: ${JSON.stringify(parsedPassage.start)}, End: ${JSON.stringify(parsedPassage.end)}`)
|
|
404
421
|
}
|
|
405
422
|
|
|
406
423
|
if (!parsedPassage.version) {
|
|
@@ -415,9 +432,9 @@ class CodexParser {
|
|
|
415
432
|
})
|
|
416
433
|
|
|
417
434
|
this.versification()
|
|
435
|
+
console.log(`Final passages: ${JSON.stringify(this.passages, null, 2)}`)
|
|
418
436
|
return this
|
|
419
437
|
}
|
|
420
|
-
|
|
421
438
|
/**
|
|
422
439
|
* Parses reference parts into chapter and verse components.
|
|
423
440
|
* @param {Object} passage - The passage object to populate.
|