pericope 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.mdown +13 -0
- data/data/book_abbreviations.txt +69 -0
- data/data/chapter_verse_count.txt +1189 -0
- data/data/chapter_verse_count.yml +1190 -0
- data/lib/pericope.rb +443 -0
- data/lib/pericope/version.rb +3 -0
- data/lib/tasks/pericope_tasks.rake +4 -0
- metadata +89 -0
data/lib/pericope.rb
ADDED
@@ -0,0 +1,443 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
require 'pericope/version'
|
3
|
+
|
4
|
+
class Pericope
|
5
|
+
attr_reader :original_string, :book, :book_name, :book_chapter_count, :book_has_chapters, :ranges, :index
|
6
|
+
|
7
|
+
|
8
|
+
|
9
|
+
def initialize(match)
|
10
|
+
match = Pericope.match_one(match) if match.is_a?(String)
|
11
|
+
raise "no pericope found in input" unless match.is_a?(MatchData)
|
12
|
+
|
13
|
+
@original_string = match.to_s
|
14
|
+
@index = match.begin(0)
|
15
|
+
@book = match.instance_variable_get("@book")
|
16
|
+
@book_name = Pericope.book_names[@book-1]
|
17
|
+
@book_chapter_count = Pericope.book_chapter_counts[@book-1]
|
18
|
+
@book_has_chapters = (book_chapter_count > 1)
|
19
|
+
reference = normalize_reference(match[1])
|
20
|
+
@ranges = parse_ranges(reference.split(/[,;]/))
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
def self.book_names
|
26
|
+
@@book_names ||= ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Joshua", "Judges", "Ruth", "1 Samuel", "2 Samuel", "1 Kings", "2 Kings", "1 Chronicles", "2 Chronicles", "Ezra", "Nehemiah", "Esther", "Job", "Psalm", "Proverbs", "Ecclesiastes", "Song of Songs", "Isaiah", "Jeremiah", "Lamentations", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", "Zephaniah", "Haggai", "Zechariah", "Malachi", "Matthew", "Mark", "Luke", "John", "Acts", "Romans", "1 Corinthians", "2 Corinthians", "Galatians", "Ephesians", "Philippians", "Colossians", "1 Thessalonians", "2 Thessalonians", "1 Timothy", "2 Timothy", "Titus", "Philemon", "Hebrews", "James", "1 Peter", "2 Peter", "1 John ", "2 John", "3 John", "Jude", "Revelation"]
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
def self.parse_one(text)
|
32
|
+
match = match_one(text)
|
33
|
+
match ? Pericope.new(match) : nil
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
|
38
|
+
def self.parse(text, &block)
|
39
|
+
pericopes = []
|
40
|
+
match_all text do |match|
|
41
|
+
pericope = Pericope.new(match)
|
42
|
+
if block_given?
|
43
|
+
yield pericope
|
44
|
+
else
|
45
|
+
pericopes << pericope
|
46
|
+
end
|
47
|
+
end
|
48
|
+
block_given? ? text : pericopes
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
|
53
|
+
def self.split(text, pattern=nil)
|
54
|
+
matches = match_all(text) # find every pericope in the text
|
55
|
+
matches = matches.sort {|a,b| a.begin(0) <=> b.begin(0)} # put them in the order of their occurrence
|
56
|
+
|
57
|
+
segments = []
|
58
|
+
start = 0
|
59
|
+
for match in matches
|
60
|
+
pretext = text.slice(start...match.begin(0))
|
61
|
+
segments.concat(pattern ? pretext.split(pattern).delete_if{|s| s.length==0} : [pretext]) if (pretext.length>0)
|
62
|
+
segments << Pericope.new(match)
|
63
|
+
start = match.end(0)
|
64
|
+
end
|
65
|
+
pretext = text.slice(start...text.length)
|
66
|
+
segments.concat(pattern ? pretext.split(pattern).delete_if{|s| s.length==0} : [pretext]) if (pretext.length>0)
|
67
|
+
|
68
|
+
segments
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
def self.extract(text)
|
74
|
+
segments = split(text)
|
75
|
+
text = ""
|
76
|
+
pericopes = []
|
77
|
+
segments.each do |segment|
|
78
|
+
if segment.is_a?(String)
|
79
|
+
text << segment
|
80
|
+
else
|
81
|
+
pericopes << segment
|
82
|
+
end
|
83
|
+
end
|
84
|
+
#segments.delete_if {|segment| !segment.is_a?(String)}
|
85
|
+
{:text => text, :pericopes => pericopes}
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
def to_s
|
91
|
+
"#{book_name} #{self.well_formatted_reference}"
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
def report
|
97
|
+
" #{self.original_string} => #{self}"
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
def to_a
|
103
|
+
# one range per chapter
|
104
|
+
chapter_ranges = []
|
105
|
+
for range in ranges
|
106
|
+
min_chapter = Pericope.get_chapter(range.min)
|
107
|
+
max_chapter = Pericope.get_chapter(range.max)
|
108
|
+
if (min_chapter==max_chapter)
|
109
|
+
chapter_ranges << range
|
110
|
+
else
|
111
|
+
chapter_ranges << Range.new(range.min, Pericope.get_last_verse(book, min_chapter))
|
112
|
+
for chapter in (min_chapter+1)...max_chapter
|
113
|
+
chapter_ranges << Range.new(
|
114
|
+
Pericope.get_first_verse(book, chapter),
|
115
|
+
Pericope.get_last_verse(book, chapter))
|
116
|
+
end
|
117
|
+
chapter_ranges << Range.new(Pericope.get_first_verse(book, max_chapter), range.max)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
chapter_ranges.inject([]) {|array, range| array.concat(range.to_a)}
|
122
|
+
end
|
123
|
+
|
124
|
+
|
125
|
+
|
126
|
+
def well_formatted_reference
|
127
|
+
recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
|
128
|
+
recent_chapter = 1 if !self.book_has_chapters
|
129
|
+
ranges.map do |range|
|
130
|
+
min_chapter = Pericope.get_chapter(range.min)
|
131
|
+
min_verse = Pericope.get_verse(range.min)
|
132
|
+
max_chapter = Pericope.get_chapter(range.max)
|
133
|
+
max_verse = Pericope.get_verse(range.max)
|
134
|
+
s = ""
|
135
|
+
|
136
|
+
if (min_verse==1) and (max_verse>=Pericope.get_max_verse(book, max_chapter))
|
137
|
+
s << min_chapter.to_s
|
138
|
+
if max_chapter > min_chapter
|
139
|
+
s << "-#{max_chapter}"
|
140
|
+
end
|
141
|
+
else
|
142
|
+
if recent_chapter == min_chapter
|
143
|
+
s << min_verse.to_s
|
144
|
+
else
|
145
|
+
recent_chapter = min_chapter
|
146
|
+
s << "#{min_chapter}:#{min_verse}"
|
147
|
+
end
|
148
|
+
|
149
|
+
if range.count > 1
|
150
|
+
|
151
|
+
s << "-"
|
152
|
+
if min_chapter == max_chapter
|
153
|
+
s << max_verse.to_s
|
154
|
+
else
|
155
|
+
recent_chapter = max_chapter
|
156
|
+
s << "#{max_chapter}:#{max_verse}"
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
s
|
162
|
+
end.join(", ")
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
|
167
|
+
def intersects?(pericope)
|
168
|
+
return false unless pericope.is_a?(Pericope)
|
169
|
+
return false unless (self.book == pericope.book)
|
170
|
+
for self_range in self.ranges
|
171
|
+
for other_range in pericope.ranges
|
172
|
+
return true if (self_range.max >= other_range.min) and (self_range.min <= other_range.max)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
return false
|
176
|
+
end
|
177
|
+
|
178
|
+
|
179
|
+
|
180
|
+
def self.get_max_verse(book, chapter)
|
181
|
+
id = (book * 1000000) + (chapter * 1000)
|
182
|
+
chapter_verse_counts[id]
|
183
|
+
# ActiveRecord::Base.connection.select_value("SELECT verses FROM chapter_verse_count WHERE id=#{id}").to_i
|
184
|
+
end
|
185
|
+
|
186
|
+
|
187
|
+
|
188
|
+
protected
|
189
|
+
|
190
|
+
|
191
|
+
|
192
|
+
def self.get_first_verse(book, chapter)
|
193
|
+
get_id(book, chapter, 1)
|
194
|
+
end
|
195
|
+
|
196
|
+
def self.get_last_verse(book, chapter)
|
197
|
+
get_id(book, chapter, get_max_verse(book, chapter))
|
198
|
+
end
|
199
|
+
|
200
|
+
def self.get_id(book, chapter, verse) #, constrain_verse=false)
|
201
|
+
book = book.to_i
|
202
|
+
book = 1 if book < 1
|
203
|
+
book = 66 if book > 66
|
204
|
+
|
205
|
+
max = book_chapter_counts[book-1]
|
206
|
+
chapter = chapter.to_i
|
207
|
+
chapter = 1 if chapter < 1
|
208
|
+
chapter = max if chapter > max
|
209
|
+
|
210
|
+
# max = constrain_verse ? get_max_verse(book, chapter) : 999
|
211
|
+
# max = 999
|
212
|
+
max = get_max_verse(book, chapter)
|
213
|
+
verse = verse.to_i
|
214
|
+
verse = 1 if verse < 1
|
215
|
+
verse = max if verse > max
|
216
|
+
|
217
|
+
return (book * 1000000) + (chapter * 1000) + verse;
|
218
|
+
end
|
219
|
+
|
220
|
+
def self.get_book(id)
|
221
|
+
id / 1000000 # the book is everything left of the least significant 6 digits
|
222
|
+
end
|
223
|
+
|
224
|
+
def self.get_chapter(id)
|
225
|
+
(id % 1000000) / 1000 # the chapter is the 3rd through 6th most significant digits
|
226
|
+
end
|
227
|
+
|
228
|
+
def self.get_verse(id)
|
229
|
+
id % 1000 # the verse is the 3 least significant digits
|
230
|
+
end
|
231
|
+
|
232
|
+
|
233
|
+
|
234
|
+
# matches the first valid Bible reference in the supplied string
|
235
|
+
def self.match_one(text)
|
236
|
+
match_all(text) do |match|
|
237
|
+
return match
|
238
|
+
end
|
239
|
+
return nil
|
240
|
+
end
|
241
|
+
|
242
|
+
|
243
|
+
|
244
|
+
# matches all valid Bible references in the supplied string
|
245
|
+
# ! will not necessarily return references in order !
|
246
|
+
def self.match_all(text, &block)
|
247
|
+
matches = []
|
248
|
+
unmatched = text
|
249
|
+
for book in book_abbreviations
|
250
|
+
rx = Regexp.new("\\b#{book[1]}\\b.? (#{ValidReference})", true)
|
251
|
+
while (match = unmatched.match rx) # find all occurrences of pericopes in this book
|
252
|
+
length = match.end(0) - match.begin(0)
|
253
|
+
|
254
|
+
# after matching "2 Peter" don't match "Peter" again as "1 Peter"
|
255
|
+
# but keep the same number of characters in the string so indices work
|
256
|
+
unmatched = match.pre_match + ("*" * length) + match.post_match
|
257
|
+
match.instance_variable_set("@book", book[0])
|
258
|
+
# match = [match, book[0]]
|
259
|
+
if block_given?
|
260
|
+
yield match
|
261
|
+
else
|
262
|
+
matches << match
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
block_given? ? text : matches
|
267
|
+
end
|
268
|
+
|
269
|
+
|
270
|
+
|
271
|
+
def parse_ranges(ranges)
|
272
|
+
recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
|
273
|
+
recent_chapter = 1 if !self.book_has_chapters
|
274
|
+
ranges.map do |range|
|
275
|
+
range = range.split('-') # parse the low end of a verse range and the high end separately
|
276
|
+
range << range[0] if (range.length < 2) # treat 12:4 as 12:4-12:4
|
277
|
+
lower_chapter_verse = range[0].split(':').map {|n| n.to_i} # parse "3:28" to [3,28]
|
278
|
+
upper_chapter_verse = range[1].split(':').map {|n| n.to_i} # parse "3:28" to [3,28]
|
279
|
+
|
280
|
+
# make sure the low end of the range and the high end of the range
|
281
|
+
# are composed of arrays with two appropriate values: [chapter, verse]
|
282
|
+
chapter_range = false
|
283
|
+
if lower_chapter_verse.length < 2
|
284
|
+
if recent_chapter
|
285
|
+
lower_chapter_verse.unshift recent_chapter # e.g. parsing 11 in 12:1-8,11 => remember that 12 is the chapter
|
286
|
+
else
|
287
|
+
lower_chapter_verse << 1 # no verse specified; this is a range of chapters, start with verse 1
|
288
|
+
chapter_range = true
|
289
|
+
end
|
290
|
+
end
|
291
|
+
if upper_chapter_verse.length < 2
|
292
|
+
if chapter_range
|
293
|
+
upper_chapter_verse << Pericope.get_max_verse(book, upper_chapter_verse[0]) # this is a range of chapters, end with the last verse
|
294
|
+
else
|
295
|
+
upper_chapter_verse.unshift lower_chapter_verse[0] # e.g. parsing 8 in 12:1-8 => remember that 12 is the chapter
|
296
|
+
end
|
297
|
+
end
|
298
|
+
recent_chapter = upper_chapter_verse[0] # remember the last chapter
|
299
|
+
|
300
|
+
Range.new(
|
301
|
+
Pericope.get_id(book, lower_chapter_verse[0], lower_chapter_verse[1]),
|
302
|
+
Pericope.get_id(book, upper_chapter_verse[0], upper_chapter_verse[1]))
|
303
|
+
end
|
304
|
+
end
|
305
|
+
|
306
|
+
|
307
|
+
|
308
|
+
def self.load_chapter_verse_counts
|
309
|
+
path = File.expand_path(File.dirname(__FILE__) + "/../data/chapter_verse_count.yml")
|
310
|
+
File.open(path) do |file|
|
311
|
+
return YAML.load(file)
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
|
316
|
+
|
317
|
+
def self.load_book_abbreviations
|
318
|
+
path = File.expand_path(File.dirname(__FILE__) + "/../data/book_abbreviations.txt")
|
319
|
+
book_abbreviations = []
|
320
|
+
File.open(path) do |file|
|
321
|
+
file.each do |text|
|
322
|
+
unless text.start_with?("#") # skip comments
|
323
|
+
|
324
|
+
# the file contains tab-separated values.
|
325
|
+
# the first value is the ordinal of the book, subsequent values
|
326
|
+
# represent abbreviations and misspellings that should be recognized
|
327
|
+
# as the aforementioned book.
|
328
|
+
segments = text.chomp.split("\t")
|
329
|
+
book_abbreviations << [segments.shift.to_i, "(?:#{segments.join("|")})"]
|
330
|
+
end
|
331
|
+
end
|
332
|
+
end
|
333
|
+
book_abbreviations
|
334
|
+
end
|
335
|
+
|
336
|
+
|
337
|
+
|
338
|
+
def normalize_reference(reference)
|
339
|
+
{ %r{(\d+)[".](\d+)} => '\1:\2', # 12"5 and 12.5 -> 12:5
|
340
|
+
%r{[^0-9,:;-]} => '' # remove everything but [0-9,;:-]
|
341
|
+
}.each {|pattern, replacement| reference.gsub!(pattern, replacement)}
|
342
|
+
reference
|
343
|
+
end
|
344
|
+
|
345
|
+
|
346
|
+
|
347
|
+
def self.chapter_verse_counts
|
348
|
+
@chapter_verse_counts ||= load_chapter_verse_counts
|
349
|
+
end
|
350
|
+
|
351
|
+
|
352
|
+
|
353
|
+
def self.book_abbreviations
|
354
|
+
@@book_abbreviations ||= load_book_abbreviations
|
355
|
+
end
|
356
|
+
|
357
|
+
|
358
|
+
|
359
|
+
def self.book_chapter_counts
|
360
|
+
@@book_chapter_counts ||= [
|
361
|
+
# Chapters Book Name Book Number
|
362
|
+
50, # Genesis 1
|
363
|
+
40, # Exodus 2
|
364
|
+
27, # Leviticus 3
|
365
|
+
36, # Numbers 4
|
366
|
+
34, # Deuteronomy 5
|
367
|
+
24, # Joshua 6
|
368
|
+
21, # Judges 7
|
369
|
+
4, # Ruth 8
|
370
|
+
31, # 1 Samuel 9
|
371
|
+
24, # 2 Samuel 10
|
372
|
+
22, # 1 Kings 11
|
373
|
+
25, # 2 Kings 12
|
374
|
+
29, # 1 Chronicles 13
|
375
|
+
36, # 2 Chronicles 14
|
376
|
+
10, # Ezra 15
|
377
|
+
13, # Nehemiah 16
|
378
|
+
10, # Esther 17
|
379
|
+
42, # Job 18
|
380
|
+
150, # Psalm 19
|
381
|
+
31, # Proverbs 20
|
382
|
+
12, # Ecclesiastes 21
|
383
|
+
8, # Song of Songs 22
|
384
|
+
66, # Isaiah 23
|
385
|
+
52, # Jeremiah 24
|
386
|
+
5, # Lamentations 25
|
387
|
+
48, # Ezekiel 26
|
388
|
+
12, # Daniel 27
|
389
|
+
14, # Hosea 28
|
390
|
+
3, # Joel 29
|
391
|
+
9, # Amos 30
|
392
|
+
1, # Obadiah 31
|
393
|
+
4, # Jonah 32
|
394
|
+
7, # Micah 33
|
395
|
+
3, # Nahum 34
|
396
|
+
3, # Habakkuk 35
|
397
|
+
3, # Zephaniah 36
|
398
|
+
2, # Haggai 37
|
399
|
+
14, # Zechariah 38
|
400
|
+
4, # Malachi 39
|
401
|
+
28, # Matthew 40
|
402
|
+
16, # Mark 41
|
403
|
+
24, # Luke 42
|
404
|
+
21, # John 43
|
405
|
+
28, # Acts 44
|
406
|
+
16, # Romans 45
|
407
|
+
16, # 1 Corinthians 46
|
408
|
+
13, # 2 Corinthians 47
|
409
|
+
6, # Galatians 48
|
410
|
+
6, # Ephesians 49
|
411
|
+
4, # Philippians 50
|
412
|
+
4, # Colossians 51
|
413
|
+
5, # 1 Thessalonians 52
|
414
|
+
3, # 2 Thessalonians 53
|
415
|
+
6, # 1 Timothy 54
|
416
|
+
4, # 2 Timothy 55
|
417
|
+
3, # Titus 56
|
418
|
+
1, # Philemon 57
|
419
|
+
13, # Hebrews 58
|
420
|
+
5, # James 59
|
421
|
+
5, # 1 Peter 60
|
422
|
+
3, # 2 Peter 61
|
423
|
+
5, # 1 John 62
|
424
|
+
1, # 2 John 63
|
425
|
+
1, # 3 John 64
|
426
|
+
1, # Jude 65
|
427
|
+
22] # Revelation 66
|
428
|
+
end
|
429
|
+
|
430
|
+
|
431
|
+
|
432
|
+
ValidReference = begin
|
433
|
+
chapter = '\d{1,3}' # matches 0-999
|
434
|
+
verse = '\d{1,3}[ab]?' # matches 0-999 with or without 'a' or 'b' afterward
|
435
|
+
chapter_verse = "#{chapter}(?:\\s?[.:\"]\\s?#{verse})?" # matches chapter with or without verse reference
|
436
|
+
chapter_verse_range = "(?:#{chapter_verse}(?:\\s?[\\&\\-]\\s?(?:(?:#{chapter_verse})|(?:#{verse})))?)"
|
437
|
+
# "(#{chapter}(?:[.:]#{verse})?(?:\\s?[\\&\\-]\\s?(?:#{verse}(?:[.:]\\d{1,3})?)?)" # chapter:verse reference
|
438
|
+
reference = "#{chapter_verse_range}(?:\\s?[,;]\\s?#{chapter_verse_range})*" # multiple chapter:verse references
|
439
|
+
end
|
440
|
+
|
441
|
+
|
442
|
+
|
443
|
+
end
|