pericope 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/pericope.rb ADDED
@@ -0,0 +1,443 @@
1
+ require 'yaml'
2
+ require 'pericope/version'
3
+
4
+ class Pericope
5
+ attr_reader :original_string, :book, :book_name, :book_chapter_count, :book_has_chapters, :ranges, :index
6
+
7
+
8
+
9
+ def initialize(match)
10
+ match = Pericope.match_one(match) if match.is_a?(String)
11
+ raise "no pericope found in input" unless match.is_a?(MatchData)
12
+
13
+ @original_string = match.to_s
14
+ @index = match.begin(0)
15
+ @book = match.instance_variable_get("@book")
16
+ @book_name = Pericope.book_names[@book-1]
17
+ @book_chapter_count = Pericope.book_chapter_counts[@book-1]
18
+ @book_has_chapters = (book_chapter_count > 1)
19
+ reference = normalize_reference(match[1])
20
+ @ranges = parse_ranges(reference.split(/[,;]/))
21
+ end
22
+
23
+
24
+
25
+ def self.book_names
26
+ @@book_names ||= ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Joshua", "Judges", "Ruth", "1 Samuel", "2 Samuel", "1 Kings", "2 Kings", "1 Chronicles", "2 Chronicles", "Ezra", "Nehemiah", "Esther", "Job", "Psalm", "Proverbs", "Ecclesiastes", "Song of Songs", "Isaiah", "Jeremiah", "Lamentations", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", "Zephaniah", "Haggai", "Zechariah", "Malachi", "Matthew", "Mark", "Luke", "John", "Acts", "Romans", "1 Corinthians", "2 Corinthians", "Galatians", "Ephesians", "Philippians", "Colossians", "1 Thessalonians", "2 Thessalonians", "1 Timothy", "2 Timothy", "Titus", "Philemon", "Hebrews", "James", "1 Peter", "2 Peter", "1 John ", "2 John", "3 John", "Jude", "Revelation"]
27
+ end
28
+
29
+
30
+
31
+ def self.parse_one(text)
32
+ match = match_one(text)
33
+ match ? Pericope.new(match) : nil
34
+ end
35
+
36
+
37
+
38
+ def self.parse(text, &block)
39
+ pericopes = []
40
+ match_all text do |match|
41
+ pericope = Pericope.new(match)
42
+ if block_given?
43
+ yield pericope
44
+ else
45
+ pericopes << pericope
46
+ end
47
+ end
48
+ block_given? ? text : pericopes
49
+ end
50
+
51
+
52
+
53
+ def self.split(text, pattern=nil)
54
+ matches = match_all(text) # find every pericope in the text
55
+ matches = matches.sort {|a,b| a.begin(0) <=> b.begin(0)} # put them in the order of their occurrence
56
+
57
+ segments = []
58
+ start = 0
59
+ for match in matches
60
+ pretext = text.slice(start...match.begin(0))
61
+ segments.concat(pattern ? pretext.split(pattern).delete_if{|s| s.length==0} : [pretext]) if (pretext.length>0)
62
+ segments << Pericope.new(match)
63
+ start = match.end(0)
64
+ end
65
+ pretext = text.slice(start...text.length)
66
+ segments.concat(pattern ? pretext.split(pattern).delete_if{|s| s.length==0} : [pretext]) if (pretext.length>0)
67
+
68
+ segments
69
+ end
70
+
71
+
72
+
73
+ def self.extract(text)
74
+ segments = split(text)
75
+ text = ""
76
+ pericopes = []
77
+ segments.each do |segment|
78
+ if segment.is_a?(String)
79
+ text << segment
80
+ else
81
+ pericopes << segment
82
+ end
83
+ end
84
+ #segments.delete_if {|segment| !segment.is_a?(String)}
85
+ {:text => text, :pericopes => pericopes}
86
+ end
87
+
88
+
89
+
90
+ def to_s
91
+ "#{book_name} #{self.well_formatted_reference}"
92
+ end
93
+
94
+
95
+
96
+ def report
97
+ " #{self.original_string} => #{self}"
98
+ end
99
+
100
+
101
+
102
+ def to_a
103
+ # one range per chapter
104
+ chapter_ranges = []
105
+ for range in ranges
106
+ min_chapter = Pericope.get_chapter(range.min)
107
+ max_chapter = Pericope.get_chapter(range.max)
108
+ if (min_chapter==max_chapter)
109
+ chapter_ranges << range
110
+ else
111
+ chapter_ranges << Range.new(range.min, Pericope.get_last_verse(book, min_chapter))
112
+ for chapter in (min_chapter+1)...max_chapter
113
+ chapter_ranges << Range.new(
114
+ Pericope.get_first_verse(book, chapter),
115
+ Pericope.get_last_verse(book, chapter))
116
+ end
117
+ chapter_ranges << Range.new(Pericope.get_first_verse(book, max_chapter), range.max)
118
+ end
119
+ end
120
+
121
+ chapter_ranges.inject([]) {|array, range| array.concat(range.to_a)}
122
+ end
123
+
124
+
125
+
126
+ def well_formatted_reference
127
+ recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
128
+ recent_chapter = 1 if !self.book_has_chapters
129
+ ranges.map do |range|
130
+ min_chapter = Pericope.get_chapter(range.min)
131
+ min_verse = Pericope.get_verse(range.min)
132
+ max_chapter = Pericope.get_chapter(range.max)
133
+ max_verse = Pericope.get_verse(range.max)
134
+ s = ""
135
+
136
+ if (min_verse==1) and (max_verse>=Pericope.get_max_verse(book, max_chapter))
137
+ s << min_chapter.to_s
138
+ if max_chapter > min_chapter
139
+ s << "-#{max_chapter}"
140
+ end
141
+ else
142
+ if recent_chapter == min_chapter
143
+ s << min_verse.to_s
144
+ else
145
+ recent_chapter = min_chapter
146
+ s << "#{min_chapter}:#{min_verse}"
147
+ end
148
+
149
+ if range.count > 1
150
+
151
+ s << "-"
152
+ if min_chapter == max_chapter
153
+ s << max_verse.to_s
154
+ else
155
+ recent_chapter = max_chapter
156
+ s << "#{max_chapter}:#{max_verse}"
157
+ end
158
+ end
159
+ end
160
+
161
+ s
162
+ end.join(", ")
163
+ end
164
+
165
+
166
+
167
+ def intersects?(pericope)
168
+ return false unless pericope.is_a?(Pericope)
169
+ return false unless (self.book == pericope.book)
170
+ for self_range in self.ranges
171
+ for other_range in pericope.ranges
172
+ return true if (self_range.max >= other_range.min) and (self_range.min <= other_range.max)
173
+ end
174
+ end
175
+ return false
176
+ end
177
+
178
+
179
+
180
+ def self.get_max_verse(book, chapter)
181
+ id = (book * 1000000) + (chapter * 1000)
182
+ chapter_verse_counts[id]
183
+ # ActiveRecord::Base.connection.select_value("SELECT verses FROM chapter_verse_count WHERE id=#{id}").to_i
184
+ end
185
+
186
+
187
+
188
+ protected
189
+
190
+
191
+
192
+ def self.get_first_verse(book, chapter)
193
+ get_id(book, chapter, 1)
194
+ end
195
+
196
+ def self.get_last_verse(book, chapter)
197
+ get_id(book, chapter, get_max_verse(book, chapter))
198
+ end
199
+
200
+ def self.get_id(book, chapter, verse) #, constrain_verse=false)
201
+ book = book.to_i
202
+ book = 1 if book < 1
203
+ book = 66 if book > 66
204
+
205
+ max = book_chapter_counts[book-1]
206
+ chapter = chapter.to_i
207
+ chapter = 1 if chapter < 1
208
+ chapter = max if chapter > max
209
+
210
+ # max = constrain_verse ? get_max_verse(book, chapter) : 999
211
+ # max = 999
212
+ max = get_max_verse(book, chapter)
213
+ verse = verse.to_i
214
+ verse = 1 if verse < 1
215
+ verse = max if verse > max
216
+
217
+ return (book * 1000000) + (chapter * 1000) + verse;
218
+ end
219
+
220
+ def self.get_book(id)
221
+ id / 1000000 # the book is everything left of the least significant 6 digits
222
+ end
223
+
224
+ def self.get_chapter(id)
225
+ (id % 1000000) / 1000 # the chapter is the 3rd through 6th most significant digits
226
+ end
227
+
228
+ def self.get_verse(id)
229
+ id % 1000 # the verse is the 3 least significant digits
230
+ end
231
+
232
+
233
+
234
+ # matches the first valid Bible reference in the supplied string
235
+ def self.match_one(text)
236
+ match_all(text) do |match|
237
+ return match
238
+ end
239
+ return nil
240
+ end
241
+
242
+
243
+
244
+ # matches all valid Bible references in the supplied string
245
+ # ! will not necessarily return references in order !
246
+ def self.match_all(text, &block)
247
+ matches = []
248
+ unmatched = text
249
+ for book in book_abbreviations
250
+ rx = Regexp.new("\\b#{book[1]}\\b.? (#{ValidReference})", true)
251
+ while (match = unmatched.match rx) # find all occurrences of pericopes in this book
252
+ length = match.end(0) - match.begin(0)
253
+
254
+ # after matching "2 Peter" don't match "Peter" again as "1 Peter"
255
+ # but keep the same number of characters in the string so indices work
256
+ unmatched = match.pre_match + ("*" * length) + match.post_match
257
+ match.instance_variable_set("@book", book[0])
258
+ # match = [match, book[0]]
259
+ if block_given?
260
+ yield match
261
+ else
262
+ matches << match
263
+ end
264
+ end
265
+ end
266
+ block_given? ? text : matches
267
+ end
268
+
269
+
270
+
271
+ def parse_ranges(ranges)
272
+ recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
273
+ recent_chapter = 1 if !self.book_has_chapters
274
+ ranges.map do |range|
275
+ range = range.split('-') # parse the low end of a verse range and the high end separately
276
+ range << range[0] if (range.length < 2) # treat 12:4 as 12:4-12:4
277
+ lower_chapter_verse = range[0].split(':').map {|n| n.to_i} # parse "3:28" to [3,28]
278
+ upper_chapter_verse = range[1].split(':').map {|n| n.to_i} # parse "3:28" to [3,28]
279
+
280
+ # make sure the low end of the range and the high end of the range
281
+ # are composed of arrays with two appropriate values: [chapter, verse]
282
+ chapter_range = false
283
+ if lower_chapter_verse.length < 2
284
+ if recent_chapter
285
+ lower_chapter_verse.unshift recent_chapter # e.g. parsing 11 in 12:1-8,11 => remember that 12 is the chapter
286
+ else
287
+ lower_chapter_verse << 1 # no verse specified; this is a range of chapters, start with verse 1
288
+ chapter_range = true
289
+ end
290
+ end
291
+ if upper_chapter_verse.length < 2
292
+ if chapter_range
293
+ upper_chapter_verse << Pericope.get_max_verse(book, upper_chapter_verse[0]) # this is a range of chapters, end with the last verse
294
+ else
295
+ upper_chapter_verse.unshift lower_chapter_verse[0] # e.g. parsing 8 in 12:1-8 => remember that 12 is the chapter
296
+ end
297
+ end
298
+ recent_chapter = upper_chapter_verse[0] # remember the last chapter
299
+
300
+ Range.new(
301
+ Pericope.get_id(book, lower_chapter_verse[0], lower_chapter_verse[1]),
302
+ Pericope.get_id(book, upper_chapter_verse[0], upper_chapter_verse[1]))
303
+ end
304
+ end
305
+
306
+
307
+
308
+ def self.load_chapter_verse_counts
309
+ path = File.expand_path(File.dirname(__FILE__) + "/../data/chapter_verse_count.yml")
310
+ File.open(path) do |file|
311
+ return YAML.load(file)
312
+ end
313
+ end
314
+
315
+
316
+
317
+ def self.load_book_abbreviations
318
+ path = File.expand_path(File.dirname(__FILE__) + "/../data/book_abbreviations.txt")
319
+ book_abbreviations = []
320
+ File.open(path) do |file|
321
+ file.each do |text|
322
+ unless text.start_with?("#") # skip comments
323
+
324
+ # the file contains tab-separated values.
325
+ # the first value is the ordinal of the book, subsequent values
326
+ # represent abbreviations and misspellings that should be recognized
327
+ # as the aforementioned book.
328
+ segments = text.chomp.split("\t")
329
+ book_abbreviations << [segments.shift.to_i, "(?:#{segments.join("|")})"]
330
+ end
331
+ end
332
+ end
333
+ book_abbreviations
334
+ end
335
+
336
+
337
+
338
+ def normalize_reference(reference)
339
+ { %r{(\d+)[".](\d+)} => '\1:\2', # 12"5 and 12.5 -> 12:5
340
+ %r{[^0-9,:;-]} => '' # remove everything but [0-9,;:-]
341
+ }.each {|pattern, replacement| reference.gsub!(pattern, replacement)}
342
+ reference
343
+ end
344
+
345
+
346
+
347
+ def self.chapter_verse_counts
348
+ @chapter_verse_counts ||= load_chapter_verse_counts
349
+ end
350
+
351
+
352
+
353
+ def self.book_abbreviations
354
+ @@book_abbreviations ||= load_book_abbreviations
355
+ end
356
+
357
+
358
+
359
+ def self.book_chapter_counts
360
+ @@book_chapter_counts ||= [
361
+ # Chapters Book Name Book Number
362
+ 50, # Genesis 1
363
+ 40, # Exodus 2
364
+ 27, # Leviticus 3
365
+ 36, # Numbers 4
366
+ 34, # Deuteronomy 5
367
+ 24, # Joshua 6
368
+ 21, # Judges 7
369
+ 4, # Ruth 8
370
+ 31, # 1 Samuel 9
371
+ 24, # 2 Samuel 10
372
+ 22, # 1 Kings 11
373
+ 25, # 2 Kings 12
374
+ 29, # 1 Chronicles 13
375
+ 36, # 2 Chronicles 14
376
+ 10, # Ezra 15
377
+ 13, # Nehemiah 16
378
+ 10, # Esther 17
379
+ 42, # Job 18
380
+ 150, # Psalm 19
381
+ 31, # Proverbs 20
382
+ 12, # Ecclesiastes 21
383
+ 8, # Song of Songs 22
384
+ 66, # Isaiah 23
385
+ 52, # Jeremiah 24
386
+ 5, # Lamentations 25
387
+ 48, # Ezekiel 26
388
+ 12, # Daniel 27
389
+ 14, # Hosea 28
390
+ 3, # Joel 29
391
+ 9, # Amos 30
392
+ 1, # Obadiah 31
393
+ 4, # Jonah 32
394
+ 7, # Micah 33
395
+ 3, # Nahum 34
396
+ 3, # Habakkuk 35
397
+ 3, # Zephaniah 36
398
+ 2, # Haggai 37
399
+ 14, # Zechariah 38
400
+ 4, # Malachi 39
401
+ 28, # Matthew 40
402
+ 16, # Mark 41
403
+ 24, # Luke 42
404
+ 21, # John 43
405
+ 28, # Acts 44
406
+ 16, # Romans 45
407
+ 16, # 1 Corinthians 46
408
+ 13, # 2 Corinthians 47
409
+ 6, # Galatians 48
410
+ 6, # Ephesians 49
411
+ 4, # Philippians 50
412
+ 4, # Colossians 51
413
+ 5, # 1 Thessalonians 52
414
+ 3, # 2 Thessalonians 53
415
+ 6, # 1 Timothy 54
416
+ 4, # 2 Timothy 55
417
+ 3, # Titus 56
418
+ 1, # Philemon 57
419
+ 13, # Hebrews 58
420
+ 5, # James 59
421
+ 5, # 1 Peter 60
422
+ 3, # 2 Peter 61
423
+ 5, # 1 John 62
424
+ 1, # 2 John 63
425
+ 1, # 3 John 64
426
+ 1, # Jude 65
427
+ 22] # Revelation 66
428
+ end
429
+
430
+
431
+
432
+ ValidReference = begin
433
+ chapter = '\d{1,3}' # matches 0-999
434
+ verse = '\d{1,3}[ab]?' # matches 0-999 with or without 'a' or 'b' afterward
435
+ chapter_verse = "#{chapter}(?:\\s?[.:\"]\\s?#{verse})?" # matches chapter with or without verse reference
436
+ chapter_verse_range = "(?:#{chapter_verse}(?:\\s?[\\&\\-]\\s?(?:(?:#{chapter_verse})|(?:#{verse})))?)"
437
+ # "(#{chapter}(?:[.:]#{verse})?(?:\\s?[\\&\\-]\\s?(?:#{verse}(?:[.:]\\d{1,3})?)?)" # chapter:verse reference
438
+ reference = "#{chapter_verse_range}(?:\\s?[,;]\\s?#{chapter_verse_range})*" # multiple chapter:verse references
439
+ end
440
+
441
+
442
+
443
+ end
@@ -0,0 +1,3 @@
1
+ class Pericope
2
+ VERSION = "0.1.2" unless defined?(::Pericope::Version)
3
+ end
@@ -0,0 +1,4 @@
1
+ # desc "Explaining what the task does"
2
+ # task :pericope do
3
+ # # Task goes here
4
+ # end