pericope 0.5.4 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.mdown CHANGED
@@ -1,13 +1,62 @@
1
- Pericope
2
- ========
1
+ # Pericope
3
2
 
4
- Introduction goes here.
3
+ Pericope is a gem for parsing Bible references.
5
4
 
5
+ It recognizes common abbreviations and misspellings for names of the books of the Bible and a variety of ways of denoting ranges of chapters and verses. It can parse pericopes singly or out of a block of text. It's useful for comparing two pericopes for intersection and normalizing them into a well-formatted string.
6
6
 
7
- Example
8
- =======
7
+ # Examples
9
8
 
10
- Example goes here.
9
+ ### Recognize common abbreviations and misspellings for names of the books of the Bible
11
10
 
11
+ ```ruby
12
+ Pericope.new("ps 118:17").to_s # => Psalm 118:17
13
+ Pericope.new("jas 3:1-5").to_s # => James 3:1-5
14
+ Pericope.new("1 jn 4:4").to_s # => 1 John 4:4
15
+ ```
12
16
 
13
- Copyright (c) 2009 [name of plugin creator], released under the MIT license
17
+ ### Compare two pericopes to see if they intersect
18
+
19
+ ```ruby
20
+ a = Pericope.new("Mark 13:1-6")
21
+ b = Pericope.new("Mark 13:5")
22
+ c = Pericope.new("Mark 13:6, 7")
23
+
24
+ a.intersects?(b) # => true
25
+ a.intersects?(c) # => true
26
+ b.intersects?(c) # => false
27
+ ```
28
+
29
+ ### Parse pericopes out of a block of text
30
+
31
+ ```ruby
32
+ text = <<-TEXT
33
+ If then, the Word is so significant, great important attaches to its exact form. It has the form of a promise as in Isaiah 43:1: "Do not fear, for I have redeemed you; I have called you by name, you are mine," or as in Luke 2:10-11, "Do not be afraid..to you is born this day...a Savior." (Bayer, p51)
34
+ TEXT
35
+
36
+ Pericope.parse(text) # => [Isaiah 43:1, Luke 2:10-11]
37
+
38
+ Pericope.split(text) # => [" If then, the Word is so significant, great important attaches to its exact form. It has the form of a promise as in ", Isaiah 43:1, ": \"Do not fear, for I have redeemed you; I have called you by name, you are mine,\" or as in ", Luke 2:10-11, ", \"Do not be afraid..to you is born this day...a Savior.\" (Bayer, p51)\n"]
39
+ ```
40
+
41
+ ### Converts pericopes into arrays of verses and reconstructs them from arrays of verses
42
+
43
+ ```ruby
44
+ array = Pericope.new("gen 1:1-3").to_a # => [1001001, 1001002, 1001003]
45
+ Pericope.new(array) # => Genesis 1:1-3
46
+ ```
47
+
48
+
49
+ # History
50
+
51
+ # 0.6.0
52
+
53
+ - Removed the `index` attribute
54
+ - Deprecated the `pattern` argument to the method `Pericope.split(text, pattern=nil)`
55
+ - Improved performance by 2x
56
+ - Added this README
57
+ - Fixed a bug with parsing inverted [invalid] ranges (e.g. Mark 3-1)
58
+
59
+
60
+ # License
61
+
62
+ Copyright (c) 2012 Bob Lail, released under the MIT license
@@ -1,5 +1,5 @@
1
1
  1 genesis gen? gn
2
- 2 exodus exo?d? ex
2
+ 2 exodus exo?d? ex
3
3
  3 leviticus le?v levi le
4
4
  4 numbers? nu?m nu numb
5
5
  5 deuteronomy deut? dt
data/lib/pericope.rb CHANGED
@@ -1,70 +1,75 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require 'yaml'
4
3
  require 'pericope/version'
5
4
 
6
5
  class Pericope
7
6
  attr_reader :book,
8
7
  :book_chapter_count,
9
8
  :book_name,
10
- :index,
11
9
  :original_string,
12
10
  :ranges
13
11
 
14
12
 
15
13
  def self.book_names
16
- @@book_names ||= ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Joshua", "Judges", "Ruth", "1 Samuel", "2 Samuel", "1 Kings", "2 Kings", "1 Chronicles", "2 Chronicles", "Ezra", "Nehemiah", "Esther", "Job", "Psalm", "Proverbs", "Ecclesiastes", "Song of Songs", "Isaiah", "Jeremiah", "Lamentations", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", "Zephaniah", "Haggai", "Zechariah", "Malachi", "Matthew", "Mark", "Luke", "John", "Acts", "Romans", "1 Corinthians", "2 Corinthians", "Galatians", "Ephesians", "Philippians", "Colossians", "1 Thessalonians", "2 Thessalonians", "1 Timothy", "2 Timothy", "Titus", "Philemon", "Hebrews", "James", "1 Peter", "2 Peter", "1 John ", "2 John", "3 John", "Jude", "Revelation"]
14
+ @book_names ||= ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Joshua", "Judges", "Ruth", "1 Samuel", "2 Samuel", "1 Kings", "2 Kings", "1 Chronicles", "2 Chronicles", "Ezra", "Nehemiah", "Esther", "Job", "Psalm", "Proverbs", "Ecclesiastes", "Song of Songs", "Isaiah", "Jeremiah", "Lamentations", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", "Zephaniah", "Haggai", "Zechariah", "Malachi", "Matthew", "Mark", "Luke", "John", "Acts", "Romans", "1 Corinthians", "2 Corinthians", "Galatians", "Ephesians", "Philippians", "Colossians", "1 Thessalonians", "2 Thessalonians", "1 Timothy", "2 Timothy", "Titus", "Philemon", "Hebrews", "James", "1 Peter", "2 Peter", "1 John ", "2 John", "3 John", "Jude", "Revelation"]
17
15
  end
18
16
 
19
17
  def self.book_name_regexes
20
- @@book_name_regexes ||= begin
21
- book_abbreviations.map do |book|
22
- [book, Regexp.new("\\b#{book[1]}\\b.? (#{ValidReference})", true)]
23
- end
24
- end
18
+ @book_name_regexes ||= book_abbreviations.map { |book_number, book_regex| [book_number, /\b#{book_regex}\b/] }
25
19
  end
26
20
 
27
21
 
28
22
 
29
- def initialize(string_or_array)
30
- case string_or_array
31
- when String, MatchData
32
- match = string_or_array.is_a?(String) ? Pericope.match_one(string_or_array) : string_or_array
33
- raise "no pericope found in #{string_or_array} (#{string_or_array.class})" if match.nil?
23
+ def initialize(arg)
24
+ case arg
25
+ when String
26
+ attributes = Pericope.match_one(arg)
27
+ raise "no pericope found in #{arg} (#{arg.class})" if attributes.nil?
34
28
 
35
- @original_string = match.to_s
36
- @index = match.begin(0)
37
- set_book match.instance_variable_get('@book')
38
- @ranges = parse_reference(match[1])
29
+ @original_string = attributes[:original_string]
30
+ set_book attributes[:book]
31
+ @ranges = attributes[:ranges]
39
32
 
40
33
  when Array
41
- set_book Pericope.get_book(string_or_array.first)
42
- @ranges = Pericope.group_array_into_ranges(string_or_array)
34
+ set_book Pericope.get_book(arg.first)
35
+ @ranges = Pericope.group_array_into_ranges(arg)
43
36
 
44
37
  else
45
- raise ArgumentError, "#{string_or_array.class} is not a recognized input for pericope"
38
+ attributes = arg
39
+ @original_string = attributes[:original_string]
40
+ set_book attributes[:book]
41
+ @ranges = attributes[:ranges]
42
+
46
43
  end
47
44
  end
48
45
 
49
46
 
50
47
 
48
+ def self.book_has_chapters?(book)
49
+ book_chapter_counts[book - 1] > 1
50
+ end
51
+
51
52
  def book_has_chapters?
52
- (book_chapter_count > 1)
53
+ book_chapter_count > 1
53
54
  end
54
55
 
55
56
 
56
57
 
58
+ # Differs from Pericope.new in that it won't raise an exception
59
+ # if text does not contain a pericope but will return nil instead.
57
60
  def self.parse_one(text)
58
- match = match_one(text)
59
- match ? Pericope.new(match) : nil
61
+ parse(text) do |pericope|
62
+ return pericope
63
+ end
64
+ nil
60
65
  end
61
66
 
62
67
 
63
68
 
64
- def self.parse(text, &block)
69
+ def self.parse(text)
65
70
  pericopes = []
66
- match_all text do |match|
67
- pericope = Pericope.new(match)
71
+ match_all(text) do |attributes|
72
+ pericope = Pericope.new(attributes)
68
73
  if block_given?
69
74
  yield pericope
70
75
  else
@@ -77,23 +82,47 @@ class Pericope
77
82
 
78
83
 
79
84
  def self.split(text, pattern=nil)
80
- matches = match_all(text) # find every pericope in the text
81
- matches = matches.sort {|a,b| a.begin(0) <=> b.begin(0)} # put them in the order of their occurrence
82
-
85
+ puts "DEPRECATION NOTICE: split will no longer accept a 'pattern' argument in Pericope 0.7.0" if pattern
83
86
  segments = []
84
87
  start = 0
85
- for match in matches
88
+
89
+ match_all(text) do |attributes, match|
90
+
86
91
  pretext = text.slice(start...match.begin(0))
87
- segments.concat(pattern ? pretext.split(pattern).delete_if{|s| s.length==0} : [pretext]) if (pretext.length>0)
88
- segments << Pericope.new(match)
92
+ if pretext.length > 0
93
+ segments << pretext
94
+ yield pretext if block_given?
95
+ end
96
+
97
+ pericope = Pericope.new(attributes)
98
+ segments << pericope
99
+ yield pericope if block_given?
100
+
89
101
  start = match.end(0)
90
102
  end
103
+
91
104
  pretext = text.slice(start...text.length)
92
- segments.concat(pattern ? pretext.split(pattern).delete_if{|s| s.length==0} : [pretext]) if (pretext.length>0)
105
+ if pretext.length > 0
106
+ segments << pretext
107
+ yield pretext if block_given?
108
+ end
93
109
 
110
+ segments = ___split_segments_by_pattern(segments, pattern) if pattern
94
111
  segments
95
112
  end
96
113
 
114
+ def self.___split_segments_by_pattern(segments, pattern)
115
+ segments2 = []
116
+ segments.each do |segment|
117
+ if segment.is_a? Pericope
118
+ segments2 << segment
119
+ else
120
+ segments2.concat(segment.split(pattern))
121
+ end
122
+ end
123
+ segments2
124
+ end
125
+
97
126
 
98
127
 
99
128
  def self.extract(text)
@@ -149,19 +178,19 @@ class Pericope
149
178
  def to_a
150
179
  # one range per chapter
151
180
  chapter_ranges = []
152
- for range in ranges
153
- min_chapter = Pericope.get_chapter(range.min)
154
- max_chapter = Pericope.get_chapter(range.max)
155
- if (min_chapter==max_chapter)
181
+ ranges.each do |range|
182
+ min_chapter = Pericope.get_chapter(range.begin)
183
+ max_chapter = Pericope.get_chapter(range.end)
184
+ if min_chapter == max_chapter
156
185
  chapter_ranges << range
157
186
  else
158
- chapter_ranges << Range.new(range.min, Pericope.get_last_verse(book, min_chapter))
187
+ chapter_ranges << Range.new(range.begin, Pericope.get_last_verse(book, min_chapter))
159
188
  for chapter in (min_chapter+1)...max_chapter
160
189
  chapter_ranges << Range.new(
161
190
  Pericope.get_first_verse(book, chapter),
162
191
  Pericope.get_last_verse(book, chapter))
163
192
  end
164
- chapter_ranges << Range.new(Pericope.get_first_verse(book, max_chapter), range.max)
193
+ chapter_ranges << Range.new(Pericope.get_first_verse(book, max_chapter), range.end)
165
194
  end
166
195
  end
167
196
 
@@ -172,19 +201,17 @@ class Pericope
172
201
 
173
202
  def well_formatted_reference
174
203
  recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
175
- recent_chapter = 1 if !self.book_has_chapters?
204
+ recent_chapter = 1 unless book_has_chapters?
176
205
  ranges.map do |range|
177
- min_chapter = Pericope.get_chapter(range.min)
178
- min_verse = Pericope.get_verse(range.min)
179
- max_chapter = Pericope.get_chapter(range.max)
180
- max_verse = Pericope.get_verse(range.max)
206
+ min_chapter = Pericope.get_chapter(range.begin)
207
+ min_verse = Pericope.get_verse(range.begin)
208
+ max_chapter = Pericope.get_chapter(range.end)
209
+ max_verse = Pericope.get_verse(range.end)
181
210
  s = ""
182
211
 
183
- if (min_verse==1) and (max_verse>=Pericope.get_max_verse(book, max_chapter))
212
+ if min_verse == 1 and max_verse >= Pericope.get_max_verse(book, max_chapter)
184
213
  s << min_chapter.to_s
185
- if max_chapter > min_chapter
186
- s << "-#{max_chapter}"
187
- end
214
+ s << "-#{max_chapter}" if max_chapter > min_chapter
188
215
  else
189
216
  if recent_chapter == min_chapter
190
217
  s << min_verse.to_s
@@ -214,12 +241,14 @@ class Pericope
214
241
  def intersects?(pericope)
215
242
  return false unless pericope.is_a?(Pericope)
216
243
  return false unless (self.book == pericope.book)
217
- for self_range in self.ranges
218
- for other_range in pericope.ranges
219
- return true if (self_range.max >= other_range.min) and (self_range.min <= other_range.max)
244
+
245
+ self.ranges.each do |self_range|
246
+ pericope.ranges.each do |other_range|
247
+ return true if (self_range.end >= other_range.begin) and (self_range.begin <= other_range.end)
220
248
  end
221
249
  end
222
- return false
250
+
251
+ false
223
252
  end
224
253
 
225
254
 
@@ -241,23 +270,8 @@ private
241
270
 
242
271
  def set_book(value)
243
272
  @book = value || raise(ArgumentError, "must specify book")
244
- @book_name = Pericope.book_names[@book-1]
245
- @book_chapter_count = Pericope.book_chapter_counts[@book-1]
246
- end
247
-
248
-
249
-
250
- def parse_reference(reference)
251
- reference = normalize_reference(reference)
252
- (reference.nil? || reference.empty?) ? [] : parse_ranges(reference.split(/[,;]/))
253
- end
254
-
255
- def normalize_reference(reference)
256
- [ [%r{(\d+)[".](\d+)},'\1:\2'], # 12"5 and 12.5 -> 12:5
257
- [%r{(–|—)},'-'], # convert em dash and en dash to -
258
- [%r{[^0-9,:;\-–—]},''] # remove everything but [0-9,;:-]
259
- ].each { |pattern, replacement| reference.gsub!(pattern, replacement) }
260
- reference
273
+ @book_name = Pericope.book_names[@book - 1]
274
+ @book_chapter_count = Pericope.book_chapter_counts[@book - 1]
261
275
  end
262
276
 
263
277
 
@@ -352,8 +366,8 @@ private
352
366
 
353
367
  # matches the first valid Bible reference in the supplied string
354
368
  def self.match_one(text)
355
- match_all(text) do |match|
356
- return match
369
+ match_all(text) do |attributes|
370
+ return attributes
357
371
  end
358
372
  nil
359
373
  end
@@ -361,41 +375,63 @@ private
361
375
 
362
376
 
363
377
  # matches all valid Bible references in the supplied string
364
- # ! will not necessarily return references in order !
365
- def self.match_all(text, &block)
366
- matches = []
367
- unmatched = text
368
-
369
- for book_regex in book_name_regexes
370
- rx = book_regex[1]
371
- while (match = unmatched.match rx) # find all occurrences of pericopes in this book
372
- length = match.end(0) - match.begin(0)
373
-
374
- # after matching "2 Peter" don't match "Peter" again as "1 Peter"
375
- # but keep the same number of characters in the string so indices work
376
- unmatched = match.pre_match + ("*" * length) + match.post_match
377
- match.instance_variable_set('@book', book_regex[0][0])
378
- if block_given?
379
- yield match
380
- else
381
- matches << match
382
- end
383
- end
378
+ def self.match_all(text)
379
+ text.scan(Pericope::PERICOPE_PATTERN) do
380
+ match = Regexp.last_match
381
+
382
+ book = recognize_book(match[1])
383
+ next unless book
384
+
385
+ ranges = parse_reference(book, match[2])
386
+ next if ranges.empty?
387
+
388
+ attributes = {
389
+ :original_string => match.to_s,
390
+ :book => book,
391
+ :ranges => ranges
392
+ }
393
+
394
+ yield attributes, match
384
395
  end
385
- block_given? ? text : matches
386
396
  end
387
397
 
398
+ def self.recognize_book(book)
399
+ book = book.to_s.downcase
400
+ book_name_regexes.each do |book_regex|
401
+ return book_regex[0] if book =~ book_regex[1]
402
+ end
403
+ nil
404
+ end
405
+
406
+ def self.parse_reference(book, reference)
407
+ reference = normalize_reference(reference)
408
+ (reference.nil? || reference.empty?) ? [] : parse_ranges(book, reference.split(/[,;]/))
409
+ end
388
410
 
411
+ def self.normalize_reference(reference)
412
+ [ [%r{(\d+)[".](\d+)},'\1:\2'], # 12"5 and 12.5 -> 12:5
413
+ [%r{(–|—)},'-'], # convert em dash and en dash to -
414
+ [%r{[^0-9,:;\-–—]},''] # remove everything but [0-9,;:-]
415
+ ].each { |pattern, replacement| reference.gsub!(pattern, replacement) }
416
+ reference
417
+ end
389
418
 
390
- def parse_ranges(ranges)
419
+ def self.parse_ranges(book, ranges)
391
420
  recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
392
- recent_chapter = 1 if !self.book_has_chapters?
421
+ recent_chapter = 1 if !book_has_chapters?(book)
393
422
  ranges.map do |range|
394
423
  range = range.split('-') # parse the low end of a verse range and the high end separately
395
424
  range << range[0] if (range.length < 2) # treat 12:4 as 12:4-12:4
396
425
  lower_chapter_verse = range[0].split(':').map(&:to_i) # parse "3:28" to [3,28]
397
426
  upper_chapter_verse = range[1].split(':').map(&:to_i) # parse "3:28" to [3,28]
398
427
 
428
+ # treat Mark 3-1 as Mark 3-3 and, eventually, Mark 3:1-35
429
+ if (lower_chapter_verse.length == 1) &&
430
+ (upper_chapter_verse.length == 1) &&
431
+ (upper_chapter_verse[0] < lower_chapter_verse[0])
432
+ upper_chapter_verse = lower_chapter_verse.dup
433
+ end
434
+
399
435
  # make sure the low end of the range and the high end of the range
400
436
  # are composed of arrays with two appropriate values: [chapter, verse]
401
437
  chapter_range = false
@@ -435,10 +471,14 @@ private
435
471
 
436
472
 
437
473
  def self.load_chapter_verse_counts
438
- path = File.expand_path(File.dirname(__FILE__) + "/../data/chapter_verse_count.yml")
474
+ path = File.expand_path(File.dirname(__FILE__) + "/../data/chapter_verse_count.txt")
475
+ chapter_verse_counts = []
439
476
  File.open(path) do |file|
440
- return YAML.load(file)
477
+ file.each do |text|
478
+ chapter_verse_counts << text.chomp.split("\t")[0..1].map(&:to_i)
479
+ end
441
480
  end
481
+ Hash[chapter_verse_counts]
442
482
  end
443
483
 
444
484
 
@@ -459,7 +499,7 @@ private
459
499
  end
460
500
  end
461
501
  end
462
- book_abbreviations
502
+ Hash[book_abbreviations]
463
503
  end
464
504
 
465
505
 
@@ -471,13 +511,13 @@ private
471
511
 
472
512
 
473
513
  def self.book_abbreviations
474
- @@book_abbreviations ||= load_book_abbreviations
514
+ @book_abbreviations ||= load_book_abbreviations
475
515
  end
476
516
 
477
517
 
478
518
 
479
519
  def self.book_chapter_counts
480
- @@book_chapter_counts ||= [
520
+ @book_chapter_counts ||= [
481
521
  # Chapters Book Name Book Number
482
522
  50, # Genesis 1
483
523
  40, # Exodus 2
@@ -547,9 +587,12 @@ private
547
587
  22] # Revelation 66
548
588
  end
549
589
 
550
- ValidReference = begin
551
- reference = '((\s*\d{1,3})(\s*[:\"\.]\s*\d{1,3}(a|b)?(\s*(,|;)\s*(\d{1,3}[:\"\.])?\s*\d{1,3}(a|b)?)*)?(\s*(-|–|—)\s*(\d{1,3}\s*[:\"\.])?(\d{1,3}(a|b)?)(\s*(,|;)\s*(\d{1,3}\s*[:\"\.])?\s*\d{1,3}(a|b)?)*)*)'
552
- end
590
+
591
+ BOOK_PATTERN = /\b(?:(?:1|2|3|i+|first|second|third|1st|2nd|3rd) )?(?:\w+| of )\b/
592
+
593
+ REFERENCE_PATTERN = '(?:\s*\d{1,3})(?:\s*[:\"\.]\s*\d{1,3}[ab]?(?:\s*[,;]\s*(?:\d{1,3}[:\"\.])?\s*\d{1,3}[ab]?)*)?(?:\s*[-–—]\s*(?:\d{1,3}\s*[:\"\.])?(?:\d{1,3}[ab]?)(?:\s*[,;]\s*(?:\d{1,3}\s*[:\"\.])?\s*\d{1,3}[ab]?)*)*'
594
+
595
+ PERICOPE_PATTERN = /(#{BOOK_PATTERN})\.? (#{REFERENCE_PATTERN})/i
553
596
 
554
597
 
555
598