pericope 0.5.4 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.mdown +56 -7
- data/data/book_abbreviations.txt +1 -1
- data/lib/pericope.rb +145 -102
- data/lib/pericope/version.rb +1 -1
- metadata +8 -6
- data/data/chapter_verse_count.yml +0 -1190
data/README.mdown
CHANGED
@@ -1,13 +1,62 @@
|
|
1
|
-
Pericope
|
2
|
-
========
|
1
|
+
# Pericope
|
3
2
|
|
4
|
-
|
3
|
+
Pericope is a gem for parsing Bible references.
|
5
4
|
|
5
|
+
It recognizes common abbreviations and misspellings for names of the books of the Bible and a variety of ways of denoting ranges of chapters and verses. It can parse pericopes singly or out of a block of text. It's useful for comparing two pericopes for intersection and normalizing them into a well-formatted string.
|
6
6
|
|
7
|
-
|
8
|
-
=======
|
7
|
+
# Examples
|
9
8
|
|
10
|
-
|
9
|
+
### Recognize common abbreviations and misspellings for names of the books of the Bible
|
11
10
|
|
11
|
+
```ruby
|
12
|
+
Pericope.new("ps 118:17").to_s # => Psalm 118:17
|
13
|
+
Pericope.new("jas 3:1-5").to_s # => James 3:1-5
|
14
|
+
Pericope.new("1 jn 4:4").to_s # => 1 John 4:4
|
15
|
+
```
|
12
16
|
|
13
|
-
|
17
|
+
### Compare two pericopes to see if they intersect
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
a = Pericope.new("Mark 13:1-6")
|
21
|
+
b = Pericope.new("Mark 13:5")
|
22
|
+
c = Pericope.new("Mark 13:6, 7")
|
23
|
+
|
24
|
+
a.intersects?(b) # => true
|
25
|
+
a.intersects?(c) # => true
|
26
|
+
b.intersects?(c) # => false
|
27
|
+
```
|
28
|
+
|
29
|
+
### Parse pericopes out of a block of text
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
text = <<-TEXT
|
33
|
+
If then, the Word is so significant, great important attaches to its exact form. It has the form of a promise as in Isaiah 43:1: "Do not fear, for I have redeemed you; I have called you by name, you are mine," or as in Luke 2:10-11, "Do not be afraid..to you is born this day...a Savior." (Bayer, p51)
|
34
|
+
TEXT
|
35
|
+
|
36
|
+
Pericope.parse(text) # => [Isaiah 43:1, Luke 2:10-11]
|
37
|
+
|
38
|
+
Pericope.split(text) # => [" If then, the Word is so significant, great important attaches to its exact form. It has the form of a promise as in ", Isaiah 43:1, ": \"Do not fear, for I have redeemed you; I have called you by name, you are mine,\" or as in ", Luke 2:10-11, ", \"Do not be afraid..to you is born this day...a Savior.\" (Bayer, p51)\n"]
|
39
|
+
```
|
40
|
+
|
41
|
+
### Converts pericopes into arrays of verses and reconstructs them from arrays of verses
|
42
|
+
|
43
|
+
```ruby
|
44
|
+
array = Pericope.new("gen 1:1-3").to_a # => [1001001, 1001002, 1001003]
|
45
|
+
Pericope.new(array) # => Genesis 1:1-3
|
46
|
+
```
|
47
|
+
|
48
|
+
|
49
|
+
# History
|
50
|
+
|
51
|
+
# 0.6.0
|
52
|
+
|
53
|
+
- Removed the `index` attribute
|
54
|
+
- Deprecated the `pattern` argument to the method `Pericope.split(text, pattern=nil)`
|
55
|
+
- Improved performance by 2x
|
56
|
+
- Added this README
|
57
|
+
- Fixed a bug with parsing inverted [invalid] ranges (e.g. Mark 3-1)
|
58
|
+
|
59
|
+
|
60
|
+
# License
|
61
|
+
|
62
|
+
Copyright (c) 2012 Bob Lail, released under the MIT license
|
data/data/book_abbreviations.txt
CHANGED
data/lib/pericope.rb
CHANGED
@@ -1,70 +1,75 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require 'yaml'
|
4
3
|
require 'pericope/version'
|
5
4
|
|
6
5
|
class Pericope
|
7
6
|
attr_reader :book,
|
8
7
|
:book_chapter_count,
|
9
8
|
:book_name,
|
10
|
-
:index,
|
11
9
|
:original_string,
|
12
10
|
:ranges
|
13
11
|
|
14
12
|
|
15
13
|
def self.book_names
|
16
|
-
|
14
|
+
@book_names ||= ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Joshua", "Judges", "Ruth", "1 Samuel", "2 Samuel", "1 Kings", "2 Kings", "1 Chronicles", "2 Chronicles", "Ezra", "Nehemiah", "Esther", "Job", "Psalm", "Proverbs", "Ecclesiastes", "Song of Songs", "Isaiah", "Jeremiah", "Lamentations", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", "Zephaniah", "Haggai", "Zechariah", "Malachi", "Matthew", "Mark", "Luke", "John", "Acts", "Romans", "1 Corinthians", "2 Corinthians", "Galatians", "Ephesians", "Philippians", "Colossians", "1 Thessalonians", "2 Thessalonians", "1 Timothy", "2 Timothy", "Titus", "Philemon", "Hebrews", "James", "1 Peter", "2 Peter", "1 John ", "2 John", "3 John", "Jude", "Revelation"]
|
17
15
|
end
|
18
16
|
|
19
17
|
def self.book_name_regexes
|
20
|
-
|
21
|
-
book_abbreviations.map do |book|
|
22
|
-
[book, Regexp.new("\\b#{book[1]}\\b.? (#{ValidReference})", true)]
|
23
|
-
end
|
24
|
-
end
|
18
|
+
@book_name_regexes ||= book_abbreviations.map { |book_number, book_regex| [book_number, /\b#{book_regex}\b/] }
|
25
19
|
end
|
26
20
|
|
27
21
|
|
28
22
|
|
29
|
-
def initialize(
|
30
|
-
case
|
31
|
-
when String
|
32
|
-
|
33
|
-
raise "no pericope found in #{
|
23
|
+
def initialize(arg)
|
24
|
+
case arg
|
25
|
+
when String
|
26
|
+
attributes = Pericope.match_one(arg)
|
27
|
+
raise "no pericope found in #{arg} (#{arg.class})" if attributes.nil?
|
34
28
|
|
35
|
-
@original_string =
|
36
|
-
|
37
|
-
|
38
|
-
@ranges = parse_reference(match[1])
|
29
|
+
@original_string = attributes[:original_string]
|
30
|
+
set_book attributes[:book]
|
31
|
+
@ranges = attributes[:ranges]
|
39
32
|
|
40
33
|
when Array
|
41
|
-
set_book Pericope.get_book(
|
42
|
-
@ranges = Pericope.group_array_into_ranges(
|
34
|
+
set_book Pericope.get_book(arg.first)
|
35
|
+
@ranges = Pericope.group_array_into_ranges(arg)
|
43
36
|
|
44
37
|
else
|
45
|
-
|
38
|
+
attributes = arg
|
39
|
+
@original_string = attributes[:original_string]
|
40
|
+
set_book attributes[:book]
|
41
|
+
@ranges = attributes[:ranges]
|
42
|
+
|
46
43
|
end
|
47
44
|
end
|
48
45
|
|
49
46
|
|
50
47
|
|
48
|
+
def self.book_has_chapters?(book)
|
49
|
+
book_chapter_counts[book - 1] > 1
|
50
|
+
end
|
51
|
+
|
51
52
|
def book_has_chapters?
|
52
|
-
|
53
|
+
book_chapter_count > 1
|
53
54
|
end
|
54
55
|
|
55
56
|
|
56
57
|
|
58
|
+
# Differs from Pericope.new in that it won't raise an exception
|
59
|
+
# if text does not contain a pericope but will return nil instead.
|
57
60
|
def self.parse_one(text)
|
58
|
-
|
59
|
-
|
61
|
+
parse(text) do |pericope|
|
62
|
+
return pericope
|
63
|
+
end
|
64
|
+
nil
|
60
65
|
end
|
61
66
|
|
62
67
|
|
63
68
|
|
64
|
-
def self.parse(text
|
69
|
+
def self.parse(text)
|
65
70
|
pericopes = []
|
66
|
-
match_all
|
67
|
-
pericope = Pericope.new(
|
71
|
+
match_all(text) do |attributes|
|
72
|
+
pericope = Pericope.new(attributes)
|
68
73
|
if block_given?
|
69
74
|
yield pericope
|
70
75
|
else
|
@@ -77,23 +82,47 @@ class Pericope
|
|
77
82
|
|
78
83
|
|
79
84
|
def self.split(text, pattern=nil)
|
80
|
-
|
81
|
-
matches = matches.sort {|a,b| a.begin(0) <=> b.begin(0)} # put them in the order of their occurrence
|
82
|
-
|
85
|
+
puts "DEPRECATION NOTICE: split will no longer accept a 'pattern' argument in Pericope 0.7.0" if pattern
|
83
86
|
segments = []
|
84
87
|
start = 0
|
85
|
-
|
88
|
+
|
89
|
+
match_all(text) do |attributes, match|
|
90
|
+
|
86
91
|
pretext = text.slice(start...match.begin(0))
|
87
|
-
|
88
|
-
|
92
|
+
if pretext.length > 0
|
93
|
+
segments << pretext
|
94
|
+
yield pretext if block_given?
|
95
|
+
end
|
96
|
+
|
97
|
+
pericope = Pericope.new(attributes)
|
98
|
+
segments << pericope
|
99
|
+
yield pericope if block_given?
|
100
|
+
|
89
101
|
start = match.end(0)
|
90
102
|
end
|
103
|
+
|
91
104
|
pretext = text.slice(start...text.length)
|
92
|
-
|
105
|
+
if pretext.length > 0
|
106
|
+
segments << pretext
|
107
|
+
yield pretext if block_given?
|
108
|
+
end
|
93
109
|
|
110
|
+
segments = ___split_segments_by_pattern(segments, pattern) if pattern
|
94
111
|
segments
|
95
112
|
end
|
96
113
|
|
114
|
+
def self.___split_segments_by_pattern(segments, pattern)
|
115
|
+
segments2 = []
|
116
|
+
segments.each do |segment|
|
117
|
+
if segment.is_a? Pericope
|
118
|
+
segments2 << segment
|
119
|
+
else
|
120
|
+
segments2.concat(segment.split(pattern))
|
121
|
+
end
|
122
|
+
end
|
123
|
+
segments2
|
124
|
+
end
|
125
|
+
|
97
126
|
|
98
127
|
|
99
128
|
def self.extract(text)
|
@@ -149,19 +178,19 @@ class Pericope
|
|
149
178
|
def to_a
|
150
179
|
# one range per chapter
|
151
180
|
chapter_ranges = []
|
152
|
-
|
153
|
-
min_chapter = Pericope.get_chapter(range.
|
154
|
-
max_chapter = Pericope.get_chapter(range.
|
155
|
-
if
|
181
|
+
ranges.each do |range|
|
182
|
+
min_chapter = Pericope.get_chapter(range.begin)
|
183
|
+
max_chapter = Pericope.get_chapter(range.end)
|
184
|
+
if min_chapter == max_chapter
|
156
185
|
chapter_ranges << range
|
157
186
|
else
|
158
|
-
chapter_ranges << Range.new(range.
|
187
|
+
chapter_ranges << Range.new(range.begin, Pericope.get_last_verse(book, min_chapter))
|
159
188
|
for chapter in (min_chapter+1)...max_chapter
|
160
189
|
chapter_ranges << Range.new(
|
161
190
|
Pericope.get_first_verse(book, chapter),
|
162
191
|
Pericope.get_last_verse(book, chapter))
|
163
192
|
end
|
164
|
-
chapter_ranges << Range.new(Pericope.get_first_verse(book, max_chapter), range.
|
193
|
+
chapter_ranges << Range.new(Pericope.get_first_verse(book, max_chapter), range.end)
|
165
194
|
end
|
166
195
|
end
|
167
196
|
|
@@ -172,19 +201,17 @@ class Pericope
|
|
172
201
|
|
173
202
|
def well_formatted_reference
|
174
203
|
recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
|
175
|
-
recent_chapter = 1
|
204
|
+
recent_chapter = 1 unless book_has_chapters?
|
176
205
|
ranges.map do |range|
|
177
|
-
min_chapter = Pericope.get_chapter(range.
|
178
|
-
min_verse = Pericope.get_verse(range.
|
179
|
-
max_chapter = Pericope.get_chapter(range.
|
180
|
-
max_verse = Pericope.get_verse(range.
|
206
|
+
min_chapter = Pericope.get_chapter(range.begin)
|
207
|
+
min_verse = Pericope.get_verse(range.begin)
|
208
|
+
max_chapter = Pericope.get_chapter(range.end)
|
209
|
+
max_verse = Pericope.get_verse(range.end)
|
181
210
|
s = ""
|
182
211
|
|
183
|
-
if
|
212
|
+
if min_verse == 1 and max_verse >= Pericope.get_max_verse(book, max_chapter)
|
184
213
|
s << min_chapter.to_s
|
185
|
-
if max_chapter > min_chapter
|
186
|
-
s << "-#{max_chapter}"
|
187
|
-
end
|
214
|
+
s << "-#{max_chapter}" if max_chapter > min_chapter
|
188
215
|
else
|
189
216
|
if recent_chapter == min_chapter
|
190
217
|
s << min_verse.to_s
|
@@ -214,12 +241,14 @@ class Pericope
|
|
214
241
|
def intersects?(pericope)
|
215
242
|
return false unless pericope.is_a?(Pericope)
|
216
243
|
return false unless (self.book == pericope.book)
|
217
|
-
|
218
|
-
|
219
|
-
|
244
|
+
|
245
|
+
self.ranges.each do |self_range|
|
246
|
+
pericope.ranges.each do |other_range|
|
247
|
+
return true if (self_range.end >= other_range.begin) and (self_range.begin <= other_range.end)
|
220
248
|
end
|
221
249
|
end
|
222
|
-
|
250
|
+
|
251
|
+
false
|
223
252
|
end
|
224
253
|
|
225
254
|
|
@@ -241,23 +270,8 @@ private
|
|
241
270
|
|
242
271
|
def set_book(value)
|
243
272
|
@book = value || raise(ArgumentError, "must specify book")
|
244
|
-
@book_name = Pericope.book_names[@book-1]
|
245
|
-
@book_chapter_count = Pericope.book_chapter_counts[@book-1]
|
246
|
-
end
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
def parse_reference(reference)
|
251
|
-
reference = normalize_reference(reference)
|
252
|
-
(reference.nil? || reference.empty?) ? [] : parse_ranges(reference.split(/[,;]/))
|
253
|
-
end
|
254
|
-
|
255
|
-
def normalize_reference(reference)
|
256
|
-
[ [%r{(\d+)[".](\d+)},'\1:\2'], # 12"5 and 12.5 -> 12:5
|
257
|
-
[%r{(–|—)},'-'], # convert em dash and en dash to -
|
258
|
-
[%r{[^0-9,:;\-–—]},''] # remove everything but [0-9,;:-]
|
259
|
-
].each { |pattern, replacement| reference.gsub!(pattern, replacement) }
|
260
|
-
reference
|
273
|
+
@book_name = Pericope.book_names[@book - 1]
|
274
|
+
@book_chapter_count = Pericope.book_chapter_counts[@book - 1]
|
261
275
|
end
|
262
276
|
|
263
277
|
|
@@ -352,8 +366,8 @@ private
|
|
352
366
|
|
353
367
|
# matches the first valid Bible reference in the supplied string
|
354
368
|
def self.match_one(text)
|
355
|
-
match_all(text) do |
|
356
|
-
return
|
369
|
+
match_all(text) do |attributes|
|
370
|
+
return attributes
|
357
371
|
end
|
358
372
|
nil
|
359
373
|
end
|
@@ -361,41 +375,63 @@ private
|
|
361
375
|
|
362
376
|
|
363
377
|
# matches all valid Bible references in the supplied string
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
matches << match
|
382
|
-
end
|
383
|
-
end
|
378
|
+
def self.match_all(text)
|
379
|
+
text.scan(Pericope::PERICOPE_PATTERN) do
|
380
|
+
match = Regexp.last_match
|
381
|
+
|
382
|
+
book = recognize_book(match[1])
|
383
|
+
next unless book
|
384
|
+
|
385
|
+
ranges = parse_reference(book, match[2])
|
386
|
+
next if ranges.empty?
|
387
|
+
|
388
|
+
attributes = {
|
389
|
+
:original_string => match.to_s,
|
390
|
+
:book => book,
|
391
|
+
:ranges => ranges
|
392
|
+
}
|
393
|
+
|
394
|
+
yield attributes, match
|
384
395
|
end
|
385
|
-
block_given? ? text : matches
|
386
396
|
end
|
387
397
|
|
398
|
+
def self.recognize_book(book)
|
399
|
+
book = book.to_s.downcase
|
400
|
+
book_name_regexes.each do |book_regex|
|
401
|
+
return book_regex[0] if book =~ book_regex[1]
|
402
|
+
end
|
403
|
+
nil
|
404
|
+
end
|
405
|
+
|
406
|
+
def self.parse_reference(book, reference)
|
407
|
+
reference = normalize_reference(reference)
|
408
|
+
(reference.nil? || reference.empty?) ? [] : parse_ranges(book, reference.split(/[,;]/))
|
409
|
+
end
|
388
410
|
|
411
|
+
def self.normalize_reference(reference)
|
412
|
+
[ [%r{(\d+)[".](\d+)},'\1:\2'], # 12"5 and 12.5 -> 12:5
|
413
|
+
[%r{(–|—)},'-'], # convert em dash and en dash to -
|
414
|
+
[%r{[^0-9,:;\-–—]},''] # remove everything but [0-9,;:-]
|
415
|
+
].each { |pattern, replacement| reference.gsub!(pattern, replacement) }
|
416
|
+
reference
|
417
|
+
end
|
389
418
|
|
390
|
-
def parse_ranges(ranges)
|
419
|
+
def self.parse_ranges(book, ranges)
|
391
420
|
recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
|
392
|
-
recent_chapter = 1 if !
|
421
|
+
recent_chapter = 1 if !book_has_chapters?(book)
|
393
422
|
ranges.map do |range|
|
394
423
|
range = range.split('-') # parse the low end of a verse range and the high end separately
|
395
424
|
range << range[0] if (range.length < 2) # treat 12:4 as 12:4-12:4
|
396
425
|
lower_chapter_verse = range[0].split(':').map(&:to_i) # parse "3:28" to [3,28]
|
397
426
|
upper_chapter_verse = range[1].split(':').map(&:to_i) # parse "3:28" to [3,28]
|
398
427
|
|
428
|
+
# treat Mark 3-1 as Mark 3-3 and, eventually, Mark 3:1-35
|
429
|
+
if (lower_chapter_verse.length == 1) &&
|
430
|
+
(upper_chapter_verse.length == 1) &&
|
431
|
+
(upper_chapter_verse[0] < lower_chapter_verse[0])
|
432
|
+
upper_chapter_verse = lower_chapter_verse.dup
|
433
|
+
end
|
434
|
+
|
399
435
|
# make sure the low end of the range and the high end of the range
|
400
436
|
# are composed of arrays with two appropriate values: [chapter, verse]
|
401
437
|
chapter_range = false
|
@@ -435,10 +471,14 @@ private
|
|
435
471
|
|
436
472
|
|
437
473
|
def self.load_chapter_verse_counts
|
438
|
-
path = File.expand_path(File.dirname(__FILE__) + "/../data/chapter_verse_count.
|
474
|
+
path = File.expand_path(File.dirname(__FILE__) + "/../data/chapter_verse_count.txt")
|
475
|
+
chapter_verse_counts = []
|
439
476
|
File.open(path) do |file|
|
440
|
-
|
477
|
+
file.each do |text|
|
478
|
+
chapter_verse_counts << text.chomp.split("\t")[0..1].map(&:to_i)
|
479
|
+
end
|
441
480
|
end
|
481
|
+
Hash[chapter_verse_counts]
|
442
482
|
end
|
443
483
|
|
444
484
|
|
@@ -459,7 +499,7 @@ private
|
|
459
499
|
end
|
460
500
|
end
|
461
501
|
end
|
462
|
-
book_abbreviations
|
502
|
+
Hash[book_abbreviations]
|
463
503
|
end
|
464
504
|
|
465
505
|
|
@@ -471,13 +511,13 @@ private
|
|
471
511
|
|
472
512
|
|
473
513
|
def self.book_abbreviations
|
474
|
-
|
514
|
+
@book_abbreviations ||= load_book_abbreviations
|
475
515
|
end
|
476
516
|
|
477
517
|
|
478
518
|
|
479
519
|
def self.book_chapter_counts
|
480
|
-
|
520
|
+
@book_chapter_counts ||= [
|
481
521
|
# Chapters Book Name Book Number
|
482
522
|
50, # Genesis 1
|
483
523
|
40, # Exodus 2
|
@@ -547,9 +587,12 @@ private
|
|
547
587
|
22] # Revelation 66
|
548
588
|
end
|
549
589
|
|
550
|
-
|
551
|
-
|
552
|
-
|
590
|
+
|
591
|
+
BOOK_PATTERN = /\b(?:(?:1|2|3|i+|first|second|third|1st|2nd|3rd) )?(?:\w+| of )\b/
|
592
|
+
|
593
|
+
REFERENCE_PATTERN = '(?:\s*\d{1,3})(?:\s*[:\"\.]\s*\d{1,3}[ab]?(?:\s*[,;]\s*(?:\d{1,3}[:\"\.])?\s*\d{1,3}[ab]?)*)?(?:\s*[-–—]\s*(?:\d{1,3}\s*[:\"\.])?(?:\d{1,3}[ab]?)(?:\s*[,;]\s*(?:\d{1,3}\s*[:\"\.])?\s*\d{1,3}[ab]?)*)*'
|
594
|
+
|
595
|
+
PERICOPE_PATTERN = /(#{BOOK_PATTERN})\.? (#{REFERENCE_PATTERN})/i
|
553
596
|
|
554
597
|
|
555
598
|
|