mericope 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/mericope.rb ADDED
@@ -0,0 +1,550 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'mericope/version'
4
+
5
+ class Mericope
6
+ attr_reader :book,
7
+ :book_chapter_count,
8
+ :book_name,
9
+ :original_string,
10
+ :ranges
11
+
12
+
13
+ def self.book_names
14
+ @book_names ||= chapter_verse_count_books.
15
+ map{ |r| r[2] }.uniq
16
+ end
17
+
18
+ def self.book_name_regexes
19
+ @book_name_regexes ||= book_abbreviations.
20
+ map { |book_number, book_regex| [book_number, /\b#{book_regex}\b/] }
21
+ end
22
+
23
+
24
+
25
+ def initialize(arg)
26
+ case arg
27
+ when String
28
+ attributes = Mericope.match_one(arg)
29
+ raise "no mericope found in #{arg} (#{arg.class})" if attributes.nil?
30
+
31
+ @original_string = attributes[:original_string]
32
+ set_book attributes[:book]
33
+ @ranges = attributes[:ranges]
34
+
35
+ when Array
36
+ arg = arg.map(&:to_i)
37
+ set_book Mericope.get_book(arg.first)
38
+ @ranges = Mericope.group_array_into_ranges(arg)
39
+
40
+ else
41
+ attributes = arg
42
+ @original_string = attributes[:original_string]
43
+ set_book attributes[:book]
44
+ @ranges = attributes[:ranges]
45
+
46
+ end
47
+ end
48
+
49
+
50
+
51
+ def self.book_has_chapters?(book)
52
+ book_chapter_counts[book - 1] > 1
53
+ end
54
+
55
+ def book_has_chapters?
56
+ book_chapter_count > 1
57
+ end
58
+
59
+
60
+
61
+ # Differs from Mericope.new in that it won't raise an exception
62
+ # if text does not contain a mericope but will return nil instead.
63
+ def self.parse_one(text)
64
+ parse(text) do |mericope|
65
+ return mericope
66
+ end
67
+ nil
68
+ end
69
+
70
+
71
+
72
+ def self.parse(text)
73
+ mericopes = []
74
+ match_all(text) do |attributes|
75
+ mericope = Mericope.new(attributes)
76
+ if block_given?
77
+ yield mericope
78
+ else
79
+ mericopes << mericope
80
+ end
81
+ end
82
+ block_given? ? text : mericopes
83
+ end
84
+
85
+
86
+
87
+ def self.split(text, pattern=nil)
88
+ puts "DEPRECATION NOTICE: split will no longer accept a 'pattern' argument in Mericope 0.7.0" if pattern
89
+ segments = []
90
+ start = 0
91
+
92
+ match_all(text) do |attributes, match|
93
+
94
+ pretext = text.slice(start...match.begin(0))
95
+ if pretext.length > 0
96
+ segments << pretext
97
+ yield pretext if block_given?
98
+ end
99
+
100
+ mericope = Mericope.new(attributes)
101
+ segments << mericope
102
+ yield mericope if block_given?
103
+
104
+ start = match.end(0)
105
+ end
106
+
107
+ pretext = text.slice(start...text.length)
108
+ if pretext.length > 0
109
+ segments << pretext
110
+ yield pretext if block_given?
111
+ end
112
+
113
+ segments = ___split_segments_by_pattern(segments, pattern) if pattern
114
+ segments
115
+ end
116
+
117
+ def self.___split_segments_by_pattern(segments, pattern)
118
+ segments2 = []
119
+ segments.each do |segment|
120
+ if segment.is_a? Mericope
121
+ segments2 << segment
122
+ else
123
+ segments2.concat(segment.split(pattern))
124
+ end
125
+ end
126
+ segments2
127
+ end
128
+
129
+
130
+
131
+ def self.extract(text)
132
+ puts "DEPRECATION NOTICE: the 'extract' method will be removed in Mericope 0.7.0"
133
+ segments = split(text)
134
+ text = ""
135
+ mericopes = []
136
+ segments.each do |segment|
137
+ if segment.is_a?(String)
138
+ text << segment
139
+ else
140
+ mericopes << segment
141
+ end
142
+ end
143
+ {:text => text, :mericopes => mericopes}
144
+ end
145
+
146
+
147
+
148
+ def self.sub(text)
149
+ segments = split(text)
150
+ segments.inject("") do |text, segment|
151
+ if segment.is_a?(String)
152
+ text << segment
153
+ else
154
+ text << "{{#{segment.to_a.join(" ")}}}"
155
+ end
156
+ end
157
+ end
158
+
159
+
160
+
161
+ def self.rsub(text)
162
+ text.gsub(/\{\{(\d{7,8} ?)+\}\}/) do |match|
163
+ ids = match[2...-2].split.collect(&:to_i)
164
+ Mericope.new(ids).to_s
165
+ end
166
+ end
167
+
168
+
169
+
170
+ def to_s
171
+ "#{book_name} #{self.well_formatted_reference}"
172
+ end
173
+
174
+
175
+
176
+ def report
177
+ puts "DEPRECATION NOTICE: the 'report' method will be removed in Mericope 0.7.0"
178
+ " #{self.original_string} => #{self}"
179
+ end
180
+
181
+
182
+
183
+ def to_a
184
+ # one range per chapter
185
+ chapter_ranges = []
186
+ ranges.each do |range|
187
+ min_chapter = Mericope.get_chapter(range.begin)
188
+ max_chapter = Mericope.get_chapter(range.end)
189
+ if min_chapter == max_chapter
190
+ chapter_ranges << range
191
+ else
192
+ chapter_ranges << Range.new(range.begin, Mericope.get_last_verse(book, min_chapter))
193
+ for chapter in (min_chapter+1)...max_chapter
194
+ chapter_ranges << Range.new(
195
+ Mericope.get_first_verse(book, chapter),
196
+ Mericope.get_last_verse(book, chapter))
197
+ end
198
+ chapter_ranges << Range.new(Mericope.get_first_verse(book, max_chapter), range.end)
199
+ end
200
+ end
201
+
202
+ chapter_ranges.inject([]) {|array, range| array.concat(range.to_a)}
203
+ end
204
+
205
+
206
+
207
+ def well_formatted_reference
208
+ recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
209
+ recent_chapter = 1 unless book_has_chapters?
210
+ ranges.map do |range|
211
+ min_chapter = Mericope.get_chapter(range.begin)
212
+ min_verse = Mericope.get_verse(range.begin)
213
+ max_chapter = Mericope.get_chapter(range.end)
214
+ max_verse = Mericope.get_verse(range.end)
215
+ s = ""
216
+
217
+ if min_verse == 1 and max_verse >= Mericope.get_max_verse(book, max_chapter)
218
+ s << min_chapter.to_s
219
+ s << "-#{max_chapter}" if max_chapter > min_chapter
220
+ else
221
+ if recent_chapter == min_chapter
222
+ s << min_verse.to_s
223
+ else
224
+ recent_chapter = min_chapter
225
+ s << "#{min_chapter}:#{min_verse}"
226
+ end
227
+
228
+ if range.count > 1
229
+
230
+ s << "-"
231
+ if min_chapter == max_chapter
232
+ s << max_verse.to_s
233
+ else
234
+ recent_chapter = max_chapter
235
+ s << "#{max_chapter}:#{max_verse}"
236
+ end
237
+ end
238
+ end
239
+
240
+ s
241
+ end.join(", ")
242
+ end
243
+
244
+
245
+
246
+ def intersects?(mericope)
247
+ return false unless mericope.is_a?(Mericope)
248
+ return false unless (self.book == mericope.book)
249
+
250
+ self.ranges.each do |self_range|
251
+ mericope.ranges.each do |other_range|
252
+ return true if (self_range.end >= other_range.begin) and (self_range.begin <= other_range.end)
253
+ end
254
+ end
255
+
256
+ false
257
+ end
258
+
259
+
260
+
261
+ def self.get_max_verse(book, chapter)
262
+ id = (book * 1000000) + (chapter * 1000)
263
+ chapter_verse_counts[id]
264
+ end
265
+
266
+ def self.get_max_chapter(book)
267
+ book_chapter_counts[book - 1]
268
+ end
269
+
270
+
271
+
272
+ private
273
+
274
+
275
+
276
+ def set_book(value)
277
+ @book = value || raise(ArgumentError, "must specify book")
278
+ @book_name = Mericope.book_names[@book - 1]
279
+ @book_chapter_count = Mericope.book_chapter_counts[@book - 1]
280
+ end
281
+
282
+
283
+
284
+ def self.get_first_verse(book, chapter)
285
+ get_id(book, chapter, 1)
286
+ end
287
+
288
+ def self.get_last_verse(book, chapter)
289
+ get_id(book, chapter, get_max_verse(book, chapter))
290
+ end
291
+
292
+ def self.get_next_verse(id)
293
+ id + 1
294
+ end
295
+
296
+ def self.get_start_of_next_chapter(id)
297
+ book = get_book(id)
298
+ chapter = get_chapter(id) + 1
299
+ verse = 1
300
+ get_id(book, chapter, verse)
301
+ end
302
+
303
+ def self.to_valid_book(book)
304
+ coerce_to_range(book, 1..66)
305
+ end
306
+
307
+ def self.to_valid_chapter(book, chapter)
308
+ coerce_to_range(chapter, 1..get_max_chapter(book))
309
+ end
310
+
311
+ def self.to_valid_verse(book, chapter, verse)
312
+ coerce_to_range(verse, 1..get_max_verse(book, chapter))
313
+ end
314
+
315
+ def self.coerce_to_range(number, range)
316
+ return range.begin if number < range.begin
317
+ return range.end if number > range.end
318
+ number
319
+ end
320
+
321
+ def self.get_id(book, chapter, verse)
322
+ book = to_valid_book(book)
323
+ chapter = to_valid_chapter(book, chapter)
324
+ verse = to_valid_verse(book, chapter, verse)
325
+
326
+ (book * 1000000) + (chapter * 1000) + verse
327
+ end
328
+
329
+ def self.get_book(id)
330
+ id / 1000000 # the book is everything left of the least significant 6 digits
331
+ end
332
+
333
+ def self.get_chapter(id)
334
+ (id % 1000000) / 1000 # the chapter is the 3rd through 6th most significant digits
335
+ end
336
+
337
+ def self.get_verse(id)
338
+ id % 1000 # the verse is the 3 least significant digits
339
+ end
340
+
341
+
342
+
343
+ def self.group_array_into_ranges(array)
344
+ return [] if array.nil? or array.empty?
345
+
346
+ array.flatten!
347
+ array.compact!
348
+ array.sort!
349
+
350
+ ranges = []
351
+ range_start = array.shift
352
+ range_end = range_start
353
+ while true
354
+ next_value = array.shift
355
+ break if next_value.nil?
356
+
357
+ if (next_value == get_next_verse(range_end)) ||
358
+ (next_value == get_start_of_next_chapter(range_end))
359
+ range_end = next_value
360
+ else
361
+ ranges << (range_start..range_end)
362
+ range_start = range_end = next_value
363
+ end
364
+ end
365
+ ranges << (range_start..range_end)
366
+
367
+ ranges
368
+ end
369
+
370
+
371
+
372
+ # matches the first valid Bible reference in the supplied string
373
+ def self.match_one(text)
374
+ match_all(text) do |attributes|
375
+ return attributes
376
+ end
377
+ nil
378
+ end
379
+
380
+
381
+
382
+ # matches all valid Bible references in the supplied string
383
+ def self.match_all(text)
384
+ text.scan(Mericope::MERICOPE_PATTERN) do
385
+ match = Regexp.last_match
386
+
387
+ book = recognize_book(match[1])
388
+ next unless book
389
+
390
+ ranges = parse_reference(book, match[2])
391
+ next if ranges.empty?
392
+
393
+ attributes = {
394
+ :original_string => match.to_s,
395
+ :book => book,
396
+ :ranges => ranges
397
+ }
398
+
399
+ yield attributes, match
400
+ end
401
+ end
402
+
403
+ def self.recognize_book(book)
404
+ book = book.to_s.downcase
405
+ book_name_regexes.each do |book_regex|
406
+ return book_regex[0] if book =~ book_regex[1]
407
+ end
408
+ nil
409
+ end
410
+
411
+ def self.parse_reference(book, reference)
412
+ reference = normalize_reference(reference)
413
+ parse_ranges(book, reference.split(/[,;]/))
414
+ end
415
+
416
+ def self.normalize_reference(reference)
417
+ reference = reference.to_s
418
+ NORMALIZATIONS.each { |(regex, replacement)| reference.gsub!(regex, replacement) }
419
+ reference
420
+ end
421
+
422
+ def self.parse_ranges(book, ranges)
423
+ recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
424
+ recent_chapter = 1 if !book_has_chapters?(book)
425
+ ranges.map do |range|
426
+ range = range.split('-') # parse the low end of a verse range and the high end separately
427
+ range << range[0] if (range.length < 2) # treat 12:4 as 12:4-12:4
428
+ lower_chapter_verse = range[0].split(':').map(&:to_i) # parse "3:28" to [3,28]
429
+ upper_chapter_verse = range[1].split(':').map(&:to_i) # parse "3:28" to [3,28]
430
+
431
+ # treat Mark 3-1 as Mark 3-3 and, eventually, Mark 3:1-35
432
+ if (lower_chapter_verse.length == 1) &&
433
+ (upper_chapter_verse.length == 1) &&
434
+ (upper_chapter_verse[0] < lower_chapter_verse[0])
435
+ upper_chapter_verse = lower_chapter_verse.dup
436
+ end
437
+
438
+ # make sure the low end of the range and the high end of the range
439
+ # are composed of arrays with two appropriate values: [chapter, verse]
440
+ chapter_range = false
441
+ if lower_chapter_verse.length < 2
442
+ if recent_chapter
443
+ lower_chapter_verse.unshift recent_chapter # e.g. parsing 11 in 12:1-8,11 => remember that 12 is the chapter
444
+ else
445
+ lower_chapter_verse[0] = Mericope.to_valid_chapter(book, lower_chapter_verse[0])
446
+ lower_chapter_verse << 1 # no verse specified; this is a range of chapters, start with verse 1
447
+ chapter_range = true
448
+ end
449
+ else
450
+ lower_chapter_verse[0] = Mericope.to_valid_chapter(book, lower_chapter_verse[0])
451
+ end
452
+ lower_chapter_verse[1] = Mericope.to_valid_verse(book, *lower_chapter_verse)
453
+
454
+ if upper_chapter_verse.length < 2
455
+ if chapter_range
456
+ upper_chapter_verse[0] = Mericope.to_valid_chapter(book, upper_chapter_verse[0])
457
+ upper_chapter_verse << Mericope.get_max_verse(book, upper_chapter_verse[0]) # this is a range of chapters, end with the last verse
458
+ else
459
+ upper_chapter_verse.unshift lower_chapter_verse[0] # e.g. parsing 8 in 12:1-8 => remember that 12 is the chapter
460
+ end
461
+ else
462
+ upper_chapter_verse[0] = Mericope.to_valid_chapter(book, upper_chapter_verse[0])
463
+ end
464
+ upper_chapter_verse[1] = Mericope.to_valid_verse(book, *upper_chapter_verse)
465
+
466
+ recent_chapter = upper_chapter_verse[0] # remember the last chapter
467
+
468
+ Range.new(
469
+ Mericope.get_id(book, *lower_chapter_verse),
470
+ Mericope.get_id(book, *upper_chapter_verse))
471
+ end
472
+ end
473
+
474
+
475
+
476
+ def self.load_chapter_verse_count_books
477
+ path = File.expand_path(File.dirname(__FILE__) + "/../data/chapter_verse_count.txt")
478
+ [].tap do |data|
479
+ File.open(path) do |file|
480
+ file.each do |text|
481
+ row = text.chomp.split("\t")
482
+ id, count, book, chapter = row[0].to_i, row[1].to_i, row[2], row[3].to_i
483
+ data << [id, count, book, chapter]
484
+ end
485
+ end
486
+ end
487
+ end
488
+
489
+
490
+
491
+ def self.load_book_abbreviations
492
+ path = File.expand_path(File.dirname(__FILE__) + "/../data/book_abbreviations.txt")
493
+ book_abbreviations = []
494
+ File.open(path) do |file|
495
+ file.each do |text|
496
+ unless text.start_with?("#") # skip comments
497
+
498
+ # the file contains tab-separated values.
499
+ # the first value is the ordinal of the book, subsequent values
500
+ # represent abbreviations and misspellings that should be recognized
501
+ # as the aforementioned book.
502
+ segments = text.chomp.split("\t")
503
+ book_abbreviations << [segments.shift.to_i, "(?:#{segments.join("|")})"]
504
+ end
505
+ end
506
+ end
507
+ Hash[book_abbreviations]
508
+ end
509
+
510
+
511
+
512
+ def self.chapter_verse_count_books
513
+ @chapter_verse_count_books ||= load_chapter_verse_count_books
514
+ end
515
+
516
+
517
+
518
+ def self.chapter_verse_counts
519
+ @chapter_verse_counts ||= Hash[chapter_verse_count_books.map{ |r| [r[0], r[1]] }]
520
+ end
521
+
522
+
523
+
524
+ def self.book_abbreviations
525
+ @book_abbreviations ||= load_book_abbreviations
526
+ end
527
+
528
+
529
+
530
+ def self.book_chapter_counts
531
+ @book_chapter_counts ||= chapter_verse_count_books.
532
+ # Group by book
533
+ group_by{ |r| r[2] }.
534
+ # Take the highest chapter number of each book
535
+ map{ |book, grp| grp.last.last }
536
+ end
537
+
538
+
539
+ BOOK_PATTERN = /\b(?:(?:1|2|3|i+|first|second|third|1st|2nd|3rd) )?(?:\w+| of )\b/
540
+
541
+ REFERENCE_PATTERN = '(?:\s*\d{1,3})(?:\s*[:\"\.]\s*\d{1,3}[ab]?(?:\s*[,;]\s*(?:\d{1,3}[:\"\.])?\s*\d{1,3}[ab]?)*)?(?:\s*[-–—]\s*(?:\d{1,3}\s*[:\"\.])?(?:\d{1,3}[ab]?)(?:\s*[,;]\s*(?:\d{1,3}\s*[:\"\.])?\s*\d{1,3}[ab]?)*)*'
542
+
543
+ MERICOPE_PATTERN = /(#{BOOK_PATTERN})\.? (#{REFERENCE_PATTERN})/i
544
+
545
+ NORMALIZATIONS = [
546
+ [/(\d+)[".](\d+)/, '\1:\2'], # 12"5 and 12.5 -> 12:5
547
+ [/[–—]/, '-'], # convert em dash and en dash to -
548
+ [/[^0-9,:;\-–—]/, ''] # remove everything but [0-9,;:-]
549
+ ]
550
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mericope
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Bob Lail
8
+ - Duane Johnson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-04-15 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: activesupport
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '4.0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '4.0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '10.4'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '10.4'
42
+ - !ruby/object:Gem::Dependency
43
+ name: turn
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '0.9'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '0.9'
56
+ - !ruby/object:Gem::Dependency
57
+ name: pry
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '0.10'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '0.10'
70
+ description: It recognizes common abbreviations of the books of the Book of Mormon
71
+ and a variety of ways of denoting ranges of chapters and verses. Based on Pericope.
72
+ email:
73
+ - bob.lailfamily@gmail.com
74
+ - duane.johnson@gmail.com
75
+ executables:
76
+ - mericope
77
+ extensions: []
78
+ extra_rdoc_files: []
79
+ files:
80
+ - README.mdown
81
+ - bin/mericope
82
+ - data/book_abbreviations.txt
83
+ - data/chapter_verse_count.txt
84
+ - lib/cli/base.rb
85
+ - lib/cli/command.rb
86
+ - lib/mericope.rb
87
+ - lib/mericope/cli.rb
88
+ - lib/mericope/version.rb
89
+ homepage: http://github.com/wordtreefoundation/mericope
90
+ licenses: []
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.2.2
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: Mericope is a gem for parsing Book of Mormon references.
112
+ test_files: []