mericope 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/mericope.rb ADDED
@@ -0,0 +1,550 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'mericope/version'
4
+
5
+ class Mericope
6
+ attr_reader :book,
7
+ :book_chapter_count,
8
+ :book_name,
9
+ :original_string,
10
+ :ranges
11
+
12
+
13
+ def self.book_names
14
+ @book_names ||= chapter_verse_count_books.
15
+ map{ |r| r[2] }.uniq
16
+ end
17
+
18
+ def self.book_name_regexes
19
+ @book_name_regexes ||= book_abbreviations.
20
+ map { |book_number, book_regex| [book_number, /\b#{book_regex}\b/] }
21
+ end
22
+
23
+
24
+
25
+ def initialize(arg)
26
+ case arg
27
+ when String
28
+ attributes = Mericope.match_one(arg)
29
+ raise "no mericope found in #{arg} (#{arg.class})" if attributes.nil?
30
+
31
+ @original_string = attributes[:original_string]
32
+ set_book attributes[:book]
33
+ @ranges = attributes[:ranges]
34
+
35
+ when Array
36
+ arg = arg.map(&:to_i)
37
+ set_book Mericope.get_book(arg.first)
38
+ @ranges = Mericope.group_array_into_ranges(arg)
39
+
40
+ else
41
+ attributes = arg
42
+ @original_string = attributes[:original_string]
43
+ set_book attributes[:book]
44
+ @ranges = attributes[:ranges]
45
+
46
+ end
47
+ end
48
+
49
+
50
+
51
+ def self.book_has_chapters?(book)
52
+ book_chapter_counts[book - 1] > 1
53
+ end
54
+
55
+ def book_has_chapters?
56
+ book_chapter_count > 1
57
+ end
58
+
59
+
60
+
61
+ # Differs from Mericope.new in that it won't raise an exception
62
+ # if text does not contain a mericope but will return nil instead.
63
+ def self.parse_one(text)
64
+ parse(text) do |mericope|
65
+ return mericope
66
+ end
67
+ nil
68
+ end
69
+
70
+
71
+
72
+ def self.parse(text)
73
+ mericopes = []
74
+ match_all(text) do |attributes|
75
+ mericope = Mericope.new(attributes)
76
+ if block_given?
77
+ yield mericope
78
+ else
79
+ mericopes << mericope
80
+ end
81
+ end
82
+ block_given? ? text : mericopes
83
+ end
84
+
85
+
86
+
87
+ def self.split(text, pattern=nil)
88
+ puts "DEPRECATION NOTICE: split will no longer accept a 'pattern' argument in Mericope 0.7.0" if pattern
89
+ segments = []
90
+ start = 0
91
+
92
+ match_all(text) do |attributes, match|
93
+
94
+ pretext = text.slice(start...match.begin(0))
95
+ if pretext.length > 0
96
+ segments << pretext
97
+ yield pretext if block_given?
98
+ end
99
+
100
+ mericope = Mericope.new(attributes)
101
+ segments << mericope
102
+ yield mericope if block_given?
103
+
104
+ start = match.end(0)
105
+ end
106
+
107
+ pretext = text.slice(start...text.length)
108
+ if pretext.length > 0
109
+ segments << pretext
110
+ yield pretext if block_given?
111
+ end
112
+
113
+ segments = ___split_segments_by_pattern(segments, pattern) if pattern
114
+ segments
115
+ end
116
+
117
+ def self.___split_segments_by_pattern(segments, pattern)
118
+ segments2 = []
119
+ segments.each do |segment|
120
+ if segment.is_a? Mericope
121
+ segments2 << segment
122
+ else
123
+ segments2.concat(segment.split(pattern))
124
+ end
125
+ end
126
+ segments2
127
+ end
128
+
129
+
130
+
131
+ def self.extract(text)
132
+ puts "DEPRECATION NOTICE: the 'extract' method will be removed in Mericope 0.7.0"
133
+ segments = split(text)
134
+ text = ""
135
+ mericopes = []
136
+ segments.each do |segment|
137
+ if segment.is_a?(String)
138
+ text << segment
139
+ else
140
+ mericopes << segment
141
+ end
142
+ end
143
+ {:text => text, :mericopes => mericopes}
144
+ end
145
+
146
+
147
+
148
+ def self.sub(text)
149
+ segments = split(text)
150
+ segments.inject("") do |text, segment|
151
+ if segment.is_a?(String)
152
+ text << segment
153
+ else
154
+ text << "{{#{segment.to_a.join(" ")}}}"
155
+ end
156
+ end
157
+ end
158
+
159
+
160
+
161
+ def self.rsub(text)
162
+ text.gsub(/\{\{(\d{7,8} ?)+\}\}/) do |match|
163
+ ids = match[2...-2].split.collect(&:to_i)
164
+ Mericope.new(ids).to_s
165
+ end
166
+ end
167
+
168
+
169
+
170
+ def to_s
171
+ "#{book_name} #{self.well_formatted_reference}"
172
+ end
173
+
174
+
175
+
176
+ def report
177
+ puts "DEPRECATION NOTICE: the 'report' method will be removed in Mericope 0.7.0"
178
+ " #{self.original_string} => #{self}"
179
+ end
180
+
181
+
182
+
183
+ def to_a
184
+ # one range per chapter
185
+ chapter_ranges = []
186
+ ranges.each do |range|
187
+ min_chapter = Mericope.get_chapter(range.begin)
188
+ max_chapter = Mericope.get_chapter(range.end)
189
+ if min_chapter == max_chapter
190
+ chapter_ranges << range
191
+ else
192
+ chapter_ranges << Range.new(range.begin, Mericope.get_last_verse(book, min_chapter))
193
+ for chapter in (min_chapter+1)...max_chapter
194
+ chapter_ranges << Range.new(
195
+ Mericope.get_first_verse(book, chapter),
196
+ Mericope.get_last_verse(book, chapter))
197
+ end
198
+ chapter_ranges << Range.new(Mericope.get_first_verse(book, max_chapter), range.end)
199
+ end
200
+ end
201
+
202
+ chapter_ranges.inject([]) {|array, range| array.concat(range.to_a)}
203
+ end
204
+
205
+
206
+
207
+ def well_formatted_reference
208
+ recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
209
+ recent_chapter = 1 unless book_has_chapters?
210
+ ranges.map do |range|
211
+ min_chapter = Mericope.get_chapter(range.begin)
212
+ min_verse = Mericope.get_verse(range.begin)
213
+ max_chapter = Mericope.get_chapter(range.end)
214
+ max_verse = Mericope.get_verse(range.end)
215
+ s = ""
216
+
217
+ if min_verse == 1 and max_verse >= Mericope.get_max_verse(book, max_chapter)
218
+ s << min_chapter.to_s
219
+ s << "-#{max_chapter}" if max_chapter > min_chapter
220
+ else
221
+ if recent_chapter == min_chapter
222
+ s << min_verse.to_s
223
+ else
224
+ recent_chapter = min_chapter
225
+ s << "#{min_chapter}:#{min_verse}"
226
+ end
227
+
228
+ if range.count > 1
229
+
230
+ s << "-"
231
+ if min_chapter == max_chapter
232
+ s << max_verse.to_s
233
+ else
234
+ recent_chapter = max_chapter
235
+ s << "#{max_chapter}:#{max_verse}"
236
+ end
237
+ end
238
+ end
239
+
240
+ s
241
+ end.join(", ")
242
+ end
243
+
244
+
245
+
246
+ def intersects?(mericope)
247
+ return false unless mericope.is_a?(Mericope)
248
+ return false unless (self.book == mericope.book)
249
+
250
+ self.ranges.each do |self_range|
251
+ mericope.ranges.each do |other_range|
252
+ return true if (self_range.end >= other_range.begin) and (self_range.begin <= other_range.end)
253
+ end
254
+ end
255
+
256
+ false
257
+ end
258
+
259
+
260
+
261
+ def self.get_max_verse(book, chapter)
262
+ id = (book * 1000000) + (chapter * 1000)
263
+ chapter_verse_counts[id]
264
+ end
265
+
266
+ def self.get_max_chapter(book)
267
+ book_chapter_counts[book - 1]
268
+ end
269
+
270
+
271
+
272
+ private
273
+
274
+
275
+
276
+ def set_book(value)
277
+ @book = value || raise(ArgumentError, "must specify book")
278
+ @book_name = Mericope.book_names[@book - 1]
279
+ @book_chapter_count = Mericope.book_chapter_counts[@book - 1]
280
+ end
281
+
282
+
283
+
284
+ def self.get_first_verse(book, chapter)
285
+ get_id(book, chapter, 1)
286
+ end
287
+
288
+ def self.get_last_verse(book, chapter)
289
+ get_id(book, chapter, get_max_verse(book, chapter))
290
+ end
291
+
292
+ def self.get_next_verse(id)
293
+ id + 1
294
+ end
295
+
296
+ def self.get_start_of_next_chapter(id)
297
+ book = get_book(id)
298
+ chapter = get_chapter(id) + 1
299
+ verse = 1
300
+ get_id(book, chapter, verse)
301
+ end
302
+
303
+ def self.to_valid_book(book)
304
+ coerce_to_range(book, 1..66)
305
+ end
306
+
307
+ def self.to_valid_chapter(book, chapter)
308
+ coerce_to_range(chapter, 1..get_max_chapter(book))
309
+ end
310
+
311
+ def self.to_valid_verse(book, chapter, verse)
312
+ coerce_to_range(verse, 1..get_max_verse(book, chapter))
313
+ end
314
+
315
+ def self.coerce_to_range(number, range)
316
+ return range.begin if number < range.begin
317
+ return range.end if number > range.end
318
+ number
319
+ end
320
+
321
+ def self.get_id(book, chapter, verse)
322
+ book = to_valid_book(book)
323
+ chapter = to_valid_chapter(book, chapter)
324
+ verse = to_valid_verse(book, chapter, verse)
325
+
326
+ (book * 1000000) + (chapter * 1000) + verse
327
+ end
328
+
329
+ def self.get_book(id)
330
+ id / 1000000 # the book is everything left of the least significant 6 digits
331
+ end
332
+
333
+ def self.get_chapter(id)
334
+ (id % 1000000) / 1000 # the chapter is the 3rd through 6th most significant digits
335
+ end
336
+
337
+ def self.get_verse(id)
338
+ id % 1000 # the verse is the 3 least significant digits
339
+ end
340
+
341
+
342
+
343
+ def self.group_array_into_ranges(array)
344
+ return [] if array.nil? or array.empty?
345
+
346
+ array.flatten!
347
+ array.compact!
348
+ array.sort!
349
+
350
+ ranges = []
351
+ range_start = array.shift
352
+ range_end = range_start
353
+ while true
354
+ next_value = array.shift
355
+ break if next_value.nil?
356
+
357
+ if (next_value == get_next_verse(range_end)) ||
358
+ (next_value == get_start_of_next_chapter(range_end))
359
+ range_end = next_value
360
+ else
361
+ ranges << (range_start..range_end)
362
+ range_start = range_end = next_value
363
+ end
364
+ end
365
+ ranges << (range_start..range_end)
366
+
367
+ ranges
368
+ end
369
+
370
+
371
+
372
+ # matches the first valid Bible reference in the supplied string
373
+ def self.match_one(text)
374
+ match_all(text) do |attributes|
375
+ return attributes
376
+ end
377
+ nil
378
+ end
379
+
380
+
381
+
382
+ # matches all valid Bible references in the supplied string
383
+ def self.match_all(text)
384
+ text.scan(Mericope::MERICOPE_PATTERN) do
385
+ match = Regexp.last_match
386
+
387
+ book = recognize_book(match[1])
388
+ next unless book
389
+
390
+ ranges = parse_reference(book, match[2])
391
+ next if ranges.empty?
392
+
393
+ attributes = {
394
+ :original_string => match.to_s,
395
+ :book => book,
396
+ :ranges => ranges
397
+ }
398
+
399
+ yield attributes, match
400
+ end
401
+ end
402
+
403
+ def self.recognize_book(book)
404
+ book = book.to_s.downcase
405
+ book_name_regexes.each do |book_regex|
406
+ return book_regex[0] if book =~ book_regex[1]
407
+ end
408
+ nil
409
+ end
410
+
411
+ def self.parse_reference(book, reference)
412
+ reference = normalize_reference(reference)
413
+ parse_ranges(book, reference.split(/[,;]/))
414
+ end
415
+
416
+ def self.normalize_reference(reference)
417
+ reference = reference.to_s
418
+ NORMALIZATIONS.each { |(regex, replacement)| reference.gsub!(regex, replacement) }
419
+ reference
420
+ end
421
+
422
+ def self.parse_ranges(book, ranges)
423
+ recent_chapter = nil # e.g. in 12:1-8, remember that 12 is the chapter when we parse the 8
424
+ recent_chapter = 1 if !book_has_chapters?(book)
425
+ ranges.map do |range|
426
+ range = range.split('-') # parse the low end of a verse range and the high end separately
427
+ range << range[0] if (range.length < 2) # treat 12:4 as 12:4-12:4
428
+ lower_chapter_verse = range[0].split(':').map(&:to_i) # parse "3:28" to [3,28]
429
+ upper_chapter_verse = range[1].split(':').map(&:to_i) # parse "3:28" to [3,28]
430
+
431
+ # treat Mark 3-1 as Mark 3-3 and, eventually, Mark 3:1-35
432
+ if (lower_chapter_verse.length == 1) &&
433
+ (upper_chapter_verse.length == 1) &&
434
+ (upper_chapter_verse[0] < lower_chapter_verse[0])
435
+ upper_chapter_verse = lower_chapter_verse.dup
436
+ end
437
+
438
+ # make sure the low end of the range and the high end of the range
439
+ # are composed of arrays with two appropriate values: [chapter, verse]
440
+ chapter_range = false
441
+ if lower_chapter_verse.length < 2
442
+ if recent_chapter
443
+ lower_chapter_verse.unshift recent_chapter # e.g. parsing 11 in 12:1-8,11 => remember that 12 is the chapter
444
+ else
445
+ lower_chapter_verse[0] = Mericope.to_valid_chapter(book, lower_chapter_verse[0])
446
+ lower_chapter_verse << 1 # no verse specified; this is a range of chapters, start with verse 1
447
+ chapter_range = true
448
+ end
449
+ else
450
+ lower_chapter_verse[0] = Mericope.to_valid_chapter(book, lower_chapter_verse[0])
451
+ end
452
+ lower_chapter_verse[1] = Mericope.to_valid_verse(book, *lower_chapter_verse)
453
+
454
+ if upper_chapter_verse.length < 2
455
+ if chapter_range
456
+ upper_chapter_verse[0] = Mericope.to_valid_chapter(book, upper_chapter_verse[0])
457
+ upper_chapter_verse << Mericope.get_max_verse(book, upper_chapter_verse[0]) # this is a range of chapters, end with the last verse
458
+ else
459
+ upper_chapter_verse.unshift lower_chapter_verse[0] # e.g. parsing 8 in 12:1-8 => remember that 12 is the chapter
460
+ end
461
+ else
462
+ upper_chapter_verse[0] = Mericope.to_valid_chapter(book, upper_chapter_verse[0])
463
+ end
464
+ upper_chapter_verse[1] = Mericope.to_valid_verse(book, *upper_chapter_verse)
465
+
466
+ recent_chapter = upper_chapter_verse[0] # remember the last chapter
467
+
468
+ Range.new(
469
+ Mericope.get_id(book, *lower_chapter_verse),
470
+ Mericope.get_id(book, *upper_chapter_verse))
471
+ end
472
+ end
473
+
474
+
475
+
476
+ def self.load_chapter_verse_count_books
477
+ path = File.expand_path(File.dirname(__FILE__) + "/../data/chapter_verse_count.txt")
478
+ [].tap do |data|
479
+ File.open(path) do |file|
480
+ file.each do |text|
481
+ row = text.chomp.split("\t")
482
+ id, count, book, chapter = row[0].to_i, row[1].to_i, row[2], row[3].to_i
483
+ data << [id, count, book, chapter]
484
+ end
485
+ end
486
+ end
487
+ end
488
+
489
+
490
+
491
+ def self.load_book_abbreviations
492
+ path = File.expand_path(File.dirname(__FILE__) + "/../data/book_abbreviations.txt")
493
+ book_abbreviations = []
494
+ File.open(path) do |file|
495
+ file.each do |text|
496
+ unless text.start_with?("#") # skip comments
497
+
498
+ # the file contains tab-separated values.
499
+ # the first value is the ordinal of the book, subsequent values
500
+ # represent abbreviations and misspellings that should be recognized
501
+ # as the aforementioned book.
502
+ segments = text.chomp.split("\t")
503
+ book_abbreviations << [segments.shift.to_i, "(?:#{segments.join("|")})"]
504
+ end
505
+ end
506
+ end
507
+ Hash[book_abbreviations]
508
+ end
509
+
510
+
511
+
512
+ def self.chapter_verse_count_books
513
+ @chapter_verse_count_books ||= load_chapter_verse_count_books
514
+ end
515
+
516
+
517
+
518
+ def self.chapter_verse_counts
519
+ @chapter_verse_counts ||= Hash[chapter_verse_count_books.map{ |r| [r[0], r[1]] }]
520
+ end
521
+
522
+
523
+
524
+ def self.book_abbreviations
525
+ @book_abbreviations ||= load_book_abbreviations
526
+ end
527
+
528
+
529
+
530
+ def self.book_chapter_counts
531
+ @book_chapter_counts ||= chapter_verse_count_books.
532
+ # Group by book
533
+ group_by{ |r| r[2] }.
534
+ # Take the highest chapter number of each book
535
+ map{ |book, grp| grp.last.last }
536
+ end
537
+
538
+
539
+ BOOK_PATTERN = /\b(?:(?:1|2|3|i+|first|second|third|1st|2nd|3rd) )?(?:\w+| of )\b/
540
+
541
+ REFERENCE_PATTERN = '(?:\s*\d{1,3})(?:\s*[:\"\.]\s*\d{1,3}[ab]?(?:\s*[,;]\s*(?:\d{1,3}[:\"\.])?\s*\d{1,3}[ab]?)*)?(?:\s*[-–—]\s*(?:\d{1,3}\s*[:\"\.])?(?:\d{1,3}[ab]?)(?:\s*[,;]\s*(?:\d{1,3}\s*[:\"\.])?\s*\d{1,3}[ab]?)*)*'
542
+
543
+ MERICOPE_PATTERN = /(#{BOOK_PATTERN})\.? (#{REFERENCE_PATTERN})/i
544
+
545
+ NORMALIZATIONS = [
546
+ [/(\d+)[".](\d+)/, '\1:\2'], # 12"5 and 12.5 -> 12:5
547
+ [/[–—]/, '-'], # convert em dash and en dash to -
548
+ [/[^0-9,:;\-–—]/, ''] # remove everything but [0-9,;:-]
549
+ ]
550
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mericope
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Bob Lail
8
+ - Duane Johnson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-04-15 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: activesupport
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '4.0'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - "~>"
26
+ - !ruby/object:Gem::Version
27
+ version: '4.0'
28
+ - !ruby/object:Gem::Dependency
29
+ name: rake
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '10.4'
35
+ type: :development
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '10.4'
42
+ - !ruby/object:Gem::Dependency
43
+ name: turn
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '0.9'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '0.9'
56
+ - !ruby/object:Gem::Dependency
57
+ name: pry
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '0.10'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '0.10'
70
+ description: It recognizes common abbreviations of the books of the Book of Mormon
71
+ and a variety of ways of denoting ranges of chapters and verses. Based on Pericope.
72
+ email:
73
+ - bob.lailfamily@gmail.com
74
+ - duane.johnson@gmail.com
75
+ executables:
76
+ - mericope
77
+ extensions: []
78
+ extra_rdoc_files: []
79
+ files:
80
+ - README.mdown
81
+ - bin/mericope
82
+ - data/book_abbreviations.txt
83
+ - data/chapter_verse_count.txt
84
+ - lib/cli/base.rb
85
+ - lib/cli/command.rb
86
+ - lib/mericope.rb
87
+ - lib/mericope/cli.rb
88
+ - lib/mericope/version.rb
89
+ homepage: http://github.com/wordtreefoundation/mericope
90
+ licenses: []
91
+ metadata: {}
92
+ post_install_message:
93
+ rdoc_options: []
94
+ require_paths:
95
+ - lib
96
+ required_ruby_version: !ruby/object:Gem::Requirement
97
+ requirements:
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: '0'
101
+ required_rubygems_version: !ruby/object:Gem::Requirement
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ version: '0'
106
+ requirements: []
107
+ rubyforge_project:
108
+ rubygems_version: 2.2.2
109
+ signing_key:
110
+ specification_version: 4
111
+ summary: Mericope is a gem for parsing Book of Mormon references.
112
+ test_files: []