scripref 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 605d0d995b512d6af69e3b2152228dc201085b2b
4
- data.tar.gz: a400e265c6d777d287170fbef3edb56c0143fcc6
3
+ metadata.gz: 223ae6028e566b369b6848eef6374fe2bc52706a
4
+ data.tar.gz: 6b9f27eee78bc4c6ca03254851e80911a9846da6
5
5
  SHA512:
6
- metadata.gz: 37506e93da2d733beaa800db74ef5d1f16fe7aee313312298752a3f442549434b5360646b62d7d754dc189efa6edf1b09372be04b0a85e8f39dca290f2082b1a
7
- data.tar.gz: 1e74214d77ed1f74256c102ea448eba2ba703d90269dc697360494dee102900d9f2503c8310f95ce783d1c92512a9eacba62e734adb8e411729d28dacccf3f98
6
+ metadata.gz: 2f85ed0888401fb880980fd31552720e23494ae879cc6dbc9834dc36d2ffaca33fdbaf62fa4f8856c9bc2754820c53e5701cf6ca44fb73172d452f139916d927
7
+ data.tar.gz: 6261f6a48f837d69701270e0083758ded7b3a47c26eb7a4e48f5b345ce9a8ddfc1ca228afea8ac692c2ad208eaa7c625aae87f469a33d0a0c804859ad9b0383a
data/Changelog CHANGED
@@ -1,3 +1,9 @@
1
+ 0.11.0
2
+ Add class Bookname and methods Passage#+ and Passage#-.
3
+ Make processor working for complex contexts.
4
+ Use autoload.
5
+ Lots of refactorings and internal improvements.
6
+
1
7
  0.10.0
2
8
  Add methods Passage#make_comparable, #make_comparable!, #comparable?,
3
9
  #overlap?, #start, #end and #<=>.
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010-2016 Jan Friedrich
1
+ Copyright (c) 2010-2017 Jan Friedrich
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person obtaining a copy
4
4
  of this software and associated documentation files (the "Software"), to deal
data/Rakefile CHANGED
@@ -12,4 +12,8 @@ Rim.setup do
12
12
  homepage 'https://github.com/janfri/scripref'
13
13
  version Scripref::VERSION
14
14
  summary 'Library for parsing scripture references in real texts.'
15
+ if feature_loaded? 'rim/irb'
16
+ irb_requires %w(scripref scripref/include scripref/pipelining)
17
+ end
18
+ test_warning false
15
19
  end
@@ -0,0 +1,32 @@
1
+ # - encoding: utf-8 -
2
+ module Scripref
3
+
4
+ # Mixin with methods of basic functionality
5
+ module BasicMethods
6
+
7
+ NUMBER_RE = /\d+\s*/o
8
+
9
+ # Regular expression to match a chapter
10
+ def chapter_re
11
+ NUMBER_RE
12
+ end
13
+
14
+ # Regular expression to match a verse
15
+ def verse_re
16
+ NUMBER_RE
17
+ end
18
+
19
+ # Regular expression to match a book.
20
+ def book_re
21
+ return @book_re if @book_re
22
+ books_res_as_strings = book_names.map do |bn|
23
+ bn.names.map do |n|
24
+ (n.gsub(/([^\dA-Z])/, '\1?').gsub('.', '\.'))
25
+ end
26
+ end.flatten
27
+ @book_re = Regexp.compile(books_res_as_strings.map {|s| '(\b' << s << '\b\s*)' }.join('|'), nil)
28
+ end
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,30 @@
1
+ # - encoding: utf-8 -
2
+
3
+ module Scripref
4
+
5
+ class Bookname
6
+
7
+ attr_reader :names, :abbrevs
8
+
9
+ def initialize names, abbrevs
10
+ @names = Array(names)
11
+ @abbrevs = Array(abbrevs)
12
+ end
13
+
14
+ def name
15
+ @names.first
16
+ end
17
+
18
+ def abbrev level=0
19
+ @abbrevs[level] || @abbrevs[-1]
20
+ end
21
+
22
+ def to_s
23
+ @names.first
24
+ end
25
+
26
+ alias to_str to_s
27
+
28
+ end
29
+
30
+ end
@@ -1,4 +1,5 @@
1
1
  # - encoding: utf-8 -
2
+ require 'scripref/bookname'
2
3
  require 'scripref/const_reader'
3
4
 
4
5
  module Scripref
@@ -6,8 +7,7 @@ module Scripref
6
7
  # Mixin for parsing references in English.
7
8
  module English
8
9
 
9
- # Array of book names.
10
- BOOK_NAMES = <<-END.strip.split(/,\s*/)
10
+ book_names = <<-END.strip.split(/,\s*/)
11
11
  Genesis, Exodus, Leviticus, Numbers, Deuteronomy, Joshua, Judges,
12
12
  Ruth, 1 Samuel, 2 Samuel, 1 Kings, 2 Kings, 1 Chronicles,
13
13
  2 Chronicles, Ezra, Nehemiah, Esther, Job, Psalms, Proverbs,
@@ -21,6 +21,18 @@ module Scripref
21
21
  Jude, Revelation
22
22
  END
23
23
 
24
+ book_abbrevs = <<-END.strip.split(/,\s*/)
25
+ Gen, Ex, Lev, Num, Deut, Josh, Judg, Rth, 1 Sam, 2 Sam, 1 Kgs, 2 Kgs,
26
+ 1 Chron, 2 Chron, Ezr, Neh, Esth, Job, Ps, Prov, Eccles, Song, Isa,
27
+ Jer, Lam, Ezek, Dan, Hos, Joel, Am, Obad, Jon, Mic, Nah, Hab, Zeph, Hag,
28
+ Zech, Mal, Matt, Mrk, Luk, John, Acts, Rom, 1 Cor, 2 Cor, Gal, Eph, Phil,
29
+ Col, 1 Thess, 2 Thess, 1 Tim, 2 Tim, Tit, Philem, Heb, Jas, 1 Pet, 2 Pet,
30
+ 1 Joh, 2 Joh, 3 Joh, Jud, Rev
31
+ END
32
+
33
+ # Array of book names.
34
+ BOOK_NAMES = book_names.zip(book_abbrevs).map {|name, abbrev| Bookname.new(name, abbrev)}
35
+
24
36
  # Separator between chapter and verse.
25
37
  CV_SEPARATOR = ':'
26
38
 
@@ -60,11 +72,6 @@ module Scripref
60
72
  # Regular expression to match postfix for more following verses
61
73
  POSTFIX_MORE_FOLLOWING_VERSES_RE = /ff\b\s*/o
62
74
 
63
- pass = '([1-3]\s*)?[A-Z][a-z]+\.?\s*(\d+\s*[,.\-]\s*)*\d+\s*'
64
-
65
- # Regular expression to parse a reference
66
- REFERENCE_RE = /#{pass}(;\s*#{pass})*/o
67
-
68
75
  # Check if book has only one chapter
69
76
  # @param book book as number
70
77
  def book_has_only_one_chapter? book
@@ -4,16 +4,17 @@ module Scripref
4
4
 
5
5
  class Formatter
6
6
 
7
- attr_accessor :cv_separator, :hyphen_separator, :pass_separator
8
-
7
+ attr_accessor :bookformat, :cv_separator, :hyphen_separator, :pass_separator
9
8
 
10
9
  # @param mods one or more modules to include
11
- def initialize *mods
10
+ # @param bookformat (:short use abbreviations, :long use full names of books)
11
+ def initialize *mods, bookformat: :name
12
12
  @mods = mods
13
13
  mods.each {|m| extend m}
14
+ @bookformat = bookformat
14
15
  end
15
16
 
16
- # Formats a reference (array of passages)
17
+ # Formats a reference (array of passages and maybe separators)
17
18
  def format *reference
18
19
  @last_b = @last_c = @last_v = :undefined
19
20
  @result = []
@@ -25,22 +26,30 @@ module Scripref
25
26
  @result.join
26
27
  end
27
28
 
29
+ # Formats a book
30
+ # @param num number of book (starting at 1)
28
31
  def format_book num
29
- Array(book_names[num - 1]).first
32
+ book_names[num - 1].send @bookformat
30
33
  end
31
34
 
35
+ # Formats a chapter
36
+ # @param num number of chapter
32
37
  def format_chapter num
33
38
  num.to_s
34
39
  end
35
40
 
41
+ # Formats a verse
42
+ # @param num number of verse
36
43
  def format_verse num
37
44
  num.to_s
38
45
  end
39
46
 
47
+ # Formats a verse addon
40
48
  def format_addon a
41
49
  a.to_s
42
50
  end
43
51
 
52
+ # Formats a verse postfix
44
53
  def format_postfix p
45
54
  p.to_s
46
55
  end
@@ -1,4 +1,5 @@
1
1
  # - encoding: utf-8 -
2
+ require 'scripref/bookname'
2
3
  require 'scripref/const_reader'
3
4
 
4
5
  module Scripref
@@ -6,8 +7,7 @@ module Scripref
6
7
  # Mixin for parsing references in German.
7
8
  module German
8
9
 
9
- # Array of book names.
10
- BOOK_NAMES = <<-END.strip.split(/,\s*/).map {|e| e.split('|')}
10
+ book_names = <<-END.strip.split(/,\s*/).map {|e| e.split('|')}
11
11
  1. Mose, 2. Mose, 3. Mose, 4. Mose, 5. Mose, Josua, Richter, Ruth, 1. Samuel, 2. Samuel,
12
12
  1. Könige, 2. Könige, 1. Chronika, 2. Chronika, Esra, Nehemia, Esther, Hiob, Psalm|Psalmen,
13
13
  Sprüche, Prediger, Hohelied, Jesaja, Jeremia, Klagelieder, Hesekiel, Daniel, Hosea, Joel,
@@ -18,6 +18,17 @@ module Scripref
18
18
  2. Johannes, 3. Johannes, Judas, Offenbarung
19
19
  END
20
20
 
21
+ book_abbrevs = <<-END.strip.split(/,\s*/).map {|e| e.split('|')}
22
+ 1.Mo, 2.Mo, 3.Mo, 4.Mo, 5.Mo, Jos, Ri, Ruth, 1.Sam, 2.Sam, 1.Kön, 2.Kön, 1.Chr, 2.Chr,
23
+ Esr, Neh, Est, Hi, Ps, Spr, Pred, Hohel, Jes, Jer, Klag, Hes, Dan, Hos, Joel, Amos, Obad,
24
+ Jona, Mich, Nah, Hab, Zef, Hag, Sach, Mal, Mat, Mar, Luk, Joh, Apg, Röm, 1.Ko, 2.Ko,
25
+ Gal, Eph, Phil, Kol, 1.Thes, 2.Thes, 1.Tim, 2.Tim, Tit, Philem, Heb, Jak, 1.Pet, 2.Pet,
26
+ 1.Joh, 2.Joh, 3.Joh, Jud, Off
27
+ END
28
+
29
+ # Array of book names.
30
+ BOOK_NAMES = book_names.zip(book_abbrevs).map {|name, abbrev| Bookname.new(name, abbrev)}
31
+
21
32
  # Separator between chapter and verse.
22
33
  CV_SEPARATOR = ','
23
34
 
@@ -43,7 +54,7 @@ module Scripref
43
54
  VERSE_SEPARATOR = '.'
44
55
 
45
56
  # Regular expression to match addons for a verse.
46
- VERSE_ADDON_RE = /[ab]\s*/o
57
+ VERSE_ADDON_RE = /[ab]\b\s*/o
47
58
 
48
59
  # Postfix for one following verse
49
60
  POSTFIX_ONE_FOLLOWING_VERSE = 'f'
@@ -57,20 +68,12 @@ module Scripref
57
68
  # Regular expression to match postfix for more following verses
58
69
  POSTFIX_MORE_FOLLOWING_VERSES_RE = /ff\b\s*/o
59
70
 
60
- pass = '([1-5]\.?\s*)?[A-Z][a-zäöü]+\.?\s*(\d+\s*[,.\-]\s*)*\d+\s*'
61
-
62
- # Regular expression to parse a reference
63
- REFERENCE_RE = /#{pass}(;\s*#{pass})*/o
64
-
65
71
  # Check if book has only one chapter
66
72
  # @param book book as number
67
73
  def book_has_only_one_chapter? book
68
74
  [31, 57, 63, 64, 65].include?(book)
69
75
  end
70
76
 
71
- # Hash with special abbreviations
72
- SPECIAL_ABBREV_MAPPING = {'Phil' => 'Philipper'}
73
-
74
77
  # Generate attr_reader methods for all constants
75
78
  extend ConstReader
76
79
  const_reader constants
@@ -0,0 +1,4 @@
1
+ # - encoding: utf-8 -
2
+ require 'scripref'
3
+
4
+ include Scripref
@@ -1,4 +1,5 @@
1
1
  # - encoding: utf-8 -
2
+ require 'scripref/basic_methods'
2
3
  require 'strscan'
3
4
 
4
5
  module Scripref
@@ -7,7 +8,7 @@ module Scripref
7
8
 
8
9
  attr_reader :error
9
10
 
10
- NUMBER_RE = /\d+\s*/o
11
+ include BasicMethods
11
12
 
12
13
  # @param mods one or more modules to include
13
14
  def initialize *mods
@@ -15,23 +16,6 @@ module Scripref
15
16
  mods.each {|m| extend m}
16
17
  end
17
18
 
18
- # Regular expression to match a chapter
19
- def chapter_re
20
- NUMBER_RE
21
- end
22
-
23
- # Regular expression to match a verse
24
- def verse_re
25
- NUMBER_RE
26
- end
27
-
28
- # Hash with special abbreviations
29
- # for example {'Phil' => 'Philipper'}
30
- # usual overwritten in Mixins
31
- def special_abbrev_mapping
32
- @special_abbrev_mapping ||= {}
33
- end
34
-
35
19
  # Parsing a string of a scripture reference
36
20
  # @param str string to parse
37
21
  def parse str
@@ -253,25 +237,14 @@ module Scripref
253
237
  @a1 = @a2 = nil
254
238
  end
255
239
 
256
- # Regular expression to match a book.
257
- def book_re
258
- return @book_re if @book_re
259
- books_res_as_strings = book_names.flatten.map do |bn|
260
- (bn.gsub(/([^\dA-Z])/, '\1?').gsub('.', '\.')) << '\b\s*'
261
- end
262
- @book_re = Regexp.compile(books_res_as_strings.map {|s| '(^' << s << ')' }.join('|'), nil)
263
- end
264
-
265
240
  def abbrev2num str
266
- book2num(abbrev2book(str))
241
+ s = str.strip
242
+ str2book_num(s) or str2book_num(abbrev2book(s))
267
243
  end
268
244
 
269
245
  def abbrev2book str
270
246
  s = str.strip
271
- if name = special_abbrev_mapping[s]
272
- return name
273
- end
274
- @books_str ||= ('#' << book_names.flatten.join('#') << '#')
247
+ @books_str ||= ('#' << book_names.map(&:names).flatten.join('#') << '#')
275
248
  pattern = s.chars.map {|c| Regexp.escape(c) << '[^#]*'}.join
276
249
  re = /(?<=#)#{pattern}(?=#)/
277
250
  names = @books_str.scan(re)
@@ -282,16 +255,23 @@ module Scripref
282
255
  names.first
283
256
  end
284
257
 
285
- def book2num str
286
- unless @book2num
287
- @book2num = {}
288
- book_names.each_with_index do |bns, i|
289
- Array(bns).each do |bn|
290
- @book2num[bn] = i+1
258
+ def init_str2book_num
259
+ unless @str2book_num
260
+ @str2book_num = {}
261
+ book_names.each_with_index do |bn, i|
262
+ bn.names.each do |n|
263
+ @str2book_num[n] = i+1
264
+ end
265
+ bn.abbrevs.each do |n|
266
+ @str2book_num[n] = i+1
291
267
  end
292
268
  end
293
269
  end
294
- @book2num[str]
270
+ end
271
+
272
+ def str2book_num str
273
+ init_str2book_num
274
+ @str2book_num[str]
295
275
  end
296
276
 
297
277
  def inspect
@@ -9,6 +9,14 @@ module Scripref
9
9
  super text, b1, c1, v1, b2, c2, v2, a1, a2
10
10
  end
11
11
 
12
+ def + other
13
+ to_a.zip(other.to_a).map {|a, b| a + b}
14
+ end
15
+
16
+ def - other
17
+ to_a.zip(other.to_a).map {|a, b| b - a}
18
+ end
19
+
12
20
  def to_a
13
21
  [b1, c1, v1, b2, c2, v2]
14
22
  end
@@ -1,5 +1,6 @@
1
1
  # - encoding: utf-8 -
2
2
  require 'strscan'
3
+ require 'scripref/basic_methods'
3
4
  require 'scripref/parser'
4
5
 
5
6
  module Scripref
@@ -7,6 +8,7 @@ module Scripref
7
8
  class Processor
8
9
 
9
10
  include Enumerable
11
+ include BasicMethods
10
12
 
11
13
  attr_accessor :text
12
14
 
@@ -38,11 +40,11 @@ module Scripref
38
40
  def each
39
41
  if block_given?
40
42
  scanner = StringScanner.new(text)
41
- while scanner.scan(/(.*?)(#{Regexp.new(reference_re.to_s)})/)
42
- yield scanner[1]
43
+ while scanner.scan(/(.*?)(#{reference_re.source})/)
44
+ yield scanner[1] unless scanner[1].empty?
43
45
  yield @parser.parse(scanner[2])
44
46
  end
45
- yield scanner.rest if scanner.rest
47
+ yield scanner.rest if scanner.rest?
46
48
  self
47
49
  else
48
50
  enum_for :each
@@ -53,6 +55,25 @@ module Scripref
53
55
  "#<#{self.class} #{@mods.inspect}>"
54
56
  end
55
57
 
58
+ # private
59
+
60
+ # Regular expression to heuristically identify a reference
61
+ def reference_re
62
+ return @reference_re if @reference_re
63
+ verse_with_optional_addon_or_postfix = '(' << [postfix_one_following_verse_re, postfix_more_following_verses_re, verse_addon_re].map {|e| verse_re.source << e.source}.join(')|(') << ')'
64
+ re_parts = [
65
+ '(', book_re, ')', '((', verse_with_optional_addon_or_postfix, ')|(', chapter_re, ')|(', verse_re, '))',
66
+ '(',
67
+ '(', book_re, ')',
68
+ '|',
69
+ verse_with_optional_addon_or_postfix,
70
+ '|',
71
+ '(', [chapter_re, cv_sep_re, verse_re, verse_sep_re, hyphen_re, pass_sep_re].map(&:source).join(')|('), ')',
72
+ ')*'
73
+ ].map {|e| Regexp === e ? e.source : e}
74
+ @reference_re = Regexp.compile(re_parts.join, nil)
75
+ end
76
+
56
77
  end
57
78
 
58
79
  end
data/lib/scripref.rb CHANGED
@@ -1,15 +1,17 @@
1
1
  # - encoding: utf-8 -
2
2
  require 'delegate'
3
- require 'scripref/parser'
4
- require 'scripref/passage'
5
- require 'scripref/processor'
6
- require 'scripref/formatter'
7
- require 'scripref/english'
8
- require 'scripref/german'
9
3
 
10
4
  module Scripref
11
5
 
12
- VERSION = '0.10.0'
6
+ VERSION = '0.11.0'
7
+
8
+ autoload :Bookname, 'scripref/bookname'
9
+ autoload :English, 'scripref/english'
10
+ autoload :Formatter, 'scripref/formatter'
11
+ autoload :German, 'scripref/german'
12
+ autoload :Parser, 'scripref/parser'
13
+ autoload :Passage, 'scripref/passage'
14
+ autoload :Processor, 'scripref/processor'
13
15
 
14
16
  class Token < DelegateClass(String)
15
17
  def initialize *args
data/regtest/parser.yml CHANGED
@@ -99,7 +99,7 @@ sample: Ruth 2,1a11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b-21a
99
99
  exception: |-
100
100
  EOS or passage separator or verse separator or hyphen expected!
101
101
  Ruth 2,1a11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b-21a
102
- ^
102
+ ^
103
103
  ---
104
104
  sample: Ruth 2,1a-.15a; 3,7b.9-12b; Markus 4; 5,3a.18b-21a
105
105
  exception: |-
@@ -179,7 +179,7 @@ sample: Ruth 2,1a-11.15a3,7b.9-12b; Markus 4; 5,3a.18b-21a
179
179
  exception: |-
180
180
  EOS or passage separator or verse separator or hyphen expected!
181
181
  Ruth 2,1a-11.15a3,7b.9-12b; Markus 4; 5,3a.18b-21a
182
- ^
182
+ ^
183
183
  ---
184
184
  sample: Ruth 2,1a-11.15a; ,7b.9-12b; Markus 4; 5,3a.18b-21a
185
185
  exception: |-
@@ -203,7 +203,7 @@ sample: Ruth 2,1a-11.15a; 3,7b9-12b; Markus 4; 5,3a.18b-21a
203
203
  exception: |-
204
204
  EOS or passage separator or verse separator or hyphen expected!
205
205
  Ruth 2,1a-11.15a; 3,7b9-12b; Markus 4; 5,3a.18b-21a
206
- ^
206
+ ^
207
207
  ---
208
208
  sample: Ruth 2,1a-11.15a; 3,7b.-12b; Markus 4; 5,3a.18b-21a
209
209
  exception: |-
@@ -293,7 +293,7 @@ sample: Ruth 2,1a-11.15a; 3,7b.9-12bMarkus 4; 5,3a.18b-21a
293
293
  exception: |-
294
294
  EOS or verse separator or passage separator expected!
295
295
  Ruth 2,1a-11.15a; 3,7b.9-12bMarkus 4; 5,3a.18b-21a
296
- ^
296
+ ^
297
297
  ---
298
298
  sample: Ruth 2,1a-11.15a; 3,7b.9-12b; 4; 5,3a.18b-21a
299
299
  result:
@@ -457,7 +457,7 @@ sample: Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a18b-21a
457
457
  exception: |-
458
458
  EOS or passage separator or verse separator or hyphen expected!
459
459
  Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a18b-21a
460
- ^
460
+ ^
461
461
  ---
462
462
  sample: Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a.-21a
463
463
  exception: |-
@@ -469,7 +469,7 @@ sample: Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b21a
469
469
  exception: |-
470
470
  EOS or passage separator or verse separator or hyphen expected!
471
471
  Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b21a
472
- ^
472
+ ^
473
473
  ---
474
474
  sample: Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b-
475
475
  exception: |-
@@ -0,0 +1,25 @@
1
+ require 'regtest'
2
+ require 'scripref'
3
+
4
+ include Regtest
5
+ include Scripref
6
+
7
+ def s text
8
+ sample text do
9
+ p = Processor.new(text, German)
10
+ p.each.to_a
11
+ end
12
+ end
13
+
14
+ text = 'Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b-21a'
15
+
16
+ s text
17
+
18
+ a = text.split(/\b/)
19
+ a.each_with_index do |e, i|
20
+ next if e =~ /^\s+$/
21
+ a2 = a.clone
22
+ a2.delete_at(i)
23
+ t = a2.join
24
+ s t
25
+ end