RubyGems - scripref - Versions diffs - 0.10.0 → 0.11.0 - Mend

scripref 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

checksums.yaml +4 -4
data/Changelog +6 -0
data/LICENSE +1 -1
data/Rakefile +4 -0
data/lib/scripref/basic_methods.rb +32 -0
data/lib/scripref/bookname.rb +30 -0
data/lib/scripref/english.rb +14 -7
data/lib/scripref/formatter.rb +14 -5
data/lib/scripref/german.rb +14 -11
data/lib/scripref/include.rb +4 -0
data/lib/scripref/parser.rb +19 -39
data/lib/scripref/passage.rb +8 -0
data/lib/scripref/processor.rb +24 -3
data/lib/scripref.rb +9 -7
data/regtest/parser.yml +6 -6
data/regtest/processor.rb +25 -0
data/regtest/processor.yml +1012 -0
data/test/test_bookname.rb +20 -0
data/test/test_english.rb +6 -4
data/test/test_formatter.rb +20 -10
data/test/test_german.rb +6 -4
data/test/test_helper.rb +4 -31
data/test/test_integration.rb +4 -10
data/test/test_parser.rb +37 -5
data/test/test_passage.rb +21 -8
data/test/test_pipelining.rb +1 -3
data/test/test_processor.rb +80 -6
metadata +11 -5

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 605d0d995b512d6af69e3b2152228dc201085b2b
-  data.tar.gz: a400e265c6d777d287170fbef3edb56c0143fcc6
+  metadata.gz: 223ae6028e566b369b6848eef6374fe2bc52706a
+  data.tar.gz: 6b9f27eee78bc4c6ca03254851e80911a9846da6
 SHA512:
-  metadata.gz: 37506e93da2d733beaa800db74ef5d1f16fe7aee313312298752a3f442549434b5360646b62d7d754dc189efa6edf1b09372be04b0a85e8f39dca290f2082b1a
-  data.tar.gz: 1e74214d77ed1f74256c102ea448eba2ba703d90269dc697360494dee102900d9f2503c8310f95ce783d1c92512a9eacba62e734adb8e411729d28dacccf3f98
+  metadata.gz: 2f85ed0888401fb880980fd31552720e23494ae879cc6dbc9834dc36d2ffaca33fdbaf62fa4f8856c9bc2754820c53e5701cf6ca44fb73172d452f139916d927
+  data.tar.gz: 6261f6a48f837d69701270e0083758ded7b3a47c26eb7a4e48f5b345ce9a8ddfc1ca228afea8ac692c2ad208eaa7c625aae87f469a33d0a0c804859ad9b0383a

data/Changelog CHANGED Viewed

@@ -1,3 +1,9 @@
+0.11.0
+Add class Bookname and methods Passage#+ and Passage#-.
+Make processor working for complex contexts.
+Use autoload.
+Lots of refactorings and internal improvements.
 0.10.0
 Add methods Passage#make_comparable, #make_comparable!, #comparable?,
 #overlap?, #start, #end and #<=>.

data/LICENSE CHANGED Viewed

@@ -1,4 +1,4 @@
-Copyright (c) 2010-2016 Jan Friedrich
+Copyright (c) 2010-2017 Jan Friedrich
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

data/Rakefile CHANGED Viewed

@@ -12,4 +12,8 @@ Rim.setup do
   homepage 'https://github.com/janfri/scripref'
   version Scripref::VERSION
   summary 'Library for parsing scripture references in real texts.'
+  if feature_loaded? 'rim/irb'
+    irb_requires %w(scripref scripref/include scripref/pipelining)
+  end
+  test_warning false
 end

data/lib/scripref/basic_methods.rb ADDED Viewed

@@ -0,0 +1,32 @@
+# - encoding: utf-8 -
+module Scripref
+  # Mixin with methods of basic functionality
+  module BasicMethods
+    NUMBER_RE = /\d+\s*/o
+    # Regular expression to match a chapter
+    def chapter_re
+      NUMBER_RE
+    end
+    # Regular expression to match a verse
+    def verse_re
+      NUMBER_RE
+    end
+    # Regular expression to match a book.
+    def book_re
+      return @book_re if @book_re
+      books_res_as_strings = book_names.map do |bn|
+        bn.names.map do |n|
+          (n.gsub(/([^\dA-Z])/, '\1?').gsub('.', '\.'))
+        end
+      end.flatten
+      @book_re = Regexp.compile(books_res_as_strings.map {|s| '(\b' << s << '\b\s*)' }.join('|'), nil)
+    end
+  end
+end

data/lib/scripref/bookname.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# - encoding: utf-8 -
+module Scripref
+  class Bookname
+    attr_reader :names, :abbrevs
+    def initialize names, abbrevs
+      @names = Array(names)
+      @abbrevs = Array(abbrevs)
+    end
+    def name
+      @names.first
+    end
+    def abbrev level=0
+      @abbrevs[level] || @abbrevs[-1]
+    end
+    def to_s
+      @names.first
+    end
+    alias to_str to_s
+  end
+end

data/lib/scripref/english.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # - encoding: utf-8 -
+require 'scripref/bookname'
 require 'scripref/const_reader'
 module Scripref
@@ -6,8 +7,7 @@ module Scripref
   # Mixin for parsing references in English.
   module English
-    # Array of book names.
-    BOOK_NAMES = <<-END.strip.split(/,\s*/)
+    book_names = <<-END.strip.split(/,\s*/)
       Genesis, Exodus, Leviticus, Numbers, Deuteronomy, Joshua, Judges,
       Ruth, 1 Samuel, 2 Samuel, 1 Kings, 2 Kings, 1 Chronicles,
       2 Chronicles, Ezra, Nehemiah, Esther, Job, Psalms, Proverbs,
@@ -21,6 +21,18 @@ module Scripref
       Jude, Revelation
     END
+    book_abbrevs = <<-END.strip.split(/,\s*/)
+      Gen, Ex, Lev, Num, Deut, Josh, Judg, Rth, 1 Sam, 2 Sam, 1 Kgs, 2 Kgs,
+      1 Chron, 2 Chron, Ezr, Neh, Esth, Job, Ps, Prov, Eccles, Song, Isa,
+      Jer, Lam, Ezek, Dan, Hos, Joel, Am, Obad, Jon, Mic, Nah, Hab, Zeph, Hag,
+      Zech, Mal, Matt, Mrk, Luk, John, Acts, Rom, 1 Cor, 2 Cor, Gal, Eph, Phil,
+      Col, 1 Thess, 2 Thess, 1 Tim, 2 Tim, Tit, Philem, Heb, Jas, 1 Pet, 2 Pet,
+      1 Joh, 2 Joh, 3 Joh, Jud, Rev
+    END
+    # Array of book names.
+    BOOK_NAMES = book_names.zip(book_abbrevs).map {|name, abbrev| Bookname.new(name, abbrev)}
     # Separator between chapter and verse.
     CV_SEPARATOR = ':'
@@ -60,11 +72,6 @@ module Scripref
     # Regular expression to match postfix for more following verses
     POSTFIX_MORE_FOLLOWING_VERSES_RE = /ff\b\s*/o
-    pass = '([1-3]\s*)?[A-Z][a-z]+\.?\s*(\d+\s*[,.\-]\s*)*\d+\s*'
-    # Regular expression to parse a reference
-    REFERENCE_RE = /#{pass}(;\s*#{pass})*/o
     # Check if book has only one chapter
     # @param book book as number
     def book_has_only_one_chapter? book

data/lib/scripref/formatter.rb CHANGED Viewed

@@ -4,16 +4,17 @@ module Scripref
   class Formatter
-    attr_accessor :cv_separator, :hyphen_separator, :pass_separator
+    attr_accessor :bookformat, :cv_separator, :hyphen_separator, :pass_separator
     # @param mods one or more modules to include
-    def initialize *mods
+    # @param bookformat (:short use abbreviations, :long use full names of books)
+    def initialize *mods, bookformat: :name
       @mods = mods
       mods.each {|m| extend m}
+      @bookformat = bookformat
     end
-    # Formats a reference (array of passages)
+    # Formats a reference (array of passages and maybe separators)
     def format *reference
       @last_b = @last_c = @last_v = :undefined
       @result = []
@@ -25,22 +26,30 @@ module Scripref
       @result.join
     end
+    # Formats a book
+    # @param num number of book (starting at 1)
     def format_book num
-      Array(book_names[num - 1]).first
+      book_names[num - 1].send @bookformat
     end
+    # Formats a chapter
+    # @param num number of chapter
     def format_chapter num
       num.to_s
     end
+    # Formats a verse
+    # @param num number of verse
     def format_verse num
       num.to_s
     end
+    # Formats a verse addon
     def format_addon a
       a.to_s
     end
+    # Formats a verse postfix
     def format_postfix p
       p.to_s
     end

data/lib/scripref/german.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # - encoding: utf-8 -
+require 'scripref/bookname'
 require 'scripref/const_reader'
 module Scripref
@@ -6,8 +7,7 @@ module Scripref
   # Mixin for parsing references in German.
   module German
-    # Array of book names.
-    BOOK_NAMES = <<-END.strip.split(/,\s*/).map {|e| e.split('|')}
+    book_names = <<-END.strip.split(/,\s*/).map {|e| e.split('|')}
       1. Mose, 2. Mose, 3. Mose, 4. Mose, 5. Mose, Josua, Richter, Ruth, 1. Samuel, 2. Samuel,
       1. Könige, 2. Könige, 1. Chronika, 2. Chronika, Esra, Nehemia, Esther, Hiob, Psalm|Psalmen,
       Sprüche, Prediger, Hohelied, Jesaja, Jeremia, Klagelieder, Hesekiel, Daniel, Hosea, Joel,
@@ -18,6 +18,17 @@ module Scripref
       2. Johannes, 3. Johannes, Judas, Offenbarung
     END
+    book_abbrevs = <<-END.strip.split(/,\s*/).map {|e| e.split('|')}
+      1.Mo, 2.Mo, 3.Mo, 4.Mo, 5.Mo, Jos, Ri, Ruth, 1.Sam, 2.Sam, 1.Kön, 2.Kön, 1.Chr, 2.Chr,
+      Esr, Neh, Est, Hi, Ps, Spr, Pred, Hohel, Jes, Jer, Klag, Hes, Dan, Hos, Joel, Amos, Obad,
+      Jona, Mich, Nah, Hab, Zef, Hag, Sach, Mal, Mat, Mar, Luk, Joh, Apg, Röm, 1.Ko, 2.Ko,
+      Gal, Eph, Phil, Kol, 1.Thes, 2.Thes, 1.Tim, 2.Tim, Tit, Philem, Heb, Jak, 1.Pet, 2.Pet,
+      1.Joh, 2.Joh, 3.Joh, Jud, Off
+    END
+    # Array of book names.
+    BOOK_NAMES = book_names.zip(book_abbrevs).map {|name, abbrev| Bookname.new(name, abbrev)}
     # Separator between chapter and verse.
     CV_SEPARATOR = ','
@@ -43,7 +54,7 @@ module Scripref
     VERSE_SEPARATOR = '.'
     # Regular expression to match addons for a verse.
-    VERSE_ADDON_RE = /[ab]\s*/o
+    VERSE_ADDON_RE = /[ab]\b\s*/o
     # Postfix for one following verse
     POSTFIX_ONE_FOLLOWING_VERSE = 'f'
@@ -57,20 +68,12 @@ module Scripref
     # Regular expression to match postfix for more following verses
     POSTFIX_MORE_FOLLOWING_VERSES_RE = /ff\b\s*/o
-    pass = '([1-5]\.?\s*)?[A-Z][a-zäöü]+\.?\s*(\d+\s*[,.\-]\s*)*\d+\s*'
-    # Regular expression to parse a reference
-    REFERENCE_RE = /#{pass}(;\s*#{pass})*/o
     # Check if book has only one chapter
     # @param book book as number
     def book_has_only_one_chapter? book
       [31, 57, 63, 64, 65].include?(book)
     end
-    # Hash with special abbreviations
-    SPECIAL_ABBREV_MAPPING = {'Phil' => 'Philipper'}
     # Generate attr_reader methods for all constants
     extend ConstReader
     const_reader constants

data/lib/scripref/include.rb ADDED Viewed

@@ -0,0 +1,4 @@
+# - encoding: utf-8 -
+require 'scripref'
+include Scripref

data/lib/scripref/parser.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # - encoding: utf-8 -
+require 'scripref/basic_methods'
 require 'strscan'
 module Scripref
@@ -7,7 +8,7 @@ module Scripref
     attr_reader :error
-    NUMBER_RE = /\d+\s*/o
+    include BasicMethods
     # @param mods one or more modules to include
     def initialize *mods
@@ -15,23 +16,6 @@ module Scripref
       mods.each {|m| extend m}
     end
-    # Regular expression to match a chapter
-    def chapter_re
-      NUMBER_RE
-    end
-    # Regular expression to match a verse
-    def verse_re
-      NUMBER_RE
-    end
-    # Hash with special abbreviations
-    # for example {'Phil' => 'Philipper'}
-    # usual overwritten in Mixins
-    def special_abbrev_mapping
-      @special_abbrev_mapping ||= {}
-    end
     # Parsing a string of a scripture reference
     # @param str string to parse
     def parse str
@@ -253,25 +237,14 @@ module Scripref
       @a1 = @a2 = nil
     end
-    # Regular expression to match a book.
-    def book_re
-      return @book_re if @book_re
-      books_res_as_strings = book_names.flatten.map do |bn|
-        (bn.gsub(/([^\dA-Z])/, '\1?').gsub('.', '\.')) << '\b\s*'
-      end
-      @book_re = Regexp.compile(books_res_as_strings.map {|s| '(^' << s << ')' }.join('|'), nil)
-    end
     def abbrev2num str
-      book2num(abbrev2book(str))
+      s = str.strip
+      str2book_num(s) or str2book_num(abbrev2book(s))
     end
     def abbrev2book str
       s = str.strip
-      if name = special_abbrev_mapping[s]
-        return name
-      end
-      @books_str ||= ('#' << book_names.flatten.join('#') << '#')
+      @books_str ||= ('#' << book_names.map(&:names).flatten.join('#') << '#')
       pattern = s.chars.map {|c| Regexp.escape(c) << '[^#]*'}.join
       re = /(?<=#)#{pattern}(?=#)/
       names = @books_str.scan(re)
@@ -282,16 +255,23 @@ module Scripref
       names.first
     end
-    def book2num str
-      unless @book2num
-        @book2num = {}
-        book_names.each_with_index do |bns, i|
-          Array(bns).each do |bn|
-            @book2num[bn] = i+1
+    def init_str2book_num
+      unless @str2book_num
+        @str2book_num = {}
+        book_names.each_with_index do |bn, i|
+          bn.names.each do |n|
+            @str2book_num[n] = i+1
+          end
+          bn.abbrevs.each do |n|
+            @str2book_num[n] = i+1
           end
         end
       end
-      @book2num[str]
+    end
+    def str2book_num str
+      init_str2book_num
+      @str2book_num[str]
     end
     def inspect

data/lib/scripref/passage.rb CHANGED Viewed

@@ -9,6 +9,14 @@ module Scripref
       super text, b1, c1, v1, b2, c2, v2, a1, a2
     end
+    def + other
+      to_a.zip(other.to_a).map {|a, b| a + b}
+    end
+    def - other
+      to_a.zip(other.to_a).map {|a, b| b - a}
+    end
     def to_a
       [b1, c1, v1, b2, c2, v2]
     end

data/lib/scripref/processor.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # - encoding: utf-8 -
 require 'strscan'
+require 'scripref/basic_methods'
 require 'scripref/parser'
 module Scripref
@@ -7,6 +8,7 @@ module Scripref
   class Processor
     include Enumerable
+    include BasicMethods
     attr_accessor :text
@@ -38,11 +40,11 @@ module Scripref
     def each
       if block_given?
         scanner = StringScanner.new(text)
-        while scanner.scan(/(.*?)(#{Regexp.new(reference_re.to_s)})/)
-          yield scanner[1]
+        while scanner.scan(/(.*?)(#{reference_re.source})/)
+          yield scanner[1] unless scanner[1].empty?
           yield @parser.parse(scanner[2])
         end
-        yield scanner.rest if scanner.rest
+        yield scanner.rest if scanner.rest?
         self
       else
         enum_for :each
@@ -53,6 +55,25 @@ module Scripref
       "#<#{self.class} #{@mods.inspect}>"
     end
+#    private
+    # Regular expression to heuristically identify a reference
+    def reference_re
+      return @reference_re if @reference_re
+      verse_with_optional_addon_or_postfix = '(' << [postfix_one_following_verse_re, postfix_more_following_verses_re, verse_addon_re].map {|e| verse_re.source << e.source}.join(')|(') << ')'
+      re_parts = [
+        '(', book_re, ')', '((', verse_with_optional_addon_or_postfix, ')|(', chapter_re, ')|(', verse_re, '))',
+        '(',
+        '(', book_re, ')',
+        '|',
+        verse_with_optional_addon_or_postfix,
+        '|',
+        '(', [chapter_re, cv_sep_re, verse_re, verse_sep_re, hyphen_re, pass_sep_re].map(&:source).join(')|('), ')',
+        ')*'
+      ].map {|e| Regexp === e ? e.source : e}
+      @reference_re = Regexp.compile(re_parts.join, nil)
+    end
   end
 end

data/lib/scripref.rb CHANGED Viewed

@@ -1,15 +1,17 @@
 # - encoding: utf-8 -
 require 'delegate'
-require 'scripref/parser'
-require 'scripref/passage'
-require 'scripref/processor'
-require 'scripref/formatter'
-require 'scripref/english'
-require 'scripref/german'
 module Scripref
-  VERSION = '0.10.0'
+  VERSION = '0.11.0'
+  autoload :Bookname, 'scripref/bookname'
+  autoload :English, 'scripref/english'
+  autoload :Formatter, 'scripref/formatter'
+  autoload :German, 'scripref/german'
+  autoload :Parser, 'scripref/parser'
+  autoload :Passage, 'scripref/passage'
+  autoload :Processor, 'scripref/processor'
   class Token < DelegateClass(String)
     def initialize *args

data/regtest/parser.yml CHANGED Viewed

@@ -99,7 +99,7 @@ sample: Ruth 2,1a11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b-21a
 exception: |-
   EOS or passage separator or verse separator or hyphen expected!
   Ruth 2,1a11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b-21a
-           ^
+          ^
 ---
 sample: Ruth 2,1a-.15a; 3,7b.9-12b; Markus 4; 5,3a.18b-21a
 exception: |-
@@ -179,7 +179,7 @@ sample: Ruth 2,1a-11.15a3,7b.9-12b; Markus 4; 5,3a.18b-21a
 exception: |-
   EOS or passage separator or verse separator or hyphen expected!
   Ruth 2,1a-11.15a3,7b.9-12b; Markus 4; 5,3a.18b-21a
-                  ^
+                 ^
 ---
 sample: Ruth 2,1a-11.15a; ,7b.9-12b; Markus 4; 5,3a.18b-21a
 exception: |-
@@ -203,7 +203,7 @@ sample: Ruth 2,1a-11.15a; 3,7b9-12b; Markus 4; 5,3a.18b-21a
 exception: |-
   EOS or passage separator or verse separator or hyphen expected!
   Ruth 2,1a-11.15a; 3,7b9-12b; Markus 4; 5,3a.18b-21a
-                        ^
+                       ^
 ---
 sample: Ruth 2,1a-11.15a; 3,7b.-12b; Markus 4; 5,3a.18b-21a
 exception: |-
@@ -293,7 +293,7 @@ sample: Ruth 2,1a-11.15a; 3,7b.9-12bMarkus 4; 5,3a.18b-21a
 exception: |-
   EOS or verse separator or passage separator expected!
   Ruth 2,1a-11.15a; 3,7b.9-12bMarkus 4; 5,3a.18b-21a
-                              ^
+                             ^
 ---
 sample: Ruth 2,1a-11.15a; 3,7b.9-12b;  4; 5,3a.18b-21a
 result:
@@ -457,7 +457,7 @@ sample: Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a18b-21a
 exception: |-
   EOS or passage separator or verse separator or hyphen expected!
   Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a18b-21a
-                                              ^
+                                             ^
 ---
 sample: Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a.-21a
 exception: |-
@@ -469,7 +469,7 @@ sample: Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b21a
 exception: |-
   EOS or passage separator or verse separator or hyphen expected!
   Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b21a
-                                                  ^
+                                                 ^
 ---
 sample: Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b-
 exception: |-

data/regtest/processor.rb ADDED Viewed

@@ -0,0 +1,25 @@
+require 'regtest'
+require 'scripref'
+include Regtest
+include Scripref
+def s text
+  sample text do
+    p = Processor.new(text, German)
+    p.each.to_a
+  end
+end
+text = 'Ruth 2,1a-11.15a; 3,7b.9-12b; Markus 4; 5,3a.18b-21a'
+s text
+a = text.split(/\b/)
+a.each_with_index do |e, i|
+  next if e =~ /^\s+$/
+  a2 = a.clone
+  a2.delete_at(i)
+  t = a2.join
+  s t
+end