RubyGems - anystyle-parser - Versions diffs - 0.6.9 → 0.6.10 - Mend

anystyle-parser 0.6.9 → 0.6.10

Files changed (12) hide show

checksums.yaml +4 -4
data/Gemfile +1 -1
data/README.md +3 -1
data/anystyle-parser.gemspec +1 -1
data/lib/anystyle/parser/normalizer.rb +21 -14
data/lib/anystyle/parser/parser.rb +18 -6
data/lib/anystyle/parser/version.rb +1 -1
data/spec/anystyle/parser/dictionary_spec.rb +6 -6
data/spec/anystyle/parser/features_spec.rb +3 -3
data/spec/anystyle/parser/normalizer_spec.rb +58 -48
data/spec/anystyle/parser/parser_spec.rb +40 -32
metadata +4 -10

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: b75781230fa07b3d6d751a7222913923193f173f
-  data.tar.gz: 762ecbac1a452a771c8b1eb6670cbce7a9a86ed6
+  metadata.gz: 978c32592fd4e4edade7f5648a1b3095d652a68b
+  data.tar.gz: e310a276de3bb0e4c94ca4df72ed8dda378a9fd6
 SHA512:
-  metadata.gz: d9b61a7966fd8cf7024180ecfe19c055b0d2bdeb1dde5f2a5a727d8e7c18ff65e11b607bdf4b164d6b6e8bb0eb651ea5459e3a3e96835c3fe32337e07e0eebd6
-  data.tar.gz: b1b0ef651aba9e2aa2ace965c8cff1fa90ed9168682fe5078ad94a784051d3b3594a575a1adaf3065094193646a842cac3203736d5a9321c77299cc6dc0df806
+  metadata.gz: 8ef818cc5486835a51de69f4cc30d46975e162c0b0b8a7cd2d08ab3e94a3956e7ffecaa1f6e4755a29b186d3f9df56b89bd6e554e8defc64f8da7e2a1fc50c8f
+  data.tar.gz: ae731f1ace948eafa16fe20bf921b48debe52f4656368f4da3ae16b2b8a6612bd2d7b09b728209558b9a27fd2f734e65752012fd714d6521983d98ff0ba96245

data/Gemfile CHANGED

@@ -4,7 +4,7 @@ gemspec
 group :development, :test do
   gem 'rake'
   gem 'cucumber'
-  gem 'rspec'
+  gem 'rspec', '~>3.0'
   gem 'simplecov', '~>0.8', :require => false
   gem 'rubinius-coverage', :platform => :rbx
   gem 'coveralls', :require => false

data/README.md CHANGED

@@ -17,7 +17,9 @@ CSL/CiteProc JSON format).
 Web Application and Web Service
 -------------------------------
 Anystyle-Parser is avaialble as a web application and a web service at
-[http://anystyle.io](http://anystyle.io).
+[http://anystyle.io](http://anystyle.io). For example Ruby code using
+the anystyle.io API, see this [prototype](https://gist.github.com/inukshuk/f1d47aeab1f778bca8ce)
+for a style predictor.
 Installation
 ------------

data/anystyle-parser.gemspec CHANGED

@@ -20,7 +20,7 @@ Gem::Specification.new do |s|
   s.add_runtime_dependency('bibtex-ruby', '~>3.0')
   s.add_runtime_dependency('builder', '>=3.0', '<4.0')
   s.add_runtime_dependency('wapiti', '~>0.1')
-  s.add_runtime_dependency('namae', '~>0.8', '>=0.8.7')
+  s.add_runtime_dependency('namae', '~>0.9')
   s.files        = `git ls-files`.split("\n").reject { |path|
     path.start_with?('.')

data/lib/anystyle/parser/normalizer.rb CHANGED

@@ -58,6 +58,16 @@ module Anystyle
         hash
       end
+      def normalize_accessed(hash)
+        token, *dangling =  hash[:accessed]
+        unmatched(:accessed, hash, dangling) unless dangling.empty?
+        token.gsub!(/(accessed|retrieved):?\s*/i, '')
+        hash[:accessed] = token
+        hash
+      end
       def normalize_key(hash)
         token, *dangling =  hash[:key]
         unmatched(:key, hash, dangling) unless dangling.empty?
@@ -114,8 +124,8 @@ module Anystyle
         editors.gsub!(/^\W+|\W+$/, '')
         editors.gsub!(/^in:?\s+/i, '')
-        editors.gsub!(/[^[:alpha:]]*[Ee]d(s|itors?|ited)?\b[^[:alpha:]]*/, '')
-        editors.gsub!(/[^[:alpha:]]*([Hh]rsg|Herausgeber)\b[^[:alpha:]]*/, '')
+        editors.gsub!(/\W*\b[Ee]d(s|itors?|ited)?\b\W*/, '')
+        editors.gsub!(/\W*\b([Hh]rsg|gg?|Herausgeber)\b\W*/, '')
         editors.gsub!(/\b[Hh]erausgegeben von\b/, '')
         editors.gsub!(/\bby\b/i, '')
@@ -134,8 +144,9 @@ module Anystyle
       def normalize_translator(hash)
         translators = hash[:translator]
+        translators.gsub!(/\b([Ii]n (d|ein)er )?[Üü]ber(s\.|setzt|setzung|tragen|tragung) v(\.|on\b)/, '')
         translators.gsub!(/^\W+|\W+$/, '')
-        translators.gsub!(/[^[:alpha:]]*trans(lated)?\b[^[:alpha:]]*/i, '')
+        translators.gsub!(/[^[:alpha:]]*\btrans(l(ated)?)?\b[^[:alpha:]]*/i, '')
         translators.gsub!(/\bby\b/i, '')
         hash[:translator] = normalize_names(translators)
@@ -170,10 +181,7 @@ module Anystyle
         names.gsub!(/;|:/, ',')
         Namae.parse!(names).map { |name|
-          unless name.given.nil? || name.family.nil?
-            name.given.gsub!(/\b([[:upper:]])(\s|$)/, '\1.\2')
-          end
+          name.normalize_initials
           name.sort_order
         }.join(' and ')
@@ -270,21 +278,20 @@ module Anystyle
       end
       def normalize_date(hash)
-        date, *dangling = hash[:date]
-        unmatched(:date, hash, dangling) unless dangling.empty?
+        date = Array(hash[:date]).join(' ')
         unless (month = MONTH[date]).nil?
-          hash[:month] = month
+          month = '%02d' % month
         end
         if date =~ /(\d{4})/
-          hash[:year] = $1.to_i
+          year = $1
-          if hash.key?(:month) && date =~ /\b(\d{1,2})\b/
-            hash[:day] = $1.to_i
+          if month && date =~ /\b(\d{1,2})\b/
+            day = '%02d' % $1.to_i
           end
-          hash.delete(:date)
+          hash[:date] = [year, month, day].compact.join('-')
         end
         hash

data/lib/anystyle/parser/parser.rb CHANGED

@@ -3,17 +3,17 @@ module Anystyle
     class Parser
-      @formats = [:bibtex, :hash, :citeproc, :xml, :tags, :raw].freeze
+      @formats = [:bibtex, :hash, :normalized, :citeproc, :xml, :tags, :raw].freeze
       @defaults = {
         :model => File.expand_path('../support/anystyle.mod', __FILE__),
         :pattern => File.expand_path('../support/anystyle.pat', __FILE__),
         :compact => true,
         :threads => 4,
-        :separator => /\s+|\b(\d+:)/,
+        :separator => /\s+|\b(\d\S*:)/,
         :tagged_separator => /\s+|(<\/?[^>]+>)/,
         :strip => /[^[:alnum:]]/,
-        :format => :hash,
+        :format => :normalized,
         :xml_entities => Hash[*%w{ &amp; & &lt; < &gt; > &apos; ' &quot; " }],
         :training_data => File.expand_path('../../../../resources/train.txt', __FILE__)
       }.freeze
@@ -258,8 +258,17 @@ module Anystyle
       def format_bibtex(labels)
         b = BibTeX::Bibliography.new
-        format_hash(labels).each do |hash|
+        format_normalized(labels).each do |hash|
           hash[:address] = hash.delete :location if hash.key?(:location)
+          hash[:urldate] = hash.delete :accessed if hash.key?(:accessed)
+          if hash.key?(:authority)
+            if [:techreport,:thesis].include?(hash[:type])
+              hash[:institution] = hash.delete :authority
+            else
+              hash[:organization] = hash.delete :authority
+            end
+          end
           b << BibTeX::Entry.new(hash)
         end
@@ -276,7 +285,7 @@ module Anystyle
       def format_hash(labels)
         labels.map do |line|
-          hash = line.inject({}) do |h, (label, token)|
+          line.inject({}) do |h, (label, token)|
             if h.has_key?(label)
               h[label] = [h[label]].flatten << token
             else
@@ -284,10 +293,13 @@ module Anystyle
             end
             h
           end
-          normalize hash
         end
       end
+      def format_normalized(labels)
+        format_hash(labels).map { |h| normalize h }
+      end
       def format_citeproc(labels)
         format_bibtex(labels).to_citeproc
       end

data/lib/anystyle/parser/version.rb CHANGED

@@ -1,5 +1,5 @@
 module Anystyle
   module Parser
-    VERSION = '0.6.9'.freeze
+    VERSION = '0.6.10'.freeze
   end
 end

data/spec/anystyle/parser/dictionary_spec.rb CHANGED

@@ -7,16 +7,16 @@ module Anystyle
 			let(:dict) { Dictionary.instance }
-			it { Dictionary.should_not respond_to(:new) }
-			it { dict.should_not be nil }
+			it { expect(Dictionary).not_to respond_to(:new) }
+			it { expect(dict).not_to be nil }
       describe '.modes' do
         it 'returns an array' do
-          Dictionary.modes.should be_a(Array)
+          expect(Dictionary.modes).to be_a(Array)
         end
         it 'contains at least :hash' do
-          Dictionary.modes.should include(:hash)
+          expect(Dictionary.modes).to include(:hash)
         end
       end
@@ -24,12 +24,12 @@ module Anystyle
 				%w{ philippines italy }.each do |place|
 					it "#{place.inspect} should be a place name" do
-						dict[place].should == Dictionary.code[:place]
+						expect(dict[place]).to eq(Dictionary.code[:place])
 					end
 				end
 				it "accepts unicode strins like 'çela' (surname)" do
-					(dict['çela'] & Dictionary.code[:surname]).should > 0
+					expect(dict['çela'] & Dictionary.code[:surname]).to be > 0
 				end
 			end

data/spec/anystyle/parser/features_spec.rb CHANGED

@@ -8,19 +8,19 @@ module Anystyle::Parser
       %w{ (1992) 1992 2011 1776 }.each do |year|
         it "returns :year for #{year.inspect}" do
-          f.match(year).should == :year
+          expect(f.match(year)).to eq(:year)
         end
       end
       %w{ (1) (12) (123) }.each do |year|
         it "returns :year for #{year.inspect}" do
-          f.match(year).should == :numeric
+          expect(f.match(year)).to eq(:numeric)
         end
       end
       ['pp', 'pp.', '23-4', '6124--19', '48 - 9', '19–27'].each do |page|
         it "returns :page for #{page.inspect}" do
-          f.match(page).should == :page
+          expect(f.match(page)).to eq(:page)
         end
       end

data/spec/anystyle/parser/normalizer_spec.rb CHANGED

@@ -9,52 +9,56 @@ module Anystyle
       describe "#tokenize_names" do
         it "tokenizes 'A B'" do
-          Normalizer.instance.normalize_names('A B').should == 'B, A.'
+          expect(Normalizer.instance.normalize_names('A B')).to eq('B, A.')
         end
         it "tokenizes 'A, B'" do
-          Normalizer.instance.normalize_names('A, B').should == 'A, B.'
+          expect(Normalizer.instance.normalize_names('A, B')).to eq('A, B.')
         end
         it "tokenizes 'A, jr., Bbb'" do
-          Normalizer.instance.normalize_names('A, jr., B').should == 'A, jr., B.'
+          expect(Normalizer.instance.normalize_names('A, jr., B')).to eq('A, jr., B.')
         end
         it "tokenizes 'A, B, jr.'" do
-          Normalizer.instance.normalize_names('A, B, jr.').should == 'A, jr., B.'
+          expect(Normalizer.instance.normalize_names('A, B, jr.')).to eq('A, jr., B.')
         end
         it "tokenizes 'A, B, C, D'" do
-          Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B. and C, D.'
+          expect(Normalizer.instance.normalize_names('A, B, C, D')).to eq('A, B. and C, D.')
         end
         it "tokenizes 'A, B, C'" do
-          Normalizer.instance.normalize_names('A, B, C').should == 'A, B. and C'
+          expect(Normalizer.instance.normalize_names('A, B, C')).to eq('A, B. and C.')
         end
         it "tokenizes 'Aa Bb, C.'" do
-          Normalizer.instance.normalize_names('Aa Bb, C.').should == 'Aa Bb, C.'
+          expect(Normalizer.instance.normalize_names('Aa Bb, C.')).to eq('Aa Bb, C.')
         end
         it "tokenizes 'Plath, L.C., Asgaard, G., ... Botros, N.'" do
-          Normalizer.instance.normalize_names('Plath, L.C., Asgaard, G., ... Botros, N.').should == 'Plath, L.C. and Asgaard, G. and Botros, N.'
-          Normalizer.instance.normalize_names('Plath, L.C., Asgaard, G., … Botros, N.').should == 'Plath, L.C. and Asgaard, G. and Botros, N.'
+          expect(Normalizer.instance.normalize_names('Plath, L.C., Asgaard, G., ... Botros, N.')).to eq('Plath, L.C. and Asgaard, G. and Botros, N.')
+          expect(Normalizer.instance.normalize_names('Plath, L.C., Asgaard, G., … Botros, N.')).to eq('Plath, L.C. and Asgaard, G. and Botros, N.')
         end
         it "tokenizes 'Aa Bb, Cc Dd, and E F G'" do
-          Normalizer.instance.normalize_names('Aa Bb, Cc Dd, and E F G').should == 'Bb, Aa and Dd, Cc and G, E. F.'
+          expect(Normalizer.instance.normalize_names('Aa Bb, Cc Dd, and E F G')).to eq('Bb, Aa and Dd, Cc and G, E.F.')
         end
         [
           ['Poe, Edgar A.', 'Poe, Edgar A.'],
           ['Edgar A. Poe', 'Poe, Edgar A.'],
+          ['J Doe', 'Doe, J.'],
+          ['Doe, J', 'Doe, J.'],
+          ['JE Doe', 'Doe, J.E.'],
+          ['Doe, JE', 'Doe, J.E.'],
           ['Edgar A. Poe, Herman Melville', 'Poe, Edgar A. and Melville, Herman'],
           ['Edgar A. Poe; Herman Melville', 'Poe, Edgar A. and Melville, Herman'],
           ['Poe, Edgar A., Melville, Herman', 'Poe, Edgar A. and Melville, Herman'],
           ['Aeschlimann Magnin, E.', 'Aeschlimann Magnin, E.']
         ].each do |name, normalized|
           it "tokenizes #{name.inspect}" do
-            Normalizer.instance.normalize_names(name).should == normalized
+            expect(Normalizer.instance.normalize_names(name)).to eq(normalized)
           end
         end
@@ -62,75 +66,81 @@ module Anystyle
       describe '#normalize_editor' do
         it "strips in from beginning" do
-          n.normalize_editor(:editor => 'In D. Knuth (ed.)').should == { :editor => 'Knuth, D.' }
-          n.normalize_editor(:editor => 'In: D. Knuth (ed.)').should == { :editor => 'Knuth, D.' }
-          n.normalize_editor(:editor => 'in: D. Knuth ed.').should == { :editor => 'Knuth, D.' }
-          n.normalize_editor(:editor => 'in D. Knuth (ed)').should == { :editor => 'Knuth, D.' }
+          expect(n.normalize_editor(:editor => 'In D. Knuth (ed.)')).to eq({ :editor => 'Knuth, D.' })
+          expect(n.normalize_editor(:editor => 'In: D. Knuth (ed.)')).to eq({ :editor => 'Knuth, D.' })
+          expect(n.normalize_editor(:editor => 'in: D. Knuth ed.')).to eq({ :editor => 'Knuth, D.' })
+          expect(n.normalize_editor(:editor => 'in D. Knuth (ed)')).to eq({ :editor => 'Knuth, D.' })
         end
         it "does not strip ed from name" do
-          n.normalize_editor(:editor => 'In Edward Wood').should == { :editor => 'Wood, Edward' }
-          n.normalize_editor(:editor => 'ed by Edward Wood').should == { :editor => 'Wood, Edward' }
-          n.normalize_editor(:editor => 'ed. by Edward Wood').should == { :editor => 'Wood, Edward' }
-          n.normalize_editor(:editor => 'ed by Edward Wood').should == { :editor => 'Wood, Edward' }
+          expect(n.normalize_editor(:editor => 'In Edward Wood')).to eq({ :editor => 'Wood, Edward' })
+          expect(n.normalize_editor(:editor => 'ed by Edward Wood')).to eq({ :editor => 'Wood, Edward' })
+          expect(n.normalize_editor(:editor => 'ed. by Edward Wood')).to eq({ :editor => 'Wood, Edward' })
+          expect(n.normalize_editor(:editor => 'ed by Edward Wood')).to eq({ :editor => 'Wood, Edward' })
+          expect(n.normalize_editor(:editor => 'In Alfred Wood')).to eq({ :editor => 'Wood, Alfred' })
+          expect(n.normalize_editor(:editor => 'ed by Alfred Wood')).to eq({ :editor => 'Wood, Alfred' })
+          expect(n.normalize_editor(:editor => 'ed. by Alfred Wood')).to eq({ :editor => 'Wood, Alfred' })
+          expect(n.normalize_editor(:editor => 'ed by Alfred Wood')).to eq({ :editor => 'Wood, Alfred' })
         end
         it "strips et al" do
-          n.normalize_editor(:editor => 'Edward Wood et al')[:editor].should == 'Wood, Edward'
-          n.normalize_editor(:editor => 'Edward Wood et al.')[:editor].should == 'Wood, Edward'
-          n.normalize_editor(:editor => 'Edward Wood u.a.')[:editor].should == 'Wood, Edward'
-          n.normalize_editor(:editor => 'Edward Wood u. a.')[:editor].should == 'Wood, Edward'
-          n.normalize_editor(:editor => 'Edward Wood and others')[:editor].should == 'Wood, Edward'
-          n.normalize_editor(:editor => 'Edward Wood & others')[:editor].should == 'Wood, Edward'
+          expect(n.normalize_editor(:editor => 'Edward Wood et al')[:editor]).to eq('Wood, Edward')
+          expect(n.normalize_editor(:editor => 'Edward Wood et al.')[:editor]).to eq('Wood, Edward')
+          expect(n.normalize_editor(:editor => 'Edward Wood u.a.')[:editor]).to eq('Wood, Edward')
+          expect(n.normalize_editor(:editor => 'Edward Wood u. a.')[:editor]).to eq('Wood, Edward')
+          expect(n.normalize_editor(:editor => 'Edward Wood and others')[:editor]).to eq('Wood, Edward')
+          expect(n.normalize_editor(:editor => 'Edward Wood & others')[:editor]).to eq('Wood, Edward')
+        end
+      end
+      describe '#normalize_translator' do
+        it "strips in from beginning" do
+          expect(n.normalize_translator(:translator => 'Translated by J Doe')).to eq({ :translator => 'Doe, J.' })
+          expect(n.normalize_translator(:translator => 'Trans by J Doe')).to eq({ :translator => 'Doe, J.' })
+          expect(n.normalize_translator(:translator => 'Trans. by J Doe')).to eq({ :translator => 'Doe, J.' })
+          expect(n.normalize_translator(:translator => 'Transl. J Doe')).to eq({ :translator => 'Doe, J.' })
+          expect(n.normalize_translator(:translator => 'übersetzt von J Doe')).to eq({ :translator => 'Doe, J.' })
+          expect(n.normalize_translator(:translator => 'übers. v. J Doe')).to eq({ :translator => 'Doe, J.' })
+          expect(n.normalize_translator(:translator => 'Übersetzung v. J Doe')).to eq({ :translator => 'Doe, J.' })
+          expect(n.normalize_translator(:translator => 'In der Übersetzung von J Doe')).to eq({ :translator => 'Doe, J.' })
         end
       end
       describe 'editors extraction' do
         it 'recognizes editors in the author field' do
-          n.normalize_author(:author => 'D. Knuth (ed.)').should == { :editor => 'Knuth, D.' }
+          expect(n.normalize_author(:author => 'D. Knuth (ed.)')).to eq({ :editor => 'Knuth, D.' })
         end
       end
       describe 'URL extraction' do
         it 'recognizes full URLs' do
-          n.normalize_url(:url => 'Available at: https://www.example.org/x.pdf').should == { :url => 'https://www.example.org/x.pdf' }
-          n.normalize_url(:url => 'Available at: https://www.example.org/x.pdf [Retrieved today]').should == { :url => 'https://www.example.org/x.pdf' }
+          expect(n.normalize_url(:url => 'Available at: https://www.example.org/x.pdf')).to eq({ :url => 'https://www.example.org/x.pdf' })
+          expect(n.normalize_url(:url => 'Available at: https://www.example.org/x.pdf [Retrieved today]')).to eq({ :url => 'https://www.example.org/x.pdf' })
         end
         it 'tries to detect URLs without protocol' do
-          n.normalize_url(:url => 'Available at: www.example.org/x.pdf').should == { :url => 'www.example.org/x.pdf' }
-          n.normalize_url(:url => 'Available at: example.org/x.pdf [Retrieved today]').should == { :url => 'example.org/x.pdf' }
+          expect(n.normalize_url(:url => 'Available at: www.example.org/x.pdf')).to eq({ :url => 'www.example.org/x.pdf' })
+          expect(n.normalize_url(:url => 'Available at: example.org/x.pdf [Retrieved today]')).to eq({ :url => 'example.org/x.pdf' })
         end
       end
       describe 'date extraction' do
         it 'extracts month and year from a string like "(July 2009)"' do
-          h = Normalizer.instance.normalize_date(:date => '(July 2009)')
-          h[:year].should == 2009
-          h[:month].should == 7
-          h.should_not have_key(:date)
-          h.should_not have_key(:day)
+          h = Normalizer.instance.normalize_date(:date => ['(July 2009)'])
+          expect(h[:date]).to eq('2009-07')
         end
         it 'extracts month and year from a string like "(1997 Sept.)"' do
           h = Normalizer.instance.normalize_date(:date => '(1997 Sept.)')
-          h[:year].should == 1997
-          h[:month].should == 9
-          h.should_not have_key(:date)
-          h.should_not have_key(:day)
+          expect(h[:date]).to eq('1997-09')
-          h = Normalizer.instance.normalize_date(:date => '(1997 Okt.)')
-          h[:year].should == 1997
-          h[:month].should == 10
-          h.should_not have_key(:day)
+          h = Normalizer.instance.normalize_date(:date => ['(1997 Okt.)'])
+          expect(h[:date]).to eq('1997-10')
         end
         it 'extracts days if month and year are present' do
-          h = n.normalize_date(:date => '(15 May 1984)')
-          h[:year].should == 1984
-          h[:month].should == 5
-          h[:day].should == 15
+          h = n.normalize_date(:date => ['(15 May 1984)'])
+          expect(h[:date]).to eq('1984-05-15')
         end
       end

data/spec/anystyle/parser/parser_spec.rb CHANGED

@@ -7,40 +7,40 @@ module Anystyle::Parser
     describe "#tokenize" do
       it "returns [] when given an empty string" do
-        subject.tokenize('').should == []
+        expect(subject.tokenize('')).to eq([])
       end
       it "takes a single line and returns an array of token sequences" do
-        subject.tokenize('hello, world!').should == [%w{ hello, world! }]
+        expect(subject.tokenize('hello, world!')).to eq([%w{ hello, world! }])
       end
       it "tokenizes volume/page-range exception" do
-        subject.tokenize('hello:world! http://abc.com 3:45 3:1-2 23:1').should == [%w{ hello:world! http://abc.com 3: 45 3: 1-2 23: 1 }]
+        expect(subject.tokenize('hello:world! http://abc.com 3:45 3:1-2 23:1 45(3):23–7')).to eq([%w{ hello:world! http://abc.com 3: 45 3: 1-2 23: 1 45(3): 23–7}])
       end
       it "takes two lines and returns an array of token sequences" do
-        subject.tokenize("hello, world!\ngoodbye!").should == [%w{ hello, world! }, %w{ goodbye! }]
+        expect(subject.tokenize("hello, world!\ngoodbye!")).to eq([%w{ hello, world! }, %w{ goodbye! }])
       end
       context "when passing a string marked as tagged" do
         it "returns [] when given an empty string" do
-          subject.tokenize('', true).should == []
+          expect(subject.tokenize('', true)).to eq([])
         end
         it "returns an array of :unknown token sequences when given an untagged single line" do
-          subject.tokenize('hello, world!', true).should == [[['hello,', :unknown], ['world!', :unknown]]]
+          expect(subject.tokenize('hello, world!', true)).to eq([[['hello,', :unknown], ['world!', :unknown]]])
         end
         it "returns an array of :unknown token sequences when given two untagged lines" do
-          subject.tokenize("hello,\nworld!", true).should == [[['hello,', :unknown]], [['world!', :unknown]]]
+          expect(subject.tokenize("hello,\nworld!", true)).to eq([[['hello,', :unknown]], [['world!', :unknown]]])
         end
         it "returns an array of token/tag pair for each line when given a single tagged string" do
-          subject.tokenize('<a>hello</a>', true).should == [[['hello', :a]]]
+          expect(subject.tokenize('<a>hello</a>', true)).to eq([[['hello', :a]]])
         end
         it "returns an array of token/tag pair for each line when given a string with multiple tags" do
-          subject.tokenize('<a>hello world</a> <b> !</b>', true).should == [[['hello',:a], ['world', :a], ['!', :b]]]
+          expect(subject.tokenize('<a>hello world</a> <b> !</b>', true)).to eq([[['hello',:a], ['world', :a], ['!', :b]]])
         end
         it "raises an argument error if the string contains mismatched tags" do
@@ -53,22 +53,22 @@ module Anystyle::Parser
     describe "#prepare" do
       it 'returns an array of expanded token sequences' do
-        subject.prepare('hello, world!').should == [['hello, , h he hel hell , o, lo, llo, hello other none 0 no-male no-female no-surname no-month no-place no-publisher no-journal no-editors 0 internal other none', 'world! ! w wo wor worl ! d! ld! rld! world other none 36 no-male no-female surname no-month no-place publisher no-journal no-editors 5 terminal other none']]
+        expect(subject.prepare('hello, world!')).to eq([['hello, , h he hel hell , o, lo, llo, hello other none 0 no-male no-female no-surname no-month no-place no-publisher no-journal no-editors 0 internal other none', 'world! ! w wo wor worl ! d! ld! rld! world other none 36 no-male no-female surname no-month no-place publisher no-journal no-editors 5 terminal other none']])
       end
       context 'when marking the input as being tagged' do
         let(:input) { %{<author> A. Cau, R. Kuiper, and W.-P. de Roever. </author> <title> Formalising Dijkstra's development strategy within Stark's formalism. </title> <editor> In C. B. Jones, R. C. Shaw, and T. Denvir, editors, </editor> <booktitle> Proc. 5th. BCS-FACS Refinement Workshop, </booktitle> <date> 1992. </date>} }
         it 'returns an array of expaned and labelled token sequences for a tagged string' do
-          subject.prepare(input, true)[0].map { |t| t[/\S+$/] }.should == %w{ author author author author author author author author title title title title title title title editor editor editor editor editor editor editor editor editor editor editor booktitle booktitle booktitle booktitle booktitle date }
+          expect(subject.prepare(input, true)[0].map { |t| t[/\S+$/] }).to eq(%w{ author author author author author author author author title title title title title title title editor editor editor editor editor editor editor editor editor editor editor booktitle booktitle booktitle booktitle booktitle date })
         end
         it 'returns an array of expanded and labelled :unknown token sequences for an untagged input' do
-          subject.prepare('hello, world!', true)[0].map { |t| t[/\S+$/] }.should == %w{ unknown unknown }
+          expect(subject.prepare('hello, world!', true)[0].map { |t| t[/\S+$/] }).to eq(%w{ unknown unknown })
         end
         it 'converts xml entitites' do
-          subject.prepare("<note>&gt;&gt; &amp; foo</note>", true)[0].map { |t| t[/\S+/] }.should == %w{ >> & foo }
+          expect(subject.prepare("<note>&gt;&gt; &amp; foo</note>", true)[0].map { |t| t[/\S+/] }).to eq(%w{ >> & foo })
         end
       end
     end
@@ -77,31 +77,31 @@ module Anystyle::Parser
       let(:citation) { 'Perec, Georges. A Void. London: The Harvill Press, 1995. p.108.' }
       it 'returns an array of labelled segments' do
-        subject.label(citation)[0].map(&:first).should == [:author, :title, :location, :publisher, :date, :pages]
+        expect(subject.label(citation)[0].map(&:first)).to eq([:author, :title, :location, :publisher, :date, :pages])
       end
       describe 'when passed more than one line' do
         it 'returns two arrays' do
-          subject.label("foo\nbar").should have(2).elements
+          expect(subject.label("foo\nbar").size).to eq(2)
         end
       end
       describe 'when passed invalid input' do
         it 'returns an empty array for an empty string' do
-          subject.label('').should == []
+          expect(subject.label('')).to eq([])
         end
         it 'returns an empty array for empty lines' do
-          subject.label("\n").should == []
-          subject.label("\n ").should == []
-          subject.label(" \n ").should == []
-          subject.label(" \n").should == []
+          expect(subject.label("\n")).to eq([])
+          expect(subject.label("\n ")).to eq([])
+          expect(subject.label(" \n ")).to eq([])
+          expect(subject.label(" \n")).to eq([])
         end
         it 'does not fail for unrecognizable input' do
-          lambda { subject.label("@misc{70213094902020,\n") }.should_not raise_error
-          lambda { subject.label("doi = {DOI:10.1503/jpn.100140}\n}\n") }.should_not raise_error
-          lambda { subject.label("\n doi ") }.should_not raise_error
+          expect { subject.label("@misc{70213094902020,\n") }.not_to raise_error
+          expect { subject.label("doi = {DOI:10.1503/jpn.100140}\n}\n") }.not_to raise_error
+          expect { subject.label("\n doi ") }.not_to raise_error
         end
       end
@@ -112,28 +112,36 @@ module Anystyle::Parser
       let(:citation) { 'Perec, Georges. A Void. London: The Harvill Press, 1995. p.108.' }
       it 'returns a hash of label/segment pairs by default' do
-        subject.parse(citation)[0].should == { :author => 'Perec, Georges', :title => 'A Void', :location => 'London', :publisher => 'The Harvill Press', :year => 1995, :pages => '108', :type => :book }
+        expect(subject.parse(citation)[0]).to eq({
+          :author => 'Perec, Georges',
+          :title => 'A Void',
+          :location => 'London',
+          :publisher => 'The Harvill Press',
+          :date => '1995',
+          :pages => '108',
+          :type => :book
+        })
       end
       describe 'using output format "tags"' do
         it 'returns a tagged string' do
-          subject.parse(citation, :tags)[0].should == '<author>Perec, Georges.</author> <title>A Void.</title> <location>London:</location> <publisher>The Harvill Press,</publisher> <date>1995.</date> <pages>p.108.</pages>'
+          expect(subject.parse(citation, :tags)[0]).to eq('<author>Perec, Georges.</author> <title>A Void.</title> <location>London:</location> <publisher>The Harvill Press,</publisher> <date>1995.</date> <pages>p.108.</pages>')
         end
       end
       it 'returns the label/token arrays for format "raw"' do
-        subject.parse(citation, :raw)[0][0].should == [:author, 'Perec,']
+        expect(subject.parse(citation, :raw)[0][0]).to eq([:author, 'Perec,'])
       end
       it 'returns the token in original order for format "raw"' do
-        subject.parse(citation, :raw)[0].map(&:last).join(' ').should == citation
+        expect(subject.parse(citation, :raw)[0].map(&:last).join(' ')).to eq(citation)
         difference = 'Derrida, J. (1967). L’écriture et la différence (1 éd.). Paris: Éditions du Seuil.'
-        subject.parse(difference, :raw)[0].map(&:last).join(' ').should == difference
+        expect(subject.parse(difference, :raw)[0].map(&:last).join(' ')).to eq(difference)
       end
       it 'returns xml document for format "raw"' do
-        subject.parse(citation, :xml).should == '<?xml version="1.0" encoding="UTF-8"?><references><reference><author>Perec, Georges.</author><title>A Void.</title><location>London:</location><publisher>The Harvill Press,</publisher><date>1995.</date><pages>p.108.</pages></reference></references>'
+        expect(subject.parse(citation, :xml)).to eq('<?xml version="1.0" encoding="UTF-8"?><references><reference><author>Perec, Georges.</author><title>A Void.</title><location>London:</location><publisher>The Harvill Press,</publisher><date>1995.</date><pages>p.108.</pages></reference></references>')
       end
     end
@@ -145,14 +153,14 @@ module Anystyle::Parser
         it 'recognizes trained references' do
           subject.learn dps[0]
-          subject.parse(strip_tags(dps[0]), :tags)[0].should == dps[0]
+          expect(subject.parse(strip_tags(dps[0]), :tags)[0]).to eq(dps[0])
         end
         it 'recognizes trained references when learnt in one go' do
           subject.learn dps
           dps.each do |d|
-            subject.parse(strip_tags(d), :tags)[0].should == d
+            expect(subject.parse(strip_tags(d), :tags)[0]).to eq(d)
           end
         end
@@ -164,7 +172,7 @@ module Anystyle::Parser
           end
           dps.each do |d|
-            subject.parse(strip_tags(d), :tags)[0].should == d
+            expect(subject.parse(strip_tags(d), :tags)[0]).to eq(d)
           end
         end
       end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: anystyle-parser
 version: !ruby/object:Gem::Version
-  version: 0.6.9
+  version: 0.6.10
 platform: ruby
 authors:
 - Sylvester Keil
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-05-16 00:00:00.000000000 Z
+date: 2014-07-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bibtex-ruby
@@ -64,20 +64,14 @@ dependencies:
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        version: '0.8'
-    - - '>='
-      - !ruby/object:Gem::Version
-        version: 0.8.7
+        version: '0.9'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        version: '0.8'
-    - - '>='
-      - !ruby/object:Gem::Version
-        version: 0.8.7
+        version: '0.9'
 description: A sophisticated parser for academic reference lists and bibliographies
   based on machine learning algorithms using conditional random fields.
 email: