anystyle-parser 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4124e6f53dcd7d11c94dd91f118ad72bbc1d3397
4
- data.tar.gz: c7687a359f621cd4071b05b644dc980ad893d040
3
+ metadata.gz: f1f0652bc06c8a041ac31417116c39b1bdb750f0
4
+ data.tar.gz: 694b97ca3f9686f6ef06d374f883ff210d8f7f04
5
5
  SHA512:
6
- metadata.gz: 782106a15036dd28d7120f22228d79fc44c22cacec68b3faab1f4040ef57abe1bda63dd6d20d8e6bc710a24e7d82764a9ebf314bf2c9794f5eaa3f3e90758817
7
- data.tar.gz: 2ad81353224b8e811508ad415fae29c137c38585288bad1dbe3e3fe1c9dadba6ad0653a5deb68d57f52eef211dd29af32751c4b3b826380e2db69de38703b4f9
6
+ metadata.gz: 733772f8b7418e6652cfe8eb28702eb7c20e4fda0ceec8ea6c02e093e548b21fd3d1c6d7e98f58dd32a053946b77c9f1a60ee8e1be74bd8621265836ade4882a
7
+ data.tar.gz: bf608a4165adfe1351c9396e5722217e2a2c79ea0e9ee83e22f731bc04a9df8e9063828805905b7e47b46c72074f4e11d431a89bc1a555fd2c65682c77fe25af
@@ -85,7 +85,7 @@ module Anystyle
85
85
  hash[:editor] = hash.delete(:author)
86
86
  hash = normalize_editor(hash)
87
87
  else
88
- hash[:'more-authors'] = true if !!authors.sub!(/\bet\.?\s*al.*$/i, '')
88
+ hash[:'more-authors'] = true if strip_et_al(authors)
89
89
  authors.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
90
90
  hash[:author] = normalize_names(authors)
91
91
  end
@@ -110,12 +110,13 @@ module Anystyle
110
110
  end
111
111
  end
112
112
 
113
- hash[:'more-editors'] = true if !!editors.sub!(/\bet\.?\s*al.*$/i, '')
113
+ hash[:'more-editors'] = true if strip_et_al(editors)
114
114
 
115
115
  editors.gsub!(/^\W+|\W+$/, '')
116
116
  editors.gsub!(/^in:?\s+/i, '')
117
117
  editors.gsub!(/[^[:alpha:]]*[Ee]d(s|itors?|ited)?\b[^[:alpha:]]*/, '')
118
118
  editors.gsub!(/[^[:alpha:]]*([Hh]rsg|Herausgeber)\b[^[:alpha:]]*/, '')
119
+ editors.gsub!(/\b[Hh]erausgeben von\b/, '')
119
120
  editors.gsub!(/\bby\b/i, '')
120
121
 
121
122
  is_trans = !!editors.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
@@ -126,6 +127,10 @@ module Anystyle
126
127
  hash
127
128
  end
128
129
 
130
+ def strip_et_al(names)
131
+ !!names.sub!(/(\bet\s+al\b|\bu\.\s*a\.|(\band|\&)\s+others).*$/, '')
132
+ end
133
+
129
134
  def normalize_translator(hash)
130
135
  translators = hash[:translator]
131
136
 
@@ -10,7 +10,7 @@ module Anystyle
10
10
  :pattern => File.expand_path('../support/anystyle.pat', __FILE__),
11
11
  :compact => true,
12
12
  :threads => 4,
13
- :separator => /\s+/,
13
+ :separator => /\s+|\b(\d+:)/,
14
14
  :tagged_separator => /\s+|(<\/?[^>]+>)/,
15
15
  :strip => /[^[:alnum:]]/,
16
16
  :format => :hash,
@@ -107,7 +107,7 @@ module Anystyle
107
107
  tokens
108
108
  end
109
109
  else
110
- lines(string).map { |s| s.split(options[:separator]) }
110
+ lines(string).map { |s| s.split(options[:separator]).reject(&:empty?) }
111
111
  end
112
112
  end
113
113
 
@@ -1,5 +1,5 @@
1
1
  module Anystyle
2
2
  module Parser
3
- VERSION = '0.6.5'.freeze
3
+ VERSION = '0.6.6'.freeze
4
4
  end
5
5
  end
@@ -74,6 +74,15 @@ module Anystyle
74
74
  n.normalize_editor(:editor => 'ed. by Edward Wood').should == { :editor => 'Wood, Edward' }
75
75
  n.normalize_editor(:editor => 'ed by Edward Wood').should == { :editor => 'Wood, Edward' }
76
76
  end
77
+
78
+ it "strips et al" do
79
+ n.normalize_editor(:editor => 'Edward Wood et al')[:editor].should == 'Wood, Edward'
80
+ n.normalize_editor(:editor => 'Edward Wood et al.')[:editor].should == 'Wood, Edward'
81
+ n.normalize_editor(:editor => 'Edward Wood u.a.')[:editor].should == 'Wood, Edward'
82
+ n.normalize_editor(:editor => 'Edward Wood u. a.')[:editor].should == 'Wood, Edward'
83
+ n.normalize_editor(:editor => 'Edward Wood and others')[:editor].should == 'Wood, Edward'
84
+ n.normalize_editor(:editor => 'Edward Wood & others')[:editor].should == 'Wood, Edward'
85
+ end
77
86
  end
78
87
 
79
88
  describe 'editors extraction' do
@@ -14,6 +14,10 @@ module Anystyle::Parser
14
14
  subject.tokenize('hello, world!').should == [%w{ hello, world! }]
15
15
  end
16
16
 
17
+ it "tokenizes volume/page-range exception" do
18
+ subject.tokenize('hello:world! http://abc.com 3:45 3:1-2 23:1').should == [%w{ hello:world! http://abc.com 3: 45 3: 1-2 23: 1 }]
19
+ end
20
+
17
21
  it "takes two lines and returns an array of token sequences" do
18
22
  subject.tokenize("hello, world!\ngoodbye!").should == [%w{ hello, world! }, %w{ goodbye! }]
19
23
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anystyle-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.5
4
+ version: 0.6.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-14 00:00:00.000000000 Z
11
+ date: 2014-05-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bibtex-ruby