anystyle-parser 0.6.5 → 0.6.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4124e6f53dcd7d11c94dd91f118ad72bbc1d3397
4
- data.tar.gz: c7687a359f621cd4071b05b644dc980ad893d040
3
+ metadata.gz: f1f0652bc06c8a041ac31417116c39b1bdb750f0
4
+ data.tar.gz: 694b97ca3f9686f6ef06d374f883ff210d8f7f04
5
5
  SHA512:
6
- metadata.gz: 782106a15036dd28d7120f22228d79fc44c22cacec68b3faab1f4040ef57abe1bda63dd6d20d8e6bc710a24e7d82764a9ebf314bf2c9794f5eaa3f3e90758817
7
- data.tar.gz: 2ad81353224b8e811508ad415fae29c137c38585288bad1dbe3e3fe1c9dadba6ad0653a5deb68d57f52eef211dd29af32751c4b3b826380e2db69de38703b4f9
6
+ metadata.gz: 733772f8b7418e6652cfe8eb28702eb7c20e4fda0ceec8ea6c02e093e548b21fd3d1c6d7e98f58dd32a053946b77c9f1a60ee8e1be74bd8621265836ade4882a
7
+ data.tar.gz: bf608a4165adfe1351c9396e5722217e2a2c79ea0e9ee83e22f731bc04a9df8e9063828805905b7e47b46c72074f4e11d431a89bc1a555fd2c65682c77fe25af
@@ -85,7 +85,7 @@ module Anystyle
85
85
  hash[:editor] = hash.delete(:author)
86
86
  hash = normalize_editor(hash)
87
87
  else
88
- hash[:'more-authors'] = true if !!authors.sub!(/\bet\.?\s*al.*$/i, '')
88
+ hash[:'more-authors'] = true if strip_et_al(authors)
89
89
  authors.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
90
90
  hash[:author] = normalize_names(authors)
91
91
  end
@@ -110,12 +110,13 @@ module Anystyle
110
110
  end
111
111
  end
112
112
 
113
- hash[:'more-editors'] = true if !!editors.sub!(/\bet\.?\s*al.*$/i, '')
113
+ hash[:'more-editors'] = true if strip_et_al(editors)
114
114
 
115
115
  editors.gsub!(/^\W+|\W+$/, '')
116
116
  editors.gsub!(/^in:?\s+/i, '')
117
117
  editors.gsub!(/[^[:alpha:]]*[Ee]d(s|itors?|ited)?\b[^[:alpha:]]*/, '')
118
118
  editors.gsub!(/[^[:alpha:]]*([Hh]rsg|Herausgeber)\b[^[:alpha:]]*/, '')
119
+ editors.gsub!(/\b[Hh]erausgeben von\b/, '')
119
120
  editors.gsub!(/\bby\b/i, '')
120
121
 
121
122
  is_trans = !!editors.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
@@ -126,6 +127,10 @@ module Anystyle
126
127
  hash
127
128
  end
128
129
 
130
+ def strip_et_al(names)
131
+ !!names.sub!(/(\bet\s+al\b|\bu\.\s*a\.|(\band|\&)\s+others).*$/, '')
132
+ end
133
+
129
134
  def normalize_translator(hash)
130
135
  translators = hash[:translator]
131
136
 
@@ -10,7 +10,7 @@ module Anystyle
10
10
  :pattern => File.expand_path('../support/anystyle.pat', __FILE__),
11
11
  :compact => true,
12
12
  :threads => 4,
13
- :separator => /\s+/,
13
+ :separator => /\s+|\b(\d+:)/,
14
14
  :tagged_separator => /\s+|(<\/?[^>]+>)/,
15
15
  :strip => /[^[:alnum:]]/,
16
16
  :format => :hash,
@@ -107,7 +107,7 @@ module Anystyle
107
107
  tokens
108
108
  end
109
109
  else
110
- lines(string).map { |s| s.split(options[:separator]) }
110
+ lines(string).map { |s| s.split(options[:separator]).reject(&:empty?) }
111
111
  end
112
112
  end
113
113
 
@@ -1,5 +1,5 @@
1
1
  module Anystyle
2
2
  module Parser
3
- VERSION = '0.6.5'.freeze
3
+ VERSION = '0.6.6'.freeze
4
4
  end
5
5
  end
@@ -74,6 +74,15 @@ module Anystyle
74
74
  n.normalize_editor(:editor => 'ed. by Edward Wood').should == { :editor => 'Wood, Edward' }
75
75
  n.normalize_editor(:editor => 'ed by Edward Wood').should == { :editor => 'Wood, Edward' }
76
76
  end
77
+
78
+ it "strips et al" do
79
+ n.normalize_editor(:editor => 'Edward Wood et al')[:editor].should == 'Wood, Edward'
80
+ n.normalize_editor(:editor => 'Edward Wood et al.')[:editor].should == 'Wood, Edward'
81
+ n.normalize_editor(:editor => 'Edward Wood u.a.')[:editor].should == 'Wood, Edward'
82
+ n.normalize_editor(:editor => 'Edward Wood u. a.')[:editor].should == 'Wood, Edward'
83
+ n.normalize_editor(:editor => 'Edward Wood and others')[:editor].should == 'Wood, Edward'
84
+ n.normalize_editor(:editor => 'Edward Wood & others')[:editor].should == 'Wood, Edward'
85
+ end
77
86
  end
78
87
 
79
88
  describe 'editors extraction' do
@@ -14,6 +14,10 @@ module Anystyle::Parser
14
14
  subject.tokenize('hello, world!').should == [%w{ hello, world! }]
15
15
  end
16
16
 
17
+ it "tokenizes volume/page-range exception" do
18
+ subject.tokenize('hello:world! http://abc.com 3:45 3:1-2 23:1').should == [%w{ hello:world! http://abc.com 3: 45 3: 1-2 23: 1 }]
19
+ end
20
+
17
21
  it "takes two lines and returns an array of token sequences" do
18
22
  subject.tokenize("hello, world!\ngoodbye!").should == [%w{ hello, world! }, %w{ goodbye! }]
19
23
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anystyle-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.5
4
+ version: 0.6.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sylvester Keil
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-14 00:00:00.000000000 Z
11
+ date: 2014-05-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bibtex-ruby