anystyle-parser 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f1f0652bc06c8a041ac31417116c39b1bdb750f0
|
|
4
|
+
data.tar.gz: 694b97ca3f9686f6ef06d374f883ff210d8f7f04
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 733772f8b7418e6652cfe8eb28702eb7c20e4fda0ceec8ea6c02e093e548b21fd3d1c6d7e98f58dd32a053946b77c9f1a60ee8e1be74bd8621265836ade4882a
|
|
7
|
+
data.tar.gz: bf608a4165adfe1351c9396e5722217e2a2c79ea0e9ee83e22f731bc04a9df8e9063828805905b7e47b46c72074f4e11d431a89bc1a555fd2c65682c77fe25af
|
|
@@ -85,7 +85,7 @@ module Anystyle
|
|
|
85
85
|
hash[:editor] = hash.delete(:author)
|
|
86
86
|
hash = normalize_editor(hash)
|
|
87
87
|
else
|
|
88
|
-
hash[:'more-authors'] = true if
|
|
88
|
+
hash[:'more-authors'] = true if strip_et_al(authors)
|
|
89
89
|
authors.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
|
|
90
90
|
hash[:author] = normalize_names(authors)
|
|
91
91
|
end
|
|
@@ -110,12 +110,13 @@ module Anystyle
|
|
|
110
110
|
end
|
|
111
111
|
end
|
|
112
112
|
|
|
113
|
-
hash[:'more-editors'] = true if
|
|
113
|
+
hash[:'more-editors'] = true if strip_et_al(editors)
|
|
114
114
|
|
|
115
115
|
editors.gsub!(/^\W+|\W+$/, '')
|
|
116
116
|
editors.gsub!(/^in:?\s+/i, '')
|
|
117
117
|
editors.gsub!(/[^[:alpha:]]*[Ee]d(s|itors?|ited)?\b[^[:alpha:]]*/, '')
|
|
118
118
|
editors.gsub!(/[^[:alpha:]]*([Hh]rsg|Herausgeber)\b[^[:alpha:]]*/, '')
|
|
119
|
+
editors.gsub!(/\b[Hh]erausgeben von\b/, '')
|
|
119
120
|
editors.gsub!(/\bby\b/i, '')
|
|
120
121
|
|
|
121
122
|
is_trans = !!editors.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
|
|
@@ -126,6 +127,10 @@ module Anystyle
|
|
|
126
127
|
hash
|
|
127
128
|
end
|
|
128
129
|
|
|
130
|
+
def strip_et_al(names)
|
|
131
|
+
!!names.sub!(/(\bet\s+al\b|\bu\.\s*a\.|(\band|\&)\s+others).*$/, '')
|
|
132
|
+
end
|
|
133
|
+
|
|
129
134
|
def normalize_translator(hash)
|
|
130
135
|
translators = hash[:translator]
|
|
131
136
|
|
|
@@ -10,7 +10,7 @@ module Anystyle
|
|
|
10
10
|
:pattern => File.expand_path('../support/anystyle.pat', __FILE__),
|
|
11
11
|
:compact => true,
|
|
12
12
|
:threads => 4,
|
|
13
|
-
:separator => /\s
|
|
13
|
+
:separator => /\s+|\b(\d+:)/,
|
|
14
14
|
:tagged_separator => /\s+|(<\/?[^>]+>)/,
|
|
15
15
|
:strip => /[^[:alnum:]]/,
|
|
16
16
|
:format => :hash,
|
|
@@ -107,7 +107,7 @@ module Anystyle
|
|
|
107
107
|
tokens
|
|
108
108
|
end
|
|
109
109
|
else
|
|
110
|
-
lines(string).map { |s| s.split(options[:separator]) }
|
|
110
|
+
lines(string).map { |s| s.split(options[:separator]).reject(&:empty?) }
|
|
111
111
|
end
|
|
112
112
|
end
|
|
113
113
|
|
|
@@ -74,6 +74,15 @@ module Anystyle
|
|
|
74
74
|
n.normalize_editor(:editor => 'ed. by Edward Wood').should == { :editor => 'Wood, Edward' }
|
|
75
75
|
n.normalize_editor(:editor => 'ed by Edward Wood').should == { :editor => 'Wood, Edward' }
|
|
76
76
|
end
|
|
77
|
+
|
|
78
|
+
it "strips et al" do
|
|
79
|
+
n.normalize_editor(:editor => 'Edward Wood et al')[:editor].should == 'Wood, Edward'
|
|
80
|
+
n.normalize_editor(:editor => 'Edward Wood et al.')[:editor].should == 'Wood, Edward'
|
|
81
|
+
n.normalize_editor(:editor => 'Edward Wood u.a.')[:editor].should == 'Wood, Edward'
|
|
82
|
+
n.normalize_editor(:editor => 'Edward Wood u. a.')[:editor].should == 'Wood, Edward'
|
|
83
|
+
n.normalize_editor(:editor => 'Edward Wood and others')[:editor].should == 'Wood, Edward'
|
|
84
|
+
n.normalize_editor(:editor => 'Edward Wood & others')[:editor].should == 'Wood, Edward'
|
|
85
|
+
end
|
|
77
86
|
end
|
|
78
87
|
|
|
79
88
|
describe 'editors extraction' do
|
|
@@ -14,6 +14,10 @@ module Anystyle::Parser
|
|
|
14
14
|
subject.tokenize('hello, world!').should == [%w{ hello, world! }]
|
|
15
15
|
end
|
|
16
16
|
|
|
17
|
+
it "tokenizes volume/page-range exception" do
|
|
18
|
+
subject.tokenize('hello:world! http://abc.com 3:45 3:1-2 23:1').should == [%w{ hello:world! http://abc.com 3: 45 3: 1-2 23: 1 }]
|
|
19
|
+
end
|
|
20
|
+
|
|
17
21
|
it "takes two lines and returns an array of token sequences" do
|
|
18
22
|
subject.tokenize("hello, world!\ngoodbye!").should == [%w{ hello, world! }, %w{ goodbye! }]
|
|
19
23
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: anystyle-parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.6.
|
|
4
|
+
version: 0.6.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sylvester Keil
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2014-05-
|
|
11
|
+
date: 2014-05-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bibtex-ruby
|