anystyle-parser 0.6.5 → 0.6.6
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f1f0652bc06c8a041ac31417116c39b1bdb750f0
|
4
|
+
data.tar.gz: 694b97ca3f9686f6ef06d374f883ff210d8f7f04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 733772f8b7418e6652cfe8eb28702eb7c20e4fda0ceec8ea6c02e093e548b21fd3d1c6d7e98f58dd32a053946b77c9f1a60ee8e1be74bd8621265836ade4882a
|
7
|
+
data.tar.gz: bf608a4165adfe1351c9396e5722217e2a2c79ea0e9ee83e22f731bc04a9df8e9063828805905b7e47b46c72074f4e11d431a89bc1a555fd2c65682c77fe25af
|
@@ -85,7 +85,7 @@ module Anystyle
|
|
85
85
|
hash[:editor] = hash.delete(:author)
|
86
86
|
hash = normalize_editor(hash)
|
87
87
|
else
|
88
|
-
hash[:'more-authors'] = true if
|
88
|
+
hash[:'more-authors'] = true if strip_et_al(authors)
|
89
89
|
authors.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
|
90
90
|
hash[:author] = normalize_names(authors)
|
91
91
|
end
|
@@ -110,12 +110,13 @@ module Anystyle
|
|
110
110
|
end
|
111
111
|
end
|
112
112
|
|
113
|
-
hash[:'more-editors'] = true if
|
113
|
+
hash[:'more-editors'] = true if strip_et_al(editors)
|
114
114
|
|
115
115
|
editors.gsub!(/^\W+|\W+$/, '')
|
116
116
|
editors.gsub!(/^in:?\s+/i, '')
|
117
117
|
editors.gsub!(/[^[:alpha:]]*[Ee]d(s|itors?|ited)?\b[^[:alpha:]]*/, '')
|
118
118
|
editors.gsub!(/[^[:alpha:]]*([Hh]rsg|Herausgeber)\b[^[:alpha:]]*/, '')
|
119
|
+
editors.gsub!(/\b[Hh]erausgeben von\b/, '')
|
119
120
|
editors.gsub!(/\bby\b/i, '')
|
120
121
|
|
121
122
|
is_trans = !!editors.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
|
@@ -126,6 +127,10 @@ module Anystyle
|
|
126
127
|
hash
|
127
128
|
end
|
128
129
|
|
130
|
+
def strip_et_al(names)
|
131
|
+
!!names.sub!(/(\bet\s+al\b|\bu\.\s*a\.|(\band|\&)\s+others).*$/, '')
|
132
|
+
end
|
133
|
+
|
129
134
|
def normalize_translator(hash)
|
130
135
|
translators = hash[:translator]
|
131
136
|
|
@@ -10,7 +10,7 @@ module Anystyle
|
|
10
10
|
:pattern => File.expand_path('../support/anystyle.pat', __FILE__),
|
11
11
|
:compact => true,
|
12
12
|
:threads => 4,
|
13
|
-
:separator => /\s
|
13
|
+
:separator => /\s+|\b(\d+:)/,
|
14
14
|
:tagged_separator => /\s+|(<\/?[^>]+>)/,
|
15
15
|
:strip => /[^[:alnum:]]/,
|
16
16
|
:format => :hash,
|
@@ -107,7 +107,7 @@ module Anystyle
|
|
107
107
|
tokens
|
108
108
|
end
|
109
109
|
else
|
110
|
-
lines(string).map { |s| s.split(options[:separator]) }
|
110
|
+
lines(string).map { |s| s.split(options[:separator]).reject(&:empty?) }
|
111
111
|
end
|
112
112
|
end
|
113
113
|
|
@@ -74,6 +74,15 @@ module Anystyle
|
|
74
74
|
n.normalize_editor(:editor => 'ed. by Edward Wood').should == { :editor => 'Wood, Edward' }
|
75
75
|
n.normalize_editor(:editor => 'ed by Edward Wood').should == { :editor => 'Wood, Edward' }
|
76
76
|
end
|
77
|
+
|
78
|
+
it "strips et al" do
|
79
|
+
n.normalize_editor(:editor => 'Edward Wood et al')[:editor].should == 'Wood, Edward'
|
80
|
+
n.normalize_editor(:editor => 'Edward Wood et al.')[:editor].should == 'Wood, Edward'
|
81
|
+
n.normalize_editor(:editor => 'Edward Wood u.a.')[:editor].should == 'Wood, Edward'
|
82
|
+
n.normalize_editor(:editor => 'Edward Wood u. a.')[:editor].should == 'Wood, Edward'
|
83
|
+
n.normalize_editor(:editor => 'Edward Wood and others')[:editor].should == 'Wood, Edward'
|
84
|
+
n.normalize_editor(:editor => 'Edward Wood & others')[:editor].should == 'Wood, Edward'
|
85
|
+
end
|
77
86
|
end
|
78
87
|
|
79
88
|
describe 'editors extraction' do
|
@@ -14,6 +14,10 @@ module Anystyle::Parser
|
|
14
14
|
subject.tokenize('hello, world!').should == [%w{ hello, world! }]
|
15
15
|
end
|
16
16
|
|
17
|
+
it "tokenizes volume/page-range exception" do
|
18
|
+
subject.tokenize('hello:world! http://abc.com 3:45 3:1-2 23:1').should == [%w{ hello:world! http://abc.com 3: 45 3: 1-2 23: 1 }]
|
19
|
+
end
|
20
|
+
|
17
21
|
it "takes two lines and returns an array of token sequences" do
|
18
22
|
subject.tokenize("hello, world!\ngoodbye!").should == [%w{ hello, world! }, %w{ goodbye! }]
|
19
23
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anystyle-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-05-
|
11
|
+
date: 2014-05-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bibtex-ruby
|