anystyle-parser 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +4 -0
- data/lib/anystyle/parser/dictionary.rb +2 -2
- data/lib/anystyle/parser/normalizer.rb +20 -54
- data/lib/anystyle/parser/parser.rb +6 -1
- data/lib/anystyle/parser/version.rb +1 -1
- data/spec/anystyle/parser/normalizer_spec.rb +5 -5
- metadata +16 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c9fa55f1ed5e371c241610ef5d77ae1f9696131
|
4
|
+
data.tar.gz: 7e6e3d024694dc1e8947bca12a9eb248f356c3cd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6ab99a896fbe1cbc5e7ab2a9ab729c289a052ff1b98a7e870ace9260519ea9500284a05e89acf8752548a7972bf629529fb17e2e967bb4d87f75a36615638ad
|
7
|
+
data.tar.gz: 4f75412d5bfd364e492ffc6c83b2e77d4853d75359f5824fc556daa658138db46c3aa391600f03d247c15b272088ba300eb354cafeac57e6e1f088885930334f
|
data/HISTORY.md
CHANGED
@@ -63,14 +63,14 @@ module Anystyle
|
|
63
63
|
|
64
64
|
begin
|
65
65
|
require 'redis'
|
66
|
-
@modes.
|
66
|
+
@modes.push :redis
|
67
67
|
rescue LoadError
|
68
68
|
# info 'no redis support detected'
|
69
69
|
end
|
70
70
|
|
71
71
|
begin
|
72
72
|
require 'kyotocabinet'
|
73
|
-
@modes.
|
73
|
+
@modes.push :kyoto
|
74
74
|
rescue LoadError
|
75
75
|
# info 'no kyoto-cabinet support detected'
|
76
76
|
end
|
@@ -55,9 +55,6 @@ module Anystyle
|
|
55
55
|
token.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
|
56
56
|
hash[key] = token
|
57
57
|
hash
|
58
|
-
rescue => e
|
59
|
-
warn e.message
|
60
|
-
hash
|
61
58
|
end
|
62
59
|
|
63
60
|
def normalize_author(hash)
|
@@ -68,14 +65,11 @@ module Anystyle
|
|
68
65
|
hash[:editor] = hash.delete(:author)
|
69
66
|
hash = normalize_editor(hash)
|
70
67
|
else
|
71
|
-
hash['more-authors'] = true if !!authors.sub!(/\bet\.?\s*al.*$/i, '')
|
68
|
+
hash[:'more-authors'] = true if !!authors.sub!(/\bet\.?\s*al.*$/i, '')
|
72
69
|
authors.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
|
73
70
|
hash[:author] = normalize_names(authors)
|
74
71
|
end
|
75
72
|
|
76
|
-
hash
|
77
|
-
rescue => e
|
78
|
-
warn e.message
|
79
73
|
hash
|
80
74
|
end
|
81
75
|
|
@@ -96,9 +90,9 @@ module Anystyle
|
|
96
90
|
end
|
97
91
|
end
|
98
92
|
|
99
|
-
hash['more-editors'] = true if !!editors.sub!(/\bet\.?\s*al.*$/i, '')
|
93
|
+
hash[:'more-editors'] = true if !!editors.sub!(/\bet\.?\s*al.*$/i, '')
|
100
94
|
|
101
|
-
editors.gsub!(
|
95
|
+
editors.gsub!(/^\W+|\W+$/, '')
|
102
96
|
editors.gsub!(/^in\s+/i, '')
|
103
97
|
editors.gsub!(/[^[:alpha:]]*[Ee]d(s|itors?|ited)?[^[:alpha:]]*/, '')
|
104
98
|
editors.gsub!(/[^[:alpha:]]*([Hh]rsg|Herausgeber)[^[:alpha:]]*/, '')
|
@@ -107,37 +101,39 @@ module Anystyle
|
|
107
101
|
is_trans = !!editors.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
|
108
102
|
|
109
103
|
hash[:editor] = normalize_names(editors)
|
110
|
-
hash[:translator] = hash
|
104
|
+
hash[:translator] = hash.delete :editor if is_trans
|
111
105
|
|
112
|
-
hash
|
113
|
-
rescue => e
|
114
|
-
warn e.message
|
115
106
|
hash
|
116
107
|
end
|
117
108
|
|
118
109
|
def normalize_translator(hash)
|
119
110
|
translators = hash[:translator]
|
120
111
|
|
121
|
-
|
112
|
+
editors.gsub!(/^\W+|\W+$/, '')
|
122
113
|
translators.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
|
123
114
|
translators.gsub!(/\bby\b/i, '')
|
124
115
|
|
125
116
|
hash[:translator] = normalize_names(translators)
|
126
117
|
hash
|
127
|
-
rescue => e
|
128
|
-
warn e.message
|
129
|
-
hash
|
130
118
|
end
|
131
119
|
|
132
|
-
Namae::Parser.instance.options[:prefer_comma_as_separator] = true
|
133
|
-
|
134
120
|
def normalize_names(names)
|
135
|
-
Namae.parse!(names).map
|
121
|
+
Namae.parse!(names).map { |name|
|
122
|
+
unless name.given.nil? || name.family.nil?
|
123
|
+
name.given.gsub!(/\b([[:upper:]])(\s|$)/, '\1.\2')
|
124
|
+
end
|
125
|
+
|
126
|
+
name.sort_order
|
127
|
+
|
128
|
+
}.join(' and ')
|
129
|
+
|
136
130
|
rescue => e
|
137
131
|
warn e.message
|
138
|
-
|
132
|
+
names
|
139
133
|
end
|
140
134
|
|
135
|
+
Namae.options[:prefer_comma_as_separator] = true
|
136
|
+
|
141
137
|
def normalize_title(hash)
|
142
138
|
title, container = hash[:title]
|
143
139
|
|
@@ -148,14 +144,11 @@ module Anystyle
|
|
148
144
|
|
149
145
|
extract_edition(title, hash)
|
150
146
|
|
151
|
-
title.gsub!(
|
147
|
+
title.gsub!(/^\s+|[\.,:;\s]+$/, '')
|
152
148
|
title.gsub!(/^["'”’´‘“`]|["'”’´‘“`]$/, '')
|
153
149
|
|
154
150
|
hash[:title] = title
|
155
151
|
|
156
|
-
hash
|
157
|
-
rescue => e
|
158
|
-
warn e.message
|
159
152
|
hash
|
160
153
|
end
|
161
154
|
|
@@ -189,16 +182,13 @@ module Anystyle
|
|
189
182
|
booktitle, *dangling = hash[:booktitle]
|
190
183
|
unmatched(:booktitle, hash, dangling) unless dangling.empty?
|
191
184
|
|
192
|
-
booktitle.gsub!(/^in\s
|
185
|
+
booktitle.gsub!(/^in\s+/i, '')
|
193
186
|
|
194
187
|
extract_edition(booktitle, hash)
|
195
188
|
|
196
|
-
booktitle.gsub!(
|
189
|
+
booktitle.gsub!(/^\s+|[\.,:;\s]+$/, '')
|
197
190
|
hash[:booktitle] = booktitle
|
198
191
|
|
199
|
-
hash
|
200
|
-
rescue => e
|
201
|
-
warn e.message
|
202
192
|
hash
|
203
193
|
end
|
204
194
|
|
@@ -209,9 +199,6 @@ module Anystyle
|
|
209
199
|
journal.gsub!(/^[\s]+|[\.,:;\s]+$/, '')
|
210
200
|
hash[:journal] = journal
|
211
201
|
|
212
|
-
hash
|
213
|
-
rescue => e
|
214
|
-
warn e.message
|
215
202
|
hash
|
216
203
|
end
|
217
204
|
|
@@ -228,9 +215,6 @@ module Anystyle
|
|
228
215
|
|
229
216
|
hash[:container] = container
|
230
217
|
hash
|
231
|
-
rescue => e
|
232
|
-
warn e.message
|
233
|
-
hash
|
234
218
|
end
|
235
219
|
|
236
220
|
def normalize_date(hash)
|
@@ -246,9 +230,6 @@ module Anystyle
|
|
246
230
|
hash.delete(:date)
|
247
231
|
end
|
248
232
|
|
249
|
-
hash
|
250
|
-
rescue => e
|
251
|
-
warn e.message
|
252
233
|
hash
|
253
234
|
end
|
254
235
|
|
@@ -276,9 +257,6 @@ module Anystyle
|
|
276
257
|
end
|
277
258
|
end
|
278
259
|
|
279
|
-
hash
|
280
|
-
rescue => e
|
281
|
-
warn e.message
|
282
260
|
hash
|
283
261
|
end
|
284
262
|
|
@@ -302,9 +280,6 @@ module Anystyle
|
|
302
280
|
hash[:pages] = $1
|
303
281
|
end
|
304
282
|
|
305
|
-
hash
|
306
|
-
rescue => e
|
307
|
-
warn e.message
|
308
283
|
hash
|
309
284
|
end
|
310
285
|
|
@@ -321,9 +296,6 @@ module Anystyle
|
|
321
296
|
|
322
297
|
hash[:location] = location
|
323
298
|
hash
|
324
|
-
rescue => e
|
325
|
-
warn e.message
|
326
|
-
hash
|
327
299
|
end
|
328
300
|
|
329
301
|
def normalize_isbn(hash)
|
@@ -333,9 +305,6 @@ module Anystyle
|
|
333
305
|
isbn = isbn[/[\d-]+/]
|
334
306
|
hash[:isbn] = isbn
|
335
307
|
|
336
|
-
hash
|
337
|
-
rescue => e
|
338
|
-
warn e.message
|
339
308
|
hash
|
340
309
|
end
|
341
310
|
|
@@ -346,9 +315,6 @@ module Anystyle
|
|
346
315
|
url.gsub!(/^\s+|[,\s]+$/, '')
|
347
316
|
hash[:isbn] = isbn
|
348
317
|
hash
|
349
|
-
rescue => e
|
350
|
-
warn e.message
|
351
|
-
hash
|
352
318
|
end
|
353
319
|
|
354
320
|
private
|
@@ -163,8 +163,13 @@ module Anystyle
|
|
163
163
|
|
164
164
|
def normalize(hash)
|
165
165
|
hash.keys.each do |label|
|
166
|
-
|
166
|
+
begin
|
167
|
+
normalizer.send("normalize_#{label}", hash)
|
168
|
+
rescue => e
|
169
|
+
warn e.message
|
170
|
+
end
|
167
171
|
end
|
172
|
+
|
168
173
|
classify hash
|
169
174
|
end
|
170
175
|
|
@@ -7,11 +7,11 @@ module Anystyle
|
|
7
7
|
describe "#tokenize_names" do
|
8
8
|
|
9
9
|
it "tokenizes 'A B'" do
|
10
|
-
Normalizer.instance.normalize_names('A B').should == 'B, A'
|
10
|
+
Normalizer.instance.normalize_names('A B').should == 'B, A.'
|
11
11
|
end
|
12
12
|
|
13
13
|
it "tokenizes 'A, B'" do
|
14
|
-
Normalizer.instance.normalize_names('A, B').should == 'A, B'
|
14
|
+
Normalizer.instance.normalize_names('A, B').should == 'A, B.'
|
15
15
|
end
|
16
16
|
|
17
17
|
# it "tokenizes 'A, jr., B'" do
|
@@ -23,11 +23,11 @@ module Anystyle
|
|
23
23
|
# end
|
24
24
|
|
25
25
|
it "tokenizes 'A, B, C, D'" do
|
26
|
-
Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B and C, D'
|
26
|
+
Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B. and C, D.'
|
27
27
|
end
|
28
28
|
|
29
29
|
it "tokenizes 'A, B, C'" do
|
30
|
-
Normalizer.instance.normalize_names('A, B, C').should == 'A, B and C'
|
30
|
+
Normalizer.instance.normalize_names('A, B, C').should == 'A, B. and C'
|
31
31
|
end
|
32
32
|
|
33
33
|
it "tokenizes 'Aa Bb, C.'" do
|
@@ -35,7 +35,7 @@ module Anystyle
|
|
35
35
|
end
|
36
36
|
|
37
37
|
it "tokenizes 'Aa Bb, Cc Dd, and E F G'" do
|
38
|
-
Normalizer.instance.normalize_names('Aa Bb, C D, and E F G').should == 'Bb, Aa and D, C and G, E F'
|
38
|
+
Normalizer.instance.normalize_names('Aa Bb, C D, and E F G').should == 'Bb, Aa and D, C. and G, E. F.'
|
39
39
|
end
|
40
40
|
|
41
41
|
[
|
metadata
CHANGED
@@ -1,55 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anystyle-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bibtex-ruby
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '3.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '3.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: wapiti
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ~>
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0.1'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: namae
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ~>
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0.8'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0.8'
|
55
55
|
description: A sophisticated parser for academic reference lists and bibliographies
|
@@ -96,27 +96,27 @@ licenses:
|
|
96
96
|
metadata: {}
|
97
97
|
post_install_message:
|
98
98
|
rdoc_options:
|
99
|
-
-
|
100
|
-
-
|
101
|
-
-
|
102
|
-
- "
|
103
|
-
-
|
99
|
+
- --line-numbers
|
100
|
+
- --inline-source
|
101
|
+
- --title
|
102
|
+
- '"Anystyle Parser"'
|
103
|
+
- --main
|
104
104
|
- README.md
|
105
105
|
require_paths:
|
106
106
|
- lib
|
107
107
|
required_ruby_version: !ruby/object:Gem::Requirement
|
108
108
|
requirements:
|
109
|
-
- -
|
109
|
+
- - '>='
|
110
110
|
- !ruby/object:Gem::Version
|
111
111
|
version: 1.9.3
|
112
112
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
113
|
requirements:
|
114
|
-
- -
|
114
|
+
- - '>='
|
115
115
|
- !ruby/object:Gem::Version
|
116
116
|
version: '0'
|
117
117
|
requirements: []
|
118
118
|
rubyforge_project:
|
119
|
-
rubygems_version: 2.
|
119
|
+
rubygems_version: 2.0.14
|
120
120
|
signing_key:
|
121
121
|
specification_version: 4
|
122
122
|
summary: Smart and fast academic bibliography parser.
|