anystyle-parser 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +4 -0
- data/lib/anystyle/parser/dictionary.rb +2 -2
- data/lib/anystyle/parser/normalizer.rb +20 -54
- data/lib/anystyle/parser/parser.rb +6 -1
- data/lib/anystyle/parser/version.rb +1 -1
- data/spec/anystyle/parser/normalizer_spec.rb +5 -5
- metadata +16 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0c9fa55f1ed5e371c241610ef5d77ae1f9696131
|
4
|
+
data.tar.gz: 7e6e3d024694dc1e8947bca12a9eb248f356c3cd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c6ab99a896fbe1cbc5e7ab2a9ab729c289a052ff1b98a7e870ace9260519ea9500284a05e89acf8752548a7972bf629529fb17e2e967bb4d87f75a36615638ad
|
7
|
+
data.tar.gz: 4f75412d5bfd364e492ffc6c83b2e77d4853d75359f5824fc556daa658138db46c3aa391600f03d247c15b272088ba300eb354cafeac57e6e1f088885930334f
|
data/HISTORY.md
CHANGED
@@ -63,14 +63,14 @@ module Anystyle
|
|
63
63
|
|
64
64
|
begin
|
65
65
|
require 'redis'
|
66
|
-
@modes.
|
66
|
+
@modes.push :redis
|
67
67
|
rescue LoadError
|
68
68
|
# info 'no redis support detected'
|
69
69
|
end
|
70
70
|
|
71
71
|
begin
|
72
72
|
require 'kyotocabinet'
|
73
|
-
@modes.
|
73
|
+
@modes.push :kyoto
|
74
74
|
rescue LoadError
|
75
75
|
# info 'no kyoto-cabinet support detected'
|
76
76
|
end
|
@@ -55,9 +55,6 @@ module Anystyle
|
|
55
55
|
token.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
|
56
56
|
hash[key] = token
|
57
57
|
hash
|
58
|
-
rescue => e
|
59
|
-
warn e.message
|
60
|
-
hash
|
61
58
|
end
|
62
59
|
|
63
60
|
def normalize_author(hash)
|
@@ -68,14 +65,11 @@ module Anystyle
|
|
68
65
|
hash[:editor] = hash.delete(:author)
|
69
66
|
hash = normalize_editor(hash)
|
70
67
|
else
|
71
|
-
hash['more-authors'] = true if !!authors.sub!(/\bet\.?\s*al.*$/i, '')
|
68
|
+
hash[:'more-authors'] = true if !!authors.sub!(/\bet\.?\s*al.*$/i, '')
|
72
69
|
authors.gsub!(/^[^[:alnum:]]+|[^[:alnum:]]+$/, '')
|
73
70
|
hash[:author] = normalize_names(authors)
|
74
71
|
end
|
75
72
|
|
76
|
-
hash
|
77
|
-
rescue => e
|
78
|
-
warn e.message
|
79
73
|
hash
|
80
74
|
end
|
81
75
|
|
@@ -96,9 +90,9 @@ module Anystyle
|
|
96
90
|
end
|
97
91
|
end
|
98
92
|
|
99
|
-
hash['more-editors'] = true if !!editors.sub!(/\bet\.?\s*al.*$/i, '')
|
93
|
+
hash[:'more-editors'] = true if !!editors.sub!(/\bet\.?\s*al.*$/i, '')
|
100
94
|
|
101
|
-
editors.gsub!(
|
95
|
+
editors.gsub!(/^\W+|\W+$/, '')
|
102
96
|
editors.gsub!(/^in\s+/i, '')
|
103
97
|
editors.gsub!(/[^[:alpha:]]*[Ee]d(s|itors?|ited)?[^[:alpha:]]*/, '')
|
104
98
|
editors.gsub!(/[^[:alpha:]]*([Hh]rsg|Herausgeber)[^[:alpha:]]*/, '')
|
@@ -107,37 +101,39 @@ module Anystyle
|
|
107
101
|
is_trans = !!editors.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
|
108
102
|
|
109
103
|
hash[:editor] = normalize_names(editors)
|
110
|
-
hash[:translator] = hash
|
104
|
+
hash[:translator] = hash.delete :editor if is_trans
|
111
105
|
|
112
|
-
hash
|
113
|
-
rescue => e
|
114
|
-
warn e.message
|
115
106
|
hash
|
116
107
|
end
|
117
108
|
|
118
109
|
def normalize_translator(hash)
|
119
110
|
translators = hash[:translator]
|
120
111
|
|
121
|
-
|
112
|
+
editors.gsub!(/^\W+|\W+$/, '')
|
122
113
|
translators.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
|
123
114
|
translators.gsub!(/\bby\b/i, '')
|
124
115
|
|
125
116
|
hash[:translator] = normalize_names(translators)
|
126
117
|
hash
|
127
|
-
rescue => e
|
128
|
-
warn e.message
|
129
|
-
hash
|
130
118
|
end
|
131
119
|
|
132
|
-
Namae::Parser.instance.options[:prefer_comma_as_separator] = true
|
133
|
-
|
134
120
|
def normalize_names(names)
|
135
|
-
Namae.parse!(names).map
|
121
|
+
Namae.parse!(names).map { |name|
|
122
|
+
unless name.given.nil? || name.family.nil?
|
123
|
+
name.given.gsub!(/\b([[:upper:]])(\s|$)/, '\1.\2')
|
124
|
+
end
|
125
|
+
|
126
|
+
name.sort_order
|
127
|
+
|
128
|
+
}.join(' and ')
|
129
|
+
|
136
130
|
rescue => e
|
137
131
|
warn e.message
|
138
|
-
|
132
|
+
names
|
139
133
|
end
|
140
134
|
|
135
|
+
Namae.options[:prefer_comma_as_separator] = true
|
136
|
+
|
141
137
|
def normalize_title(hash)
|
142
138
|
title, container = hash[:title]
|
143
139
|
|
@@ -148,14 +144,11 @@ module Anystyle
|
|
148
144
|
|
149
145
|
extract_edition(title, hash)
|
150
146
|
|
151
|
-
title.gsub!(
|
147
|
+
title.gsub!(/^\s+|[\.,:;\s]+$/, '')
|
152
148
|
title.gsub!(/^["'”’´‘“`]|["'”’´‘“`]$/, '')
|
153
149
|
|
154
150
|
hash[:title] = title
|
155
151
|
|
156
|
-
hash
|
157
|
-
rescue => e
|
158
|
-
warn e.message
|
159
152
|
hash
|
160
153
|
end
|
161
154
|
|
@@ -189,16 +182,13 @@ module Anystyle
|
|
189
182
|
booktitle, *dangling = hash[:booktitle]
|
190
183
|
unmatched(:booktitle, hash, dangling) unless dangling.empty?
|
191
184
|
|
192
|
-
booktitle.gsub!(/^in\s
|
185
|
+
booktitle.gsub!(/^in\s+/i, '')
|
193
186
|
|
194
187
|
extract_edition(booktitle, hash)
|
195
188
|
|
196
|
-
booktitle.gsub!(
|
189
|
+
booktitle.gsub!(/^\s+|[\.,:;\s]+$/, '')
|
197
190
|
hash[:booktitle] = booktitle
|
198
191
|
|
199
|
-
hash
|
200
|
-
rescue => e
|
201
|
-
warn e.message
|
202
192
|
hash
|
203
193
|
end
|
204
194
|
|
@@ -209,9 +199,6 @@ module Anystyle
|
|
209
199
|
journal.gsub!(/^[\s]+|[\.,:;\s]+$/, '')
|
210
200
|
hash[:journal] = journal
|
211
201
|
|
212
|
-
hash
|
213
|
-
rescue => e
|
214
|
-
warn e.message
|
215
202
|
hash
|
216
203
|
end
|
217
204
|
|
@@ -228,9 +215,6 @@ module Anystyle
|
|
228
215
|
|
229
216
|
hash[:container] = container
|
230
217
|
hash
|
231
|
-
rescue => e
|
232
|
-
warn e.message
|
233
|
-
hash
|
234
218
|
end
|
235
219
|
|
236
220
|
def normalize_date(hash)
|
@@ -246,9 +230,6 @@ module Anystyle
|
|
246
230
|
hash.delete(:date)
|
247
231
|
end
|
248
232
|
|
249
|
-
hash
|
250
|
-
rescue => e
|
251
|
-
warn e.message
|
252
233
|
hash
|
253
234
|
end
|
254
235
|
|
@@ -276,9 +257,6 @@ module Anystyle
|
|
276
257
|
end
|
277
258
|
end
|
278
259
|
|
279
|
-
hash
|
280
|
-
rescue => e
|
281
|
-
warn e.message
|
282
260
|
hash
|
283
261
|
end
|
284
262
|
|
@@ -302,9 +280,6 @@ module Anystyle
|
|
302
280
|
hash[:pages] = $1
|
303
281
|
end
|
304
282
|
|
305
|
-
hash
|
306
|
-
rescue => e
|
307
|
-
warn e.message
|
308
283
|
hash
|
309
284
|
end
|
310
285
|
|
@@ -321,9 +296,6 @@ module Anystyle
|
|
321
296
|
|
322
297
|
hash[:location] = location
|
323
298
|
hash
|
324
|
-
rescue => e
|
325
|
-
warn e.message
|
326
|
-
hash
|
327
299
|
end
|
328
300
|
|
329
301
|
def normalize_isbn(hash)
|
@@ -333,9 +305,6 @@ module Anystyle
|
|
333
305
|
isbn = isbn[/[\d-]+/]
|
334
306
|
hash[:isbn] = isbn
|
335
307
|
|
336
|
-
hash
|
337
|
-
rescue => e
|
338
|
-
warn e.message
|
339
308
|
hash
|
340
309
|
end
|
341
310
|
|
@@ -346,9 +315,6 @@ module Anystyle
|
|
346
315
|
url.gsub!(/^\s+|[,\s]+$/, '')
|
347
316
|
hash[:isbn] = isbn
|
348
317
|
hash
|
349
|
-
rescue => e
|
350
|
-
warn e.message
|
351
|
-
hash
|
352
318
|
end
|
353
319
|
|
354
320
|
private
|
@@ -163,8 +163,13 @@ module Anystyle
|
|
163
163
|
|
164
164
|
def normalize(hash)
|
165
165
|
hash.keys.each do |label|
|
166
|
-
|
166
|
+
begin
|
167
|
+
normalizer.send("normalize_#{label}", hash)
|
168
|
+
rescue => e
|
169
|
+
warn e.message
|
170
|
+
end
|
167
171
|
end
|
172
|
+
|
168
173
|
classify hash
|
169
174
|
end
|
170
175
|
|
@@ -7,11 +7,11 @@ module Anystyle
|
|
7
7
|
describe "#tokenize_names" do
|
8
8
|
|
9
9
|
it "tokenizes 'A B'" do
|
10
|
-
Normalizer.instance.normalize_names('A B').should == 'B, A'
|
10
|
+
Normalizer.instance.normalize_names('A B').should == 'B, A.'
|
11
11
|
end
|
12
12
|
|
13
13
|
it "tokenizes 'A, B'" do
|
14
|
-
Normalizer.instance.normalize_names('A, B').should == 'A, B'
|
14
|
+
Normalizer.instance.normalize_names('A, B').should == 'A, B.'
|
15
15
|
end
|
16
16
|
|
17
17
|
# it "tokenizes 'A, jr., B'" do
|
@@ -23,11 +23,11 @@ module Anystyle
|
|
23
23
|
# end
|
24
24
|
|
25
25
|
it "tokenizes 'A, B, C, D'" do
|
26
|
-
Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B and C, D'
|
26
|
+
Normalizer.instance.normalize_names('A, B, C, D').should == 'A, B. and C, D.'
|
27
27
|
end
|
28
28
|
|
29
29
|
it "tokenizes 'A, B, C'" do
|
30
|
-
Normalizer.instance.normalize_names('A, B, C').should == 'A, B and C'
|
30
|
+
Normalizer.instance.normalize_names('A, B, C').should == 'A, B. and C'
|
31
31
|
end
|
32
32
|
|
33
33
|
it "tokenizes 'Aa Bb, C.'" do
|
@@ -35,7 +35,7 @@ module Anystyle
|
|
35
35
|
end
|
36
36
|
|
37
37
|
it "tokenizes 'Aa Bb, Cc Dd, and E F G'" do
|
38
|
-
Normalizer.instance.normalize_names('Aa Bb, C D, and E F G').should == 'Bb, Aa and D, C and G, E F'
|
38
|
+
Normalizer.instance.normalize_names('Aa Bb, C D, and E F G').should == 'Bb, Aa and D, C. and G, E. F.'
|
39
39
|
end
|
40
40
|
|
41
41
|
[
|
metadata
CHANGED
@@ -1,55 +1,55 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anystyle-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sylvester Keil
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-03-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bibtex-ruby
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ~>
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '3.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ~>
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '3.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: wapiti
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ~>
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0.1'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ~>
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: namae
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ~>
|
46
46
|
- !ruby/object:Gem::Version
|
47
47
|
version: '0.8'
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0.8'
|
55
55
|
description: A sophisticated parser for academic reference lists and bibliographies
|
@@ -96,27 +96,27 @@ licenses:
|
|
96
96
|
metadata: {}
|
97
97
|
post_install_message:
|
98
98
|
rdoc_options:
|
99
|
-
-
|
100
|
-
-
|
101
|
-
-
|
102
|
-
- "
|
103
|
-
-
|
99
|
+
- --line-numbers
|
100
|
+
- --inline-source
|
101
|
+
- --title
|
102
|
+
- '"Anystyle Parser"'
|
103
|
+
- --main
|
104
104
|
- README.md
|
105
105
|
require_paths:
|
106
106
|
- lib
|
107
107
|
required_ruby_version: !ruby/object:Gem::Requirement
|
108
108
|
requirements:
|
109
|
-
- -
|
109
|
+
- - '>='
|
110
110
|
- !ruby/object:Gem::Version
|
111
111
|
version: 1.9.3
|
112
112
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
113
113
|
requirements:
|
114
|
-
- -
|
114
|
+
- - '>='
|
115
115
|
- !ruby/object:Gem::Version
|
116
116
|
version: '0'
|
117
117
|
requirements: []
|
118
118
|
rubyforge_project:
|
119
|
-
rubygems_version: 2.
|
119
|
+
rubygems_version: 2.0.14
|
120
120
|
signing_key:
|
121
121
|
specification_version: 4
|
122
122
|
summary: Smart and fast academic bibliography parser.
|