anystyle-parser 0.5.2 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +5 -0
- data/lib/anystyle/parser/normalizer.rb +66 -11
- data/lib/anystyle/parser/parser.rb +3 -1
- data/lib/anystyle/parser/support/anystyle.mod +9314 -30605
- data/lib/anystyle/parser/version.rb +1 -1
- data/spec/anystyle/parser/normalizer_spec.rb +8 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71ded6c875b62e8abc138caa27b2fa9c4d423ca0
|
4
|
+
data.tar.gz: 011cb817c6dfde3ed323b34fc7196bc3e43fa47c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e7787ab5548ef848e7045c8c340630ccda3158884471bf40c3a408543f7367eeb0f6b8cecb005a57e08b578100c87ccf96a583a130c82a8c9b1e308e9894f3c
|
7
|
+
data.tar.gz: 368d50c41245e8456e5937a3b2cd81e9b6f2b2b2ac655d026e9c4716c5c8c031a363b73d49674b6d7d3ad6733cf945db84cdb8a4b37e64c2d5f681c64d224ab4
|
data/HISTORY.md
CHANGED
@@ -109,7 +109,7 @@ module Anystyle
|
|
109
109
|
def normalize_translator(hash)
|
110
110
|
translators = hash[:translator]
|
111
111
|
|
112
|
-
|
112
|
+
translators.gsub!(/^\W+|\W+$/, '')
|
113
113
|
translators.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
|
114
114
|
translators.gsub!(/\bby\b/i, '')
|
115
115
|
|
@@ -117,7 +117,31 @@ module Anystyle
|
|
117
117
|
hash
|
118
118
|
end
|
119
119
|
|
120
|
+
def normalize_director(hash)
|
121
|
+
directors = hash[:director]
|
122
|
+
|
123
|
+
directors.gsub!(/^\W+|\W+$/, '')
|
124
|
+
directors.gsub!(/[^[:alpha:]]*direct(or|ed)?[^[:alpha:]]*/i, '')
|
125
|
+
directors.gsub!(/\bby\b/i, '')
|
126
|
+
|
127
|
+
hash[:director] = normalize_names(directors)
|
128
|
+
hash
|
129
|
+
end
|
130
|
+
|
131
|
+
def normalize_producer(hash)
|
132
|
+
producers = hash[:producer]
|
133
|
+
|
134
|
+
producers.gsub!(/^\W+|\W+$/, '')
|
135
|
+
producers.gsub!(/[^[:alpha:]]*produc(er|ed)?[^[:alpha:]]*/i, '')
|
136
|
+
producers.gsub!(/\bby\b/i, '')
|
137
|
+
|
138
|
+
hash[:director] = normalize_names(producers)
|
139
|
+
hash
|
140
|
+
end
|
141
|
+
|
120
142
|
def normalize_names(names)
|
143
|
+
names.gsub!(/\s*(\.\.\.|…)\s*/, '')
|
144
|
+
|
121
145
|
Namae.parse!(names).map { |name|
|
122
146
|
unless name.given.nil? || name.family.nil?
|
123
147
|
name.given.gsub!(/\b([[:upper:]])(\s|$)/, '\1.\2')
|
@@ -135,11 +159,11 @@ module Anystyle
|
|
135
159
|
Namae.options[:prefer_comma_as_separator] = true
|
136
160
|
|
137
161
|
def normalize_title(hash)
|
138
|
-
title,
|
162
|
+
title, source = hash[:title]
|
139
163
|
|
140
|
-
unless
|
141
|
-
hash[:
|
142
|
-
normalize(:
|
164
|
+
unless source.nil?
|
165
|
+
hash[:source] = source
|
166
|
+
normalize(:source, hash)
|
143
167
|
end
|
144
168
|
|
145
169
|
extract_edition(title, hash)
|
@@ -202,18 +226,18 @@ module Anystyle
|
|
202
226
|
hash
|
203
227
|
end
|
204
228
|
|
205
|
-
def
|
206
|
-
|
207
|
-
unmatched(:
|
229
|
+
def normalize_source(hash)
|
230
|
+
source, *dangling = hash[:source]
|
231
|
+
unmatched(:source, hash, dangling) unless dangling.empty?
|
208
232
|
|
209
|
-
case
|
233
|
+
case source
|
210
234
|
when /dissertation abstracts/i
|
211
|
-
|
235
|
+
source.gsub!(/\s*section \w: ([[:alnum:]\s]+).*$/i, '')
|
212
236
|
hash[:category] = $1 unless $1.nil?
|
213
237
|
hash[:type] = :phdthesis
|
214
238
|
end
|
215
239
|
|
216
|
-
hash[:
|
240
|
+
hash[:source] = source
|
217
241
|
hash
|
218
242
|
end
|
219
243
|
|
@@ -227,6 +251,11 @@ module Anystyle
|
|
227
251
|
|
228
252
|
if date =~ /(\d{4})/
|
229
253
|
hash[:year] = $1.to_i
|
254
|
+
|
255
|
+
if hash.key?(:month) && date =~ /(\d{1,2})\b/
|
256
|
+
hash[:day] = $1.to_i
|
257
|
+
end
|
258
|
+
|
230
259
|
hash.delete(:date)
|
231
260
|
end
|
232
261
|
|
@@ -260,6 +289,24 @@ module Anystyle
|
|
260
289
|
hash
|
261
290
|
end
|
262
291
|
|
292
|
+
def normalize_publisher(hash)
|
293
|
+
normalize :publisher, hash
|
294
|
+
|
295
|
+
case hash[:publisher]
|
296
|
+
when /^producers?$/i
|
297
|
+
hash[:publisher] = hash[:producer]
|
298
|
+
|
299
|
+
when /^authors?$/i
|
300
|
+
hash[:publisher] = hash[:author]
|
301
|
+
|
302
|
+
when /^editor?$/i
|
303
|
+
hash[:publisher] = hash[:editor]
|
304
|
+
|
305
|
+
end
|
306
|
+
|
307
|
+
hash
|
308
|
+
end
|
309
|
+
|
263
310
|
def normalize_pages(hash)
|
264
311
|
pages, *dangling = hash[:pages]
|
265
312
|
unmatched(:pages, hash, dangling) unless dangling.empty?
|
@@ -316,6 +363,14 @@ module Anystyle
|
|
316
363
|
hash
|
317
364
|
end
|
318
365
|
|
366
|
+
def normalize_medium(hash)
|
367
|
+
medium, *dangling = hash[:medium]
|
368
|
+
unmatched(:medium, hash, dangling) unless dangling.empty?
|
369
|
+
|
370
|
+
hash[:medium] = medium.split(/\W+/).reject(&:empty?).join('-')
|
371
|
+
hash
|
372
|
+
end
|
373
|
+
|
319
374
|
private
|
320
375
|
|
321
376
|
def unmatched(label, hash, tokens)
|
@@ -180,11 +180,13 @@ module Anystyle
|
|
180
180
|
text = hash.values.flatten.join
|
181
181
|
|
182
182
|
case
|
183
|
+
when keys.include?(:medium)
|
184
|
+
hash[:type] = hash[:medium]
|
183
185
|
when keys.include?(:journal)
|
184
186
|
hash[:type] = :article
|
185
187
|
when text =~ /proceedings/i
|
186
188
|
hash[:type] = :inproceedings
|
187
|
-
when keys.include?(:booktitle), keys.include?(:
|
189
|
+
when keys.include?(:booktitle), keys.include?(:source)
|
188
190
|
hash[:type] = :incollection
|
189
191
|
when keys.include?(:publisher)
|
190
192
|
hash[:type] = :book
|