anystyle-parser 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +5 -0
- data/lib/anystyle/parser/normalizer.rb +66 -11
- data/lib/anystyle/parser/parser.rb +3 -1
- data/lib/anystyle/parser/support/anystyle.mod +9314 -30605
- data/lib/anystyle/parser/version.rb +1 -1
- data/spec/anystyle/parser/normalizer_spec.rb +8 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71ded6c875b62e8abc138caa27b2fa9c4d423ca0
|
4
|
+
data.tar.gz: 011cb817c6dfde3ed323b34fc7196bc3e43fa47c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e7787ab5548ef848e7045c8c340630ccda3158884471bf40c3a408543f7367eeb0f6b8cecb005a57e08b578100c87ccf96a583a130c82a8c9b1e308e9894f3c
|
7
|
+
data.tar.gz: 368d50c41245e8456e5937a3b2cd81e9b6f2b2b2ac655d026e9c4716c5c8c031a363b73d49674b6d7d3ad6733cf945db84cdb8a4b37e64c2d5f681c64d224ab4
|
data/HISTORY.md
CHANGED
@@ -109,7 +109,7 @@ module Anystyle
|
|
109
109
|
def normalize_translator(hash)
|
110
110
|
translators = hash[:translator]
|
111
111
|
|
112
|
-
|
112
|
+
translators.gsub!(/^\W+|\W+$/, '')
|
113
113
|
translators.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
|
114
114
|
translators.gsub!(/\bby\b/i, '')
|
115
115
|
|
@@ -117,7 +117,31 @@ module Anystyle
|
|
117
117
|
hash
|
118
118
|
end
|
119
119
|
|
120
|
+
def normalize_director(hash)
|
121
|
+
directors = hash[:director]
|
122
|
+
|
123
|
+
directors.gsub!(/^\W+|\W+$/, '')
|
124
|
+
directors.gsub!(/[^[:alpha:]]*direct(or|ed)?[^[:alpha:]]*/i, '')
|
125
|
+
directors.gsub!(/\bby\b/i, '')
|
126
|
+
|
127
|
+
hash[:director] = normalize_names(directors)
|
128
|
+
hash
|
129
|
+
end
|
130
|
+
|
131
|
+
def normalize_producer(hash)
|
132
|
+
producers = hash[:producer]
|
133
|
+
|
134
|
+
producers.gsub!(/^\W+|\W+$/, '')
|
135
|
+
producers.gsub!(/[^[:alpha:]]*produc(er|ed)?[^[:alpha:]]*/i, '')
|
136
|
+
producers.gsub!(/\bby\b/i, '')
|
137
|
+
|
138
|
+
hash[:director] = normalize_names(producers)
|
139
|
+
hash
|
140
|
+
end
|
141
|
+
|
120
142
|
def normalize_names(names)
|
143
|
+
names.gsub!(/\s*(\.\.\.|…)\s*/, '')
|
144
|
+
|
121
145
|
Namae.parse!(names).map { |name|
|
122
146
|
unless name.given.nil? || name.family.nil?
|
123
147
|
name.given.gsub!(/\b([[:upper:]])(\s|$)/, '\1.\2')
|
@@ -135,11 +159,11 @@ module Anystyle
|
|
135
159
|
Namae.options[:prefer_comma_as_separator] = true
|
136
160
|
|
137
161
|
def normalize_title(hash)
|
138
|
-
title,
|
162
|
+
title, source = hash[:title]
|
139
163
|
|
140
|
-
unless
|
141
|
-
hash[:
|
142
|
-
normalize(:
|
164
|
+
unless source.nil?
|
165
|
+
hash[:source] = source
|
166
|
+
normalize(:source, hash)
|
143
167
|
end
|
144
168
|
|
145
169
|
extract_edition(title, hash)
|
@@ -202,18 +226,18 @@ module Anystyle
|
|
202
226
|
hash
|
203
227
|
end
|
204
228
|
|
205
|
-
def
|
206
|
-
|
207
|
-
unmatched(:
|
229
|
+
def normalize_source(hash)
|
230
|
+
source, *dangling = hash[:source]
|
231
|
+
unmatched(:source, hash, dangling) unless dangling.empty?
|
208
232
|
|
209
|
-
case
|
233
|
+
case source
|
210
234
|
when /dissertation abstracts/i
|
211
|
-
|
235
|
+
source.gsub!(/\s*section \w: ([[:alnum:]\s]+).*$/i, '')
|
212
236
|
hash[:category] = $1 unless $1.nil?
|
213
237
|
hash[:type] = :phdthesis
|
214
238
|
end
|
215
239
|
|
216
|
-
hash[:
|
240
|
+
hash[:source] = source
|
217
241
|
hash
|
218
242
|
end
|
219
243
|
|
@@ -227,6 +251,11 @@ module Anystyle
|
|
227
251
|
|
228
252
|
if date =~ /(\d{4})/
|
229
253
|
hash[:year] = $1.to_i
|
254
|
+
|
255
|
+
if hash.key?(:month) && date =~ /(\d{1,2})\b/
|
256
|
+
hash[:day] = $1.to_i
|
257
|
+
end
|
258
|
+
|
230
259
|
hash.delete(:date)
|
231
260
|
end
|
232
261
|
|
@@ -260,6 +289,24 @@ module Anystyle
|
|
260
289
|
hash
|
261
290
|
end
|
262
291
|
|
292
|
+
def normalize_publisher(hash)
|
293
|
+
normalize :publisher, hash
|
294
|
+
|
295
|
+
case hash[:publisher]
|
296
|
+
when /^producers?$/i
|
297
|
+
hash[:publisher] = hash[:producer]
|
298
|
+
|
299
|
+
when /^authors?$/i
|
300
|
+
hash[:publisher] = hash[:author]
|
301
|
+
|
302
|
+
when /^editor?$/i
|
303
|
+
hash[:publisher] = hash[:editor]
|
304
|
+
|
305
|
+
end
|
306
|
+
|
307
|
+
hash
|
308
|
+
end
|
309
|
+
|
263
310
|
def normalize_pages(hash)
|
264
311
|
pages, *dangling = hash[:pages]
|
265
312
|
unmatched(:pages, hash, dangling) unless dangling.empty?
|
@@ -316,6 +363,14 @@ module Anystyle
|
|
316
363
|
hash
|
317
364
|
end
|
318
365
|
|
366
|
+
def normalize_medium(hash)
|
367
|
+
medium, *dangling = hash[:medium]
|
368
|
+
unmatched(:medium, hash, dangling) unless dangling.empty?
|
369
|
+
|
370
|
+
hash[:medium] = medium.split(/\W+/).reject(&:empty?).join('-')
|
371
|
+
hash
|
372
|
+
end
|
373
|
+
|
319
374
|
private
|
320
375
|
|
321
376
|
def unmatched(label, hash, tokens)
|
@@ -180,11 +180,13 @@ module Anystyle
|
|
180
180
|
text = hash.values.flatten.join
|
181
181
|
|
182
182
|
case
|
183
|
+
when keys.include?(:medium)
|
184
|
+
hash[:type] = hash[:medium]
|
183
185
|
when keys.include?(:journal)
|
184
186
|
hash[:type] = :article
|
185
187
|
when text =~ /proceedings/i
|
186
188
|
hash[:type] = :inproceedings
|
187
|
-
when keys.include?(:booktitle), keys.include?(:
|
189
|
+
when keys.include?(:booktitle), keys.include?(:source)
|
188
190
|
hash[:type] = :incollection
|
189
191
|
when keys.include?(:publisher)
|
190
192
|
hash[:type] = :book
|