anystyle-parser 0.5.2 → 0.5.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c5471f4465579c6aec53ffee37f8e337075934fd
4
- data.tar.gz: e32a68bbbe093abd3c44df90df5524325afc9f7e
3
+ metadata.gz: 71ded6c875b62e8abc138caa27b2fa9c4d423ca0
4
+ data.tar.gz: 011cb817c6dfde3ed323b34fc7196bc3e43fa47c
5
5
  SHA512:
6
- metadata.gz: cf520db7b3e31bfe1bd02b03b3d8a951a6288e6d10055ceeccf0b515d3f45e1a83abce911cb698839169c75cf9cac8d2e4169ea54d5db900b25b134b7caffd39
7
- data.tar.gz: ebc4173949674553f396dbba6631fae165493f47f748288edad5ccf4fdee33b4e0a2fe7b6a54c5fc0c95e79f8bbbf991a03f62dff202fb6ec949a1098ec1ab24
6
+ metadata.gz: 8e7787ab5548ef848e7045c8c340630ccda3158884471bf40c3a408543f7367eeb0f6b8cecb005a57e08b578100c87ccf96a583a130c82a8c9b1e308e9894f3c
7
+ data.tar.gz: 368d50c41245e8456e5937a3b2cd81e9b6f2b2b2ac655d026e9c4716c5c8c031a363b73d49674b6d7d3ad6733cf945db84cdb8a4b37e64c2d5f681c64d224ab4
data/HISTORY.md CHANGED
@@ -1,3 +1,8 @@
1
+ 0.5.3 / 2014-03-14
2
+ ==================
3
+ * Added source, director, producer and section labels
4
+ * Updated model
5
+
1
6
  0.5.2 / 2014-03-13
2
7
  ==================
3
8
  * Add XML output
@@ -109,7 +109,7 @@ module Anystyle
109
109
  def normalize_translator(hash)
110
110
  translators = hash[:translator]
111
111
 
112
- editors.gsub!(/^\W+|\W+$/, '')
112
+ translators.gsub!(/^\W+|\W+$/, '')
113
113
  translators.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
114
114
  translators.gsub!(/\bby\b/i, '')
115
115
 
@@ -117,7 +117,31 @@ module Anystyle
117
117
  hash
118
118
  end
119
119
 
120
+ def normalize_director(hash)
121
+ directors = hash[:director]
122
+
123
+ directors.gsub!(/^\W+|\W+$/, '')
124
+ directors.gsub!(/[^[:alpha:]]*direct(or|ed)?[^[:alpha:]]*/i, '')
125
+ directors.gsub!(/\bby\b/i, '')
126
+
127
+ hash[:director] = normalize_names(directors)
128
+ hash
129
+ end
130
+
131
+ def normalize_producer(hash)
132
+ producers = hash[:producer]
133
+
134
+ producers.gsub!(/^\W+|\W+$/, '')
135
+ producers.gsub!(/[^[:alpha:]]*produc(er|ed)?[^[:alpha:]]*/i, '')
136
+ producers.gsub!(/\bby\b/i, '')
137
+
138
+ hash[:director] = normalize_names(producers)
139
+ hash
140
+ end
141
+
120
142
  def normalize_names(names)
143
+ names.gsub!(/\s*(\.\.\.|…)\s*/, '')
144
+
121
145
  Namae.parse!(names).map { |name|
122
146
  unless name.given.nil? || name.family.nil?
123
147
  name.given.gsub!(/\b([[:upper:]])(\s|$)/, '\1.\2')
@@ -135,11 +159,11 @@ module Anystyle
135
159
  Namae.options[:prefer_comma_as_separator] = true
136
160
 
137
161
  def normalize_title(hash)
138
- title, container = hash[:title]
162
+ title, source = hash[:title]
139
163
 
140
- unless container.nil?
141
- hash[:container] = container
142
- normalize(:container, hash)
164
+ unless source.nil?
165
+ hash[:source] = source
166
+ normalize(:source, hash)
143
167
  end
144
168
 
145
169
  extract_edition(title, hash)
@@ -202,18 +226,18 @@ module Anystyle
202
226
  hash
203
227
  end
204
228
 
205
- def normalize_container(hash)
206
- container, *dangling = hash[:container]
207
- unmatched(:container, hash, dangling) unless dangling.empty?
229
+ def normalize_source(hash)
230
+ source, *dangling = hash[:source]
231
+ unmatched(:source, hash, dangling) unless dangling.empty?
208
232
 
209
- case container
233
+ case source
210
234
  when /dissertation abstracts/i
211
- container.gsub!(/\s*section \w: ([[:alnum:]\s]+).*$/i, '')
235
+ source.gsub!(/\s*section \w: ([[:alnum:]\s]+).*$/i, '')
212
236
  hash[:category] = $1 unless $1.nil?
213
237
  hash[:type] = :phdthesis
214
238
  end
215
239
 
216
- hash[:container] = container
240
+ hash[:source] = source
217
241
  hash
218
242
  end
219
243
 
@@ -227,6 +251,11 @@ module Anystyle
227
251
 
228
252
  if date =~ /(\d{4})/
229
253
  hash[:year] = $1.to_i
254
+
255
+ if hash.key?(:month) && date =~ /(\d{1,2})\b/
256
+ hash[:day] = $1.to_i
257
+ end
258
+
230
259
  hash.delete(:date)
231
260
  end
232
261
 
@@ -260,6 +289,24 @@ module Anystyle
260
289
  hash
261
290
  end
262
291
 
292
+ def normalize_publisher(hash)
293
+ normalize :publisher, hash
294
+
295
+ case hash[:publisher]
296
+ when /^producers?$/i
297
+ hash[:publisher] = hash[:producer]
298
+
299
+ when /^authors?$/i
300
+ hash[:publisher] = hash[:author]
301
+
302
+ when /^editor?$/i
303
+ hash[:publisher] = hash[:editor]
304
+
305
+ end
306
+
307
+ hash
308
+ end
309
+
263
310
  def normalize_pages(hash)
264
311
  pages, *dangling = hash[:pages]
265
312
  unmatched(:pages, hash, dangling) unless dangling.empty?
@@ -316,6 +363,14 @@ module Anystyle
316
363
  hash
317
364
  end
318
365
 
366
+ def normalize_medium(hash)
367
+ medium, *dangling = hash[:medium]
368
+ unmatched(:medium, hash, dangling) unless dangling.empty?
369
+
370
+ hash[:medium] = medium.split(/\W+/).reject(&:empty?).join('-')
371
+ hash
372
+ end
373
+
319
374
  private
320
375
 
321
376
  def unmatched(label, hash, tokens)
@@ -180,11 +180,13 @@ module Anystyle
180
180
  text = hash.values.flatten.join
181
181
 
182
182
  case
183
+ when keys.include?(:medium)
184
+ hash[:type] = hash[:medium]
183
185
  when keys.include?(:journal)
184
186
  hash[:type] = :article
185
187
  when text =~ /proceedings/i
186
188
  hash[:type] = :inproceedings
187
- when keys.include?(:booktitle), keys.include?(:container)
189
+ when keys.include?(:booktitle), keys.include?(:source)
188
190
  hash[:type] = :incollection
189
191
  when keys.include?(:publisher)
190
192
  hash[:type] = :book