anystyle-parser 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c5471f4465579c6aec53ffee37f8e337075934fd
4
- data.tar.gz: e32a68bbbe093abd3c44df90df5524325afc9f7e
3
+ metadata.gz: 71ded6c875b62e8abc138caa27b2fa9c4d423ca0
4
+ data.tar.gz: 011cb817c6dfde3ed323b34fc7196bc3e43fa47c
5
5
  SHA512:
6
- metadata.gz: cf520db7b3e31bfe1bd02b03b3d8a951a6288e6d10055ceeccf0b515d3f45e1a83abce911cb698839169c75cf9cac8d2e4169ea54d5db900b25b134b7caffd39
7
- data.tar.gz: ebc4173949674553f396dbba6631fae165493f47f748288edad5ccf4fdee33b4e0a2fe7b6a54c5fc0c95e79f8bbbf991a03f62dff202fb6ec949a1098ec1ab24
6
+ metadata.gz: 8e7787ab5548ef848e7045c8c340630ccda3158884471bf40c3a408543f7367eeb0f6b8cecb005a57e08b578100c87ccf96a583a130c82a8c9b1e308e9894f3c
7
+ data.tar.gz: 368d50c41245e8456e5937a3b2cd81e9b6f2b2b2ac655d026e9c4716c5c8c031a363b73d49674b6d7d3ad6733cf945db84cdb8a4b37e64c2d5f681c64d224ab4
data/HISTORY.md CHANGED
@@ -1,3 +1,8 @@
1
+ 0.5.3 / 2014-03-14
2
+ ==================
3
+ * Added source, director, producer and section labels
4
+ * Updated model
5
+
1
6
  0.5.2 / 2014-03-13
2
7
  ==================
3
8
  * Add XML output
@@ -109,7 +109,7 @@ module Anystyle
109
109
  def normalize_translator(hash)
110
110
  translators = hash[:translator]
111
111
 
112
- editors.gsub!(/^\W+|\W+$/, '')
112
+ translators.gsub!(/^\W+|\W+$/, '')
113
113
  translators.gsub!(/[^[:alpha:]]*trans(lated)?[^[:alpha:]]*/i, '')
114
114
  translators.gsub!(/\bby\b/i, '')
115
115
 
@@ -117,7 +117,31 @@ module Anystyle
117
117
  hash
118
118
  end
119
119
 
120
+ def normalize_director(hash)
121
+ directors = hash[:director]
122
+
123
+ directors.gsub!(/^\W+|\W+$/, '')
124
+ directors.gsub!(/[^[:alpha:]]*direct(or|ed)?[^[:alpha:]]*/i, '')
125
+ directors.gsub!(/\bby\b/i, '')
126
+
127
+ hash[:director] = normalize_names(directors)
128
+ hash
129
+ end
130
+
131
+ def normalize_producer(hash)
132
+ producers = hash[:producer]
133
+
134
+ producers.gsub!(/^\W+|\W+$/, '')
135
+ producers.gsub!(/[^[:alpha:]]*produc(er|ed)?[^[:alpha:]]*/i, '')
136
+ producers.gsub!(/\bby\b/i, '')
137
+
138
+ hash[:director] = normalize_names(producers)
139
+ hash
140
+ end
141
+
120
142
  def normalize_names(names)
143
+ names.gsub!(/\s*(\.\.\.|…)\s*/, '')
144
+
121
145
  Namae.parse!(names).map { |name|
122
146
  unless name.given.nil? || name.family.nil?
123
147
  name.given.gsub!(/\b([[:upper:]])(\s|$)/, '\1.\2')
@@ -135,11 +159,11 @@ module Anystyle
135
159
  Namae.options[:prefer_comma_as_separator] = true
136
160
 
137
161
  def normalize_title(hash)
138
- title, container = hash[:title]
162
+ title, source = hash[:title]
139
163
 
140
- unless container.nil?
141
- hash[:container] = container
142
- normalize(:container, hash)
164
+ unless source.nil?
165
+ hash[:source] = source
166
+ normalize(:source, hash)
143
167
  end
144
168
 
145
169
  extract_edition(title, hash)
@@ -202,18 +226,18 @@ module Anystyle
202
226
  hash
203
227
  end
204
228
 
205
- def normalize_container(hash)
206
- container, *dangling = hash[:container]
207
- unmatched(:container, hash, dangling) unless dangling.empty?
229
+ def normalize_source(hash)
230
+ source, *dangling = hash[:source]
231
+ unmatched(:source, hash, dangling) unless dangling.empty?
208
232
 
209
- case container
233
+ case source
210
234
  when /dissertation abstracts/i
211
- container.gsub!(/\s*section \w: ([[:alnum:]\s]+).*$/i, '')
235
+ source.gsub!(/\s*section \w: ([[:alnum:]\s]+).*$/i, '')
212
236
  hash[:category] = $1 unless $1.nil?
213
237
  hash[:type] = :phdthesis
214
238
  end
215
239
 
216
- hash[:container] = container
240
+ hash[:source] = source
217
241
  hash
218
242
  end
219
243
 
@@ -227,6 +251,11 @@ module Anystyle
227
251
 
228
252
  if date =~ /(\d{4})/
229
253
  hash[:year] = $1.to_i
254
+
255
+ if hash.key?(:month) && date =~ /(\d{1,2})\b/
256
+ hash[:day] = $1.to_i
257
+ end
258
+
230
259
  hash.delete(:date)
231
260
  end
232
261
 
@@ -260,6 +289,24 @@ module Anystyle
260
289
  hash
261
290
  end
262
291
 
292
+ def normalize_publisher(hash)
293
+ normalize :publisher, hash
294
+
295
+ case hash[:publisher]
296
+ when /^producers?$/i
297
+ hash[:publisher] = hash[:producer]
298
+
299
+ when /^authors?$/i
300
+ hash[:publisher] = hash[:author]
301
+
302
+ when /^editor?$/i
303
+ hash[:publisher] = hash[:editor]
304
+
305
+ end
306
+
307
+ hash
308
+ end
309
+
263
310
  def normalize_pages(hash)
264
311
  pages, *dangling = hash[:pages]
265
312
  unmatched(:pages, hash, dangling) unless dangling.empty?
@@ -316,6 +363,14 @@ module Anystyle
316
363
  hash
317
364
  end
318
365
 
366
+ def normalize_medium(hash)
367
+ medium, *dangling = hash[:medium]
368
+ unmatched(:medium, hash, dangling) unless dangling.empty?
369
+
370
+ hash[:medium] = medium.split(/\W+/).reject(&:empty?).join('-')
371
+ hash
372
+ end
373
+
319
374
  private
320
375
 
321
376
  def unmatched(label, hash, tokens)
@@ -180,11 +180,13 @@ module Anystyle
180
180
  text = hash.values.flatten.join
181
181
 
182
182
  case
183
+ when keys.include?(:medium)
184
+ hash[:type] = hash[:medium]
183
185
  when keys.include?(:journal)
184
186
  hash[:type] = :article
185
187
  when text =~ /proceedings/i
186
188
  hash[:type] = :inproceedings
187
- when keys.include?(:booktitle), keys.include?(:container)
189
+ when keys.include?(:booktitle), keys.include?(:source)
188
190
  hash[:type] = :incollection
189
191
  when keys.include?(:publisher)
190
192
  hash[:type] = :book