bpl_enrich 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 41994a22b0ef539c9938525b17ad475e6eaafce7
4
- data.tar.gz: 9c16b1cbb137f1c770d72190b3916a2ac6f703ae
3
+ metadata.gz: 63ca261fe49ac43d3aad9df73b3caa2ce33dce29
4
+ data.tar.gz: 70f1ae1ef1a0b9650b75093cff8d3916023cc3cf
5
5
  SHA512:
6
- metadata.gz: 06560416487c4a58d16c0ebed6716429b44aa73bd92c62f6d99093ce4d130a416adf8fa8f5eaeae0e91f2f371fed67535288109265c1a1c8ea44c3690b15d78c
7
- data.tar.gz: 54f601dd8de34b18f9c018c09ea48fe1ac4de300e719c07aa34b6f595cbab50c337eecf24d19cde70c2cde2c5f786af492346581027fc30872a1cb3de31db311
6
+ metadata.gz: 48f5af0440326806c69d638ee8a3cb4b2cb5c28e9b7d4e17128a48b413542a59fb8e063b6d31521089334adb1791b90216e3f10b68b86b176dca0f289b9132ec
7
+ data.tar.gz: cedfcdcc85a1bb2d55e7f97117081f45fd5fc77c1b031fe108a4aded7ea258d7e6f35c12e1a0139f4828264369d52bfb1baaa4e0b1933aad71177d18db793aae
@@ -1,6 +1,43 @@
1
1
  module BplEnrich
2
2
  class Dates
3
3
 
4
+ def self.is_numeric? (string)
5
+ true if Float(string) rescue false
6
+ end
7
+
8
+ def self.convert_month_words(date_string)
9
+ return_date_string = date_string.clone
10
+
11
+ date_string = date_string.gsub(/[,\/\.]/, ' ').squeeze #switch periods, slashes, and commas that can seperate dates with spaces
12
+ if date_string.split(' ').any? { |word| Date::MONTHNAMES.include?(word.humanize) || Date::ABBR_MONTHNAMES.include?(word.gsub('.', '').humanize) }
13
+ return_date_string = ''
14
+ was_numeric = false
15
+
16
+ date_string.split(' ').each do |date_word|
17
+ if Date::MONTHNAMES.include?(date_word.humanize)
18
+ current_value = Date::MONTHNAMES.index(date_word).to_s.rjust(2, '0')
19
+ elsif Date::ABBR_MONTHNAMES.include?(date_word.humanize)
20
+ current_value = Date::ABBR_MONTHNAMES.index(date_word).to_s.rjust(2, '0')
21
+ else
22
+ current_value = date_word
23
+ end
24
+ if is_numeric?(current_value)
25
+ if was_numeric
26
+ return_date_string += "/#{current_value.to_s.rjust(2, '0')}"
27
+ else
28
+ was_numeric = true
29
+ return_date_string += " #{current_value.to_s.rjust(2, '0')}"
30
+ end
31
+ else
32
+ was_numeric = false
33
+ return_date_string += " #{current_value}"
34
+ end
35
+ end
36
+ end
37
+
38
+ return return_date_string
39
+ end
40
+
4
41
  # a function to convert date data from OAI feeds into MODS-usable date data
5
42
  # assumes date values containing ";" have already been split
6
43
  # returns hash with :single_date, :date_range, :date_qualifier, and/or :date_note values
@@ -9,6 +46,8 @@ module BplEnrich
9
46
  date_data = {} # create the hash to hold all the data
10
47
  source_date_string = value.strip # variable to hold original value
11
48
 
49
+ value = convert_month_words(value) #Stuff like April 7, 1983
50
+
12
51
  # weed out obvious bad dates before processing
13
52
  if (value.match(/([Pp]re|[Pp]ost|[Bb]efore|[Aa]fter|[Uu]nknown|[Uu]ndated|n\.d\.)/)) ||
14
53
  (value.match(/\d\d\d\d-\z/)) || # 1975-
@@ -194,22 +233,26 @@ module BplEnrich
194
233
  # try to automatically parse single dates with YYYY && MM && DD values
195
234
  if Timeliness.parse(value).nil?
196
235
  # start further processing
197
- if value.match(/\A[12]\d\d\d-[01][0-9]\z/) # yyyy-mm
198
- date_data[:single_date] = value
199
- elsif value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
200
- value = '0' + value if value.match(/\A[1-9][-\/][12]\d\d\d\z/) # m-yyyy || m/yyyy
201
- date_data[:single_date] = value[3..6] + '-' + value[0..1]
202
- elsif value.match(/\A[A-Za-z]{3,} [12]\d\d\d\z/) # April 1987 || Apr. 1987
203
- value = value.split(' ')
204
- if value[0].length == 3
205
- value_month = '%02d' % Date::ABBR_MONTHNAMES.index(value[0])
206
- else
207
- value_month = '%02d' % Date::MONTHNAMES.index(value[0])
236
+ value.split(' ').each do |split_value|
237
+ if split_value.match(/\A[12]\d\d\d[-\/\.][01][0-9]\z/) # yyyy-mm || yyyy/mm || yyyy.mm
238
+ split_value = split_value.gsub(/[,\/\.]/, '-').squeeze
239
+ date_data[:single_date] = split_value
240
+ elsif split_value.match(/\A[12]\d\d\d[-\/\.][01][0-9][-\/\.][01][0-9]\z/) # yyyy-mm-dd || yyyy/mm/dd || yyyy.mm.dd
241
+ split_value = split_value.gsub(/[,\/\.]/, '-').squeeze
242
+ date_data[:single_date] = split_value
243
+ elsif split_value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
244
+ split_value = '0' + split_value if split_value.match(/\A[1-9][-\/\.][12]\d\d\d\z/) # m-yyyy || m/yyyy
245
+ date_data[:single_date] = split_value[3..6] + '-' + split_value[0..1]
246
+ elsif split_value.match(/\A[12]\d\d\d\z/) # 1999
247
+ date_data[:single_date] = split_value
248
+ elsif split_value.match(/\A[01]?[1-9][-\/\.][01]?[1-9][-\/\.][12]\d\d\d\z/) # mm-dd-yyyy || m-dd-yyyy || mm/dd/yyyy
249
+ split_value = split_value.gsub(/[,\/\.]/, '/').squeeze
250
+ date_data[:single_date] = "#{split_value.split('/')[2]}-#{split_value.split('/')[0]}-#{split_value.split('/')[1]}"
208
251
  end
209
- date_data[:single_date] = value_month ? value[1] + '-' + value_month : value[1]
210
- elsif value.match(/\A[12]\d\d\d\z/) # 1999
211
- date_data[:single_date] = value
212
- else
252
+
253
+ end
254
+
255
+ if value.split(' ').length > 1 || date_data[:single_date].blank?
213
256
  date_data[:date_note] = source_date_string
214
257
  end
215
258
  else
@@ -1,3 +1,3 @@
1
1
  module BplEnrich
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/bpl_enrich.rb CHANGED
@@ -8,6 +8,11 @@ module BplEnrich
8
8
  require "htmlentities"
9
9
  require "qa"
10
10
 
11
+ # add some formats to Timeliness gem for better parsing
12
+ Timeliness.add_formats(:date, 'm-d-yy', :before => 'd-m-yy')
13
+ Timeliness.add_formats(:date, 'mmm[\.]? d[a-z]?[a-z]?[,]? yyyy')
14
+ Timeliness.add_formats(:date, 'yyyy mmm d')
15
+
11
16
  def self.strip_value(value)
12
17
  if(value.blank?)
13
18
  return nil
data/test/dates_test.rb CHANGED
@@ -2,10 +2,33 @@ require 'test_helper'
2
2
 
3
3
  class DatesTest < ActiveSupport::TestCase
4
4
  def test_date_standardizer
5
+
6
+ #Month dates
5
7
  result = BplEnrich::Dates.standardize('April 1983')
6
8
  assert_equal '1983-04', result[:single_date]
7
9
  assert_equal nil, result[:date_range]
8
10
  assert_equal nil, result[:date_note]
11
+
12
+ result = BplEnrich::Dates.standardize('April 7, 1983')
13
+ assert_equal '1983-04-07', result[:single_date]
14
+ assert_equal nil, result[:date_range]
15
+ assert_equal nil, result[:date_note]
16
+
17
+ result = BplEnrich::Dates.standardize('April 7.1983 (Easter)')
18
+ assert_equal '1983-04-07', result[:single_date]
19
+ assert_equal nil, result[:date_range]
20
+ assert_equal 'April 7.1983 (Easter)', result[:date_note]
21
+
22
+ result = BplEnrich::Dates.standardize('1983.April.7 (Easter)')
23
+ assert_equal '1983-04-07', result[:single_date]
24
+ assert_equal nil, result[:date_range]
25
+ assert_equal '1983.April.7 (Easter)', result[:date_note]
26
+
27
+ result = BplEnrich::Dates.standardize('between April 2014 and May 2014')
28
+ assert_equal nil, result[:single_date]
29
+ assert_equal '2014-04', result[:date_range][:start]
30
+ assert_equal '2014-05', result[:date_range][:end]
31
+ assert_equal nil, result[:date_note]
9
32
  end
10
33
 
11
34
 
@@ -176,5 +176,125 @@ DatesTest: test_date_standardizer
176
176
   (0.1ms) begin transaction
177
177
  --------------------------------
178
178
  LCSHTest: test_lcsh_standardizer
179
+ --------------------------------
180
+  (0.1ms) rollback transaction
181
+  (0.7ms) begin transaction
182
+ ------------------------------------
183
+ AuthoritiesTest: test_parse_language
184
+ ------------------------------------
185
+  (0.3ms) rollback transaction
186
+  (0.2ms) begin transaction
187
+ -----------------------------------------
188
+ AuthoritiesTest: test_parse_name_for_role
189
+ -----------------------------------------
190
+  (0.3ms) rollback transaction
191
+  (0.2ms) begin transaction
192
+ --------------------------------
193
+ AuthoritiesTest: test_parse_role
194
+ --------------------------------
195
+  (0.3ms) rollback transaction
196
+  (0.2ms) begin transaction
197
+ -------------------------------
198
+ BplEnrichTest: test_strip_value
199
+ -------------------------------
200
+  (0.1ms) rollback transaction
201
+  (0.2ms) begin transaction
202
+ ---------------------------------
203
+ DatesTest: test_date_standardizer
204
+ ---------------------------------
205
+  (0.2ms) rollback transaction
206
+  (0.1ms) begin transaction
207
+ --------------------------------
208
+ LCSHTest: test_lcsh_standardizer
209
+ --------------------------------
210
+  (0.1ms) rollback transaction
211
+  (0.5ms) begin transaction
212
+ ------------------------------------
213
+ AuthoritiesTest: test_parse_language
214
+ ------------------------------------
215
+  (0.3ms) rollback transaction
216
+  (0.2ms) begin transaction
217
+ -----------------------------------------
218
+ AuthoritiesTest: test_parse_name_for_role
219
+ -----------------------------------------
220
+  (0.3ms) rollback transaction
221
+  (0.2ms) begin transaction
222
+ --------------------------------
223
+ AuthoritiesTest: test_parse_role
224
+ --------------------------------
225
+  (0.3ms) rollback transaction
226
+  (0.2ms) begin transaction
227
+ -------------------------------
228
+ BplEnrichTest: test_strip_value
229
+ -------------------------------
230
+  (0.2ms) rollback transaction
231
+  (0.1ms) begin transaction
232
+ ---------------------------------
233
+ DatesTest: test_date_standardizer
234
+ ---------------------------------
235
+  (0.2ms) rollback transaction
236
+  (0.1ms) begin transaction
237
+ --------------------------------
238
+ LCSHTest: test_lcsh_standardizer
239
+ --------------------------------
240
+  (0.1ms) rollback transaction
241
+  (0.4ms) begin transaction
242
+ ------------------------------------
243
+ AuthoritiesTest: test_parse_language
244
+ ------------------------------------
245
+  (0.3ms) rollback transaction
246
+  (0.2ms) begin transaction
247
+ -----------------------------------------
248
+ AuthoritiesTest: test_parse_name_for_role
249
+ -----------------------------------------
250
+  (0.3ms) rollback transaction
251
+  (0.2ms) begin transaction
252
+ --------------------------------
253
+ AuthoritiesTest: test_parse_role
254
+ --------------------------------
255
+  (0.3ms) rollback transaction
256
+  (0.2ms) begin transaction
257
+ -------------------------------
258
+ BplEnrichTest: test_strip_value
259
+ -------------------------------
260
+  (0.2ms) rollback transaction
261
+  (0.5ms) begin transaction
262
+ ---------------------------------
263
+ DatesTest: test_date_standardizer
264
+ ---------------------------------
265
+  (0.1ms) rollback transaction
266
+  (0.1ms) begin transaction
267
+ --------------------------------
268
+ LCSHTest: test_lcsh_standardizer
269
+ --------------------------------
270
+  (0.1ms) rollback transaction
271
+  (0.8ms) begin transaction
272
+ ------------------------------------
273
+ AuthoritiesTest: test_parse_language
274
+ ------------------------------------
275
+  (0.3ms) rollback transaction
276
+  (0.2ms) begin transaction
277
+ -----------------------------------------
278
+ AuthoritiesTest: test_parse_name_for_role
279
+ -----------------------------------------
280
+  (0.3ms) rollback transaction
281
+  (0.2ms) begin transaction
282
+ --------------------------------
283
+ AuthoritiesTest: test_parse_role
284
+ --------------------------------
285
+  (0.1ms) rollback transaction
286
+  (0.1ms) begin transaction
287
+ -------------------------------
288
+ BplEnrichTest: test_strip_value
289
+ -------------------------------
290
+  (0.1ms) rollback transaction
291
+  (0.2ms) begin transaction
292
+ ---------------------------------
293
+ DatesTest: test_date_standardizer
294
+ ---------------------------------
295
+  (0.3ms) rollback transaction
296
+  (0.1ms) begin transaction
297
+ --------------------------------
298
+ LCSHTest: test_lcsh_standardizer
179
299
  --------------------------------
180
300
   (0.1ms) rollback transaction
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bpl_enrich
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boston Public Library
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-11 00:00:00.000000000 Z
11
+ date: 2014-06-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails