bpl_enrich 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 41994a22b0ef539c9938525b17ad475e6eaafce7
4
- data.tar.gz: 9c16b1cbb137f1c770d72190b3916a2ac6f703ae
3
+ metadata.gz: 63ca261fe49ac43d3aad9df73b3caa2ce33dce29
4
+ data.tar.gz: 70f1ae1ef1a0b9650b75093cff8d3916023cc3cf
5
5
  SHA512:
6
- metadata.gz: 06560416487c4a58d16c0ebed6716429b44aa73bd92c62f6d99093ce4d130a416adf8fa8f5eaeae0e91f2f371fed67535288109265c1a1c8ea44c3690b15d78c
7
- data.tar.gz: 54f601dd8de34b18f9c018c09ea48fe1ac4de300e719c07aa34b6f595cbab50c337eecf24d19cde70c2cde2c5f786af492346581027fc30872a1cb3de31db311
6
+ metadata.gz: 48f5af0440326806c69d638ee8a3cb4b2cb5c28e9b7d4e17128a48b413542a59fb8e063b6d31521089334adb1791b90216e3f10b68b86b176dca0f289b9132ec
7
+ data.tar.gz: cedfcdcc85a1bb2d55e7f97117081f45fd5fc77c1b031fe108a4aded7ea258d7e6f35c12e1a0139f4828264369d52bfb1baaa4e0b1933aad71177d18db793aae
@@ -1,6 +1,43 @@
1
1
  module BplEnrich
2
2
  class Dates
3
3
 
4
+ def self.is_numeric? (string)
5
+ true if Float(string) rescue false
6
+ end
7
+
8
+ def self.convert_month_words(date_string)
9
+ return_date_string = date_string.clone
10
+
11
+ date_string = date_string.gsub(/[,\/\.]/, ' ').squeeze #switch periods, slashes, and commas that can seperate dates with spaces
12
+ if date_string.split(' ').any? { |word| Date::MONTHNAMES.include?(word.humanize) || Date::ABBR_MONTHNAMES.include?(word.gsub('.', '').humanize) }
13
+ return_date_string = ''
14
+ was_numeric = false
15
+
16
+ date_string.split(' ').each do |date_word|
17
+ if Date::MONTHNAMES.include?(date_word.humanize)
18
+ current_value = Date::MONTHNAMES.index(date_word).to_s.rjust(2, '0')
19
+ elsif Date::ABBR_MONTHNAMES.include?(date_word.humanize)
20
+ current_value = Date::ABBR_MONTHNAMES.index(date_word).to_s.rjust(2, '0')
21
+ else
22
+ current_value = date_word
23
+ end
24
+ if is_numeric?(current_value)
25
+ if was_numeric
26
+ return_date_string += "/#{current_value.to_s.rjust(2, '0')}"
27
+ else
28
+ was_numeric = true
29
+ return_date_string += " #{current_value.to_s.rjust(2, '0')}"
30
+ end
31
+ else
32
+ was_numeric = false
33
+ return_date_string += " #{current_value}"
34
+ end
35
+ end
36
+ end
37
+
38
+ return return_date_string
39
+ end
40
+
4
41
  # a function to convert date data from OAI feeds into MODS-usable date data
5
42
  # assumes date values containing ";" have already been split
6
43
  # returns hash with :single_date, :date_range, :date_qualifier, and/or :date_note values
@@ -9,6 +46,8 @@ module BplEnrich
9
46
  date_data = {} # create the hash to hold all the data
10
47
  source_date_string = value.strip # variable to hold original value
11
48
 
49
+ value = convert_month_words(value) #Stuff like April 7, 1983
50
+
12
51
  # weed out obvious bad dates before processing
13
52
  if (value.match(/([Pp]re|[Pp]ost|[Bb]efore|[Aa]fter|[Uu]nknown|[Uu]ndated|n\.d\.)/)) ||
14
53
  (value.match(/\d\d\d\d-\z/)) || # 1975-
@@ -194,22 +233,26 @@ module BplEnrich
194
233
  # try to automatically parse single dates with YYYY && MM && DD values
195
234
  if Timeliness.parse(value).nil?
196
235
  # start further processing
197
- if value.match(/\A[12]\d\d\d-[01][0-9]\z/) # yyyy-mm
198
- date_data[:single_date] = value
199
- elsif value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
200
- value = '0' + value if value.match(/\A[1-9][-\/][12]\d\d\d\z/) # m-yyyy || m/yyyy
201
- date_data[:single_date] = value[3..6] + '-' + value[0..1]
202
- elsif value.match(/\A[A-Za-z]{3,} [12]\d\d\d\z/) # April 1987 || Apr. 1987
203
- value = value.split(' ')
204
- if value[0].length == 3
205
- value_month = '%02d' % Date::ABBR_MONTHNAMES.index(value[0])
206
- else
207
- value_month = '%02d' % Date::MONTHNAMES.index(value[0])
236
+ value.split(' ').each do |split_value|
237
+ if split_value.match(/\A[12]\d\d\d[-\/\.][01][0-9]\z/) # yyyy-mm || yyyy/mm || yyyy.mm
238
+ split_value = split_value.gsub(/[,\/\.]/, '-').squeeze
239
+ date_data[:single_date] = split_value
240
+ elsif split_value.match(/\A[12]\d\d\d[-\/\.][01][0-9][-\/\.][01][0-9]\z/) # yyyy-mm-dd || yyyy/mm/dd || yyyy.mm.dd
241
+ split_value = split_value.gsub(/[,\/\.]/, '-').squeeze
242
+ date_data[:single_date] = split_value
243
+ elsif split_value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
244
+ split_value = '0' + split_value if split_value.match(/\A[1-9][-\/\.][12]\d\d\d\z/) # m-yyyy || m/yyyy
245
+ date_data[:single_date] = split_value[3..6] + '-' + split_value[0..1]
246
+ elsif split_value.match(/\A[12]\d\d\d\z/) # 1999
247
+ date_data[:single_date] = split_value
248
+ elsif split_value.match(/\A[01]?[1-9][-\/\.][01]?[1-9][-\/\.][12]\d\d\d\z/) # mm-dd-yyyy || m-dd-yyyy || mm/dd/yyyy
249
+ split_value = split_value.gsub(/[,\/\.]/, '/').squeeze
250
+ date_data[:single_date] = "#{split_value.split('/')[2]}-#{split_value.split('/')[0]}-#{split_value.split('/')[1]}"
208
251
  end
209
- date_data[:single_date] = value_month ? value[1] + '-' + value_month : value[1]
210
- elsif value.match(/\A[12]\d\d\d\z/) # 1999
211
- date_data[:single_date] = value
212
- else
252
+
253
+ end
254
+
255
+ if value.split(' ').length > 1 || date_data[:single_date].blank?
213
256
  date_data[:date_note] = source_date_string
214
257
  end
215
258
  else
@@ -1,3 +1,3 @@
1
1
  module BplEnrich
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
data/lib/bpl_enrich.rb CHANGED
@@ -8,6 +8,11 @@ module BplEnrich
8
8
  require "htmlentities"
9
9
  require "qa"
10
10
 
11
+ # add some formats to Timeliness gem for better parsing
12
+ Timeliness.add_formats(:date, 'm-d-yy', :before => 'd-m-yy')
13
+ Timeliness.add_formats(:date, 'mmm[\.]? d[a-z]?[a-z]?[,]? yyyy')
14
+ Timeliness.add_formats(:date, 'yyyy mmm d')
15
+
11
16
  def self.strip_value(value)
12
17
  if(value.blank?)
13
18
  return nil
data/test/dates_test.rb CHANGED
@@ -2,10 +2,33 @@ require 'test_helper'
2
2
 
3
3
  class DatesTest < ActiveSupport::TestCase
4
4
  def test_date_standardizer
5
+
6
+ #Month dates
5
7
  result = BplEnrich::Dates.standardize('April 1983')
6
8
  assert_equal '1983-04', result[:single_date]
7
9
  assert_equal nil, result[:date_range]
8
10
  assert_equal nil, result[:date_note]
11
+
12
+ result = BplEnrich::Dates.standardize('April 7, 1983')
13
+ assert_equal '1983-04-07', result[:single_date]
14
+ assert_equal nil, result[:date_range]
15
+ assert_equal nil, result[:date_note]
16
+
17
+ result = BplEnrich::Dates.standardize('April 7.1983 (Easter)')
18
+ assert_equal '1983-04-07', result[:single_date]
19
+ assert_equal nil, result[:date_range]
20
+ assert_equal 'April 7.1983 (Easter)', result[:date_note]
21
+
22
+ result = BplEnrich::Dates.standardize('1983.April.7 (Easter)')
23
+ assert_equal '1983-04-07', result[:single_date]
24
+ assert_equal nil, result[:date_range]
25
+ assert_equal '1983.April.7 (Easter)', result[:date_note]
26
+
27
+ result = BplEnrich::Dates.standardize('between April 2014 and May 2014')
28
+ assert_equal nil, result[:single_date]
29
+ assert_equal '2014-04', result[:date_range][:start]
30
+ assert_equal '2014-05', result[:date_range][:end]
31
+ assert_equal nil, result[:date_note]
9
32
  end
10
33
 
11
34
 
@@ -176,5 +176,125 @@ DatesTest: test_date_standardizer
176
176
   (0.1ms) begin transaction
177
177
  --------------------------------
178
178
  LCSHTest: test_lcsh_standardizer
179
+ --------------------------------
180
+  (0.1ms) rollback transaction
181
+  (0.7ms) begin transaction
182
+ ------------------------------------
183
+ AuthoritiesTest: test_parse_language
184
+ ------------------------------------
185
+  (0.3ms) rollback transaction
186
+  (0.2ms) begin transaction
187
+ -----------------------------------------
188
+ AuthoritiesTest: test_parse_name_for_role
189
+ -----------------------------------------
190
+  (0.3ms) rollback transaction
191
+  (0.2ms) begin transaction
192
+ --------------------------------
193
+ AuthoritiesTest: test_parse_role
194
+ --------------------------------
195
+  (0.3ms) rollback transaction
196
+  (0.2ms) begin transaction
197
+ -------------------------------
198
+ BplEnrichTest: test_strip_value
199
+ -------------------------------
200
+  (0.1ms) rollback transaction
201
+  (0.2ms) begin transaction
202
+ ---------------------------------
203
+ DatesTest: test_date_standardizer
204
+ ---------------------------------
205
+  (0.2ms) rollback transaction
206
+  (0.1ms) begin transaction
207
+ --------------------------------
208
+ LCSHTest: test_lcsh_standardizer
209
+ --------------------------------
210
+  (0.1ms) rollback transaction
211
+  (0.5ms) begin transaction
212
+ ------------------------------------
213
+ AuthoritiesTest: test_parse_language
214
+ ------------------------------------
215
+  (0.3ms) rollback transaction
216
+  (0.2ms) begin transaction
217
+ -----------------------------------------
218
+ AuthoritiesTest: test_parse_name_for_role
219
+ -----------------------------------------
220
+  (0.3ms) rollback transaction
221
+  (0.2ms) begin transaction
222
+ --------------------------------
223
+ AuthoritiesTest: test_parse_role
224
+ --------------------------------
225
+  (0.3ms) rollback transaction
226
+  (0.2ms) begin transaction
227
+ -------------------------------
228
+ BplEnrichTest: test_strip_value
229
+ -------------------------------
230
+  (0.2ms) rollback transaction
231
+  (0.1ms) begin transaction
232
+ ---------------------------------
233
+ DatesTest: test_date_standardizer
234
+ ---------------------------------
235
+  (0.2ms) rollback transaction
236
+  (0.1ms) begin transaction
237
+ --------------------------------
238
+ LCSHTest: test_lcsh_standardizer
239
+ --------------------------------
240
+  (0.1ms) rollback transaction
241
+  (0.4ms) begin transaction
242
+ ------------------------------------
243
+ AuthoritiesTest: test_parse_language
244
+ ------------------------------------
245
+  (0.3ms) rollback transaction
246
+  (0.2ms) begin transaction
247
+ -----------------------------------------
248
+ AuthoritiesTest: test_parse_name_for_role
249
+ -----------------------------------------
250
+  (0.3ms) rollback transaction
251
+  (0.2ms) begin transaction
252
+ --------------------------------
253
+ AuthoritiesTest: test_parse_role
254
+ --------------------------------
255
+  (0.3ms) rollback transaction
256
+  (0.2ms) begin transaction
257
+ -------------------------------
258
+ BplEnrichTest: test_strip_value
259
+ -------------------------------
260
+  (0.2ms) rollback transaction
261
+  (0.5ms) begin transaction
262
+ ---------------------------------
263
+ DatesTest: test_date_standardizer
264
+ ---------------------------------
265
+  (0.1ms) rollback transaction
266
+  (0.1ms) begin transaction
267
+ --------------------------------
268
+ LCSHTest: test_lcsh_standardizer
269
+ --------------------------------
270
+  (0.1ms) rollback transaction
271
+  (0.8ms) begin transaction
272
+ ------------------------------------
273
+ AuthoritiesTest: test_parse_language
274
+ ------------------------------------
275
+  (0.3ms) rollback transaction
276
+  (0.2ms) begin transaction
277
+ -----------------------------------------
278
+ AuthoritiesTest: test_parse_name_for_role
279
+ -----------------------------------------
280
+  (0.3ms) rollback transaction
281
+  (0.2ms) begin transaction
282
+ --------------------------------
283
+ AuthoritiesTest: test_parse_role
284
+ --------------------------------
285
+  (0.1ms) rollback transaction
286
+  (0.1ms) begin transaction
287
+ -------------------------------
288
+ BplEnrichTest: test_strip_value
289
+ -------------------------------
290
+  (0.1ms) rollback transaction
291
+  (0.2ms) begin transaction
292
+ ---------------------------------
293
+ DatesTest: test_date_standardizer
294
+ ---------------------------------
295
+  (0.3ms) rollback transaction
296
+  (0.1ms) begin transaction
297
+ --------------------------------
298
+ LCSHTest: test_lcsh_standardizer
179
299
  --------------------------------
180
300
   (0.1ms) rollback transaction
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bpl_enrich
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boston Public Library
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-11 00:00:00.000000000 Z
11
+ date: 2014-06-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails