bpl_enrich 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bpl_enrich/dates.rb +58 -15
- data/lib/bpl_enrich/version.rb +1 -1
- data/lib/bpl_enrich.rb +5 -0
- data/test/dates_test.rb +23 -0
- data/test/dummy/log/test.log +120 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63ca261fe49ac43d3aad9df73b3caa2ce33dce29
|
4
|
+
data.tar.gz: 70f1ae1ef1a0b9650b75093cff8d3916023cc3cf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 48f5af0440326806c69d638ee8a3cb4b2cb5c28e9b7d4e17128a48b413542a59fb8e063b6d31521089334adb1791b90216e3f10b68b86b176dca0f289b9132ec
|
7
|
+
data.tar.gz: cedfcdcc85a1bb2d55e7f97117081f45fd5fc77c1b031fe108a4aded7ea258d7e6f35c12e1a0139f4828264369d52bfb1baaa4e0b1933aad71177d18db793aae
|
data/lib/bpl_enrich/dates.rb
CHANGED
@@ -1,6 +1,43 @@
|
|
1
1
|
module BplEnrich
|
2
2
|
class Dates
|
3
3
|
|
4
|
+
def self.is_numeric? (string)
|
5
|
+
true if Float(string) rescue false
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.convert_month_words(date_string)
|
9
|
+
return_date_string = date_string.clone
|
10
|
+
|
11
|
+
date_string = date_string.gsub(/[,\/\.]/, ' ').squeeze #switch periods, slashes, and commas that can seperate dates with spaces
|
12
|
+
if date_string.split(' ').any? { |word| Date::MONTHNAMES.include?(word.humanize) || Date::ABBR_MONTHNAMES.include?(word.gsub('.', '').humanize) }
|
13
|
+
return_date_string = ''
|
14
|
+
was_numeric = false
|
15
|
+
|
16
|
+
date_string.split(' ').each do |date_word|
|
17
|
+
if Date::MONTHNAMES.include?(date_word.humanize)
|
18
|
+
current_value = Date::MONTHNAMES.index(date_word).to_s.rjust(2, '0')
|
19
|
+
elsif Date::ABBR_MONTHNAMES.include?(date_word.humanize)
|
20
|
+
current_value = Date::ABBR_MONTHNAMES.index(date_word).to_s.rjust(2, '0')
|
21
|
+
else
|
22
|
+
current_value = date_word
|
23
|
+
end
|
24
|
+
if is_numeric?(current_value)
|
25
|
+
if was_numeric
|
26
|
+
return_date_string += "/#{current_value.to_s.rjust(2, '0')}"
|
27
|
+
else
|
28
|
+
was_numeric = true
|
29
|
+
return_date_string += " #{current_value.to_s.rjust(2, '0')}"
|
30
|
+
end
|
31
|
+
else
|
32
|
+
was_numeric = false
|
33
|
+
return_date_string += " #{current_value}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
return return_date_string
|
39
|
+
end
|
40
|
+
|
4
41
|
# a function to convert date data from OAI feeds into MODS-usable date data
|
5
42
|
# assumes date values containing ";" have already been split
|
6
43
|
# returns hash with :single_date, :date_range, :date_qualifier, and/or :date_note values
|
@@ -9,6 +46,8 @@ module BplEnrich
|
|
9
46
|
date_data = {} # create the hash to hold all the data
|
10
47
|
source_date_string = value.strip # variable to hold original value
|
11
48
|
|
49
|
+
value = convert_month_words(value) #Stuff like April 7, 1983
|
50
|
+
|
12
51
|
# weed out obvious bad dates before processing
|
13
52
|
if (value.match(/([Pp]re|[Pp]ost|[Bb]efore|[Aa]fter|[Uu]nknown|[Uu]ndated|n\.d\.)/)) ||
|
14
53
|
(value.match(/\d\d\d\d-\z/)) || # 1975-
|
@@ -194,22 +233,26 @@ module BplEnrich
|
|
194
233
|
# try to automatically parse single dates with YYYY && MM && DD values
|
195
234
|
if Timeliness.parse(value).nil?
|
196
235
|
# start further processing
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
236
|
+
value.split(' ').each do |split_value|
|
237
|
+
if split_value.match(/\A[12]\d\d\d[-\/\.][01][0-9]\z/) # yyyy-mm || yyyy/mm || yyyy.mm
|
238
|
+
split_value = split_value.gsub(/[,\/\.]/, '-').squeeze
|
239
|
+
date_data[:single_date] = split_value
|
240
|
+
elsif split_value.match(/\A[12]\d\d\d[-\/\.][01][0-9][-\/\.][01][0-9]\z/) # yyyy-mm-dd || yyyy/mm/dd || yyyy.mm.dd
|
241
|
+
split_value = split_value.gsub(/[,\/\.]/, '-').squeeze
|
242
|
+
date_data[:single_date] = split_value
|
243
|
+
elsif split_value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
|
244
|
+
split_value = '0' + split_value if split_value.match(/\A[1-9][-\/\.][12]\d\d\d\z/) # m-yyyy || m/yyyy
|
245
|
+
date_data[:single_date] = split_value[3..6] + '-' + split_value[0..1]
|
246
|
+
elsif split_value.match(/\A[12]\d\d\d\z/) # 1999
|
247
|
+
date_data[:single_date] = split_value
|
248
|
+
elsif split_value.match(/\A[01]?[1-9][-\/\.][01]?[1-9][-\/\.][12]\d\d\d\z/) # mm-dd-yyyy || m-dd-yyyy || mm/dd/yyyy
|
249
|
+
split_value = split_value.gsub(/[,\/\.]/, '/').squeeze
|
250
|
+
date_data[:single_date] = "#{split_value.split('/')[2]}-#{split_value.split('/')[0]}-#{split_value.split('/')[1]}"
|
208
251
|
end
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
252
|
+
|
253
|
+
end
|
254
|
+
|
255
|
+
if value.split(' ').length > 1 || date_data[:single_date].blank?
|
213
256
|
date_data[:date_note] = source_date_string
|
214
257
|
end
|
215
258
|
else
|
data/lib/bpl_enrich/version.rb
CHANGED
data/lib/bpl_enrich.rb
CHANGED
@@ -8,6 +8,11 @@ module BplEnrich
|
|
8
8
|
require "htmlentities"
|
9
9
|
require "qa"
|
10
10
|
|
11
|
+
# add some formats to Timeliness gem for better parsing
|
12
|
+
Timeliness.add_formats(:date, 'm-d-yy', :before => 'd-m-yy')
|
13
|
+
Timeliness.add_formats(:date, 'mmm[\.]? d[a-z]?[a-z]?[,]? yyyy')
|
14
|
+
Timeliness.add_formats(:date, 'yyyy mmm d')
|
15
|
+
|
11
16
|
def self.strip_value(value)
|
12
17
|
if(value.blank?)
|
13
18
|
return nil
|
data/test/dates_test.rb
CHANGED
@@ -2,10 +2,33 @@ require 'test_helper'
|
|
2
2
|
|
3
3
|
class DatesTest < ActiveSupport::TestCase
|
4
4
|
def test_date_standardizer
|
5
|
+
|
6
|
+
#Month dates
|
5
7
|
result = BplEnrich::Dates.standardize('April 1983')
|
6
8
|
assert_equal '1983-04', result[:single_date]
|
7
9
|
assert_equal nil, result[:date_range]
|
8
10
|
assert_equal nil, result[:date_note]
|
11
|
+
|
12
|
+
result = BplEnrich::Dates.standardize('April 7, 1983')
|
13
|
+
assert_equal '1983-04-07', result[:single_date]
|
14
|
+
assert_equal nil, result[:date_range]
|
15
|
+
assert_equal nil, result[:date_note]
|
16
|
+
|
17
|
+
result = BplEnrich::Dates.standardize('April 7.1983 (Easter)')
|
18
|
+
assert_equal '1983-04-07', result[:single_date]
|
19
|
+
assert_equal nil, result[:date_range]
|
20
|
+
assert_equal 'April 7.1983 (Easter)', result[:date_note]
|
21
|
+
|
22
|
+
result = BplEnrich::Dates.standardize('1983.April.7 (Easter)')
|
23
|
+
assert_equal '1983-04-07', result[:single_date]
|
24
|
+
assert_equal nil, result[:date_range]
|
25
|
+
assert_equal '1983.April.7 (Easter)', result[:date_note]
|
26
|
+
|
27
|
+
result = BplEnrich::Dates.standardize('between April 2014 and May 2014')
|
28
|
+
assert_equal nil, result[:single_date]
|
29
|
+
assert_equal '2014-04', result[:date_range][:start]
|
30
|
+
assert_equal '2014-05', result[:date_range][:end]
|
31
|
+
assert_equal nil, result[:date_note]
|
9
32
|
end
|
10
33
|
|
11
34
|
|
data/test/dummy/log/test.log
CHANGED
@@ -176,5 +176,125 @@ DatesTest: test_date_standardizer
|
|
176
176
|
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
177
177
|
--------------------------------
|
178
178
|
LCSHTest: test_lcsh_standardizer
|
179
|
+
--------------------------------
|
180
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
181
|
+
[1m[36m (0.7ms)[0m [1mbegin transaction[0m
|
182
|
+
------------------------------------
|
183
|
+
AuthoritiesTest: test_parse_language
|
184
|
+
------------------------------------
|
185
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
186
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
187
|
+
-----------------------------------------
|
188
|
+
AuthoritiesTest: test_parse_name_for_role
|
189
|
+
-----------------------------------------
|
190
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
191
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
192
|
+
--------------------------------
|
193
|
+
AuthoritiesTest: test_parse_role
|
194
|
+
--------------------------------
|
195
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
196
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
197
|
+
-------------------------------
|
198
|
+
BplEnrichTest: test_strip_value
|
199
|
+
-------------------------------
|
200
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
201
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
202
|
+
---------------------------------
|
203
|
+
DatesTest: test_date_standardizer
|
204
|
+
---------------------------------
|
205
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
206
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
207
|
+
--------------------------------
|
208
|
+
LCSHTest: test_lcsh_standardizer
|
209
|
+
--------------------------------
|
210
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
211
|
+
[1m[36m (0.5ms)[0m [1mbegin transaction[0m
|
212
|
+
------------------------------------
|
213
|
+
AuthoritiesTest: test_parse_language
|
214
|
+
------------------------------------
|
215
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
216
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
217
|
+
-----------------------------------------
|
218
|
+
AuthoritiesTest: test_parse_name_for_role
|
219
|
+
-----------------------------------------
|
220
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
221
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
222
|
+
--------------------------------
|
223
|
+
AuthoritiesTest: test_parse_role
|
224
|
+
--------------------------------
|
225
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
226
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
227
|
+
-------------------------------
|
228
|
+
BplEnrichTest: test_strip_value
|
229
|
+
-------------------------------
|
230
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
231
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
232
|
+
---------------------------------
|
233
|
+
DatesTest: test_date_standardizer
|
234
|
+
---------------------------------
|
235
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
236
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
237
|
+
--------------------------------
|
238
|
+
LCSHTest: test_lcsh_standardizer
|
239
|
+
--------------------------------
|
240
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
241
|
+
[1m[36m (0.4ms)[0m [1mbegin transaction[0m
|
242
|
+
------------------------------------
|
243
|
+
AuthoritiesTest: test_parse_language
|
244
|
+
------------------------------------
|
245
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
246
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
247
|
+
-----------------------------------------
|
248
|
+
AuthoritiesTest: test_parse_name_for_role
|
249
|
+
-----------------------------------------
|
250
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
251
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
252
|
+
--------------------------------
|
253
|
+
AuthoritiesTest: test_parse_role
|
254
|
+
--------------------------------
|
255
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
256
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
257
|
+
-------------------------------
|
258
|
+
BplEnrichTest: test_strip_value
|
259
|
+
-------------------------------
|
260
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
261
|
+
[1m[36m (0.5ms)[0m [1mbegin transaction[0m
|
262
|
+
---------------------------------
|
263
|
+
DatesTest: test_date_standardizer
|
264
|
+
---------------------------------
|
265
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
266
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
267
|
+
--------------------------------
|
268
|
+
LCSHTest: test_lcsh_standardizer
|
269
|
+
--------------------------------
|
270
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
271
|
+
[1m[36m (0.8ms)[0m [1mbegin transaction[0m
|
272
|
+
------------------------------------
|
273
|
+
AuthoritiesTest: test_parse_language
|
274
|
+
------------------------------------
|
275
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
276
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
277
|
+
-----------------------------------------
|
278
|
+
AuthoritiesTest: test_parse_name_for_role
|
279
|
+
-----------------------------------------
|
280
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
281
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
282
|
+
--------------------------------
|
283
|
+
AuthoritiesTest: test_parse_role
|
284
|
+
--------------------------------
|
285
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
286
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
287
|
+
-------------------------------
|
288
|
+
BplEnrichTest: test_strip_value
|
289
|
+
-------------------------------
|
290
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
291
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
292
|
+
---------------------------------
|
293
|
+
DatesTest: test_date_standardizer
|
294
|
+
---------------------------------
|
295
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
296
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
297
|
+
--------------------------------
|
298
|
+
LCSHTest: test_lcsh_standardizer
|
179
299
|
--------------------------------
|
180
300
|
[1m[35m (0.1ms)[0m rollback transaction
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bpl_enrich
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boston Public Library
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|