bpl_enrich 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bpl_enrich/dates.rb +58 -15
- data/lib/bpl_enrich/version.rb +1 -1
- data/lib/bpl_enrich.rb +5 -0
- data/test/dates_test.rb +23 -0
- data/test/dummy/log/test.log +120 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63ca261fe49ac43d3aad9df73b3caa2ce33dce29
|
4
|
+
data.tar.gz: 70f1ae1ef1a0b9650b75093cff8d3916023cc3cf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 48f5af0440326806c69d638ee8a3cb4b2cb5c28e9b7d4e17128a48b413542a59fb8e063b6d31521089334adb1791b90216e3f10b68b86b176dca0f289b9132ec
|
7
|
+
data.tar.gz: cedfcdcc85a1bb2d55e7f97117081f45fd5fc77c1b031fe108a4aded7ea258d7e6f35c12e1a0139f4828264369d52bfb1baaa4e0b1933aad71177d18db793aae
|
data/lib/bpl_enrich/dates.rb
CHANGED
@@ -1,6 +1,43 @@
|
|
1
1
|
module BplEnrich
|
2
2
|
class Dates
|
3
3
|
|
4
|
+
def self.is_numeric? (string)
|
5
|
+
true if Float(string) rescue false
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.convert_month_words(date_string)
|
9
|
+
return_date_string = date_string.clone
|
10
|
+
|
11
|
+
date_string = date_string.gsub(/[,\/\.]/, ' ').squeeze #switch periods, slashes, and commas that can seperate dates with spaces
|
12
|
+
if date_string.split(' ').any? { |word| Date::MONTHNAMES.include?(word.humanize) || Date::ABBR_MONTHNAMES.include?(word.gsub('.', '').humanize) }
|
13
|
+
return_date_string = ''
|
14
|
+
was_numeric = false
|
15
|
+
|
16
|
+
date_string.split(' ').each do |date_word|
|
17
|
+
if Date::MONTHNAMES.include?(date_word.humanize)
|
18
|
+
current_value = Date::MONTHNAMES.index(date_word).to_s.rjust(2, '0')
|
19
|
+
elsif Date::ABBR_MONTHNAMES.include?(date_word.humanize)
|
20
|
+
current_value = Date::ABBR_MONTHNAMES.index(date_word).to_s.rjust(2, '0')
|
21
|
+
else
|
22
|
+
current_value = date_word
|
23
|
+
end
|
24
|
+
if is_numeric?(current_value)
|
25
|
+
if was_numeric
|
26
|
+
return_date_string += "/#{current_value.to_s.rjust(2, '0')}"
|
27
|
+
else
|
28
|
+
was_numeric = true
|
29
|
+
return_date_string += " #{current_value.to_s.rjust(2, '0')}"
|
30
|
+
end
|
31
|
+
else
|
32
|
+
was_numeric = false
|
33
|
+
return_date_string += " #{current_value}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
return return_date_string
|
39
|
+
end
|
40
|
+
|
4
41
|
# a function to convert date data from OAI feeds into MODS-usable date data
|
5
42
|
# assumes date values containing ";" have already been split
|
6
43
|
# returns hash with :single_date, :date_range, :date_qualifier, and/or :date_note values
|
@@ -9,6 +46,8 @@ module BplEnrich
|
|
9
46
|
date_data = {} # create the hash to hold all the data
|
10
47
|
source_date_string = value.strip # variable to hold original value
|
11
48
|
|
49
|
+
value = convert_month_words(value) #Stuff like April 7, 1983
|
50
|
+
|
12
51
|
# weed out obvious bad dates before processing
|
13
52
|
if (value.match(/([Pp]re|[Pp]ost|[Bb]efore|[Aa]fter|[Uu]nknown|[Uu]ndated|n\.d\.)/)) ||
|
14
53
|
(value.match(/\d\d\d\d-\z/)) || # 1975-
|
@@ -194,22 +233,26 @@ module BplEnrich
|
|
194
233
|
# try to automatically parse single dates with YYYY && MM && DD values
|
195
234
|
if Timeliness.parse(value).nil?
|
196
235
|
# start further processing
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
236
|
+
value.split(' ').each do |split_value|
|
237
|
+
if split_value.match(/\A[12]\d\d\d[-\/\.][01][0-9]\z/) # yyyy-mm || yyyy/mm || yyyy.mm
|
238
|
+
split_value = split_value.gsub(/[,\/\.]/, '-').squeeze
|
239
|
+
date_data[:single_date] = split_value
|
240
|
+
elsif split_value.match(/\A[12]\d\d\d[-\/\.][01][0-9][-\/\.][01][0-9]\z/) # yyyy-mm-dd || yyyy/mm/dd || yyyy.mm.dd
|
241
|
+
split_value = split_value.gsub(/[,\/\.]/, '-').squeeze
|
242
|
+
date_data[:single_date] = split_value
|
243
|
+
elsif split_value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
|
244
|
+
split_value = '0' + split_value if split_value.match(/\A[1-9][-\/\.][12]\d\d\d\z/) # m-yyyy || m/yyyy
|
245
|
+
date_data[:single_date] = split_value[3..6] + '-' + split_value[0..1]
|
246
|
+
elsif split_value.match(/\A[12]\d\d\d\z/) # 1999
|
247
|
+
date_data[:single_date] = split_value
|
248
|
+
elsif split_value.match(/\A[01]?[1-9][-\/\.][01]?[1-9][-\/\.][12]\d\d\d\z/) # mm-dd-yyyy || m-dd-yyyy || mm/dd/yyyy
|
249
|
+
split_value = split_value.gsub(/[,\/\.]/, '/').squeeze
|
250
|
+
date_data[:single_date] = "#{split_value.split('/')[2]}-#{split_value.split('/')[0]}-#{split_value.split('/')[1]}"
|
208
251
|
end
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
252
|
+
|
253
|
+
end
|
254
|
+
|
255
|
+
if value.split(' ').length > 1 || date_data[:single_date].blank?
|
213
256
|
date_data[:date_note] = source_date_string
|
214
257
|
end
|
215
258
|
else
|
data/lib/bpl_enrich/version.rb
CHANGED
data/lib/bpl_enrich.rb
CHANGED
@@ -8,6 +8,11 @@ module BplEnrich
|
|
8
8
|
require "htmlentities"
|
9
9
|
require "qa"
|
10
10
|
|
11
|
+
# add some formats to Timeliness gem for better parsing
|
12
|
+
Timeliness.add_formats(:date, 'm-d-yy', :before => 'd-m-yy')
|
13
|
+
Timeliness.add_formats(:date, 'mmm[\.]? d[a-z]?[a-z]?[,]? yyyy')
|
14
|
+
Timeliness.add_formats(:date, 'yyyy mmm d')
|
15
|
+
|
11
16
|
def self.strip_value(value)
|
12
17
|
if(value.blank?)
|
13
18
|
return nil
|
data/test/dates_test.rb
CHANGED
@@ -2,10 +2,33 @@ require 'test_helper'
|
|
2
2
|
|
3
3
|
class DatesTest < ActiveSupport::TestCase
|
4
4
|
def test_date_standardizer
|
5
|
+
|
6
|
+
#Month dates
|
5
7
|
result = BplEnrich::Dates.standardize('April 1983')
|
6
8
|
assert_equal '1983-04', result[:single_date]
|
7
9
|
assert_equal nil, result[:date_range]
|
8
10
|
assert_equal nil, result[:date_note]
|
11
|
+
|
12
|
+
result = BplEnrich::Dates.standardize('April 7, 1983')
|
13
|
+
assert_equal '1983-04-07', result[:single_date]
|
14
|
+
assert_equal nil, result[:date_range]
|
15
|
+
assert_equal nil, result[:date_note]
|
16
|
+
|
17
|
+
result = BplEnrich::Dates.standardize('April 7.1983 (Easter)')
|
18
|
+
assert_equal '1983-04-07', result[:single_date]
|
19
|
+
assert_equal nil, result[:date_range]
|
20
|
+
assert_equal 'April 7.1983 (Easter)', result[:date_note]
|
21
|
+
|
22
|
+
result = BplEnrich::Dates.standardize('1983.April.7 (Easter)')
|
23
|
+
assert_equal '1983-04-07', result[:single_date]
|
24
|
+
assert_equal nil, result[:date_range]
|
25
|
+
assert_equal '1983.April.7 (Easter)', result[:date_note]
|
26
|
+
|
27
|
+
result = BplEnrich::Dates.standardize('between April 2014 and May 2014')
|
28
|
+
assert_equal nil, result[:single_date]
|
29
|
+
assert_equal '2014-04', result[:date_range][:start]
|
30
|
+
assert_equal '2014-05', result[:date_range][:end]
|
31
|
+
assert_equal nil, result[:date_note]
|
9
32
|
end
|
10
33
|
|
11
34
|
|
data/test/dummy/log/test.log
CHANGED
@@ -176,5 +176,125 @@ DatesTest: test_date_standardizer
|
|
176
176
|
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
177
177
|
--------------------------------
|
178
178
|
LCSHTest: test_lcsh_standardizer
|
179
|
+
--------------------------------
|
180
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
181
|
+
[1m[36m (0.7ms)[0m [1mbegin transaction[0m
|
182
|
+
------------------------------------
|
183
|
+
AuthoritiesTest: test_parse_language
|
184
|
+
------------------------------------
|
185
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
186
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
187
|
+
-----------------------------------------
|
188
|
+
AuthoritiesTest: test_parse_name_for_role
|
189
|
+
-----------------------------------------
|
190
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
191
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
192
|
+
--------------------------------
|
193
|
+
AuthoritiesTest: test_parse_role
|
194
|
+
--------------------------------
|
195
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
196
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
197
|
+
-------------------------------
|
198
|
+
BplEnrichTest: test_strip_value
|
199
|
+
-------------------------------
|
200
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
201
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
202
|
+
---------------------------------
|
203
|
+
DatesTest: test_date_standardizer
|
204
|
+
---------------------------------
|
205
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
206
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
207
|
+
--------------------------------
|
208
|
+
LCSHTest: test_lcsh_standardizer
|
209
|
+
--------------------------------
|
210
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
211
|
+
[1m[36m (0.5ms)[0m [1mbegin transaction[0m
|
212
|
+
------------------------------------
|
213
|
+
AuthoritiesTest: test_parse_language
|
214
|
+
------------------------------------
|
215
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
216
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
217
|
+
-----------------------------------------
|
218
|
+
AuthoritiesTest: test_parse_name_for_role
|
219
|
+
-----------------------------------------
|
220
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
221
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
222
|
+
--------------------------------
|
223
|
+
AuthoritiesTest: test_parse_role
|
224
|
+
--------------------------------
|
225
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
226
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
227
|
+
-------------------------------
|
228
|
+
BplEnrichTest: test_strip_value
|
229
|
+
-------------------------------
|
230
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
231
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
232
|
+
---------------------------------
|
233
|
+
DatesTest: test_date_standardizer
|
234
|
+
---------------------------------
|
235
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
236
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
237
|
+
--------------------------------
|
238
|
+
LCSHTest: test_lcsh_standardizer
|
239
|
+
--------------------------------
|
240
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
241
|
+
[1m[36m (0.4ms)[0m [1mbegin transaction[0m
|
242
|
+
------------------------------------
|
243
|
+
AuthoritiesTest: test_parse_language
|
244
|
+
------------------------------------
|
245
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
246
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
247
|
+
-----------------------------------------
|
248
|
+
AuthoritiesTest: test_parse_name_for_role
|
249
|
+
-----------------------------------------
|
250
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
251
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
252
|
+
--------------------------------
|
253
|
+
AuthoritiesTest: test_parse_role
|
254
|
+
--------------------------------
|
255
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
256
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
257
|
+
-------------------------------
|
258
|
+
BplEnrichTest: test_strip_value
|
259
|
+
-------------------------------
|
260
|
+
[1m[35m (0.2ms)[0m rollback transaction
|
261
|
+
[1m[36m (0.5ms)[0m [1mbegin transaction[0m
|
262
|
+
---------------------------------
|
263
|
+
DatesTest: test_date_standardizer
|
264
|
+
---------------------------------
|
265
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
266
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
267
|
+
--------------------------------
|
268
|
+
LCSHTest: test_lcsh_standardizer
|
269
|
+
--------------------------------
|
270
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
271
|
+
[1m[36m (0.8ms)[0m [1mbegin transaction[0m
|
272
|
+
------------------------------------
|
273
|
+
AuthoritiesTest: test_parse_language
|
274
|
+
------------------------------------
|
275
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
276
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
277
|
+
-----------------------------------------
|
278
|
+
AuthoritiesTest: test_parse_name_for_role
|
279
|
+
-----------------------------------------
|
280
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
281
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
282
|
+
--------------------------------
|
283
|
+
AuthoritiesTest: test_parse_role
|
284
|
+
--------------------------------
|
285
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
286
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
287
|
+
-------------------------------
|
288
|
+
BplEnrichTest: test_strip_value
|
289
|
+
-------------------------------
|
290
|
+
[1m[35m (0.1ms)[0m rollback transaction
|
291
|
+
[1m[36m (0.2ms)[0m [1mbegin transaction[0m
|
292
|
+
---------------------------------
|
293
|
+
DatesTest: test_date_standardizer
|
294
|
+
---------------------------------
|
295
|
+
[1m[35m (0.3ms)[0m rollback transaction
|
296
|
+
[1m[36m (0.1ms)[0m [1mbegin transaction[0m
|
297
|
+
--------------------------------
|
298
|
+
LCSHTest: test_lcsh_standardizer
|
179
299
|
--------------------------------
|
180
300
|
[1m[35m (0.1ms)[0m rollback transaction
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bpl_enrich
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boston Public Library
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rails
|