bpl_enrich 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/Rakefile +32 -0
  3. data/lib/bpl_enrich/authorities.rb +79 -0
  4. data/lib/bpl_enrich/constants.rb +5 -0
  5. data/lib/bpl_enrich/dates.rb +251 -0
  6. data/lib/bpl_enrich/lcsh.rb +46 -0
  7. data/lib/bpl_enrich/version.rb +3 -0
  8. data/lib/bpl_enrich.rb +29 -0
  9. data/lib/tasks/bpl_enrich_tasks.rake +4 -0
  10. data/test/authorities_test.rb +48 -0
  11. data/test/bpl_enrich_test.rb +9 -0
  12. data/test/dates_test.rb +12 -0
  13. data/test/dummy/README.rdoc +28 -0
  14. data/test/dummy/Rakefile +6 -0
  15. data/test/dummy/app/assets/javascripts/application.js +13 -0
  16. data/test/dummy/app/assets/stylesheets/application.css +13 -0
  17. data/test/dummy/app/controllers/application_controller.rb +5 -0
  18. data/test/dummy/app/helpers/application_helper.rb +2 -0
  19. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  20. data/test/dummy/bin/bundle +3 -0
  21. data/test/dummy/bin/rails +4 -0
  22. data/test/dummy/bin/rake +4 -0
  23. data/test/dummy/config/application.rb +23 -0
  24. data/test/dummy/config/boot.rb +5 -0
  25. data/test/dummy/config/database.yml +25 -0
  26. data/test/dummy/config/environment.rb +5 -0
  27. data/test/dummy/config/environments/development.rb +29 -0
  28. data/test/dummy/config/environments/production.rb +80 -0
  29. data/test/dummy/config/environments/test.rb +36 -0
  30. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  31. data/test/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  32. data/test/dummy/config/initializers/inflections.rb +16 -0
  33. data/test/dummy/config/initializers/mime_types.rb +5 -0
  34. data/test/dummy/config/initializers/secret_token.rb +12 -0
  35. data/test/dummy/config/initializers/session_store.rb +3 -0
  36. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  37. data/test/dummy/config/locales/en.yml +23 -0
  38. data/test/dummy/config/routes.rb +56 -0
  39. data/test/dummy/config.ru +4 -0
  40. data/test/dummy/db/test.sqlite3 +0 -0
  41. data/test/dummy/log/development.log +35 -0
  42. data/test/dummy/log/test.log +180 -0
  43. data/test/dummy/public/404.html +58 -0
  44. data/test/dummy/public/422.html +58 -0
  45. data/test/dummy/public/500.html +57 -0
  46. data/test/dummy/public/favicon.ico +0 -0
  47. data/test/lcsh_test.rb +10 -0
  48. data/test/test_helper.rb +15 -0
  49. metadata +214 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 41994a22b0ef539c9938525b17ad475e6eaafce7
4
+ data.tar.gz: 9c16b1cbb137f1c770d72190b3916a2ac6f703ae
5
+ SHA512:
6
+ metadata.gz: 06560416487c4a58d16c0ebed6716429b44aa73bd92c62f6d99093ce4d130a416adf8fa8f5eaeae0e91f2f371fed67535288109265c1a1c8ea44c3690b15d78c
7
+ data.tar.gz: 54f601dd8de34b18f9c018c09ea48fe1ac4de300e719c07aa34b6f595cbab50c337eecf24d19cde70c2cde2c5f786af492346581027fc30872a1cb3de31db311
data/Rakefile ADDED
@@ -0,0 +1,32 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'Bplgeo'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.rdoc')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+
18
+
19
+
20
+ Bundler::GemHelper.install_tasks
21
+
22
+ require 'rake/testtask'
23
+
24
+ Rake::TestTask.new(:test) do |t|
25
+ t.libs << 'lib'
26
+ t.libs << 'test'
27
+ t.pattern = 'test/**/*_test.rb'
28
+ t.verbose = false
29
+ end
30
+
31
+
32
+ task default: :test
@@ -0,0 +1,79 @@
1
+ module BplEnrich
2
+ class Authorities
3
+
4
+ def self.parse_language(language_value)
5
+ return_hash = {}
6
+ authority_check = Qa::Authorities::Loc.new
7
+ authority_result = authority_check.search(URI.escape(language_value), 'iso639-2')
8
+
9
+ if authority_result.present?
10
+ authority_result = authority_result.select{|hash| hash['label'].downcase == language_value.downcase || hash['id'].split('/').last.downcase == language_value.downcase }
11
+ if authority_result.present?
12
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
13
+ return_hash[:label] = authority_result.first["label"]
14
+ end
15
+ end
16
+
17
+ return return_hash
18
+ end
19
+
20
+ #TODO: Research why authority_result = authority_check.search(URI.escape('ctb'), 'relators') doesn't work.
21
+ def self.parse_role(role_value)
22
+ return_hash = {}
23
+ authority_check = Qa::Authorities::Loc.new
24
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
25
+ if authority_result.present?
26
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase }
27
+ if authority_result.present?
28
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
29
+ return_hash[:label] = authority_result.first["label"]
30
+ end
31
+ end
32
+
33
+ return return_hash
34
+ end
35
+
36
+ def self.parse_name_for_role(name)
37
+ return_hash = {:name=>name}
38
+
39
+ #Make sure we have at least three distinct parts of 2-letter+ words. Avoid something like: Steven C. Painter or Painter, Steven C.
40
+ #Possible Issue: Full name of Steven Carlos Painter ?
41
+ potential_role_check = name.to_ascii.match(/[\(\"\',]*\w\w+[\),\"\']* [\w\.,\d\-\"]*[\w\d][\w\d][\w\.,\d\-\"]* [\(\"\',]*\w\w+[\),\"\']*$/) || name.split(/[ ]+/).length >= 4
42
+
43
+ if potential_role_check.present?
44
+ authority_check = Qa::Authorities::Loc.new
45
+
46
+ #Check the last value of the name string...
47
+ role_value = name.to_ascii.match(/(?<=[\(\"\', ])\w+(?=[\),\"\']*$)/).to_s
48
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
49
+ if authority_result.present?
50
+
51
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
52
+ if authority_result.present?
53
+ #Remove the word and any other characters around it. $ means the end of the line.
54
+ #
55
+ return_hash[:name] = name.sub(/[\(\"\', ]*\w+[\),\"\']*$/, '').gsub(/^[ ]*:/, '').strip
56
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
57
+ return_hash[:label] = authority_result.first["label"]
58
+ end
59
+ end
60
+
61
+ #Check the last value of the name string...
62
+ role_value = name.to_ascii.match(/\w+(?=[\),\"\']*)/).to_s
63
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
64
+ if authority_result.present? && return_hash.blank?
65
+
66
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
67
+ if authority_result.present?
68
+ #Remove the word and any other characters around it. $ means the end of the line.
69
+ return_hash[:name] = name.sub(/[\(\"\', ]*\w+[ \),\"\']*/, '').gsub(/^[ ]*:/, '').strip
70
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
71
+ return_hash[:label] = authority_result.first["label"]
72
+ end
73
+ end
74
+ end
75
+
76
+ return return_hash
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,5 @@
1
+ module BplEnrich
2
+ class Constants
3
+
4
+ end
5
+ end
@@ -0,0 +1,251 @@
1
+ module BplEnrich
2
+ class Dates
3
+
4
+ # a function to convert date data from OAI feeds into MODS-usable date data
5
+ # assumes date values containing ";" have already been split
6
+ # returns hash with :single_date, :date_range, :date_qualifier, and/or :date_note values
7
+ def self.standardize(value)
8
+
9
+ date_data = {} # create the hash to hold all the data
10
+ source_date_string = value.strip # variable to hold original value
11
+
12
+ # weed out obvious bad dates before processing
13
+ if (value.match(/([Pp]re|[Pp]ost|[Bb]efore|[Aa]fter|[Uu]nknown|[Uu]ndated|n\.d\.)/)) ||
14
+ (value.match(/\d\d\d\d-\z/)) || # 1975-
15
+ (value.match(/\d\d-\d\d\/\d\d/)) || # 1975-09-09/10
16
+ (value.match(/\d*\(\d*\)/)) || # 1975(1976)
17
+ (value.scan(/\d\d\d\d/).length > 2) || # 1861/1869/1915
18
+ (value.scan(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).length > 1) ||
19
+ # or if data does not match any of these
20
+ (!value.match(/(\d\dth [Cc]entury|\d\d\d-\?*|\d\d\d\?|\d\d\?\?|\d\d\d\d)/))
21
+ date_data[:date_note] = source_date_string
22
+ else
23
+ # find date qualifier
24
+ if value.include? '?'
25
+ date_data[:date_qualifier] = 'questionable'
26
+ elsif value.match(/\A[Cc]/)
27
+ date_data[:date_qualifier] = 'approximate'
28
+ elsif (value.match(/[\[\]]+/)) || (value.match(/[(][A-Za-z, \d]*[\d]+[A-Za-z, \d]*[)]+/)) # if [] or ()
29
+ date_data[:date_qualifier] = 'inferred'
30
+ end
31
+
32
+ # remove unnecessary chars and words
33
+ value = value.gsub(/[\[\]\(\)\.,']/,'')
34
+ value = value.gsub(/(\b[Bb]etween\b|\bcirca\b|\bca\b|\Aca|\Ac)/,'').strip
35
+
36
+ # differentiate between ranges and single dates
37
+ if (value.scan(/\d\d\d\d/).length == 2) ||
38
+ (value.include? '0s') || # 1970s
39
+ (value.include? 'entury') || # 20th century
40
+ (value.match(/(\A\d\d\d\?|\A\d\d\?\?|\A\d\d\d-\?*|\d\d\d\d-\d\z|\d\d\d\d\/[\d]{1,2}\z)/)) ||
41
+ (value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/)) ||
42
+ ((value.match(/\d\d\d\d-\d\d\z/)) && (value[-2..-1].to_i > 12)) # 1975-76 but NOT 1910-11
43
+
44
+ # RANGES
45
+ date_data[:date_range] = {}
46
+
47
+ # deal with date strings with 2 4-digit year values separately
48
+ if value.scan(/\d\d\d\d/).length == 2
49
+
50
+ # convert weird span indicators ('or','and','||'), remove extraneous text
51
+ value = value.gsub(/(or|and|\|\|)/,'-').gsub(/[A-Za-z\?\s]/,'')
52
+
53
+ if value.match(/\A[12][\d]{3}-[01][\d]-[12][\d]{3}-[01][\d]\z/) # 1895-05-1898-01
54
+ date_data_range_start = value[0..6]
55
+ date_data_range_end = value[-7..-1]
56
+ elsif value.match(/\A[12][\d]{3}\/[12][\d]{3}\z/) # 1987/1988
57
+ date_data_range_start = value[0..3]
58
+ date_data_range_end = value[-4..-1]
59
+ else
60
+ range_dates = value.split('-') # split the dates into an array
61
+ range_dates.each_with_index do |range_date,index|
62
+ # format the data properly
63
+ if range_date.include? '/' # 11/05/1965
64
+ range_date_pieces = range_date.split('/')
65
+ range_date_piece_year = range_date_pieces.last
66
+ range_date_piece_month = range_date_pieces.first.length == 2 ? range_date_pieces.first : '0' + range_date_pieces.first
67
+ if range_date_pieces.length == 3
68
+ range_date_piece_day = range_date_pieces[1].length == 2 ? range_date_pieces[1] : '0' + range_date_pieces[1]
69
+ end
70
+ value_to_insert = range_date_piece_year + '-' + range_date_piece_month
71
+ value_to_insert << '-' + range_date_piece_day if range_date_piece_day
72
+ elsif range_date.match(/\A[12][\d]{3}\z/)
73
+ value_to_insert = range_date
74
+ end
75
+ # add the data to the proper variable
76
+ if value_to_insert
77
+ if index == 0
78
+ date_data_range_start = value_to_insert
79
+ else
80
+ date_data_range_end = value_to_insert
81
+ end
82
+ end
83
+ end
84
+ end
85
+ else
86
+ # if there are 'natural language' range values, find, assign to var, then remove
87
+ text_range = value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).to_s
88
+ if text_range.length > 0
89
+ date_data[:date_qualifier] ||= 'approximate' # TODO - remove this??
90
+ value = value.gsub(/#{text_range}/,'').strip
91
+ end
92
+
93
+ # deal with ranges for which 'natural language' range values are ignored
94
+ if value.match(/\A1\d\?\?\z/) # 19??
95
+ date_data_range_start = value[0..1] + '00'
96
+ date_data_range_end = value[0..1] + '99'
97
+ elsif value.match(/\A[12]\d\d-*\?*\z/) # 195? || 195-? || 195-
98
+ date_data_range_start = value[0..2] + '0'
99
+ date_data_range_end = value[0..2] + '9'
100
+ elsif value.match(/\A[12]\d\d\d[-\/][\d]{1,2}\z/) # 1956-57 || 1956/57 || 1956-7
101
+ if value.length == 7 && (value[5..6].to_i > value[2..3].to_i)
102
+ date_data_range_start = value[0..3]
103
+ date_data_range_end = value[0..1] + value[5..6]
104
+ elsif value.length == 6 && (value[5].to_i > value[3].to_i)
105
+ date_data_range_start = value[0..3]
106
+ date_data_range_end = value[0..2] + value[5]
107
+ end
108
+ date_data[:date_note] = source_date_string if text_range.length > 0
109
+ end
110
+ # deal with ranges where text range values are evaluated
111
+ value = value.gsub(/\?/,'').strip # remove question marks
112
+
113
+ # centuries
114
+ if value.match(/([12][\d]{1}th [Cc]entury|[12][\d]{1}00s)/) # 19th century || 1800s
115
+ if value.match(/[12][\d]{1}00s/)
116
+ century_prefix_date = value.match(/[12][\d]{1}/).to_s
117
+ else
118
+ century_prefix_date = (value.match(/[12][\d]{1}/).to_s.to_i-1).to_s
119
+ end
120
+ if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
121
+ if text_range.match(/[Ee]arly/)
122
+ century_suffix_dates = %w[00 39]
123
+ elsif text_range.match(/[Mm]id/)
124
+ century_suffix_dates = %w[30 69]
125
+ else
126
+ century_suffix_dates = %w[60 99]
127
+ end
128
+ end
129
+ date_data_range_start = century_suffix_dates ? century_prefix_date + century_suffix_dates[0] : century_prefix_date + '00'
130
+ date_data_range_end = century_suffix_dates ? century_prefix_date + century_suffix_dates[1] : century_prefix_date + '99'
131
+ else
132
+ # remove any remaining non-date text
133
+ value.match(/[12][1-9][1-9]0s/) ? is_decade = true : is_decade = false # but preserve decade-ness
134
+ remaining_text = value.match(/\D+/).to_s
135
+ value = value.gsub(/#{remaining_text}/,'').strip if remaining_text.length > 0
136
+
137
+ # decades
138
+ if is_decade
139
+ decade_prefix_date = value.match(/\A[12][1-9][1-9]/).to_s
140
+ if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
141
+ if text_range.match(/[Ee]arly/)
142
+ decade_suffix_dates = %w[0 3]
143
+ elsif text_range.match(/[Mm]id/)
144
+ decade_suffix_dates = %w[4 6]
145
+ else
146
+ decade_suffix_dates = %w[7 9]
147
+ end
148
+ end
149
+ date_data_range_start = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[0] : decade_prefix_date + '0'
150
+ date_data_range_end = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[1] : decade_prefix_date + '9'
151
+ else
152
+ # single year ranges
153
+ single_year_prefix = value.match(/[12][0-9]{3}/).to_s
154
+ if text_range.length > 0
155
+ if text_range.match(/[Ee]arly/)
156
+ single_year_suffixes = %w[01 04]
157
+ elsif text_range.match(/[Mm]id/)
158
+ single_year_suffixes = %w[05 08]
159
+ elsif text_range.match(/[Ll]ate/)
160
+ single_year_suffixes = %w[09 12]
161
+ elsif text_range.match(/[Ww]inter/)
162
+ single_year_suffixes = %w[01 03]
163
+ elsif text_range.match(/[Ss]pring/)
164
+ single_year_suffixes = %w[03 05]
165
+ elsif text_range.match(/[Ss]ummer/)
166
+ single_year_suffixes = %w[06 08]
167
+ else text_range.match(/[F]all/)
168
+ single_year_suffixes = %w[09 11]
169
+ end
170
+ date_data_range_start = single_year_prefix + '-' + single_year_suffixes[0]
171
+ date_data_range_end = single_year_prefix + '-' + single_year_suffixes[1]
172
+ end
173
+ end
174
+ # if possibly significant info removed, include as note
175
+ date_data[:date_note] = source_date_string if remaining_text.length > 1
176
+ end
177
+ end
178
+
179
+ # insert the values into the date_data hash
180
+ if date_data_range_start && date_data_range_end
181
+ date_data[:date_range][:start] = date_data_range_start
182
+ date_data[:date_range][:end] = date_data_range_end
183
+ else
184
+ date_data[:date_note] ||= source_date_string
185
+ date_data.delete :date_range
186
+ end
187
+
188
+ else
189
+ # SINGLE DATES
190
+ value = value.gsub(/\?/,'') # remove question marks
191
+ # fix bad spacing (e.g. December 13,1985 || December 3,1985)
192
+ value = value.insert(-5, ' ') if value.match(/[A-Za-z]* \d{6}/) || value.match(/[A-Za-z]* \d{5}/)
193
+
194
+ # try to automatically parse single dates with YYYY && MM && DD values
195
+ if Timeliness.parse(value).nil?
196
+ # start further processing
197
+ if value.match(/\A[12]\d\d\d-[01][0-9]\z/) # yyyy-mm
198
+ date_data[:single_date] = value
199
+ elsif value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
200
+ value = '0' + value if value.match(/\A[1-9][-\/][12]\d\d\d\z/) # m-yyyy || m/yyyy
201
+ date_data[:single_date] = value[3..6] + '-' + value[0..1]
202
+ elsif value.match(/\A[A-Za-z]{3,} [12]\d\d\d\z/) # April 1987 || Apr. 1987
203
+ value = value.split(' ')
204
+ if value[0].length == 3
205
+ value_month = '%02d' % Date::ABBR_MONTHNAMES.index(value[0])
206
+ else
207
+ value_month = '%02d' % Date::MONTHNAMES.index(value[0])
208
+ end
209
+ date_data[:single_date] = value_month ? value[1] + '-' + value_month : value[1]
210
+ elsif value.match(/\A[12]\d\d\d\z/) # 1999
211
+ date_data[:single_date] = value
212
+ else
213
+ date_data[:date_note] = source_date_string
214
+ end
215
+ else
216
+ date_data[:single_date] = Timeliness.parse(value).strftime("%Y-%m-%d")
217
+ end
218
+
219
+ end
220
+
221
+ end
222
+
223
+ # some final validation, just in case
224
+ date_validation_array = []
225
+ date_validation_array << date_data[:single_date] if date_data[:single_date]
226
+ date_validation_array << date_data[:date_range][:start] if date_data[:date_range]
227
+ date_validation_array << date_data[:date_range][:end] if date_data[:date_range]
228
+ date_validation_array.each do |date_to_val|
229
+ if date_to_val.length == '7'
230
+ bad_date = true unless date_to_val[-2..-1].to_i.between?(1,12) && !date_to_val.nil?
231
+ elsif
232
+ date_to_val.length == '10'
233
+ bad_date = true unless Timeliness.parse(value) && !date_to_val.nil?
234
+ end
235
+ if bad_date
236
+ date_data[:date_note] ||= source_date_string
237
+ date_data.delete :single_date if date_data[:single_date]
238
+ date_data.delete :date_range if date_data[:date_range]
239
+ end
240
+ end
241
+
242
+ # if the date slipped by all the processing somehow!
243
+ if date_data[:single_date].nil? && date_data[:date_range].nil? && date_data[:date_note].nil?
244
+ date_data[:date_note] = source_date_string
245
+ end
246
+
247
+ date_data
248
+
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,46 @@
1
+ module BplEnrich
2
+ class LCSH
3
+
4
+ #Take LCSH subjects and make them standard.
5
+ def self.standardize(value)
6
+
7
+ if value.blank?
8
+ return ''
9
+ end
10
+
11
+ #Remove stuff that is quoted (quotation for first and last words)..
12
+ value = value.gsub(/^['"]/, '').gsub(/['"]$/, '').strip
13
+
14
+ #Remove ending periods ... except when an initial or etc.
15
+ if value.last == '.' && value[-2].match(/[^A-Z]/) && !value[-4..-1].match('etc.')
16
+ value = value.slice(0..-2)
17
+ end
18
+
19
+ #Fix when '- -' occurs
20
+ value = value.gsub(/-\s-/,'--')
21
+
22
+ #Fix for "em" dashes - two types?
23
+ value = value.gsub('—','--')
24
+
25
+ #Fix for "em" dashes - two types?
26
+ value = value.gsub('–','--')
27
+
28
+ #Fix for ' - ' combinations
29
+ value = value.gsub(' - ','--')
30
+
31
+ #Remove white space after and before '--'
32
+ value = value.gsub(/\s+--/,'--')
33
+ value = value.gsub(/--\s+/,'--')
34
+
35
+ #Ensure first work is capitalized
36
+ value[0] = value.first.capitalize[0]
37
+
38
+ #Strip an white space
39
+ value = BplEnrich.strip_value(value)
40
+
41
+ return value
42
+ end
43
+
44
+
45
+ end
46
+ end
@@ -0,0 +1,3 @@
1
+ module BplEnrich
2
+ VERSION = "0.0.1"
3
+ end
data/lib/bpl_enrich.rb ADDED
@@ -0,0 +1,29 @@
1
+ module BplEnrich
2
+ require "bpl_enrich/lcsh"
3
+ require "bpl_enrich/dates"
4
+ require "bpl_enrich/constants"
5
+ require "bpl_enrich/authorities"
6
+ require "timeliness"
7
+ require "unidecoder"
8
+ require "htmlentities"
9
+ require "qa"
10
+
11
+ def self.strip_value(value)
12
+ if(value.blank?)
13
+ return nil
14
+ else
15
+ if value.class == Float || value.class == Fixnum
16
+ value = value.to_i.to_s
17
+ end
18
+
19
+ # Make sure it is all UTF-8 and not character encodings or HTML tags and remove any cariage returns
20
+ return utf8Encode(value)
21
+ end
22
+ end
23
+
24
+ #TODO: Better name for this. Should be part of an overall helped gem.
25
+ def self.utf8Encode(value)
26
+ return ::HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
27
+ end
28
+
29
+ end
@@ -0,0 +1,4 @@
1
+ # desc "Explaining what the task does"
2
+ # task :bpl_enrich do
3
+ # # Task goes here
4
+ # end
@@ -0,0 +1,48 @@
1
+ require 'test_helper'
2
+
3
+ class AuthoritiesTest < ActiveSupport::TestCase
4
+ def test_parse_language
5
+ result = BplEnrich::Authorities.parse_language('eng')
6
+ assert_equal 'English', result[:label]
7
+ assert_equal 'http://id.loc.gov/vocabulary/iso639-2/eng', result[:uri]
8
+
9
+ result = BplEnrich::Authorities.parse_language('English')
10
+ assert_equal 'English', result[:label]
11
+ assert_equal 'http://id.loc.gov/vocabulary/iso639-2/eng', result[:uri]
12
+ end
13
+
14
+ def test_parse_role
15
+
16
+ result = BplEnrich::Authorities.parse_role('Contributor')
17
+ assert_equal 'Contributor', result[:label]
18
+ assert_equal 'http://id.loc.gov/vocabulary/relators/ctb', result[:uri]
19
+
20
+ #FIXME: Using URI doesn't seem to work in this vocab?
21
+ #result = BplEnrich::Authorities.parse_role('ctb')
22
+ #assert_equal 'Contributor', result[:label]
23
+ #assert_equal 'http://id.loc.gov/vocabulary/relators/ctb', result[:uri]
24
+ end
25
+
26
+ def test_parse_name_for_role
27
+
28
+ result = BplEnrich::Authorities.parse_name_for_role('Steven Anderson (Contributor)')
29
+ assert_equal 'Steven Anderson', result[:name]
30
+ assert_equal 'Contributor', result[:label]
31
+ assert_equal 'http://id.loc.gov/vocabulary/relators/ctb', result[:uri]
32
+
33
+ result = BplEnrich::Authorities.parse_name_for_role('Steven Anderson (Painter)')
34
+ assert_equal 'Steven Anderson (Painter)', result[:name]
35
+ assert_equal nil, result[:label]
36
+ assert_equal nil, result[:uri]
37
+
38
+ #Special non-Ascii character check
39
+ result = BplEnrich::Authorities.parse_name_for_role('Sully, François (Photographer)')
40
+ assert_equal 'Sully, François', result[:name]
41
+ assert_equal 'Photographer', result[:label]
42
+ assert_equal 'http://id.loc.gov/vocabulary/relators/pht', result[:uri]
43
+
44
+
45
+ end
46
+
47
+
48
+ end
@@ -0,0 +1,9 @@
1
+ require 'test_helper'
2
+
3
+ class BplEnrichTest < ActiveSupport::TestCase
4
+
5
+ def test_strip_value
6
+ #TODO
7
+
8
+ end
9
+ end
@@ -0,0 +1,12 @@
1
+ require 'test_helper'
2
+
3
+ class DatesTest < ActiveSupport::TestCase
4
+ def test_date_standardizer
5
+ result = BplEnrich::Dates.standardize('April 1983')
6
+ assert_equal '1983-04', result[:single_date]
7
+ assert_equal nil, result[:date_range]
8
+ assert_equal nil, result[:date_note]
9
+ end
10
+
11
+
12
+ end
@@ -0,0 +1,28 @@
1
+ == README
2
+
3
+ This README would normally document whatever steps are necessary to get the
4
+ application up and running.
5
+
6
+ Things you may want to cover:
7
+
8
+ * Ruby version
9
+
10
+ * System dependencies
11
+
12
+ * Configuration
13
+
14
+ * Database creation
15
+
16
+ * Database initialization
17
+
18
+ * How to run the test suite
19
+
20
+ * Services (job queues, cache servers, search engines, etc.)
21
+
22
+ * Deployment instructions
23
+
24
+ * ...
25
+
26
+
27
+ Please feel free to use a different markup language if you do not plan to run
28
+ <tt>rake doc:app</tt>.
@@ -0,0 +1,6 @@
1
+ # Add your own tasks in files placed in lib/tasks ending in .rake,
2
+ # for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
3
+
4
+ require File.expand_path('../config/application', __FILE__)
5
+
6
+ Dummy::Application.load_tasks
@@ -0,0 +1,13 @@
1
+ // This is a manifest file that'll be compiled into application.js, which will include all the files
2
+ // listed below.
3
+ //
4
+ // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
+ // or vendor/assets/javascripts of plugins, if any, can be referenced here using a relative path.
6
+ //
7
+ // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
+ // compiled file.
9
+ //
10
+ // Read Sprockets README (https://github.com/sstephenson/sprockets#sprockets-directives) for details
11
+ // about supported directives.
12
+ //
13
+ //= require_tree .
@@ -0,0 +1,13 @@
1
+ /*
2
+ * This is a manifest file that'll be compiled into application.css, which will include all the files
3
+ * listed below.
4
+ *
5
+ * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
+ * or vendor/assets/stylesheets of plugins, if any, can be referenced here using a relative path.
7
+ *
8
+ * You're free to add application-wide styles to this file and they'll appear at the top of the
9
+ * compiled file, but it's generally better to create a new file per style scope.
10
+ *
11
+ *= require_self
12
+ *= require_tree .
13
+ */
@@ -0,0 +1,5 @@
1
+ class ApplicationController < ActionController::Base
2
+ # Prevent CSRF attacks by raising an exception.
3
+ # For APIs, you may want to use :null_session instead.
4
+ protect_from_forgery with: :exception
5
+ end
@@ -0,0 +1,2 @@
1
+ module ApplicationHelper
2
+ end
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Dummy</title>
5
+ <%= stylesheet_link_tag "application", media: "all", "data-turbolinks-track" => true %>
6
+ <%= javascript_include_tag "application", "data-turbolinks-track" => true %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
3
+ load Gem.bin_path('bundler', 'bundle')
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ APP_PATH = File.expand_path('../../config/application', __FILE__)
3
+ require_relative '../config/boot'
4
+ require 'rails/commands'
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../config/boot'
3
+ require 'rake'
4
+ Rake.application.run