bpl_enrich 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/Rakefile +32 -0
  3. data/lib/bpl_enrich/authorities.rb +79 -0
  4. data/lib/bpl_enrich/constants.rb +5 -0
  5. data/lib/bpl_enrich/dates.rb +251 -0
  6. data/lib/bpl_enrich/lcsh.rb +46 -0
  7. data/lib/bpl_enrich/version.rb +3 -0
  8. data/lib/bpl_enrich.rb +29 -0
  9. data/lib/tasks/bpl_enrich_tasks.rake +4 -0
  10. data/test/authorities_test.rb +48 -0
  11. data/test/bpl_enrich_test.rb +9 -0
  12. data/test/dates_test.rb +12 -0
  13. data/test/dummy/README.rdoc +28 -0
  14. data/test/dummy/Rakefile +6 -0
  15. data/test/dummy/app/assets/javascripts/application.js +13 -0
  16. data/test/dummy/app/assets/stylesheets/application.css +13 -0
  17. data/test/dummy/app/controllers/application_controller.rb +5 -0
  18. data/test/dummy/app/helpers/application_helper.rb +2 -0
  19. data/test/dummy/app/views/layouts/application.html.erb +14 -0
  20. data/test/dummy/bin/bundle +3 -0
  21. data/test/dummy/bin/rails +4 -0
  22. data/test/dummy/bin/rake +4 -0
  23. data/test/dummy/config/application.rb +23 -0
  24. data/test/dummy/config/boot.rb +5 -0
  25. data/test/dummy/config/database.yml +25 -0
  26. data/test/dummy/config/environment.rb +5 -0
  27. data/test/dummy/config/environments/development.rb +29 -0
  28. data/test/dummy/config/environments/production.rb +80 -0
  29. data/test/dummy/config/environments/test.rb +36 -0
  30. data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
  31. data/test/dummy/config/initializers/filter_parameter_logging.rb +4 -0
  32. data/test/dummy/config/initializers/inflections.rb +16 -0
  33. data/test/dummy/config/initializers/mime_types.rb +5 -0
  34. data/test/dummy/config/initializers/secret_token.rb +12 -0
  35. data/test/dummy/config/initializers/session_store.rb +3 -0
  36. data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
  37. data/test/dummy/config/locales/en.yml +23 -0
  38. data/test/dummy/config/routes.rb +56 -0
  39. data/test/dummy/config.ru +4 -0
  40. data/test/dummy/db/test.sqlite3 +0 -0
  41. data/test/dummy/log/development.log +35 -0
  42. data/test/dummy/log/test.log +180 -0
  43. data/test/dummy/public/404.html +58 -0
  44. data/test/dummy/public/422.html +58 -0
  45. data/test/dummy/public/500.html +57 -0
  46. data/test/dummy/public/favicon.ico +0 -0
  47. data/test/lcsh_test.rb +10 -0
  48. data/test/test_helper.rb +15 -0
  49. metadata +214 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 41994a22b0ef539c9938525b17ad475e6eaafce7
4
+ data.tar.gz: 9c16b1cbb137f1c770d72190b3916a2ac6f703ae
5
+ SHA512:
6
+ metadata.gz: 06560416487c4a58d16c0ebed6716429b44aa73bd92c62f6d99093ce4d130a416adf8fa8f5eaeae0e91f2f371fed67535288109265c1a1c8ea44c3690b15d78c
7
+ data.tar.gz: 54f601dd8de34b18f9c018c09ea48fe1ac4de300e719c07aa34b6f595cbab50c337eecf24d19cde70c2cde2c5f786af492346581027fc30872a1cb3de31db311
data/Rakefile ADDED
@@ -0,0 +1,32 @@
1
+ begin
2
+ require 'bundler/setup'
3
+ rescue LoadError
4
+ puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
5
+ end
6
+
7
+ require 'rdoc/task'
8
+
9
+ RDoc::Task.new(:rdoc) do |rdoc|
10
+ rdoc.rdoc_dir = 'rdoc'
11
+ rdoc.title = 'Bplgeo'
12
+ rdoc.options << '--line-numbers'
13
+ rdoc.rdoc_files.include('README.rdoc')
14
+ rdoc.rdoc_files.include('lib/**/*.rb')
15
+ end
16
+
17
+
18
+
19
+
20
+ Bundler::GemHelper.install_tasks
21
+
22
+ require 'rake/testtask'
23
+
24
+ Rake::TestTask.new(:test) do |t|
25
+ t.libs << 'lib'
26
+ t.libs << 'test'
27
+ t.pattern = 'test/**/*_test.rb'
28
+ t.verbose = false
29
+ end
30
+
31
+
32
+ task default: :test
@@ -0,0 +1,79 @@
1
+ module BplEnrich
2
+ class Authorities
3
+
4
+ def self.parse_language(language_value)
5
+ return_hash = {}
6
+ authority_check = Qa::Authorities::Loc.new
7
+ authority_result = authority_check.search(URI.escape(language_value), 'iso639-2')
8
+
9
+ if authority_result.present?
10
+ authority_result = authority_result.select{|hash| hash['label'].downcase == language_value.downcase || hash['id'].split('/').last.downcase == language_value.downcase }
11
+ if authority_result.present?
12
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
13
+ return_hash[:label] = authority_result.first["label"]
14
+ end
15
+ end
16
+
17
+ return return_hash
18
+ end
19
+
20
+ #TODO: Research why authority_result = authority_check.search(URI.escape('ctb'), 'relators') doesn't work.
21
+ def self.parse_role(role_value)
22
+ return_hash = {}
23
+ authority_check = Qa::Authorities::Loc.new
24
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
25
+ if authority_result.present?
26
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase }
27
+ if authority_result.present?
28
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
29
+ return_hash[:label] = authority_result.first["label"]
30
+ end
31
+ end
32
+
33
+ return return_hash
34
+ end
35
+
36
+ def self.parse_name_for_role(name)
37
+ return_hash = {:name=>name}
38
+
39
+ #Make sure we have at least three distinct parts of 2-letter+ words. Avoid something like: Steven C. Painter or Painter, Steven C.
40
+ #Possible Issue: Full name of Steven Carlos Painter ?
41
+ potential_role_check = name.to_ascii.match(/[\(\"\',]*\w\w+[\),\"\']* [\w\.,\d\-\"]*[\w\d][\w\d][\w\.,\d\-\"]* [\(\"\',]*\w\w+[\),\"\']*$/) || name.split(/[ ]+/).length >= 4
42
+
43
+ if potential_role_check.present?
44
+ authority_check = Qa::Authorities::Loc.new
45
+
46
+ #Check the last value of the name string...
47
+ role_value = name.to_ascii.match(/(?<=[\(\"\', ])\w+(?=[\),\"\']*$)/).to_s
48
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
49
+ if authority_result.present?
50
+
51
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
52
+ if authority_result.present?
53
+ #Remove the word and any other characters around it. $ means the end of the line.
54
+ #
55
+ return_hash[:name] = name.sub(/[\(\"\', ]*\w+[\),\"\']*$/, '').gsub(/^[ ]*:/, '').strip
56
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
57
+ return_hash[:label] = authority_result.first["label"]
58
+ end
59
+ end
60
+
61
+ #Check the last value of the name string...
62
+ role_value = name.to_ascii.match(/\w+(?=[\),\"\']*)/).to_s
63
+ authority_result = authority_check.search(URI.escape(role_value), 'relators')
64
+ if authority_result.present? && return_hash.blank?
65
+
66
+ authority_result = authority_result.select{|hash| hash['label'].downcase == role_value.downcase}
67
+ if authority_result.present?
68
+ #Remove the word and any other characters around it. $ means the end of the line.
69
+ return_hash[:name] = name.sub(/[\(\"\', ]*\w+[ \),\"\']*/, '').gsub(/^[ ]*:/, '').strip
70
+ return_hash[:uri] = authority_result.first["id"].gsub('info:lc', 'http://id.loc.gov')
71
+ return_hash[:label] = authority_result.first["label"]
72
+ end
73
+ end
74
+ end
75
+
76
+ return return_hash
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,5 @@
1
+ module BplEnrich
2
+ class Constants
3
+
4
+ end
5
+ end
@@ -0,0 +1,251 @@
1
+ module BplEnrich
2
+ class Dates
3
+
4
+ # a function to convert date data from OAI feeds into MODS-usable date data
5
+ # assumes date values containing ";" have already been split
6
+ # returns hash with :single_date, :date_range, :date_qualifier, and/or :date_note values
7
+ def self.standardize(value)
8
+
9
+ date_data = {} # create the hash to hold all the data
10
+ source_date_string = value.strip # variable to hold original value
11
+
12
+ # weed out obvious bad dates before processing
13
+ if (value.match(/([Pp]re|[Pp]ost|[Bb]efore|[Aa]fter|[Uu]nknown|[Uu]ndated|n\.d\.)/)) ||
14
+ (value.match(/\d\d\d\d-\z/)) || # 1975-
15
+ (value.match(/\d\d-\d\d\/\d\d/)) || # 1975-09-09/10
16
+ (value.match(/\d*\(\d*\)/)) || # 1975(1976)
17
+ (value.scan(/\d\d\d\d/).length > 2) || # 1861/1869/1915
18
+ (value.scan(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).length > 1) ||
19
+ # or if data does not match any of these
20
+ (!value.match(/(\d\dth [Cc]entury|\d\d\d-\?*|\d\d\d\?|\d\d\?\?|\d\d\d\d)/))
21
+ date_data[:date_note] = source_date_string
22
+ else
23
+ # find date qualifier
24
+ if value.include? '?'
25
+ date_data[:date_qualifier] = 'questionable'
26
+ elsif value.match(/\A[Cc]/)
27
+ date_data[:date_qualifier] = 'approximate'
28
+ elsif (value.match(/[\[\]]+/)) || (value.match(/[(][A-Za-z, \d]*[\d]+[A-Za-z, \d]*[)]+/)) # if [] or ()
29
+ date_data[:date_qualifier] = 'inferred'
30
+ end
31
+
32
+ # remove unnecessary chars and words
33
+ value = value.gsub(/[\[\]\(\)\.,']/,'')
34
+ value = value.gsub(/(\b[Bb]etween\b|\bcirca\b|\bca\b|\Aca|\Ac)/,'').strip
35
+
36
+ # differentiate between ranges and single dates
37
+ if (value.scan(/\d\d\d\d/).length == 2) ||
38
+ (value.include? '0s') || # 1970s
39
+ (value.include? 'entury') || # 20th century
40
+ (value.match(/(\A\d\d\d\?|\A\d\d\?\?|\A\d\d\d-\?*|\d\d\d\d-\d\z|\d\d\d\d\/[\d]{1,2}\z)/)) ||
41
+ (value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/)) ||
42
+ ((value.match(/\d\d\d\d-\d\d\z/)) && (value[-2..-1].to_i > 12)) # 1975-76 but NOT 1910-11
43
+
44
+ # RANGES
45
+ date_data[:date_range] = {}
46
+
47
+ # deal with date strings with 2 4-digit year values separately
48
+ if value.scan(/\d\d\d\d/).length == 2
49
+
50
+ # convert weird span indicators ('or','and','||'), remove extraneous text
51
+ value = value.gsub(/(or|and|\|\|)/,'-').gsub(/[A-Za-z\?\s]/,'')
52
+
53
+ if value.match(/\A[12][\d]{3}-[01][\d]-[12][\d]{3}-[01][\d]\z/) # 1895-05-1898-01
54
+ date_data_range_start = value[0..6]
55
+ date_data_range_end = value[-7..-1]
56
+ elsif value.match(/\A[12][\d]{3}\/[12][\d]{3}\z/) # 1987/1988
57
+ date_data_range_start = value[0..3]
58
+ date_data_range_end = value[-4..-1]
59
+ else
60
+ range_dates = value.split('-') # split the dates into an array
61
+ range_dates.each_with_index do |range_date,index|
62
+ # format the data properly
63
+ if range_date.include? '/' # 11/05/1965
64
+ range_date_pieces = range_date.split('/')
65
+ range_date_piece_year = range_date_pieces.last
66
+ range_date_piece_month = range_date_pieces.first.length == 2 ? range_date_pieces.first : '0' + range_date_pieces.first
67
+ if range_date_pieces.length == 3
68
+ range_date_piece_day = range_date_pieces[1].length == 2 ? range_date_pieces[1] : '0' + range_date_pieces[1]
69
+ end
70
+ value_to_insert = range_date_piece_year + '-' + range_date_piece_month
71
+ value_to_insert << '-' + range_date_piece_day if range_date_piece_day
72
+ elsif range_date.match(/\A[12][\d]{3}\z/)
73
+ value_to_insert = range_date
74
+ end
75
+ # add the data to the proper variable
76
+ if value_to_insert
77
+ if index == 0
78
+ date_data_range_start = value_to_insert
79
+ else
80
+ date_data_range_end = value_to_insert
81
+ end
82
+ end
83
+ end
84
+ end
85
+ else
86
+ # if there are 'natural language' range values, find, assign to var, then remove
87
+ text_range = value.match(/([Ee]arly|[Ll]ate|[Mm]id|[Ww]inter|[Ss]pring|[Ss]ummer|[Ff]all)/).to_s
88
+ if text_range.length > 0
89
+ date_data[:date_qualifier] ||= 'approximate' # TODO - remove this??
90
+ value = value.gsub(/#{text_range}/,'').strip
91
+ end
92
+
93
+ # deal with ranges for which 'natural language' range values are ignored
94
+ if value.match(/\A1\d\?\?\z/) # 19??
95
+ date_data_range_start = value[0..1] + '00'
96
+ date_data_range_end = value[0..1] + '99'
97
+ elsif value.match(/\A[12]\d\d-*\?*\z/) # 195? || 195-? || 195-
98
+ date_data_range_start = value[0..2] + '0'
99
+ date_data_range_end = value[0..2] + '9'
100
+ elsif value.match(/\A[12]\d\d\d[-\/][\d]{1,2}\z/) # 1956-57 || 1956/57 || 1956-7
101
+ if value.length == 7 && (value[5..6].to_i > value[2..3].to_i)
102
+ date_data_range_start = value[0..3]
103
+ date_data_range_end = value[0..1] + value[5..6]
104
+ elsif value.length == 6 && (value[5].to_i > value[3].to_i)
105
+ date_data_range_start = value[0..3]
106
+ date_data_range_end = value[0..2] + value[5]
107
+ end
108
+ date_data[:date_note] = source_date_string if text_range.length > 0
109
+ end
110
+ # deal with ranges where text range values are evaluated
111
+ value = value.gsub(/\?/,'').strip # remove question marks
112
+
113
+ # centuries
114
+ if value.match(/([12][\d]{1}th [Cc]entury|[12][\d]{1}00s)/) # 19th century || 1800s
115
+ if value.match(/[12][\d]{1}00s/)
116
+ century_prefix_date = value.match(/[12][\d]{1}/).to_s
117
+ else
118
+ century_prefix_date = (value.match(/[12][\d]{1}/).to_s.to_i-1).to_s
119
+ end
120
+ if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
121
+ if text_range.match(/[Ee]arly/)
122
+ century_suffix_dates = %w[00 39]
123
+ elsif text_range.match(/[Mm]id/)
124
+ century_suffix_dates = %w[30 69]
125
+ else
126
+ century_suffix_dates = %w[60 99]
127
+ end
128
+ end
129
+ date_data_range_start = century_suffix_dates ? century_prefix_date + century_suffix_dates[0] : century_prefix_date + '00'
130
+ date_data_range_end = century_suffix_dates ? century_prefix_date + century_suffix_dates[1] : century_prefix_date + '99'
131
+ else
132
+ # remove any remaining non-date text
133
+ value.match(/[12][1-9][1-9]0s/) ? is_decade = true : is_decade = false # but preserve decade-ness
134
+ remaining_text = value.match(/\D+/).to_s
135
+ value = value.gsub(/#{remaining_text}/,'').strip if remaining_text.length > 0
136
+
137
+ # decades
138
+ if is_decade
139
+ decade_prefix_date = value.match(/\A[12][1-9][1-9]/).to_s
140
+ if text_range.match(/([Ee]arly|[Ll]ate|[Mm]id)/)
141
+ if text_range.match(/[Ee]arly/)
142
+ decade_suffix_dates = %w[0 3]
143
+ elsif text_range.match(/[Mm]id/)
144
+ decade_suffix_dates = %w[4 6]
145
+ else
146
+ decade_suffix_dates = %w[7 9]
147
+ end
148
+ end
149
+ date_data_range_start = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[0] : decade_prefix_date + '0'
150
+ date_data_range_end = decade_suffix_dates ? decade_prefix_date + decade_suffix_dates[1] : decade_prefix_date + '9'
151
+ else
152
+ # single year ranges
153
+ single_year_prefix = value.match(/[12][0-9]{3}/).to_s
154
+ if text_range.length > 0
155
+ if text_range.match(/[Ee]arly/)
156
+ single_year_suffixes = %w[01 04]
157
+ elsif text_range.match(/[Mm]id/)
158
+ single_year_suffixes = %w[05 08]
159
+ elsif text_range.match(/[Ll]ate/)
160
+ single_year_suffixes = %w[09 12]
161
+ elsif text_range.match(/[Ww]inter/)
162
+ single_year_suffixes = %w[01 03]
163
+ elsif text_range.match(/[Ss]pring/)
164
+ single_year_suffixes = %w[03 05]
165
+ elsif text_range.match(/[Ss]ummer/)
166
+ single_year_suffixes = %w[06 08]
167
+ else text_range.match(/[F]all/)
168
+ single_year_suffixes = %w[09 11]
169
+ end
170
+ date_data_range_start = single_year_prefix + '-' + single_year_suffixes[0]
171
+ date_data_range_end = single_year_prefix + '-' + single_year_suffixes[1]
172
+ end
173
+ end
174
+ # if possibly significant info removed, include as note
175
+ date_data[:date_note] = source_date_string if remaining_text.length > 1
176
+ end
177
+ end
178
+
179
+ # insert the values into the date_data hash
180
+ if date_data_range_start && date_data_range_end
181
+ date_data[:date_range][:start] = date_data_range_start
182
+ date_data[:date_range][:end] = date_data_range_end
183
+ else
184
+ date_data[:date_note] ||= source_date_string
185
+ date_data.delete :date_range
186
+ end
187
+
188
+ else
189
+ # SINGLE DATES
190
+ value = value.gsub(/\?/,'') # remove question marks
191
+ # fix bad spacing (e.g. December 13,1985 || December 3,1985)
192
+ value = value.insert(-5, ' ') if value.match(/[A-Za-z]* \d{6}/) || value.match(/[A-Za-z]* \d{5}/)
193
+
194
+ # try to automatically parse single dates with YYYY && MM && DD values
195
+ if Timeliness.parse(value).nil?
196
+ # start further processing
197
+ if value.match(/\A[12]\d\d\d-[01][0-9]\z/) # yyyy-mm
198
+ date_data[:single_date] = value
199
+ elsif value.match(/\A[01]?[1-9][-\/][12]\d\d\d\z/) # mm-yyyy || m-yyyy || mm/yyyy
200
+ value = '0' + value if value.match(/\A[1-9][-\/][12]\d\d\d\z/) # m-yyyy || m/yyyy
201
+ date_data[:single_date] = value[3..6] + '-' + value[0..1]
202
+ elsif value.match(/\A[A-Za-z]{3,} [12]\d\d\d\z/) # April 1987 || Apr. 1987
203
+ value = value.split(' ')
204
+ if value[0].length == 3
205
+ value_month = '%02d' % Date::ABBR_MONTHNAMES.index(value[0])
206
+ else
207
+ value_month = '%02d' % Date::MONTHNAMES.index(value[0])
208
+ end
209
+ date_data[:single_date] = value_month ? value[1] + '-' + value_month : value[1]
210
+ elsif value.match(/\A[12]\d\d\d\z/) # 1999
211
+ date_data[:single_date] = value
212
+ else
213
+ date_data[:date_note] = source_date_string
214
+ end
215
+ else
216
+ date_data[:single_date] = Timeliness.parse(value).strftime("%Y-%m-%d")
217
+ end
218
+
219
+ end
220
+
221
+ end
222
+
223
+ # some final validation, just in case
224
+ date_validation_array = []
225
+ date_validation_array << date_data[:single_date] if date_data[:single_date]
226
+ date_validation_array << date_data[:date_range][:start] if date_data[:date_range]
227
+ date_validation_array << date_data[:date_range][:end] if date_data[:date_range]
228
+ date_validation_array.each do |date_to_val|
229
+ if date_to_val.length == '7'
230
+ bad_date = true unless date_to_val[-2..-1].to_i.between?(1,12) && !date_to_val.nil?
231
+ elsif
232
+ date_to_val.length == '10'
233
+ bad_date = true unless Timeliness.parse(value) && !date_to_val.nil?
234
+ end
235
+ if bad_date
236
+ date_data[:date_note] ||= source_date_string
237
+ date_data.delete :single_date if date_data[:single_date]
238
+ date_data.delete :date_range if date_data[:date_range]
239
+ end
240
+ end
241
+
242
+ # if the date slipped by all the processing somehow!
243
+ if date_data[:single_date].nil? && date_data[:date_range].nil? && date_data[:date_note].nil?
244
+ date_data[:date_note] = source_date_string
245
+ end
246
+
247
+ date_data
248
+
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,46 @@
1
+ module BplEnrich
2
+ class LCSH
3
+
4
+ #Take LCSH subjects and make them standard.
5
+ def self.standardize(value)
6
+
7
+ if value.blank?
8
+ return ''
9
+ end
10
+
11
+ #Remove stuff that is quoted (quotation for first and last words)..
12
+ value = value.gsub(/^['"]/, '').gsub(/['"]$/, '').strip
13
+
14
+ #Remove ending periods ... except when an initial or etc.
15
+ if value.last == '.' && value[-2].match(/[^A-Z]/) && !value[-4..-1].match('etc.')
16
+ value = value.slice(0..-2)
17
+ end
18
+
19
+ #Fix when '- -' occurs
20
+ value = value.gsub(/-\s-/,'--')
21
+
22
+ #Fix for "em" dashes - two types?
23
+ value = value.gsub('—','--')
24
+
25
+ #Fix for "em" dashes - two types?
26
+ value = value.gsub('–','--')
27
+
28
+ #Fix for ' - ' combinations
29
+ value = value.gsub(' - ','--')
30
+
31
+ #Remove white space after and before '--'
32
+ value = value.gsub(/\s+--/,'--')
33
+ value = value.gsub(/--\s+/,'--')
34
+
35
+ #Ensure first work is capitalized
36
+ value[0] = value.first.capitalize[0]
37
+
38
+ #Strip an white space
39
+ value = BplEnrich.strip_value(value)
40
+
41
+ return value
42
+ end
43
+
44
+
45
+ end
46
+ end
@@ -0,0 +1,3 @@
1
+ module BplEnrich
2
+ VERSION = "0.0.1"
3
+ end
data/lib/bpl_enrich.rb ADDED
@@ -0,0 +1,29 @@
1
+ module BplEnrich
2
+ require "bpl_enrich/lcsh"
3
+ require "bpl_enrich/dates"
4
+ require "bpl_enrich/constants"
5
+ require "bpl_enrich/authorities"
6
+ require "timeliness"
7
+ require "unidecoder"
8
+ require "htmlentities"
9
+ require "qa"
10
+
11
+ def self.strip_value(value)
12
+ if(value.blank?)
13
+ return nil
14
+ else
15
+ if value.class == Float || value.class == Fixnum
16
+ value = value.to_i.to_s
17
+ end
18
+
19
+ # Make sure it is all UTF-8 and not character encodings or HTML tags and remove any cariage returns
20
+ return utf8Encode(value)
21
+ end
22
+ end
23
+
24
+ #TODO: Better name for this. Should be part of an overall helped gem.
25
+ def self.utf8Encode(value)
26
+ return ::HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
27
+ end
28
+
29
+ end
@@ -0,0 +1,4 @@
1
+ # desc "Explaining what the task does"
2
+ # task :bpl_enrich do
3
+ # # Task goes here
4
+ # end
@@ -0,0 +1,48 @@
1
+ require 'test_helper'
2
+
3
+ class AuthoritiesTest < ActiveSupport::TestCase
4
+ def test_parse_language
5
+ result = BplEnrich::Authorities.parse_language('eng')
6
+ assert_equal 'English', result[:label]
7
+ assert_equal 'http://id.loc.gov/vocabulary/iso639-2/eng', result[:uri]
8
+
9
+ result = BplEnrich::Authorities.parse_language('English')
10
+ assert_equal 'English', result[:label]
11
+ assert_equal 'http://id.loc.gov/vocabulary/iso639-2/eng', result[:uri]
12
+ end
13
+
14
+ def test_parse_role
15
+
16
+ result = BplEnrich::Authorities.parse_role('Contributor')
17
+ assert_equal 'Contributor', result[:label]
18
+ assert_equal 'http://id.loc.gov/vocabulary/relators/ctb', result[:uri]
19
+
20
+ #FIXME: Using URI doesn't seem to work in this vocab?
21
+ #result = BplEnrich::Authorities.parse_role('ctb')
22
+ #assert_equal 'Contributor', result[:label]
23
+ #assert_equal 'http://id.loc.gov/vocabulary/relators/ctb', result[:uri]
24
+ end
25
+
26
+ def test_parse_name_for_role
27
+
28
+ result = BplEnrich::Authorities.parse_name_for_role('Steven Anderson (Contributor)')
29
+ assert_equal 'Steven Anderson', result[:name]
30
+ assert_equal 'Contributor', result[:label]
31
+ assert_equal 'http://id.loc.gov/vocabulary/relators/ctb', result[:uri]
32
+
33
+ result = BplEnrich::Authorities.parse_name_for_role('Steven Anderson (Painter)')
34
+ assert_equal 'Steven Anderson (Painter)', result[:name]
35
+ assert_equal nil, result[:label]
36
+ assert_equal nil, result[:uri]
37
+
38
+ #Special non-Ascii character check
39
+ result = BplEnrich::Authorities.parse_name_for_role('Sully, François (Photographer)')
40
+ assert_equal 'Sully, François', result[:name]
41
+ assert_equal 'Photographer', result[:label]
42
+ assert_equal 'http://id.loc.gov/vocabulary/relators/pht', result[:uri]
43
+
44
+
45
+ end
46
+
47
+
48
+ end
@@ -0,0 +1,9 @@
1
+ require 'test_helper'
2
+
3
+ class BplEnrichTest < ActiveSupport::TestCase
4
+
5
+ def test_strip_value
6
+ #TODO
7
+
8
+ end
9
+ end
@@ -0,0 +1,12 @@
1
+ require 'test_helper'
2
+
3
+ class DatesTest < ActiveSupport::TestCase
4
+ def test_date_standardizer
5
+ result = BplEnrich::Dates.standardize('April 1983')
6
+ assert_equal '1983-04', result[:single_date]
7
+ assert_equal nil, result[:date_range]
8
+ assert_equal nil, result[:date_note]
9
+ end
10
+
11
+
12
+ end
@@ -0,0 +1,28 @@
1
+ == README
2
+
3
+ This README would normally document whatever steps are necessary to get the
4
+ application up and running.
5
+
6
+ Things you may want to cover:
7
+
8
+ * Ruby version
9
+
10
+ * System dependencies
11
+
12
+ * Configuration
13
+
14
+ * Database creation
15
+
16
+ * Database initialization
17
+
18
+ * How to run the test suite
19
+
20
+ * Services (job queues, cache servers, search engines, etc.)
21
+
22
+ * Deployment instructions
23
+
24
+ * ...
25
+
26
+
27
+ Please feel free to use a different markup language if you do not plan to run
28
+ <tt>rake doc:app</tt>.
@@ -0,0 +1,6 @@
1
+ # Add your own tasks in files placed in lib/tasks ending in .rake,
2
+ # for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
3
+
4
+ require File.expand_path('../config/application', __FILE__)
5
+
6
+ Dummy::Application.load_tasks
@@ -0,0 +1,13 @@
1
+ // This is a manifest file that'll be compiled into application.js, which will include all the files
2
+ // listed below.
3
+ //
4
+ // Any JavaScript/Coffee file within this directory, lib/assets/javascripts, vendor/assets/javascripts,
5
+ // or vendor/assets/javascripts of plugins, if any, can be referenced here using a relative path.
6
+ //
7
+ // It's not advisable to add code directly here, but if you do, it'll appear at the bottom of the
8
+ // compiled file.
9
+ //
10
+ // Read Sprockets README (https://github.com/sstephenson/sprockets#sprockets-directives) for details
11
+ // about supported directives.
12
+ //
13
+ //= require_tree .
@@ -0,0 +1,13 @@
1
+ /*
2
+ * This is a manifest file that'll be compiled into application.css, which will include all the files
3
+ * listed below.
4
+ *
5
+ * Any CSS and SCSS file within this directory, lib/assets/stylesheets, vendor/assets/stylesheets,
6
+ * or vendor/assets/stylesheets of plugins, if any, can be referenced here using a relative path.
7
+ *
8
+ * You're free to add application-wide styles to this file and they'll appear at the top of the
9
+ * compiled file, but it's generally better to create a new file per style scope.
10
+ *
11
+ *= require_self
12
+ *= require_tree .
13
+ */
@@ -0,0 +1,5 @@
1
+ class ApplicationController < ActionController::Base
2
+ # Prevent CSRF attacks by raising an exception.
3
+ # For APIs, you may want to use :null_session instead.
4
+ protect_from_forgery with: :exception
5
+ end
@@ -0,0 +1,2 @@
1
+ module ApplicationHelper
2
+ end
@@ -0,0 +1,14 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Dummy</title>
5
+ <%= stylesheet_link_tag "application", media: "all", "data-turbolinks-track" => true %>
6
+ <%= javascript_include_tag "application", "data-turbolinks-track" => true %>
7
+ <%= csrf_meta_tags %>
8
+ </head>
9
+ <body>
10
+
11
+ <%= yield %>
12
+
13
+ </body>
14
+ </html>
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
3
+ load Gem.bin_path('bundler', 'bundle')
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ APP_PATH = File.expand_path('../../config/application', __FILE__)
3
+ require_relative '../config/boot'
4
+ require 'rails/commands'
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../config/boot'
3
+ require 'rake'
4
+ Rake.application.run