stanford-mods 2.4.1 → 2.6.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.rubocop.yml +2 -2
- data/.rubocop_todo.yml +172 -102
- data/.travis.yml +13 -12
- data/Gemfile +0 -10
- data/lib/stanford-mods/coordinate.rb +3 -0
- data/lib/stanford-mods/date_parsing.rb +12 -0
- data/lib/stanford-mods/imprint.rb +55 -8
- data/lib/stanford-mods/name.rb +2 -0
- data/lib/stanford-mods/origin_info.rb +14 -0
- data/lib/stanford-mods/searchworks.rb +50 -90
- data/lib/stanford-mods/searchworks_languages.rb +2 -0
- data/lib/stanford-mods/searchworks_subjects.rb +3 -2
- data/lib/stanford-mods/version.rb +1 -1
- data/spec/date_parsing_spec.rb +2 -0
- data/spec/fixtures/searchworks_imprint_data.rb +1 -1
- data/spec/fixtures/searchworks_pub_date_data.rb +1 -1
- data/spec/imprint_spec.rb +23 -0
- data/spec/name_spec.rb +3 -0
- data/spec/origin_info_spec.rb +3 -0
- data/spec/physical_location_spec.rb +6 -0
- data/spec/searchworks_format_spec.rb +22 -285
- data/spec/searchworks_pub_dates_spec.rb +1 -0
- data/spec/searchworks_spec.rb +1 -0
- data/spec/searchworks_subject_raw_spec.rb +6 -0
- data/spec/searchworks_subject_spec.rb +2 -12
- data/spec/searchworks_title_spec.rb +36 -0
- data/spec/spec_helper.rb +2 -2
- data/stanford-mods.gemspec +4 -1
- metadata +47 -12
data/.travis.yml
CHANGED
@@ -1,16 +1,17 @@
|
|
1
1
|
language: ruby
|
2
2
|
script: rake
|
3
3
|
rvm:
|
4
|
-
- 2.
|
5
|
-
-
|
6
|
-
- 2.2.0
|
7
|
-
# we used to use jruby for merged DOR + MARC records, but no more ...
|
8
|
-
- jruby-head
|
9
|
-
# we also test against ruby-head, which may be unstable.
|
10
|
-
- ruby-head
|
11
|
-
matrix:
|
12
|
-
allow_failures:
|
13
|
-
- rvm: jruby-head
|
14
|
-
- rvm: ruby-head
|
4
|
+
- 2.7.0
|
5
|
+
- jruby-9.2.11.0
|
15
6
|
notifications: false
|
16
|
-
|
7
|
+
env:
|
8
|
+
global:
|
9
|
+
- CC_TEST_REPORTER_ID=102848d93bffaa165e4a28cf9998693634256355cd5821349afdea9792dbedaa
|
10
|
+
|
11
|
+
before_script:
|
12
|
+
- curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
|
13
|
+
- chmod +x ./cc-test-reporter
|
14
|
+
- ./cc-test-reporter before-build
|
15
|
+
|
16
|
+
after_script:
|
17
|
+
- ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT
|
data/Gemfile
CHANGED
@@ -2,13 +2,3 @@ source 'https://rubygems.org'
|
|
2
2
|
|
3
3
|
# See stanford-mods.gemspec for this gem's dependencies
|
4
4
|
gemspec
|
5
|
-
|
6
|
-
group :test, :development do
|
7
|
-
gem 'rubocop', require: false
|
8
|
-
gem 'rubocop-rspec', require: false
|
9
|
-
gem 'pry-byebug', require: false, platform: [:ruby_20, :ruby_21]
|
10
|
-
gem 'coveralls', require: false
|
11
|
-
end
|
12
|
-
|
13
|
-
# Pin to activesupport 4.x for older versions of ruby
|
14
|
-
gem 'activesupport', '~> 4.2' if RUBY_VERSION < '2.2.2'
|
@@ -16,12 +16,14 @@ module Stanford
|
|
16
16
|
# @return [String] the coordinate in WKT/CQL ENVELOPE representation
|
17
17
|
def as_envelope
|
18
18
|
return unless valid?
|
19
|
+
|
19
20
|
"ENVELOPE(#{bounds[:min_x]}, #{bounds[:max_x]}, #{bounds[:max_y]}, #{bounds[:min_y]})"
|
20
21
|
end
|
21
22
|
|
22
23
|
# @return [String] the coordinate in Solr 4.x+ bbox-format representation
|
23
24
|
def as_bbox
|
24
25
|
return unless valid?
|
26
|
+
|
25
27
|
"#{bounds[:min_x]} #{bounds[:min_y]} #{bounds[:max_x]} #{bounds[:max_y]}"
|
26
28
|
end
|
27
29
|
|
@@ -44,6 +46,7 @@ module Stanford
|
|
44
46
|
@bounds ||= begin
|
45
47
|
matches = cleaner_coordinate(value).match %r{\A(?<lat>[EW].+-+.+)\s*/\s*(?<lng>[NS].+-+.+)\Z}
|
46
48
|
return {} unless matches
|
49
|
+
|
47
50
|
min_x, max_x = matches['lat'].split(/-+/).map { |x| coord_to_decimal(x) }.minmax
|
48
51
|
min_y, max_y = matches['lng'].split(/-+/).map { |y| coord_to_decimal(y) }.minmax
|
49
52
|
{ min_x: min_x, min_y: min_y, max_x: max_x, max_y: max_y }
|
@@ -32,6 +32,7 @@ module Stanford
|
|
32
32
|
# @return [Boolean] true if the year is between -999 and (current year + 1); false otherwise
|
33
33
|
def self.year_str_valid?(year_str)
|
34
34
|
return false unless year_str && (year_str.match(/^\d{1,4}$/) || year_str.match(/^-\d{1,3}$/))
|
35
|
+
|
35
36
|
(-1000 < year_str.to_i) && (year_str.to_i < Date.today.year + 2)
|
36
37
|
end
|
37
38
|
|
@@ -39,6 +40,7 @@ module Stanford
|
|
39
40
|
# @return [Boolean] true if the year is between -9999 and (current year + 1); false otherwise
|
40
41
|
def self.year_int_valid?(year)
|
41
42
|
return false unless year.is_a? Integer
|
43
|
+
|
42
44
|
(-1000 < year.to_i) && (year < Date.today.year + 2)
|
43
45
|
end
|
44
46
|
|
@@ -59,6 +61,7 @@ module Stanford
|
|
59
61
|
return display_str_for_bc if orig_date_str.match(BC_REGEX)
|
60
62
|
# decade next in case there are 4 digits, e.g. 1950s
|
61
63
|
return display_str_for_decade if orig_date_str.match(DECADE_4CHAR_REGEXP) || orig_date_str.match(DECADE_S_REGEXP)
|
64
|
+
|
62
65
|
result = sortable_year_for_yyyy_or_yy
|
63
66
|
unless result
|
64
67
|
# try removing brackets between digits in case we have 169[5] or [18]91
|
@@ -81,6 +84,7 @@ module Stanford
|
|
81
84
|
return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
|
82
85
|
# B.C. first in case there are 4 digits, e.g. 1600 B.C.
|
83
86
|
return sortable_year_int_for_bc if orig_date_str.match(BC_REGEX)
|
87
|
+
|
84
88
|
result = sortable_year_for_yyyy_or_yy
|
85
89
|
result ||= sortable_year_for_decade # 19xx or 20xx
|
86
90
|
result ||= sortable_year_for_century
|
@@ -102,6 +106,7 @@ module Stanford
|
|
102
106
|
return if orig_date_str == '0000-00-00' # shpc collection has these useless dates
|
103
107
|
# B.C. first in case there are 4 digits, e.g. 1600 B.C.
|
104
108
|
return sortable_year_str_for_bc if orig_date_str.match(BC_REGEX)
|
109
|
+
|
105
110
|
result = sortable_year_for_yyyy_or_yy
|
106
111
|
result ||= sortable_year_for_decade # 19xx or 20xx
|
107
112
|
result ||= sortable_year_for_century
|
@@ -144,6 +149,7 @@ module Stanford
|
|
144
149
|
# @return [String, nil] 4 digit year (e.g. 1865, 0950) if orig_date_str matches pattern, nil otherwise
|
145
150
|
def sortable_year_for_yy
|
146
151
|
return unless orig_date_str
|
152
|
+
|
147
153
|
slash_matches = orig_date_str.match(/\d{1,2}\/\d{1,2}\/\d{2}/)
|
148
154
|
if slash_matches
|
149
155
|
date_obj = Date.strptime(orig_date_str, '%m/%d/%y')
|
@@ -196,6 +202,7 @@ module Stanford
|
|
196
202
|
def sortable_year_for_century
|
197
203
|
return unless orig_date_str
|
198
204
|
return if orig_date_str =~ /B\.C\./
|
205
|
+
|
199
206
|
century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP)
|
200
207
|
if century_matches
|
201
208
|
return $1 + '00' if $1.length == 2
|
@@ -215,6 +222,7 @@ module Stanford
|
|
215
222
|
def display_str_for_century
|
216
223
|
return unless orig_date_str
|
217
224
|
return if orig_date_str =~ /B\.C\./
|
225
|
+
|
218
226
|
century_str_matches = orig_date_str.match(CENTURY_WORD_REGEXP)
|
219
227
|
return century_str_matches.to_s if century_str_matches
|
220
228
|
|
@@ -262,6 +270,7 @@ module Stanford
|
|
262
270
|
# @return [String, nil] String sortable -ddd if orig_date_str matches pattern; nil otherwise
|
263
271
|
def sortable_year_str_for_early_numeric
|
264
272
|
return unless orig_date_str.match(EARLY_NUMERIC)
|
273
|
+
|
265
274
|
if orig_date_str =~ /^\-/
|
266
275
|
# negative number becomes x - 1000 for sorting; -005 for -995
|
267
276
|
num = orig_date_str[1..-1].to_i - 1000
|
@@ -275,6 +284,7 @@ module Stanford
|
|
275
284
|
# @return [Integer, nil] Integer sortable -ddd if orig_date_str matches pattern; nil otherwise
|
276
285
|
def sortable_year_int_for_early_numeric
|
277
286
|
return orig_date_str.to_i if orig_date_str.match(EARLY_NUMERIC)
|
287
|
+
|
278
288
|
orig_date_str.to_i if orig_date_str =~ /^-\d{4}$/
|
279
289
|
end
|
280
290
|
|
@@ -290,6 +300,7 @@ module Stanford
|
|
290
300
|
return '1 B.C.' if orig_date_str == '0'
|
291
301
|
# negative number becomes B.C.
|
292
302
|
return "#{orig_date_str[1..-1].to_i + 1} B.C." if orig_date_str =~ /^\-/
|
303
|
+
|
293
304
|
# remove leading 0s from early dates
|
294
305
|
"#{orig_date_str.to_i} A.D."
|
295
306
|
end
|
@@ -304,6 +315,7 @@ module Stanford
|
|
304
315
|
# need more in string than only 2 digits
|
305
316
|
return if orig_date_str.match(/^\d\d$/) || orig_date_str.match(/^\D*\d\d\D*$/)
|
306
317
|
return if orig_date_str =~ /\d\s*B.C./ # skip B.C. dates
|
318
|
+
|
307
319
|
date_obj = Date.parse(orig_date_str)
|
308
320
|
date_obj.year.to_s
|
309
321
|
rescue ArgumentError
|
@@ -37,12 +37,53 @@ module Stanford
|
|
37
37
|
imprint_statements.join('; ') if imprint_statements.present?
|
38
38
|
end
|
39
39
|
|
40
|
+
# @return Array<Integer> an array of publication years for the resource
|
41
|
+
def publication_date_for_slider
|
42
|
+
@originInfo_ng_nodeset.map do |origin_info_node|
|
43
|
+
date_elements = if origin_info_node.as_object.first.key_dates.any?
|
44
|
+
origin_info_node.as_object.first.key_dates.map(&:as_object).map(&:first)
|
45
|
+
else
|
46
|
+
date_field_keys.map do |date_field|
|
47
|
+
next unless origin_info_node.respond_to?(date_field)
|
48
|
+
|
49
|
+
date_elements = origin_info_node.send(date_field)
|
50
|
+
date_elements.map(&:as_object).map(&:first) if date_elements.any?
|
51
|
+
end.compact.first
|
52
|
+
end
|
53
|
+
|
54
|
+
if date_elements.nil? || date_elements.none?
|
55
|
+
[]
|
56
|
+
elsif date_elements.find(&:start?) &&
|
57
|
+
date_elements.find(&:start?).as_range &&
|
58
|
+
date_elements.find(&:end?) &&
|
59
|
+
date_elements.find(&:end?).as_range
|
60
|
+
start_date = date_elements.find(&:start?)
|
61
|
+
end_date = date_elements.find(&:end?)
|
62
|
+
|
63
|
+
(start_date.as_range.min.year..end_date.as_range.max.year).to_a
|
64
|
+
elsif date_elements.find(&:start?) && date_elements.find(&:start?).as_range
|
65
|
+
start_date = date_elements.find(&:start?)
|
66
|
+
|
67
|
+
(start_date.as_range.min.year..Time.now.year).to_a
|
68
|
+
elsif date_elements.one?
|
69
|
+
date_elements.first.to_a.map(&:year)
|
70
|
+
else
|
71
|
+
date_elements.map { |v| v.to_a.map(&:year) }.flatten
|
72
|
+
end
|
73
|
+
end.flatten
|
74
|
+
end
|
75
|
+
|
40
76
|
private
|
41
77
|
|
78
|
+
def extract_year(el)
|
79
|
+
DateParsing.year_int_from_date_str(el.text)
|
80
|
+
end
|
81
|
+
|
42
82
|
def compact_and_join_with_delimiter(values, delimiter)
|
43
83
|
compact_values = values.compact.reject { |v| v.strip.empty? }
|
44
84
|
return compact_values.join(delimiter) if compact_values.length == 1 ||
|
45
85
|
!ends_in_terminating_punctuation?(delimiter)
|
86
|
+
|
46
87
|
compact_values.each_with_index.map do |value, i|
|
47
88
|
if (compact_values.length - 1) == i || # last item?
|
48
89
|
ends_in_terminating_punctuation?(value)
|
@@ -65,6 +106,7 @@ module Stanford
|
|
65
106
|
|
66
107
|
def publisher_vals_str(origin_info_node)
|
67
108
|
return if origin_info_node.publisher.text.strip.empty?
|
109
|
+
|
68
110
|
publishers = origin_info_node.publisher.reject do |p|
|
69
111
|
p.text.strip.empty?
|
70
112
|
end.map(&:text)
|
@@ -75,6 +117,7 @@ module Stanford
|
|
75
117
|
|
76
118
|
def place_vals_str(origin_info_node)
|
77
119
|
return if origin_info_node.place.text.strip.empty?
|
120
|
+
|
78
121
|
places = place_terms(origin_info_node).reject do |p|
|
79
122
|
p.text.strip.empty?
|
80
123
|
end.map(&:text)
|
@@ -91,6 +134,7 @@ module Stanford
|
|
91
134
|
def place_terms(origin_info_element)
|
92
135
|
return [] unless origin_info_element.respond_to?(:place) &&
|
93
136
|
origin_info_element.place.respond_to?(:placeTerm)
|
137
|
+
|
94
138
|
if unencoded_place_terms?(origin_info_element)
|
95
139
|
origin_info_element.place.placeTerm.select do |term|
|
96
140
|
!term.attributes['type'].respond_to?(:value) ||
|
@@ -103,6 +147,7 @@ module Stanford
|
|
103
147
|
term.attributes['authority'].respond_to?(:value) &&
|
104
148
|
term.attributes['authority'].value == 'marccountry' &&
|
105
149
|
MARC_COUNTRIES.include?(term.text.strip)
|
150
|
+
|
106
151
|
term = term.clone
|
107
152
|
term.content = MARC_COUNTRIES[term.text.strip]
|
108
153
|
term
|
@@ -115,12 +160,14 @@ module Stanford
|
|
115
160
|
def date_str(origin_info_node)
|
116
161
|
date_vals = origin_info_date_vals(origin_info_node)
|
117
162
|
return if date_vals.empty?
|
163
|
+
|
118
164
|
date_vals.map(&:strip).join(' ')
|
119
165
|
end
|
120
166
|
|
121
167
|
def origin_info_date_vals(origin_info_node)
|
122
168
|
date_field_keys.map do |date_field|
|
123
169
|
next unless origin_info_node.respond_to?(date_field)
|
170
|
+
|
124
171
|
date_elements = origin_info_node.send(date_field)
|
125
172
|
date_elements_display_vals(date_elements) if date_elements.present?
|
126
173
|
end.compact.flatten
|
@@ -299,8 +346,8 @@ module Stanford
|
|
299
346
|
else
|
300
347
|
ng_date_element.content
|
301
348
|
end
|
302
|
-
|
303
|
-
|
349
|
+
rescue
|
350
|
+
ng_date_element.content
|
304
351
|
end
|
305
352
|
ng_date_element
|
306
353
|
end
|
@@ -313,8 +360,8 @@ module Stanford
|
|
313
360
|
else
|
314
361
|
ng_date_element.content
|
315
362
|
end
|
316
|
-
|
317
|
-
|
363
|
+
rescue
|
364
|
+
ng_date_element.content
|
318
365
|
end
|
319
366
|
ng_date_element
|
320
367
|
end
|
@@ -337,8 +384,8 @@ module Stanford
|
|
337
384
|
else
|
338
385
|
my_ng_date_element.content
|
339
386
|
end
|
340
|
-
|
341
|
-
|
387
|
+
rescue
|
388
|
+
my_ng_date_element.content
|
342
389
|
end
|
343
390
|
my_ng_date_element
|
344
391
|
end
|
@@ -359,8 +406,8 @@ module Stanford
|
|
359
406
|
else
|
360
407
|
my_ng_date_element.content
|
361
408
|
end
|
362
|
-
|
363
|
-
|
409
|
+
rescue
|
410
|
+
my_ng_date_element.content
|
364
411
|
end
|
365
412
|
my_ng_date_element
|
366
413
|
end
|
data/lib/stanford-mods/name.rb
CHANGED
@@ -45,6 +45,7 @@ module Stanford
|
|
45
45
|
result = []
|
46
46
|
mods_ng_xml.personal_name.map do |n|
|
47
47
|
next if n.role.size.zero?
|
48
|
+
|
48
49
|
n.role.each { |r|
|
49
50
|
result << n.display_value_w_date unless includes_marc_relator_collector_role?(r)
|
50
51
|
}
|
@@ -58,6 +59,7 @@ module Stanford
|
|
58
59
|
result = []
|
59
60
|
mods_ng_xml.personal_name.each do |n|
|
60
61
|
next if n.role.size.zero?
|
62
|
+
|
61
63
|
n.role.each { |r|
|
62
64
|
result << n.display_value_w_date if includes_marc_relator_collector_role?(r)
|
63
65
|
}
|
@@ -82,6 +82,7 @@ module Stanford
|
|
82
82
|
def year_display_str(date_el_array)
|
83
83
|
result = date_parsing_result(date_el_array, :date_str_for_display)
|
84
84
|
return result if result
|
85
|
+
|
85
86
|
_ignore, orig_str_to_parse = self.class.earliest_year_str(date_el_array)
|
86
87
|
DateParsing.date_str_for_display(orig_str_to_parse) if orig_str_to_parse
|
87
88
|
end
|
@@ -93,6 +94,7 @@ module Stanford
|
|
93
94
|
def year_int(date_el_array)
|
94
95
|
result = date_parsing_result(date_el_array, :year_int_from_date_str)
|
95
96
|
return result if result
|
97
|
+
|
96
98
|
year_int, _ignore = self.class.earliest_year_int(date_el_array)
|
97
99
|
year_int if year_int
|
98
100
|
end
|
@@ -104,6 +106,7 @@ module Stanford
|
|
104
106
|
def year_sort_str(date_el_array)
|
105
107
|
result = date_parsing_result(date_el_array, :sortable_year_string_from_date_str)
|
106
108
|
return result if result
|
109
|
+
|
107
110
|
sortable_str, _ignore = self.class.earliest_year_str(date_el_array)
|
108
111
|
sortable_str if sortable_str
|
109
112
|
end
|
@@ -115,6 +118,7 @@ module Stanford
|
|
115
118
|
def date_created_elements(ignore_approximate = false)
|
116
119
|
date_created_nodeset = mods_ng_xml.origin_info.dateCreated
|
117
120
|
return self.class.remove_approximate(date_created_nodeset) if ignore_approximate
|
121
|
+
|
118
122
|
date_created_nodeset.to_a
|
119
123
|
end
|
120
124
|
|
@@ -125,6 +129,7 @@ module Stanford
|
|
125
129
|
def date_issued_elements(ignore_approximate = false)
|
126
130
|
date_issued_nodeset = mods_ng_xml.origin_info.dateIssued
|
127
131
|
return self.class.remove_approximate(date_issued_nodeset) if ignore_approximate
|
132
|
+
|
128
133
|
date_issued_nodeset.to_a
|
129
134
|
end
|
130
135
|
|
@@ -194,6 +199,7 @@ module Stanford
|
|
194
199
|
# @return [Integer, String] year as a String or Integer, depending on method_sym
|
195
200
|
def date_parsing_result(date_el_array, method_sym)
|
196
201
|
return if date_el_array.empty?
|
202
|
+
|
197
203
|
# prefer keyDate
|
198
204
|
key_date_el = self.class.keyDate(date_el_array)
|
199
205
|
DateParsing.send(method_sym, key_date_el.content) if key_date_el
|
@@ -239,6 +245,7 @@ module Stanford
|
|
239
245
|
return nil unless pub_date
|
240
246
|
return "#{pub_date.to_i + 1000} B.C." if pub_date.start_with?('-')
|
241
247
|
return pub_date unless pub_date.include? '--'
|
248
|
+
|
242
249
|
"#{pub_date[0, 2].to_i + 1}th century"
|
243
250
|
end
|
244
251
|
|
@@ -251,6 +258,7 @@ module Stanford
|
|
251
258
|
pd = pd.gsub('--', '00')
|
252
259
|
end
|
253
260
|
fail "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd && pd.length != 4
|
261
|
+
|
254
262
|
pd
|
255
263
|
end
|
256
264
|
|
@@ -261,6 +269,7 @@ module Stanford
|
|
261
269
|
def pub_date_display
|
262
270
|
return dates_no_marc_encoding.first unless dates_no_marc_encoding.empty?
|
263
271
|
return dates_marc_encoding.first unless dates_marc_encoding.empty?
|
272
|
+
|
264
273
|
nil
|
265
274
|
end
|
266
275
|
|
@@ -274,6 +283,7 @@ module Stanford
|
|
274
283
|
# use the cached year if there is one
|
275
284
|
if @pub_year
|
276
285
|
return nil if @pub_year == ''
|
286
|
+
|
277
287
|
return @pub_year
|
278
288
|
end
|
279
289
|
|
@@ -307,6 +317,7 @@ module Stanford
|
|
307
317
|
def pub_dates
|
308
318
|
return dates_marc_encoding unless dates_marc_encoding.empty?
|
309
319
|
return dates_no_marc_encoding unless dates_no_marc_encoding.empty?
|
320
|
+
|
310
321
|
nil
|
311
322
|
end
|
312
323
|
|
@@ -410,6 +421,7 @@ module Stanford
|
|
410
421
|
dates.each do |f_date|
|
411
422
|
matches = f_date.scan(/\d{1}th/)
|
412
423
|
next if matches.empty?
|
424
|
+
|
413
425
|
if matches.length == 1
|
414
426
|
@pub_year = (matches.first[0, 2].to_i - 1).to_s + '--'
|
415
427
|
return @pub_year
|
@@ -436,6 +448,7 @@ module Stanford
|
|
436
448
|
dates.each do |f_date|
|
437
449
|
matches = f_date.scan(/\d{2}th/)
|
438
450
|
next if matches.empty?
|
451
|
+
|
439
452
|
if matches.length == 1
|
440
453
|
@pub_year = (matches.first[0, 2].to_i - 1).to_s + '--'
|
441
454
|
return @pub_year
|
@@ -464,6 +477,7 @@ module Stanford
|
|
464
477
|
# Single digit u notation
|
465
478
|
matches = f_date.scan(/\d{3}u/)
|
466
479
|
return matches.first.tr('u', '0') if matches.length == 1
|
480
|
+
|
467
481
|
# Double digit u notation
|
468
482
|
matches = f_date.scan(/\d{2}u{2}/)
|
469
483
|
return matches.first.tr('u', '-') if matches.length == 1
|
@@ -139,26 +139,27 @@ module Stanford
|
|
139
139
|
present_title_info_nodes ? present_title_info_nodes.first : nil
|
140
140
|
end
|
141
141
|
|
142
|
-
# @return [String] the nonSort text portion of the titleInfo node as a string (if non-empty, else nil)
|
142
|
+
# @return [String] the nonSort text portion of the titleInfo node as a string (if non-empty, else nil)
|
143
143
|
def nonSort_title
|
144
144
|
return unless first_title_info_node && first_title_info_node.nonSort
|
145
145
|
|
146
146
|
first_title_info_node.nonSort.text.strip.empty? ? nil : first_title_info_node.nonSort.text.strip
|
147
147
|
end
|
148
|
-
|
148
|
+
|
149
149
|
# @return [String] the text of the titleInfo node as a string (if non-empty, else nil)
|
150
150
|
def title
|
151
151
|
return unless first_title_info_node && first_title_info_node.title
|
152
152
|
|
153
153
|
first_title_info_node.title.text.strip.empty? ? nil : first_title_info_node.title.text.strip
|
154
154
|
end
|
155
|
-
|
155
|
+
|
156
|
+
# Searchworks requires that the MODS has a '//titleInfo/title'
|
156
157
|
# @return [String] value for title_245_search, title_full_display
|
157
158
|
def sw_full_title
|
158
|
-
|
159
|
-
|
159
|
+
return nil if !first_title_info_node || !title
|
160
|
+
|
160
161
|
preSubTitle = nonSort_title ? [nonSort_title, title].compact.join(" ") : title
|
161
|
-
preSubTitle.sub!(/:$/, '')
|
162
|
+
preSubTitle.sub!(/:$/, '')
|
162
163
|
|
163
164
|
subTitle = first_title_info_node.subTitle.text.strip
|
164
165
|
preParts = subTitle.empty? ? preSubTitle : preSubTitle + " : " + subTitle
|
@@ -178,6 +179,7 @@ module Stanford
|
|
178
179
|
|
179
180
|
result = parts ? preParts + ". " + parts : preParts
|
180
181
|
return nil unless result
|
182
|
+
|
181
183
|
result += "." unless result =~ /[[:punct:]]$/
|
182
184
|
result.strip!
|
183
185
|
result = nil if result.empty?
|
@@ -191,13 +193,20 @@ module Stanford
|
|
191
193
|
def sw_title_display
|
192
194
|
result = sw_full_title
|
193
195
|
return nil unless result
|
196
|
+
|
194
197
|
result.sub(/[\.,;:\/\\]+$/, '').strip
|
195
198
|
end
|
196
199
|
|
197
200
|
# this includes all titles except
|
198
201
|
# @return [Array<String>] values for title_variant_search
|
199
202
|
def sw_addl_titles
|
200
|
-
|
203
|
+
excluded_title = sw_short_title || sw_title_display
|
204
|
+
if excluded_title.present?
|
205
|
+
title_regex = Regexp.new(Regexp.escape(excluded_title))
|
206
|
+
full_titles.reject { |s| s =~ title_regex }.reject(&:blank?)
|
207
|
+
else
|
208
|
+
full_titles.reject(&:blank?)
|
209
|
+
end
|
201
210
|
end
|
202
211
|
|
203
212
|
# Returns a sortable version of the main title
|
@@ -227,68 +236,15 @@ module Stanford
|
|
227
236
|
# see origin_info.rb (as all this information comes from top level originInfo element)
|
228
237
|
# ---- end PUBLICATION (place, year) ----
|
229
238
|
|
230
|
-
# select one or more format values from the controlled vocabulary here:
|
231
|
-
# http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format&rows=0&facet.sort=index
|
232
|
-
# @return <Array[String]> value in the SearchWorks controlled vocabulary
|
233
|
-
# @deprecated - kept for backwards compatibility but not part of SW UI redesign work Summer 2014
|
234
|
-
# @deprecated: this is no longer used in SW, Revs or Spotlight Jan 2016
|
235
|
-
def format
|
236
|
-
types = term_values(:typeOfResource)
|
237
|
-
return [] unless types
|
238
|
-
genres = term_values(:genre)
|
239
|
-
issuance = term_values([:origin_info, :issuance])
|
240
|
-
val = []
|
241
|
-
types.each do |type|
|
242
|
-
case type
|
243
|
-
when 'cartographic'
|
244
|
-
val << 'Map/Globe'
|
245
|
-
when 'mixed material'
|
246
|
-
val << 'Manuscript/Archive'
|
247
|
-
when 'moving image'
|
248
|
-
val << 'Video'
|
249
|
-
when 'notated music'
|
250
|
-
val << 'Music - Score'
|
251
|
-
when 'software, multimedia'
|
252
|
-
val << 'Computer File'
|
253
|
-
when 'sound recording-musical'
|
254
|
-
val << 'Music - Recording'
|
255
|
-
when 'sound recording-nonmusical', 'sound recording'
|
256
|
-
val << 'Sound Recording'
|
257
|
-
when 'still image'
|
258
|
-
val << 'Image'
|
259
|
-
when 'text'
|
260
|
-
val << 'Book' if issuance && issuance.include?('monographic')
|
261
|
-
book_genres = ['book chapter', 'Book chapter', 'Book Chapter',
|
262
|
-
'issue brief', 'Issue brief', 'Issue Brief',
|
263
|
-
'librettos', 'Librettos',
|
264
|
-
'project report', 'Project report', 'Project Report',
|
265
|
-
'technical report', 'Technical report', 'Technical Report',
|
266
|
-
'working paper', 'Working paper', 'Working Paper']
|
267
|
-
val << 'Book' if genres && !(genres & book_genres).empty?
|
268
|
-
conf_pub = ['conference publication', 'Conference publication', 'Conference Publication']
|
269
|
-
val << 'Conference Proceedings' if genres && !(genres & conf_pub).empty?
|
270
|
-
val << 'Journal/Periodical' if issuance && issuance.include?('continuing')
|
271
|
-
article = ['article', 'Article']
|
272
|
-
val << 'Journal/Periodical' if genres && !(genres & article).empty?
|
273
|
-
stu_proj_rpt = ['student project report', 'Student project report', 'Student Project report', 'Student Project Report']
|
274
|
-
val << 'Other' if genres && !(genres & stu_proj_rpt).empty?
|
275
|
-
thesis = ['thesis', 'Thesis']
|
276
|
-
val << 'Thesis' if genres && !(genres & thesis).empty?
|
277
|
-
when 'three dimensional object'
|
278
|
-
val << 'Other'
|
279
|
-
end
|
280
|
-
end
|
281
|
-
val.uniq
|
282
|
-
end
|
283
|
-
|
284
239
|
# select one or more format values from the controlled vocabulary per JVine Summer 2014
|
285
240
|
# http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format_main_ssim&rows=0&facet.sort=index
|
286
241
|
# https://github.com/sul-dlss/stanford-mods/issues/66 - For geodata, the
|
287
242
|
# resource type should be only Map and not include Software, multimedia.
|
288
243
|
# @return <Array[String]> value in the SearchWorks controlled vocabulary
|
289
244
|
def format_main
|
290
|
-
types =
|
245
|
+
types = typeOfResource
|
291
246
|
return [] unless types
|
247
|
+
|
292
248
|
article_genres = ['article', 'Article',
|
293
249
|
'book chapter', 'Book chapter', 'Book Chapter',
|
294
250
|
'issue brief', 'Issue brief', 'Issue Brief',
|
@@ -303,13 +259,18 @@ module Stanford
|
|
303
259
|
'thesis', 'Thesis'
|
304
260
|
]
|
305
261
|
val = []
|
306
|
-
genres = term_values(:genre)
|
307
|
-
issuance = term_values([:origin_info, :issuance])
|
262
|
+
genres = term_values(:genre) || []
|
263
|
+
issuance = term_values([:origin_info, :issuance]) || []
|
264
|
+
frequency = term_values([:origin_info, :frequency]) || []
|
265
|
+
|
266
|
+
val << 'Dataset' if genres.include?('dataset') || genres.include?('Dataset')
|
267
|
+
|
308
268
|
types.each do |type|
|
309
|
-
|
269
|
+
val << 'Archive/Manuscript' if type.manuscript == 'yes'
|
270
|
+
|
271
|
+
case type.text
|
310
272
|
when 'cartographic'
|
311
273
|
val << 'Map'
|
312
|
-
val.delete 'Software/Multimedia'
|
313
274
|
when 'mixed material'
|
314
275
|
val << 'Archive/Manuscript'
|
315
276
|
when 'moving image'
|
@@ -317,11 +278,7 @@ module Stanford
|
|
317
278
|
when 'notated music'
|
318
279
|
val << 'Music score'
|
319
280
|
when 'software, multimedia'
|
320
|
-
|
321
|
-
val << 'Dataset'
|
322
|
-
elsif !val.include?('Map')
|
323
|
-
val << 'Software/Multimedia'
|
324
|
-
end
|
281
|
+
val << 'Software/Multimedia' unless types.map(&:text).include?('cartographic') || (genres.include?('dataset') || genres.include?('Dataset'))
|
325
282
|
when 'sound recording-musical'
|
326
283
|
val << 'Music recording'
|
327
284
|
when 'sound recording-nonmusical', 'sound recording'
|
@@ -329,11 +286,14 @@ module Stanford
|
|
329
286
|
when 'still image'
|
330
287
|
val << 'Image'
|
331
288
|
when 'text'
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
val << '
|
289
|
+
is_explicitly_a_book = type.manuscript != 'yes' && (issuance.include?('monographic') || !(genres & article_genres).empty? || !(genres & book_genres).empty?)
|
290
|
+
is_periodical = issuance.include?('continuing') || issuance.include?('serial') || frequency.any? { |x| !x.empty? }
|
291
|
+
is_archived_website = genres.any? { |x| x.casecmp('archived website') == 0 }
|
292
|
+
|
293
|
+
val << 'Book' if is_explicitly_a_book
|
294
|
+
val << 'Journal/Periodical' if is_periodical
|
295
|
+
val << 'Archived website' if is_archived_website
|
296
|
+
val << 'Book' unless is_explicitly_a_book || is_periodical || is_archived_website
|
337
297
|
when 'three dimensional object'
|
338
298
|
val << 'Object'
|
339
299
|
end
|
@@ -341,24 +301,23 @@ module Stanford
|
|
341
301
|
val.uniq
|
342
302
|
end
|
343
303
|
|
344
|
-
# https://github.com/sul-dlss/stanford-mods/issues/66
|
345
|
-
# Limit genre values to Government document, Conference proceedings,
|
346
|
-
# Technical report and Thesis/Dissertation
|
347
304
|
# @return <Array[String]> values for the genre facet in SearchWorks
|
348
305
|
def sw_genre
|
349
306
|
genres = term_values(:genre)
|
350
307
|
return [] unless genres
|
351
|
-
|
352
|
-
val =
|
353
|
-
|
354
|
-
if genres
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
308
|
+
|
309
|
+
val = genres.map(&:to_s)
|
310
|
+
thesis_pub = ['thesis', 'Thesis']
|
311
|
+
val << 'Thesis/Dissertation' if (genres & thesis_pub).any?
|
312
|
+
|
313
|
+
conf_pub = ['conference publication', 'Conference publication', 'Conference Publication']
|
314
|
+
gov_pub = ['government publication', 'Government publication', 'Government Publication']
|
315
|
+
tech_rpt = ['technical report', 'Technical report', 'Technical Report']
|
316
|
+
|
317
|
+
val << 'Conference proceedings' if (genres & conf_pub).any?
|
318
|
+
val << 'Government document' if (genres & gov_pub).any?
|
319
|
+
val << 'Technical report' if (genres & tech_rpt).any?
|
320
|
+
|
362
321
|
val.uniq
|
363
322
|
end
|
364
323
|
|
@@ -366,6 +325,7 @@ module Stanford
|
|
366
325
|
def catkey
|
367
326
|
catkey = term_values([:record_info, :recordIdentifier])
|
368
327
|
return nil unless catkey && !catkey.empty?
|
328
|
+
|
369
329
|
catkey.first.tr('a', '') # ensure catkey is numeric only
|
370
330
|
end
|
371
331
|
end # class Record
|