pennmarc 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +119 -0
- data/README.md +82 -0
- data/legacy/indexer.rb +568 -0
- data/legacy/marc.rb +2964 -0
- data/legacy/test_file_output.json +49 -0
- data/lib/pennmarc/encoding_level.rb +43 -0
- data/lib/pennmarc/enriched_marc.rb +36 -0
- data/lib/pennmarc/heading_control.rb +11 -0
- data/lib/pennmarc/helpers/citation.rb +31 -0
- data/lib/pennmarc/helpers/creator.rb +237 -0
- data/lib/pennmarc/helpers/database.rb +89 -0
- data/lib/pennmarc/helpers/date.rb +85 -0
- data/lib/pennmarc/helpers/edition.rb +90 -0
- data/lib/pennmarc/helpers/format.rb +312 -0
- data/lib/pennmarc/helpers/genre.rb +71 -0
- data/lib/pennmarc/helpers/helper.rb +11 -0
- data/lib/pennmarc/helpers/identifier.rb +134 -0
- data/lib/pennmarc/helpers/language.rb +37 -0
- data/lib/pennmarc/helpers/link.rb +12 -0
- data/lib/pennmarc/helpers/location.rb +97 -0
- data/lib/pennmarc/helpers/note.rb +132 -0
- data/lib/pennmarc/helpers/production.rb +131 -0
- data/lib/pennmarc/helpers/relation.rb +135 -0
- data/lib/pennmarc/helpers/series.rb +118 -0
- data/lib/pennmarc/helpers/subject.rb +304 -0
- data/lib/pennmarc/helpers/title.rb +197 -0
- data/lib/pennmarc/mappings/language.yml +516 -0
- data/lib/pennmarc/mappings/locations.yml +1801 -0
- data/lib/pennmarc/mappings/relator.yml +263 -0
- data/lib/pennmarc/parser.rb +177 -0
- data/lib/pennmarc/util.rb +240 -0
- data/lib/pennmarc.rb +6 -0
- data/pennmarc.gemspec +22 -0
- data/spec/fixtures/marcxml/test.xml +167 -0
- data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
- data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
- data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
- data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
- data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
- data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
- data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
- data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
- data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
- data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
- data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
- data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
- data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
- data/spec/lib/pennmarc/parser_spec.rb +13 -0
- data/spec/spec_helper.rb +104 -0
- data/spec/support/marc_spec_helpers.rb +84 -0
- metadata +171 -0
data/legacy/marc.rb
ADDED
@@ -0,0 +1,2964 @@
|
|
1
|
+
# rubocop:disable all
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'nokogiri'
|
5
|
+
|
6
|
+
module PennLib
|
7
|
+
|
8
|
+
# Constants for Alma's MARC enrichment
|
9
|
+
module EnrichedMarc
|
10
|
+
# terminology follows the Publishing Profile screen
|
11
|
+
TAG_HOLDING = 'hld'
|
12
|
+
TAG_ITEM = 'itm'
|
13
|
+
TAG_ELECTRONIC_INVENTORY = 'prt'
|
14
|
+
TAG_DIGITAL_INVENTORY = 'dig'
|
15
|
+
|
16
|
+
# these are 852 subfield codes; terminology comes from MARC spec
|
17
|
+
SUB_HOLDING_SHELVING_LOCATION = 'c'
|
18
|
+
SUB_HOLDING_SEQUENCE_NUMBER = '8'
|
19
|
+
SUB_HOLDING_CLASSIFICATION_PART = 'h'
|
20
|
+
SUB_HOLDING_ITEM_PART = 'i'
|
21
|
+
|
22
|
+
SUB_ITEM_CURRENT_LOCATION = 'g'
|
23
|
+
SUB_ITEM_CALL_NUMBER_TYPE = 'h'
|
24
|
+
SUB_ITEM_CALL_NUMBER = 'i'
|
25
|
+
SUB_ITEM_DATE_CREATED = 'q'
|
26
|
+
|
27
|
+
SUB_ELEC_PORTFOLIO_PID = 'a'
|
28
|
+
SUB_ELEC_ACCESS_URL = 'b'
|
29
|
+
SUB_ELEC_COLLECTION_NAME = 'c'
|
30
|
+
SUB_ELEC_COVERAGE = 'g'
|
31
|
+
|
32
|
+
# a subfield code NOT used by the MARC 21 spec for 852 holdings records.
|
33
|
+
# we add this subfield during preprocessing to store boundwith record IDs.
|
34
|
+
SUB_BOUND_WITH_ID = 'y'
|
35
|
+
end
|
36
|
+
|
37
|
+
module DateType
|
38
|
+
# Nothing
|
39
|
+
UNSPECIFIED = '|'
|
40
|
+
NO_DATES_OR_BC = 'b'
|
41
|
+
UNKNOWN = 'n'
|
42
|
+
|
43
|
+
# Single point
|
44
|
+
DETAILED = 'e'
|
45
|
+
SINGLE = 's'
|
46
|
+
|
47
|
+
# Lower bound
|
48
|
+
CONTINUING_CURRENTLY_PUBLISHED = 'c'
|
49
|
+
CONTINUING_STATUS_UNKNOWN = 'u'
|
50
|
+
|
51
|
+
# Range
|
52
|
+
CONTINUING_CEASED_PUBLICATION = 'd'
|
53
|
+
COLLECTION_INCLUSIVE = 'i'
|
54
|
+
COLLECTION_BULK = 'k'
|
55
|
+
MULTIPLE = 'm'
|
56
|
+
QUESTIONABLE = 'q'
|
57
|
+
|
58
|
+
# Separate date for content
|
59
|
+
DISTRIBUTION_AND_PRODUCTION = 'p'
|
60
|
+
REPRINT_AND_ORIGINAL = 'r'
|
61
|
+
PUBLICATION_AND_COPYRIGHT = 't'
|
62
|
+
|
63
|
+
MAP = {
|
64
|
+
DETAILED => :single,
|
65
|
+
SINGLE => :single,
|
66
|
+
|
67
|
+
CONTINUING_CURRENTLY_PUBLISHED => :lower_bound,
|
68
|
+
CONTINUING_STATUS_UNKNOWN => :lower_bound,
|
69
|
+
|
70
|
+
CONTINUING_CEASED_PUBLICATION => :range,
|
71
|
+
COLLECTION_INCLUSIVE => :range,
|
72
|
+
COLLECTION_BULK => :range,
|
73
|
+
MULTIPLE => :range,
|
74
|
+
QUESTIONABLE => :range,
|
75
|
+
|
76
|
+
DISTRIBUTION_AND_PRODUCTION => :separate_content,
|
77
|
+
REPRINT_AND_ORIGINAL => :separate_content,
|
78
|
+
PUBLICATION_AND_COPYRIGHT => :separate_content
|
79
|
+
}
|
80
|
+
end
|
81
|
+
|
82
|
+
module SubjectConfig
|
83
|
+
|
84
|
+
module Prefixes
|
85
|
+
NAME = 'n'
|
86
|
+
TITLE = 't'
|
87
|
+
SUBJECT = 's' # used for default, handled as lcsh
|
88
|
+
FAST = 'f'
|
89
|
+
GEO = 'g'
|
90
|
+
CHILDRENS = 'c'
|
91
|
+
MESH = 'm'
|
92
|
+
OTHER = 'o'
|
93
|
+
end
|
94
|
+
|
95
|
+
class FieldConfig
|
96
|
+
def initialize(mapper)
|
97
|
+
@mapper = mapper
|
98
|
+
end
|
99
|
+
|
100
|
+
def map_prefix(field)
|
101
|
+
@mapper.call(field)
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
THESAURI = {
|
106
|
+
'aat' => Prefixes::OTHER,
|
107
|
+
'cct' => Prefixes::OTHER,
|
108
|
+
'fast' => Prefixes::FAST,
|
109
|
+
'homoit' => Prefixes::OTHER,
|
110
|
+
'jlabsh' => Prefixes::OTHER,
|
111
|
+
'lcsh' => Prefixes::SUBJECT,
|
112
|
+
'lcstt' => Prefixes::OTHER,
|
113
|
+
'lctgm' => Prefixes::OTHER,
|
114
|
+
'local/osu' => Prefixes::OTHER,
|
115
|
+
'mesh' => Prefixes::MESH,
|
116
|
+
'ndlsh' => Prefixes::OTHER,
|
117
|
+
'nlksh' => Prefixes::OTHER
|
118
|
+
}
|
119
|
+
|
120
|
+
# default field mapping is based only on ind2, and topic headings (as
|
121
|
+
# opposed to name/title headings) vary significantly across thesauri
|
122
|
+
default_field_mapping = FieldConfig.new(lambda { |f|
|
123
|
+
case f.indicator2
|
124
|
+
when '0'
|
125
|
+
return Prefixes::SUBJECT
|
126
|
+
when '1'
|
127
|
+
return Prefixes::CHILDRENS
|
128
|
+
when '2'
|
129
|
+
return Prefixes::MESH
|
130
|
+
when '4'
|
131
|
+
return Prefixes::OTHER
|
132
|
+
else
|
133
|
+
return nil
|
134
|
+
end
|
135
|
+
})
|
136
|
+
|
137
|
+
# for name/title, ind2=='0'/'1'/'2' are _all_ backed by LCNAF. See:
|
138
|
+
# https://www.loc.gov/aba/cyac/childsubjhead.html
|
139
|
+
# https://www.nlm.nih.gov/tsd/cataloging/trainingcourses/mesh/mod8_020.html
|
140
|
+
base_factory = lambda { |base|
|
141
|
+
lambda { |f|
|
142
|
+
case f.indicator2
|
143
|
+
when '0', '1', '2'
|
144
|
+
return base
|
145
|
+
when '4'
|
146
|
+
return Prefixes::OTHER
|
147
|
+
else
|
148
|
+
return nil
|
149
|
+
end
|
150
|
+
}
|
151
|
+
}
|
152
|
+
name_general = FieldConfig.new(base_factory.call(Prefixes::NAME))
|
153
|
+
title_general = FieldConfig.new(base_factory.call(Prefixes::TITLE))
|
154
|
+
geo_general = FieldConfig.new(base_factory.call(Prefixes::GEO))
|
155
|
+
static_other = FieldConfig.new(lambda { |f|
|
156
|
+
# For now, treat all of these as "other"
|
157
|
+
case f.indicator2
|
158
|
+
when '0', '1', '2', '4'
|
159
|
+
# NOTE: 2nd indicator for local subject fields is inconsistently applied; map everything to "other"
|
160
|
+
return Prefixes::OTHER
|
161
|
+
else
|
162
|
+
return nil
|
163
|
+
end
|
164
|
+
})
|
165
|
+
|
166
|
+
FIELDS = {
|
167
|
+
'600' => name_general,
|
168
|
+
'610' => name_general,
|
169
|
+
'611' => name_general,
|
170
|
+
'630' => title_general,
|
171
|
+
'650' => default_field_mapping,
|
172
|
+
'651' => geo_general,
|
173
|
+
'690' => static_other, # topical (650)
|
174
|
+
'691' => static_other, # geographic (651)
|
175
|
+
#'696' => static_other # personal name (600) NOTE: not currently mapped!
|
176
|
+
'697' => static_other # corporate name (610)
|
177
|
+
}
|
178
|
+
|
179
|
+
def self.prepare_subjects(rec)
|
180
|
+
acc = []
|
181
|
+
rec.fields(FIELDS.keys).each do |f|
|
182
|
+
filter_subject(f, f.tag, acc)
|
183
|
+
end
|
184
|
+
rec.fields('880').each do |f|
|
185
|
+
field_type_tag = f.find { |sf| sf.code == '6' && FIELDS.has_key?(sf.value) }&.value
|
186
|
+
filter_subject(f, field_type_tag, acc) if field_type_tag
|
187
|
+
end
|
188
|
+
return acc.empty? ? nil : map_to_input_fields(acc)
|
189
|
+
end
|
190
|
+
|
191
|
+
ONLY_KEYS = [:val, :prefix, :append, :local, :vernacular]
|
192
|
+
|
193
|
+
def self.map_to_input_fields(acc)
|
194
|
+
xfacet = [] # provisionally instantiate; we'll almost always need it
|
195
|
+
ret = {
|
196
|
+
# `xfacets` entries support browse/facet, and will be mapped to stored fields solr-side
|
197
|
+
xfacet: nil,
|
198
|
+
# `stored_*` fields (below) are stored only, and do _not_ support browse/facet
|
199
|
+
stored_lcsh: nil,
|
200
|
+
stored_childrens: nil,
|
201
|
+
stored_mesh: nil,
|
202
|
+
stored_local: nil
|
203
|
+
}
|
204
|
+
acc.each do |struct|
|
205
|
+
last = struct[:parts].last
|
206
|
+
# Normalize trailing punctuation on the last heading component. If a comma is present (to be
|
207
|
+
# normalized away), then any `.` present is integral (i.e., not ISBD punctuation), and thus
|
208
|
+
# should be left intact as part of the heading.
|
209
|
+
Marc.trim_trailing_comma!(last) || Marc.trim_trailing_period!(last)
|
210
|
+
if struct[:local] && struct[:prefix] == Prefixes::OTHER
|
211
|
+
# local subjects without source specified are really too messy, so they should bypass
|
212
|
+
# xfacet processing and be placed directly in stored field for display only
|
213
|
+
struct[:val] = struct.delete(:parts).join('--')
|
214
|
+
struct.delete(:prefix)
|
215
|
+
serialized = struct.to_json(:only => ONLY_KEYS)
|
216
|
+
(ret[:stored_local] ||= []) << serialized
|
217
|
+
elsif struct.size == 2
|
218
|
+
# only `parts` and `prefix` (required keys) are present; use legacy format (for now
|
219
|
+
# we're mainly doing this to incidentally test backward compatibility of server-side
|
220
|
+
# parsing
|
221
|
+
serialized = struct[:prefix] + struct[:parts].join('--')
|
222
|
+
xfacet << serialized
|
223
|
+
else
|
224
|
+
# simply map `parts` to `val`
|
225
|
+
struct[:val] = struct.delete(:parts).join('--')
|
226
|
+
serialized = struct.to_json(:only => ONLY_KEYS)
|
227
|
+
xfacet << serialized
|
228
|
+
end
|
229
|
+
end
|
230
|
+
ret[:xfacet] = xfacet unless xfacet.empty?
|
231
|
+
return ret
|
232
|
+
end
|
233
|
+
|
234
|
+
def self.filter_subject(field, tag, acc)
|
235
|
+
ret = build_subject_struct(field, tag)
|
236
|
+
return nil unless ret
|
237
|
+
return nil unless map_prefix(ret, tag, field)
|
238
|
+
acc << ret if post_process(ret)
|
239
|
+
end
|
240
|
+
|
241
|
+
def self.map_prefix(ret, tag, field)
|
242
|
+
if ret[:source_specified]
|
243
|
+
# source_specified takes priority. NOTE: This is true even if ind2!=7 (i.e., source_specified
|
244
|
+
# shouldn't even apply), because we want to be lenient with our parsing, so the priciple is that
|
245
|
+
# we defer to the _most explicit_ heading type declaration
|
246
|
+
prefix = THESAURI[ret[:source_specified].downcase]
|
247
|
+
else
|
248
|
+
# in the absence of `source_specified`, handling depends on field. NOTE: fields should be
|
249
|
+
# pre-filtered to only valid codes, so intentionally don't use the safe-nav operator here
|
250
|
+
prefix = FIELDS[tag].map_prefix(field)
|
251
|
+
end
|
252
|
+
prefix ? (ret[:prefix] = prefix) : nil
|
253
|
+
end
|
254
|
+
|
255
|
+
def self.build_subject_struct(field, tag)
|
256
|
+
local = field.indicator2 == '4' || tag.starts_with?('69')
|
257
|
+
ret = {
|
258
|
+
count: 0,
|
259
|
+
parts: [],
|
260
|
+
}
|
261
|
+
ret[:local] = true if local
|
262
|
+
ret[:vernacular] = true if field.tag == '880'
|
263
|
+
field.each do |sf|
|
264
|
+
case sf.code
|
265
|
+
when '0', '6', '8', '5', '1'
|
266
|
+
# ignore these subfields
|
267
|
+
next
|
268
|
+
when 'a'
|
269
|
+
# filter out PRO/CHR entirely (but only need to check on local heading types)
|
270
|
+
return nil if local && sf.value =~ /^%?(PRO|CHR)([ $]|$)/
|
271
|
+
when '2'
|
272
|
+
# use the _last_ source specified, so don't worry about overriding any prior values
|
273
|
+
ret[:source_specified] = sf.value.strip
|
274
|
+
next
|
275
|
+
when 'e', 'w'
|
276
|
+
# 'e' is relator term; not sure what 'w' is. These are used to append for record-view display only
|
277
|
+
(ret[:append] ||= []) << sf.value.strip
|
278
|
+
next
|
279
|
+
when 'b', 'c', 'd', 'p', 'q', 't'
|
280
|
+
# these are appended to the last component if possible (i.e., when joined, should have no delimiter)
|
281
|
+
append_to_last_part(ret[:parts], sf.value.strip)
|
282
|
+
ret[:count] += 1
|
283
|
+
next
|
284
|
+
end
|
285
|
+
# the usual case; add a new component to `parts`
|
286
|
+
append_new_part(ret[:parts], sf.value.strip)
|
287
|
+
ret[:count] += 1
|
288
|
+
end
|
289
|
+
return ret
|
290
|
+
end
|
291
|
+
|
292
|
+
def self.append_new_part(parts, value)
|
293
|
+
if parts.empty?
|
294
|
+
parts << value
|
295
|
+
else
|
296
|
+
last = parts.last
|
297
|
+
Marc.trim_trailing_comma!(last) || Marc.trim_trailing_period!(last)
|
298
|
+
parts << value
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
def self.append_to_last_part(parts, value)
|
303
|
+
if parts.empty?
|
304
|
+
parts << value
|
305
|
+
else
|
306
|
+
parts.last << ' ' + value
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
def self.post_process(ret)
|
311
|
+
case ret.delete(:count)
|
312
|
+
when 0
|
313
|
+
return nil
|
314
|
+
when 1
|
315
|
+
# when we've only encountered one subfield, assume that it might be a poorly-coded record
|
316
|
+
# with a bunch of subdivisions mashed together, and attempt to convert it to a consistent
|
317
|
+
# form. Note that we must separately track count (as opposed to simply checking `parts.size`),
|
318
|
+
# because we're using "subdivision count" as a heuristic for the quality level of the heading.
|
319
|
+
only = ret[:parts].first
|
320
|
+
only.gsub!(/([[[:alnum:]])])(\s+--\s*|\s*--\s+)([[[:upper:]][[:digit:]]])/, '\1--\3')
|
321
|
+
only.gsub!(/([[[:alpha:]])])\s+-\s+([[:upper:]]|[[:digit:]]{2,})/, '\1--\2')
|
322
|
+
only.gsub!(/([[[:alnum:]])])\s+-\s+([[:upper:]])/, '\1--\2')
|
323
|
+
end
|
324
|
+
return ret
|
325
|
+
end
|
326
|
+
end
|
327
|
+
|
328
|
+
module EncodingLevel
|
329
|
+
# Official MARC codes (https://www.loc.gov/marc/bibliographic/bdleader.html)
|
330
|
+
FULL = ' '
|
331
|
+
FULL_NOT_EXAMINED = '1'
|
332
|
+
UNFULL_NOT_EXAMINED = '2'
|
333
|
+
ABBREVIATED = '3'
|
334
|
+
CORE = '4'
|
335
|
+
PRELIMINARY = '5'
|
336
|
+
MINIMAL = '7'
|
337
|
+
PREPUBLICATION = '8'
|
338
|
+
UNKNOWN = 'u'
|
339
|
+
NOT_APPLICABLE = 'z'
|
340
|
+
|
341
|
+
# OCLC extension codes (https://www.oclc.org/bibformats/en/fixedfield/elvl.html)
|
342
|
+
OCLC_FULL = 'I'
|
343
|
+
OCLC_MINIMAL = 'K'
|
344
|
+
OCLC_BATCH_LEGACY = 'L'
|
345
|
+
OCLC_BATCH = 'M'
|
346
|
+
OCLC_SOURCE_DELETED = 'J'
|
347
|
+
|
348
|
+
RANK = {
|
349
|
+
# top 4 (per nelsonrr), do not differentiate among "good" records
|
350
|
+
FULL => 0,
|
351
|
+
FULL_NOT_EXAMINED => 0, # 1
|
352
|
+
OCLC_FULL => 0, # 2
|
353
|
+
CORE => 0, # 3
|
354
|
+
UNFULL_NOT_EXAMINED => 4,
|
355
|
+
ABBREVIATED => 5,
|
356
|
+
PRELIMINARY => 6,
|
357
|
+
MINIMAL => 7,
|
358
|
+
OCLC_MINIMAL => 8,
|
359
|
+
OCLC_BATCH => 9,
|
360
|
+
OCLC_BATCH_LEGACY => 10,
|
361
|
+
OCLC_SOURCE_DELETED => 11
|
362
|
+
}
|
363
|
+
end
|
364
|
+
|
365
|
+
# Genre/Form
|
366
|
+
# display field selector logic
|
367
|
+
# reference: https://www.loc.gov/marc/bibliographic/bd655.html
|
368
|
+
#
|
369
|
+
# We display Genre/Term values if they fulfill the following criteria
|
370
|
+
# - The field is in MARC 655. Or the field is in MARC 880 with subfield 2 includes '655'.
|
371
|
+
# AND
|
372
|
+
# - Above fields have an indicator 2 value of: 0 (LSCH) or 4 (No source specified).
|
373
|
+
# OR
|
374
|
+
# - Above fields have a subfield 2 (ontology code) in the list of allowed values.
|
375
|
+
class GenreTools
|
376
|
+
GENRE_FIELD_TAG = '655'
|
377
|
+
ALT_GENRE_FIELD_TAG = '880'
|
378
|
+
ALLOWED_INDICATOR2_VALUES = %w[0 4]
|
379
|
+
|
380
|
+
class << self
|
381
|
+
# @param [MARC::DataField] field
|
382
|
+
# @return [TrueClass, FalseClass]
|
383
|
+
def allowed_genre_field?(field)
|
384
|
+
return false unless genre_field?(field)
|
385
|
+
|
386
|
+
allowed_code?(field) || allowed_ind2?(field)
|
387
|
+
end
|
388
|
+
|
389
|
+
# @param [MARC::DataField] field
|
390
|
+
# @return [TrueClass, FalseClass]
|
391
|
+
def genre_field?(field)
|
392
|
+
field.tag == GENRE_FIELD_TAG ||
|
393
|
+
(field.tag == ALT_GENRE_FIELD_TAG && MarcUtil.has_subfield_value?(field, '6', /#{GENRE_FIELD_TAG}/))
|
394
|
+
end
|
395
|
+
|
396
|
+
# @param [MARC::DataField] field
|
397
|
+
# @return [TrueClass, FalseClass]
|
398
|
+
def allowed_code?(field)
|
399
|
+
MarcUtil.subfield_value_in?(field, '2', PennLib::Marc::ALLOWED_SUBJ_GENRE_ONTOLOGIES)
|
400
|
+
end
|
401
|
+
|
402
|
+
# 0 in ind2 means LCSH
|
403
|
+
# 4 in ind2 means "Source not specified"
|
404
|
+
# @param [MARC::DataField] field
|
405
|
+
# @return [TrueClass, FalseClass]
|
406
|
+
def allowed_ind2?(field)
|
407
|
+
field.indicator2.in? ALLOWED_INDICATOR2_VALUES
|
408
|
+
end
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
# class to hold "utility" methods used by others methods in main Marc class and new *Tool classes
|
413
|
+
# for now, leave methods as also defined in Marc class to avoid unexpected issues
|
414
|
+
class MarcUtil
|
415
|
+
class << self
|
416
|
+
# returns true if field has a value that matches
|
417
|
+
# passed-in regex and passed in subfield
|
418
|
+
# @param [MARC::DataField] field
|
419
|
+
# @param [String|Integer|Symbol] subf
|
420
|
+
# @param [Regexp] regex
|
421
|
+
# @return [TrueClass, FalseClass]
|
422
|
+
def has_subfield_value?(field, subf, regex)
|
423
|
+
field.any? { |sf| sf.code == subf.to_s && sf.value =~ regex }
|
424
|
+
end
|
425
|
+
|
426
|
+
# @param [MARC:DataField] field
|
427
|
+
# @param [String|Integer|Symbol] subf
|
428
|
+
# @param [Array] array
|
429
|
+
# @return [TrueClass, FalseClass]
|
430
|
+
def subfield_value_in?(field, subf, array)
|
431
|
+
field.any? { |sf| sf.code == subf.to_s && sf.value.in?(array) }
|
432
|
+
end
|
433
|
+
end
|
434
|
+
end
|
435
|
+
|
436
|
+
# Class for doing extraction and processing on MARC::Record objects.
|
437
|
+
# This is intended to be used in both indexing code and front-end templating code
|
438
|
+
# (since MARC is stored in Solr). As such, there should NOT be any traject-specific
|
439
|
+
# things here.
|
440
|
+
#
|
441
|
+
# For a slight performance increase (~5%?) we use frozen_string_literal for immutable strings.
|
442
|
+
#
|
443
|
+
# Method naming conventions:
|
444
|
+
#
|
445
|
+
# *_values = indicates method returns an Array of values
|
446
|
+
#
|
447
|
+
# *_display = indicates method is intended to be used for
|
448
|
+
# individual record view (we should name things more meaningfully, according to
|
449
|
+
# the logic by which the values are generated, but I don't always know what this
|
450
|
+
# logic is, necessarily - JC)
|
451
|
+
#
|
452
|
+
class Marc
|
453
|
+
include BlacklightSolrplugins::Indexer
|
454
|
+
|
455
|
+
attr_accessor :code_mappings
|
456
|
+
|
457
|
+
DATABASES_FACET_VALUE = 'Database & Article Index'
|
458
|
+
ALLOWED_SUBJ_GENRE_ONTOLOGIES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
|
459
|
+
local/osu mesh ndlsh nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp]
|
460
|
+
|
461
|
+
# @param [PennLib::CodeMappings]
|
462
|
+
def initialize(code_mappings)
|
463
|
+
@code_mappings = code_mappings
|
464
|
+
end
|
465
|
+
|
466
|
+
def current_year
|
467
|
+
@current_year ||= Date.today.year
|
468
|
+
end
|
469
|
+
|
470
|
+
def relator_codes
|
471
|
+
@code_mappings.relator_codes
|
472
|
+
end
|
473
|
+
|
474
|
+
def locations
|
475
|
+
@code_mappings.locations
|
476
|
+
end
|
477
|
+
|
478
|
+
def loc_classifications
|
479
|
+
@code_mappings.loc_classifications
|
480
|
+
end
|
481
|
+
|
482
|
+
def dewey_classifications
|
483
|
+
@code_mappings.dewey_classifications
|
484
|
+
end
|
485
|
+
|
486
|
+
def languages
|
487
|
+
@code_mappings.languages
|
488
|
+
end
|
489
|
+
|
490
|
+
def trim_trailing_colon(s)
|
491
|
+
s.sub(/\s*:\s*$/, '')
|
492
|
+
end
|
493
|
+
|
494
|
+
def trim_trailing_semicolon(s)
|
495
|
+
s.sub(/\s*;\s*$/, '')
|
496
|
+
end
|
497
|
+
|
498
|
+
def trim_trailing_equal(s)
|
499
|
+
s.sub(/=$/, '')
|
500
|
+
end
|
501
|
+
|
502
|
+
def trim_trailing_slash(s)
|
503
|
+
s.sub(/\s*\/\s*$/, '')
|
504
|
+
end
|
505
|
+
|
506
|
+
def trim_trailing_comma(s)
|
507
|
+
self.class.trim_trailing_comma(s, false)
|
508
|
+
end
|
509
|
+
|
510
|
+
def self.trim_trailing_comma!(s)
|
511
|
+
trim_trailing_comma(s, true)
|
512
|
+
end
|
513
|
+
|
514
|
+
def self.trim_trailing_comma(s, inplace)
|
515
|
+
replace_regex = /\s*,\s*$/
|
516
|
+
inplace ? s.sub!(replace_regex, '') : s.sub(replace_regex, '')
|
517
|
+
end
|
518
|
+
|
519
|
+
def trim_trailing_period(s)
|
520
|
+
self.class.trim_trailing_period(s, false)
|
521
|
+
end
|
522
|
+
|
523
|
+
def self.trim_trailing_period!(s)
|
524
|
+
trim_trailing_period(s, true)
|
525
|
+
end
|
526
|
+
|
527
|
+
def self.trim_trailing_period(s, inplace)
|
528
|
+
if s.end_with?('etc.') || s =~ /(^|[^a-zA-Z])[A-Z]\.$/
|
529
|
+
inplace ? nil : s # nil if unchanged, for consistency with standard `inplace` semantics
|
530
|
+
else
|
531
|
+
replace_regex = /\.\s*$/
|
532
|
+
inplace ? s.sub!(replace_regex, '') : s.sub(replace_regex, '')
|
533
|
+
end
|
534
|
+
end
|
535
|
+
|
536
|
+
# squish in ActiveSupport
|
537
|
+
def normalize_space(s)
|
538
|
+
s.strip.gsub(/\s{2,}/, ' ')
|
539
|
+
end
|
540
|
+
|
541
|
+
# this logic matches substring-before in XSLT: if no match for sub, returns an empty string
|
542
|
+
def substring_before(s, sub)
|
543
|
+
s.scan(sub).present? ? s.split(sub, 2)[0] : ''
|
544
|
+
end
|
545
|
+
|
546
|
+
# this logic matches substring-after in XSLT: if no match for sub, returns an empty string
|
547
|
+
def substring_after(s, sub)
|
548
|
+
s.scan(sub).present? ? s.split(sub, 2)[1] : ''
|
549
|
+
end
|
550
|
+
|
551
|
+
def join_and_trim_whitespace(array)
|
552
|
+
normalize_space(array.join(' '))
|
553
|
+
end
|
554
|
+
|
555
|
+
# join subfield values together (as selected using passed-in block)
|
556
|
+
def join_subfields(field, &block)
|
557
|
+
field.select { |v| block.call(v) }.map(&:value).select { |v| v.present? }.join(' ')
|
558
|
+
end
|
559
|
+
|
560
|
+
# this is used for filtering in a lots of places
|
561
|
+
# returns a lambda that can be passed to Enumerable#select
|
562
|
+
# using the & syntax
|
563
|
+
def subfield_not_6_or_8
|
564
|
+
@subfield_not_6_or_8 ||= lambda { |subfield|
|
565
|
+
!%w{6 8}.member?(subfield.code)
|
566
|
+
}
|
567
|
+
end
|
568
|
+
|
569
|
+
# returns a lambda checking if passed-in subfield's code
|
570
|
+
# is a member of array
|
571
|
+
def subfield_in(array)
|
572
|
+
lambda { |subfield| array.member?(subfield.code) }
|
573
|
+
end
|
574
|
+
|
575
|
+
# returns a lambda checking if passed-in subfield's code
|
576
|
+
# is NOT a member of array
|
577
|
+
def subfield_not_in(array)
|
578
|
+
lambda { |subfield| !array.member?(subfield.code) }
|
579
|
+
end
|
580
|
+
|
581
|
+
|
582
|
+
# 11/2018 kms: eventually should deprecate has_subfield6_value and use this for all
|
583
|
+
# returns true if field has a value that matches
|
584
|
+
# passed-in regex and passed in subfield
|
585
|
+
def has_subfield_value(field, subf, regex)
|
586
|
+
field.any? { |sf| sf.code == subf && sf.value =~ regex }
|
587
|
+
end
|
588
|
+
|
589
|
+
def subfield_value_in(field, subf, array)
|
590
|
+
field.any? { |sf| sf.code == subf && sf.value.in?(array) }
|
591
|
+
end
|
592
|
+
|
593
|
+
# common case of wanting to extract subfields as selected by passed-in block,
|
594
|
+
# from 880 datafield that has a particular subfield 6 value
|
595
|
+
# @param subf6_value [String|Array] either a single str value to look for in sub6 or an array of them
|
596
|
+
# @param block [Proc] takes a subfield as argument, returns a boolean
|
597
|
+
def get_880(rec, subf6_value, &block)
|
598
|
+
regex_value = subf6_value
|
599
|
+
if subf6_value.is_a?(Array)
|
600
|
+
regex_value = "(#{subf6_value.join('|')})"
|
601
|
+
end
|
602
|
+
|
603
|
+
rec.fields('880')
|
604
|
+
.select { |f| has_subfield6_value(f, /^#{regex_value}/) }
|
605
|
+
.map do |field|
|
606
|
+
field.select { |sf| block.call(sf) }.map(&:value).join(' ')
|
607
|
+
end
|
608
|
+
end
|
609
|
+
|
610
|
+
# common case of wanting to extract all the subfields besides 6 or 8,
|
611
|
+
# from 880 datafield that has a particular subfield 6 value
|
612
|
+
def get_880_subfield_not_6_or_8(rec, subf6_value)
|
613
|
+
get_880(rec, subf6_value) do |sf|
|
614
|
+
!%w{6 8}.member?(sf.code)
|
615
|
+
end
|
616
|
+
end
|
617
|
+
|
618
|
+
# returns the non-6,8 subfields from a datafield and its 880 link
|
619
|
+
def get_datafield_and_880(rec, tag)
|
620
|
+
acc = []
|
621
|
+
acc += rec.fields(tag).map do |field|
|
622
|
+
join_subfields(field, &subfield_not_in(%w{6 8}))
|
623
|
+
end
|
624
|
+
acc += get_880_subfield_not_6_or_8(rec, tag)
|
625
|
+
acc
|
626
|
+
end
|
627
|
+
|
628
|
+
def append_title_variant_field(acc, non_filing, subfields)
|
629
|
+
base = subfields.shift;
|
630
|
+
return if base.nil? # there's something wrong; first is always required
|
631
|
+
if non_filing =~ /[1-9]/
|
632
|
+
prefix = base.slice!(0, non_filing.to_i)
|
633
|
+
end
|
634
|
+
loop do
|
635
|
+
acc << base
|
636
|
+
if !prefix.nil?
|
637
|
+
acc << prefix + base
|
638
|
+
end
|
639
|
+
return if subfields.empty?
|
640
|
+
while (next_part = subfields.shift).nil?
|
641
|
+
return if subfields.empty?
|
642
|
+
end
|
643
|
+
base = "#{base} #{next_part}"
|
644
|
+
end
|
645
|
+
end
|
646
|
+
|
647
|
+
# returns true if field's subfield 6 has a value that matches
|
648
|
+
# passed-in regex
|
649
|
+
def has_subfield6_value(field, regex)
|
650
|
+
field.any? { |sf| sf.code == '6' && sf.value =~ regex }
|
651
|
+
end
|
652
|
+
|
653
|
+
# for a string 's', return a hash of ref_type => Array of references,
|
654
|
+
# where a reference is a String or a Hash representing a multipart string
|
655
|
+
def get_subject_references(s)
|
656
|
+
# TODO: just simple test data for now; hook up to actual cross ref data
|
657
|
+
case s
|
658
|
+
when 'Cyberspace'
|
659
|
+
{ 'see_also' => [ 'Internet', 'Computer networks' ] }
|
660
|
+
when 'Internet'
|
661
|
+
{ 'see_also' => [ 'Cyberspace', 'Computer networks' ] }
|
662
|
+
when 'Computer networks'
|
663
|
+
{ 'see_also' => [ 'Cyberspace', 'Internet' ] }
|
664
|
+
# one way
|
665
|
+
when 'Programming Languages'
|
666
|
+
{ 'use_instead' => [ 'Computer programming' ] }
|
667
|
+
end
|
668
|
+
end
|
669
|
+
|
670
|
+
def subject_codes
|
671
|
+
@subject_codes ||= %w(600 610 611 630 650 651)
|
672
|
+
end
|
673
|
+
|
674
|
+
def subject_codes_to_xfacet_prefixes
|
675
|
+
@subject_codes_to_xfacet_prefixes ||= {
|
676
|
+
600 => 'n',
|
677
|
+
610 => 'n',
|
678
|
+
611 => 'n',
|
679
|
+
630 => 't',
|
680
|
+
650 => 's',
|
681
|
+
651 => 'g'
|
682
|
+
}
|
683
|
+
end
|
684
|
+
|
685
|
+
def is_subject_field(field)
|
686
|
+
# 10/2018 kms: add 2nd Ind 7
|
687
|
+
subject_codes.member?(field.tag) && (%w(0 2 4).member?(field.indicator2) ||
|
688
|
+
(field.indicator2 == '7' && field.any? do |sf|
|
689
|
+
sf.code == '2' && ALLOWED_SUBJ_GENRE_ONTOLOGIES.member?(sf.value)
|
690
|
+
end))
|
691
|
+
end
|
692
|
+
|
693
|
+
def reject_pro_chr(sf)
|
694
|
+
%w{a %}.member?(sf.code) && sf.value =~ /^%?(PRO|CHR)([ $]|$)/
|
695
|
+
end
|
696
|
+
|
697
|
+
def is_curated_database(rec)
|
698
|
+
rec.fields('944').any? do |field|
|
699
|
+
field.any? do |sf|
|
700
|
+
sf.code == 'a' && sf.value == 'Database & Article Index'
|
701
|
+
end
|
702
|
+
end
|
703
|
+
end
|
704
|
+
|
705
|
+
def get_curated_format(rec)
|
706
|
+
rec.fields('944').map do |field|
|
707
|
+
sf = field.find { |sf| sf.code == 'a' }
|
708
|
+
sf.nil? || (sf.value == sf.value.to_i.to_s) ? nil : sf.value
|
709
|
+
end.compact.uniq
|
710
|
+
end
|
711
|
+
|
712
|
+
def get_db_types(rec)
|
713
|
+
return [] unless is_curated_database(rec)
|
714
|
+
rec.fields('944').map do |field|
|
715
|
+
if field.any? { |sf| sf.code == 'a' && sf.value == PennLib::Marc::DATABASES_FACET_VALUE }
|
716
|
+
sf = field.find { |sf| sf.code == 'b' }
|
717
|
+
sf.nil? ? nil : sf.value
|
718
|
+
end
|
719
|
+
end.compact
|
720
|
+
end
|
721
|
+
|
722
|
+
def get_db_categories(rec)
|
723
|
+
return [] unless is_curated_database(rec)
|
724
|
+
rec.fields('943').map do |field|
|
725
|
+
if field.any? { |sf| sf.code == '2' && sf.value == 'penncoi' }
|
726
|
+
sf = field.find { |sf| sf.code == 'a' }
|
727
|
+
sf.nil? ? nil : sf.value
|
728
|
+
end
|
729
|
+
end.compact
|
730
|
+
end
|
731
|
+
|
732
|
+
def get_db_subcategories(rec)
|
733
|
+
return [] unless is_curated_database(rec)
|
734
|
+
rec.fields('943').map do |field|
|
735
|
+
if field.any? { |sf| sf.code == '2' && sf.value == 'penncoi' }
|
736
|
+
category = field.find { |sf| sf.code == 'a' }
|
737
|
+
unless category.nil?
|
738
|
+
sub_category = field.find { |sf| sf.code == 'b' }
|
739
|
+
sub_category.nil? ? category : "#{category.value}--#{sub_category.value}"
|
740
|
+
end
|
741
|
+
end
|
742
|
+
end.compact
|
743
|
+
end
|
744
|
+
|
745
|
+
# TODO: MG removed the join_subject_parts method when adding in the SubjectConfig module here. This method still
|
746
|
+
# appears to be in use in the FranklinIndexer even though many subject fields are now processed differently
|
747
|
+
# Work should be done to remove all usages of join_subject_parts. Perhaps functionality from SubjectConfig could
|
748
|
+
# be used instead
|
749
|
+
def get_subject_facet_values(rec, toplevel_only = false)
|
750
|
+
rec.fields.find_all { |f| is_subject_field(f) }.map do |field|
|
751
|
+
just_a = nil
|
752
|
+
if field.any? { |sf| sf.code == 'a' } && (toplevel_only || field.any? { |sf| sf.code != 'a' })
|
753
|
+
just_a = field.find_all(&subfield_in(%w{a})).map(&:value)
|
754
|
+
.select { |v| v !~ /^%?(PRO|CHR)/ }.join(' ')
|
755
|
+
end
|
756
|
+
[ (toplevel_only ? nil : join_subject_parts(field)), just_a ].compact.map{ |v| trim_trailing_period(v) }
|
757
|
+
end.flatten(1).select { |v| v.present? }
|
758
|
+
end
|
759
|
+
|
760
|
+
def get_subject_xfacet_values(rec)
|
761
|
+
rec.fields.find_all { |f| is_subject_field(f) }
|
762
|
+
.map { |f| { field: f, prefix: subject_codes_to_xfacet_prefixes[f.tag.to_i] } }
|
763
|
+
.map { |f_struct| f_struct[:value] = trim_trailing_period(join_subject_parts(f_struct[:field], double_dash: true)); f_struct }
|
764
|
+
.select { |f_struct| f_struct[:value].present? }
|
765
|
+
.map { |f_struct| f_struct[:prefix] + f_struct[:value] }
|
766
|
+
# don't need to wrap data in #references anymore because cross refs are now handled Solr-side
|
767
|
+
# .map { |s| references(s, refs: get_subject_references(s)) }
|
768
|
+
end
|
769
|
+
|
770
|
+
def subject_search_tags
|
771
|
+
@subject_search_tags ||= %w{541 561 600 610 611 630 650 651 653}
|
772
|
+
end
|
773
|
+
|
774
|
+
def is_subject_search_field(field)
|
775
|
+
# 11/2018 kms: add 2nd Ind 7
|
776
|
+
if ! (field.respond_to?(:indicator2) && %w{0 1 2 4 7}.member?(field.indicator2))
|
777
|
+
false
|
778
|
+
elsif subject_search_tags.member?(field.tag) || field.tag.start_with?('69')
|
779
|
+
true
|
780
|
+
elsif field.tag == '880'
|
781
|
+
sub6 = (field.find_all { |sf| sf.code == '6' }.map(&:value).first || '')[0..2]
|
782
|
+
subject_search_tags.member?(sub6) || sub6.start_with?('69')
|
783
|
+
else
|
784
|
+
false
|
785
|
+
end
|
786
|
+
end
|
787
|
+
|
788
|
+
def get_subject_search_values(rec)
|
789
|
+
# this has been completely migrated
|
790
|
+
rec.fields.find_all { |f| is_subject_search_field(f) }
|
791
|
+
.map do |field|
|
792
|
+
subj = []
|
793
|
+
field.each do |sf|
|
794
|
+
if sf.code == 'a'
|
795
|
+
subj << " #{sf.value.gsub(/^%?(PRO|CHR)/, '').gsub(/\?$/, '')}"
|
796
|
+
elsif sf.code == '4'
|
797
|
+
subj << "#{sf.value}, #{relator_codes[sf.value]}"
|
798
|
+
elsif !%w{a 4 5 6 8}.member?(sf.code)
|
799
|
+
subj << " #{sf.value}"
|
800
|
+
end
|
801
|
+
end
|
802
|
+
join_and_trim_whitespace(subj) if subj.present?
|
803
|
+
end.compact
|
804
|
+
end
|
805
|
+
|
806
|
+
# @returns [Array] of string field tags to examine for subjects
|
807
|
+
def subject_600s
|
808
|
+
@subject_600s ||= %w{600 610 611 630 650 651}
|
809
|
+
end
|
810
|
+
|
811
|
+
# 11/2018 kms: add local subj fields- always Local no matter the 2nd Ind
|
812
|
+
def subject_69X
|
813
|
+
@subject_69X ||= %w{690 691 697}
|
814
|
+
end
|
815
|
+
|
816
|
+
# 11/2018: add 69x as local subj, add 650 _7 as subj
|
817
|
+
def get_subjects_from_600s_and_800(rec, indicator2)
|
818
|
+
track_dups = Set.new
|
819
|
+
acc = []
|
820
|
+
if %w{0 1 2}.member?(indicator2)
|
821
|
+
#Subjects, Childrens subjects, and Medical Subjects all share this code
|
822
|
+
# also 650 _7, subjs w/ source specified in $2. These display as Subjects along w/ the ind2==0 650s
|
823
|
+
acc += rec.fields
|
824
|
+
.select { |f| subject_600s.member?(f.tag) ||
|
825
|
+
(f.tag == '880' && has_subfield6_value(f, /^(#{subject_600s.join('|')})/)) }
|
826
|
+
.select { |f| f.indicator2 == indicator2 || (f.indicator2 == '7' && indicator2 == '0' && f.any? do |sf|
|
827
|
+
sf.code == '2' && ALLOWED_SUBJ_GENRE_ONTOLOGIES.member?(sf.value)
|
828
|
+
end)}
|
829
|
+
.map do |field|
|
830
|
+
#added 2017/04/10: filter out 0 (authority record numbers) added by Alma
|
831
|
+
value_for_link = join_subfields(field, &subfield_not_in(%w{0 6 8 2 e w}))
|
832
|
+
sub_with_hyphens = field.select(&subfield_not_in(%w{0 6 8 2 e w})).map do |sf|
|
833
|
+
pre = !%w{a b c d p q t}.member?(sf.code) ? ' -- ' : ' '
|
834
|
+
pre + sf.value + (sf.code == 'p' ? '.' : '')
|
835
|
+
end.join(' ')
|
836
|
+
eandw_with_hyphens = field.select(&subfield_in(%w{e w})).map do |sf|
|
837
|
+
' -- ' + sf.value
|
838
|
+
end.join(' ')
|
839
|
+
if sub_with_hyphens.present?
|
840
|
+
{
|
841
|
+
value: sub_with_hyphens,
|
842
|
+
value_for_link: value_for_link,
|
843
|
+
value_append: eandw_with_hyphens,
|
844
|
+
link_type: 'subject_xfacet2'
|
845
|
+
}
|
846
|
+
end
|
847
|
+
end.compact.select { |val| track_dups.add?(val) }
|
848
|
+
elsif indicator2 == '4'
|
849
|
+
# Local subjects
|
850
|
+
# either a tag in subject_600s list with ind2==4, or a tag in subject_69X list with any ind2.
|
851
|
+
# but NOT a penn community of interest 690 (which have $2 penncoi )
|
852
|
+
acc += rec.fields
|
853
|
+
.select { |f| subject_600s.member?(f.tag) && f.indicator2 == '4' ||
|
854
|
+
( subject_69X.member?(f.tag) && !(has_subfield_value(f,'2',/penncoi/)) ) }
|
855
|
+
.map do |field|
|
856
|
+
suba = field.select(&subfield_in(%w{a}))
|
857
|
+
.select { |sf| sf.value !~ /^%?(PRO|CHR)/ }
|
858
|
+
.map(&:value).join(' ')
|
859
|
+
#added 2017/04/10: filter out 0 (authority record numbers) added by Alma
|
860
|
+
# 11/2018 kms: also do not display subf 5 or 2
|
861
|
+
sub_oth = field.select(&subfield_not_in(%w{0 a 6 8 5 2})).map do |sf|
|
862
|
+
pre = !%w{b c d p q t}.member?(sf.code) ? ' -- ' : ' '
|
863
|
+
pre + sf.value + (sf.code == 'p' ? '.' : '')
|
864
|
+
end
|
865
|
+
subj_display = [ suba, sub_oth ].join(' ')
|
866
|
+
#added 2017/04/10: filter out 0 (authority record numbers) added by Alma
|
867
|
+
# 11/2018 kms: also do not display subf 5 or 2
|
868
|
+
sub_oth_no_hyphens = join_subfields(field, &subfield_not_in(%w{0 a 6 8 5 2}))
|
869
|
+
subj_search = [ suba, sub_oth_no_hyphens ].join(' ')
|
870
|
+
if subj_display.present?
|
871
|
+
{
|
872
|
+
value: subj_display,
|
873
|
+
value_for_link: subj_search,
|
874
|
+
link_type: 'subject_search'
|
875
|
+
}
|
876
|
+
end
|
877
|
+
end.compact.select { |val| track_dups.add?(val) }
|
878
|
+
end
|
879
|
+
acc
|
880
|
+
end
|
881
|
+
|
882
|
+
# 11/2018: 650 _7 is also handled here
|
883
|
+
def get_subject_display(rec)
|
884
|
+
get_subjects_from_600s_and_800(rec, '0')
|
885
|
+
end
|
886
|
+
|
887
|
+
def get_children_subject_display(rec)
|
888
|
+
get_subjects_from_600s_and_800(rec, '1')
|
889
|
+
end
|
890
|
+
|
891
|
+
def get_medical_subject_display(rec)
|
892
|
+
get_subjects_from_600s_and_800(rec, '2')
|
893
|
+
end
|
894
|
+
|
895
|
+
def get_local_subject_display(rec)
|
896
|
+
get_subjects_from_600s_and_800(rec, '4')
|
897
|
+
end
|
898
|
+
|
899
|
+
def get_subject_solrdoc_display(doc)
|
900
|
+
doc[:default_subject_stored_a]
|
901
|
+
end
|
902
|
+
|
903
|
+
def get_children_subject_solrdoc_display(doc)
|
904
|
+
doc[:childrens_subject_stored_a]
|
905
|
+
end
|
906
|
+
|
907
|
+
def get_medical_subject_solrdoc_display(doc)
|
908
|
+
doc[:mesh_subject_stored_a]
|
909
|
+
end
|
910
|
+
|
911
|
+
def get_local_subject_solrdoc_display(doc)
|
912
|
+
doc[:local_subject_stored_a]
|
913
|
+
end
|
914
|
+
|
915
|
+
def get_format(rec)
|
916
|
+
acc = []
|
917
|
+
|
918
|
+
format_code = get_format_from_leader(rec)
|
919
|
+
f008 = rec.fields('008').map(&:value).first || ''
|
920
|
+
f007 = rec.fields('007').map(&:value)
|
921
|
+
f260press = rec.fields('260').any? do |field|
|
922
|
+
field.select { |sf| sf.code == 'b' && sf.value =~ /press/i }.any?
|
923
|
+
end
|
924
|
+
# first letter of every 006
|
925
|
+
f006firsts = rec.fields('006').map do |field|
|
926
|
+
field.value[0]
|
927
|
+
end
|
928
|
+
f245k = rec.fields('245').flat_map do |field|
|
929
|
+
field.select { |sf| sf.code == 'k' }.map(&:value)
|
930
|
+
end
|
931
|
+
f245h = rec.fields('245').flat_map do |field|
|
932
|
+
field.select { |sf| sf.code == 'h' }.map(&:value)
|
933
|
+
end
|
934
|
+
f337a = rec.fields('337').flat_map do |field|
|
935
|
+
field.select { |sf| sf.code == 'a' }.map(&:value)
|
936
|
+
end
|
937
|
+
call_nums = rec.fields(EnrichedMarc::TAG_HOLDING).map do |field|
|
938
|
+
# h gives us the 'Classification part' which contains strings like 'Microfilm'
|
939
|
+
join_subfields(field, &subfield_in([ EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART, EnrichedMarc::SUB_HOLDING_ITEM_PART ]))
|
940
|
+
end
|
941
|
+
locations = get_specific_location_values(rec)
|
942
|
+
|
943
|
+
if locations.any? { |loc| loc =~ /manuscripts/i }
|
944
|
+
acc << 'Manuscript'
|
945
|
+
elsif locations.any? { |loc| loc =~ /archives/i } &&
|
946
|
+
locations.none? { |loc| loc =~ /cajs/i } &&
|
947
|
+
locations.none? { |loc| loc =~ /nursing/i }
|
948
|
+
acc << 'Archive'
|
949
|
+
elsif locations.any? { |loc| loc =~ /micro/i } ||
|
950
|
+
f245h.any? { |val| val =~ /micro/i } ||
|
951
|
+
call_nums.any? { |val| val =~ /micro/i } ||
|
952
|
+
f337a.any? { |val| val =~ /microform/i }
|
953
|
+
acc << 'Microformat'
|
954
|
+
else
|
955
|
+
# these next 4 can have this format plus ONE of the formats down farther below
|
956
|
+
if rec.fields('502').any? && format_code == 'tm'
|
957
|
+
acc << 'Thesis/Dissertation'
|
958
|
+
end
|
959
|
+
if rec.fields('111').any? || rec.fields('711').any?
|
960
|
+
acc << 'Conference/Event'
|
961
|
+
end
|
962
|
+
if (!%w{c d i j}.member?(format_code[0])) && %w{f i o}.member?(f008[28]) && (!f260press)
|
963
|
+
acc << 'Government document'
|
964
|
+
end
|
965
|
+
if format_code == 'as' && (f008[21] == 'n' || f008[22] == 'e')
|
966
|
+
acc << 'Newspaper'
|
967
|
+
end
|
968
|
+
|
969
|
+
# only one of these
|
970
|
+
if format_code.end_with?('i') || (format_code == 'am' && f006firsts.member?('m') && f006firsts.member?('s'))
|
971
|
+
acc << 'Website/Database'
|
972
|
+
elsif %w(aa ac am tm).member?(format_code) &&
|
973
|
+
f245k.none? { |v| v =~ /kit/i } &&
|
974
|
+
f245h.none? { |v| v =~ /micro/i }
|
975
|
+
acc << 'Book'
|
976
|
+
elsif %w(ca cb cd cm cs dm).member?(format_code)
|
977
|
+
acc << 'Musical score'
|
978
|
+
elsif format_code.start_with?('e') || format_code == 'fm'
|
979
|
+
acc << 'Map/Atlas'
|
980
|
+
elsif format_code == 'gm'
|
981
|
+
if f007.any? { |v| v.start_with?('v') }
|
982
|
+
acc << 'Video'
|
983
|
+
elsif f007.any? { |v| v.start_with?('g') }
|
984
|
+
acc << 'Projected graphic'
|
985
|
+
else
|
986
|
+
acc << 'Video'
|
987
|
+
end
|
988
|
+
elsif %w(im jm jc jd js).member?(format_code)
|
989
|
+
acc << 'Sound recording'
|
990
|
+
elsif %w(km kd).member?(format_code)
|
991
|
+
acc << 'Image'
|
992
|
+
elsif format_code == 'mm'
|
993
|
+
acc << 'Datafile'
|
994
|
+
elsif %w(as gs).member?(format_code)
|
995
|
+
acc << 'Journal/Periodical'
|
996
|
+
elsif format_code.start_with?('r')
|
997
|
+
acc << '3D object'
|
998
|
+
else
|
999
|
+
acc << 'Other'
|
1000
|
+
end
|
1001
|
+
end
|
1002
|
+
acc.concat(get_curated_format(rec))
|
1003
|
+
end
|
1004
|
+
|
1005
|
+
# returns two-char format code from MARC leader, representing two fields:
|
1006
|
+
# "Type of record" and "Bibliographic level"
|
1007
|
+
def get_format_from_leader(rec)
|
1008
|
+
rec.leader[6..7]
|
1009
|
+
end
|
1010
|
+
|
1011
|
+
def get_format_display(rec)
|
1012
|
+
results = []
|
1013
|
+
results += rec.fields('300').map do |field|
|
1014
|
+
join_subfields(field, &subfield_not_in(%w{3 6 8}))
|
1015
|
+
end
|
1016
|
+
results += rec.fields(%w{254 255 310 342 352 362}).map do |field|
|
1017
|
+
join_subfields(field, &subfield_not_in(%w{6 8}))
|
1018
|
+
end
|
1019
|
+
results += rec.fields('340').map do |field|
|
1020
|
+
join_subfields(field, &subfield_not_in(%w{0 2 6 8}))
|
1021
|
+
end
|
1022
|
+
results += rec.fields('880').map do |field|
|
1023
|
+
if has_subfield6_value(field,/^300/)
|
1024
|
+
join_subfields(field, &subfield_not_in(%w{3 6 8}))
|
1025
|
+
elsif has_subfield6_value(field, /^(254|255|310|342|352|362)/)
|
1026
|
+
join_subfields(field, &subfield_not_in(%w{6 8}))
|
1027
|
+
elsif has_subfield6_value(field, /^340/)
|
1028
|
+
join_subfields(field, &subfield_not_in(%w{0 2 6 8}))
|
1029
|
+
else
|
1030
|
+
[]
|
1031
|
+
end
|
1032
|
+
end
|
1033
|
+
results.select { |value| value.present? }
|
1034
|
+
end
|
1035
|
+
|
1036
|
+
def get_itm_count(rec)
|
1037
|
+
fields = rec.fields(EnrichedMarc::TAG_ITEM)
|
1038
|
+
fields.empty? ? nil : fields.size
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
def get_hld_count(rec)
|
1042
|
+
fields = rec.fields(EnrichedMarc::TAG_HOLDING)
|
1043
|
+
fields.empty? ? nil : fields.size
|
1044
|
+
end
|
1045
|
+
|
1046
|
+
def get_empty_hld_count(rec)
|
1047
|
+
holding_ids_from_items = Set.new
|
1048
|
+
rec.each_by_tag(EnrichedMarc::TAG_ITEM) do |field|
|
1049
|
+
holding_id_subfield = field.find do |subfield|
|
1050
|
+
subfield.code == 'r'
|
1051
|
+
end
|
1052
|
+
holding_ids_from_items.add(holding_id_subfield.value) if holding_id_subfield
|
1053
|
+
end
|
1054
|
+
empty_holding_count = 0
|
1055
|
+
rec.each_by_tag(EnrichedMarc::TAG_HOLDING) do |field|
|
1056
|
+
id_subfield = field.find do |subfield|
|
1057
|
+
subfield.code == '8'
|
1058
|
+
end
|
1059
|
+
unless holding_ids_from_items.include?(id_subfield&.value)
|
1060
|
+
empty_holding_count += 1
|
1061
|
+
end
|
1062
|
+
end
|
1063
|
+
empty_holding_count
|
1064
|
+
end
|
1065
|
+
|
1066
|
+
def get_prt_count(rec)
|
1067
|
+
fields = rec.fields(EnrichedMarc::TAG_ELECTRONIC_INVENTORY)
|
1068
|
+
fields.empty? ? nil : fields.size
|
1069
|
+
end
|
1070
|
+
|
1071
|
+
def get_access_values(rec)
|
1072
|
+
acc = rec.map do |f|
|
1073
|
+
case f.tag
|
1074
|
+
when EnrichedMarc::TAG_HOLDING
|
1075
|
+
'At the library'
|
1076
|
+
when EnrichedMarc::TAG_ELECTRONIC_INVENTORY
|
1077
|
+
'Online'
|
1078
|
+
end
|
1079
|
+
end.compact
|
1080
|
+
acc += rec.fields('856')
|
1081
|
+
.select { |f| f.indicator1 == '4' && f.indicator2 != '2' }
|
1082
|
+
.flat_map do |field|
|
1083
|
+
subz = join_subfields(field, &subfield_in(%w{z}))
|
1084
|
+
field.find_all(&subfield_in(%w{u})).map do |sf|
|
1085
|
+
if !subz.include?('Finding aid') && sf.value.include?('hdl.library.upenn.edu')
|
1086
|
+
'Online'
|
1087
|
+
end
|
1088
|
+
end.compact
|
1089
|
+
end
|
1090
|
+
acc << 'Online' if is_etas(rec)
|
1091
|
+
acc.uniq
|
1092
|
+
end
|
1093
|
+
|
1094
|
+
def is_etas(rec)
|
1095
|
+
rec.fields('977').any? do |f|
|
1096
|
+
f.any? do |sf|
|
1097
|
+
sf.code == 'e' && sf.value == 'ETAS'
|
1098
|
+
end
|
1099
|
+
end
|
1100
|
+
end
|
1101
|
+
|
1102
|
+
# examines a 1xx datafield and constructs a string out of select
|
1103
|
+
# subfields, including expansion of 'relator' code
|
1104
|
+
def get_name_1xx_field(field)
|
1105
|
+
s = field.map do |sf|
|
1106
|
+
# added 2017/04/10: filter out 0 (authority record numbers) added by Alma
|
1107
|
+
# added 2022/08/04: filter our 1 (URIs) added my MARCive project
|
1108
|
+
if !%W{0 1 4 6 8}.member?(sf.code)
|
1109
|
+
" #{sf.value}"
|
1110
|
+
elsif sf.code == '4'
|
1111
|
+
", #{relator_codes[sf.value]}"
|
1112
|
+
end
|
1113
|
+
end.compact.join
|
1114
|
+
s2 = s + (!%w(. -).member?(s[-1]) ? '.' : '')
|
1115
|
+
normalize_space(s2)
|
1116
|
+
end
|
1117
|
+
|
1118
|
+
def get_series_8xx_field(field)
|
1119
|
+
s = field.map do |sf|
|
1120
|
+
# added 2017/04/10: filter out 0 (authority record numbers) added by Alma
|
1121
|
+
if(! %W{0 4 5 6 8}.member?(sf.code))
|
1122
|
+
" #{sf.value}"
|
1123
|
+
elsif sf.code == '4'
|
1124
|
+
", #{relator_codes[sf.value]}"
|
1125
|
+
end
|
1126
|
+
end.compact.join
|
1127
|
+
s2 = s + (!%w(. -).member?(s[-1]) ? '.' : '')
|
1128
|
+
normalize_space(s2)
|
1129
|
+
end
|
1130
|
+
|
1131
|
+
def get_series_4xx_field(field)
|
1132
|
+
s = field.map do |sf|
|
1133
|
+
# added 2017/04/10: filter out 0 (authority record numbers) added by Alma
|
1134
|
+
if(! %W{0 4 6 8}.member?(sf.code))
|
1135
|
+
" #{sf.value}"
|
1136
|
+
elsif sf.code == '4'
|
1137
|
+
", #{relator_codes[sf.value]}"
|
1138
|
+
end
|
1139
|
+
end.compact.join
|
1140
|
+
s2 = s + (!%w(. -).member?(s[-1]) ? '.' : '')
|
1141
|
+
normalize_space(s2)
|
1142
|
+
end
|
1143
|
+
|
1144
|
+
def get_publication_values(rec)
|
1145
|
+
acc = []
|
1146
|
+
rec.fields('245').each do |field|
|
1147
|
+
field.find_all { |sf| sf.code == 'f' }
|
1148
|
+
.map(&:value)
|
1149
|
+
.each { |value| acc << value }
|
1150
|
+
end
|
1151
|
+
added_2xx = false
|
1152
|
+
rec.fields(%w{260 261 262}).take(1).each do |field|
|
1153
|
+
results = field.find_all { |sf| sf.code != '6' }
|
1154
|
+
.map(&:value)
|
1155
|
+
acc << join_and_trim_whitespace(results)
|
1156
|
+
added_2xx = true
|
1157
|
+
end
|
1158
|
+
if(!added_2xx)
|
1159
|
+
sf_ab264 = rec.fields.select { |field| field.tag == '264' && field.indicator2 == '1' }
|
1160
|
+
.take(1)
|
1161
|
+
.flat_map do |field|
|
1162
|
+
field.find_all(&subfield_in(%w{a b})).map(&:value)
|
1163
|
+
end
|
1164
|
+
|
1165
|
+
sf_c264_1 = rec.fields.select { |field| field.tag == '264' && field.indicator2 == '1' }
|
1166
|
+
.take(1)
|
1167
|
+
.flat_map do |field|
|
1168
|
+
field.find_all(&subfield_in(['c']))
|
1169
|
+
.map(&:value)
|
1170
|
+
end
|
1171
|
+
|
1172
|
+
sf_c264_4 = rec.fields.select { |field| field.tag == '264' && field.indicator2 == '4' }
|
1173
|
+
.take(1)
|
1174
|
+
.flat_map do |field|
|
1175
|
+
field.find_all { |sf| sf.code == 'c' }
|
1176
|
+
.map { |sf| (sf_c264_1.present? ? ', ' : '') + sf.value }
|
1177
|
+
end
|
1178
|
+
|
1179
|
+
acc << [sf_ab264, sf_c264_1, sf_c264_4].join(' ')
|
1180
|
+
end
|
1181
|
+
acc.map!(&:strip).select!(&:present?)
|
1182
|
+
acc
|
1183
|
+
end
|
1184
|
+
|
1185
|
+
def get_publication_display(rec)
|
1186
|
+
acc = []
|
1187
|
+
rec.fields('245').take(1).each do |field|
|
1188
|
+
field.find_all { |sf| sf.code == 'f' }
|
1189
|
+
.map(&:value)
|
1190
|
+
.each { |value| acc << value }
|
1191
|
+
end
|
1192
|
+
rec.fields(%w{260 261 262}).take(1).each do |field|
|
1193
|
+
acc << join_subfields(field, &subfield_not_6_or_8)
|
1194
|
+
end
|
1195
|
+
rec.fields('880')
|
1196
|
+
.select { |f| has_subfield6_value(f, /^(260|261|262)/) }
|
1197
|
+
.take(1)
|
1198
|
+
.each do |field|
|
1199
|
+
acc << join_subfields(field, &subfield_not_6_or_8)
|
1200
|
+
end
|
1201
|
+
rec.fields('880')
|
1202
|
+
.select { |f| has_subfield6_value(f, /^245/) }
|
1203
|
+
.each do |field|
|
1204
|
+
acc << join_subfields(field, &subfield_in(['f']))
|
1205
|
+
end
|
1206
|
+
acc += get_264_or_880_fields(rec, '1')
|
1207
|
+
acc.select(&:present?)
|
1208
|
+
end
|
1209
|
+
|
1210
|
+
def get_language_values(rec)
|
1211
|
+
rec.fields('008').map do |field|
|
1212
|
+
lang_code = field.value[35..37]
|
1213
|
+
if lang_code
|
1214
|
+
languages[lang_code]
|
1215
|
+
end
|
1216
|
+
end.compact
|
1217
|
+
end
|
1218
|
+
|
1219
|
+
# fieldname = name of field in the locations data structure to use
|
1220
|
+
def holdings_location_mappings(rec, display_fieldname)
|
1221
|
+
|
1222
|
+
# in holdings records, the shelving location is always the permanent location.
|
1223
|
+
# in item records, the current location takes into account
|
1224
|
+
# temporary locations and permanent locations. if you update the item's perm location,
|
1225
|
+
# the holding's shelving location changes.
|
1226
|
+
#
|
1227
|
+
# Since item records may reflect locations more accurately, we use them if they exist;
|
1228
|
+
# if not, we use the holdings.
|
1229
|
+
|
1230
|
+
tag = EnrichedMarc::TAG_HOLDING
|
1231
|
+
subfield_code = EnrichedMarc::SUB_HOLDING_SHELVING_LOCATION
|
1232
|
+
|
1233
|
+
if rec.fields(EnrichedMarc::TAG_ITEM).size > 0
|
1234
|
+
tag = EnrichedMarc::TAG_ITEM
|
1235
|
+
subfield_code = EnrichedMarc::SUB_ITEM_CURRENT_LOCATION
|
1236
|
+
end
|
1237
|
+
|
1238
|
+
# we don't facet for 'web' which is the 'Penn Library Web' location used in Voyager.
|
1239
|
+
# this location should eventually go away completely with data cleanup in Alma.
|
1240
|
+
|
1241
|
+
acc = rec.fields(tag).flat_map do |field|
|
1242
|
+
results = field.find_all { |sf| sf.code == subfield_code }
|
1243
|
+
.select { |sf| sf.value != 'web' }
|
1244
|
+
.map { |sf|
|
1245
|
+
# sometimes "happening locations" are mistakenly
|
1246
|
+
# used in holdings records. that's a data problem that should be fixed.
|
1247
|
+
# here, if we encounter a code we can't map, we ignore it, for faceting purposes.
|
1248
|
+
if locations[sf.value].present?
|
1249
|
+
locations[sf.value][display_fieldname]
|
1250
|
+
end
|
1251
|
+
}
|
1252
|
+
# flatten multiple 'library' values
|
1253
|
+
results.select(&:present?).flatten
|
1254
|
+
end.uniq
|
1255
|
+
if rec.fields(EnrichedMarc::TAG_ELECTRONIC_INVENTORY).any?
|
1256
|
+
acc << 'Online library'
|
1257
|
+
end
|
1258
|
+
return acc
|
1259
|
+
end
|
1260
|
+
|
1261
|
+
def items_nocirc(rec)
|
1262
|
+
items = rec.fields(EnrichedMarc::TAG_ITEM)
|
1263
|
+
return 'na' if items.empty?
|
1264
|
+
all = true
|
1265
|
+
none = true
|
1266
|
+
items.each do |f|
|
1267
|
+
nocirc = f.any? do |sf|
|
1268
|
+
sf.code == EnrichedMarc::SUB_ITEM_CURRENT_LOCATION && sf.value == 'vanpNocirc'
|
1269
|
+
end
|
1270
|
+
if nocirc
|
1271
|
+
none = false
|
1272
|
+
else
|
1273
|
+
all = false
|
1274
|
+
end
|
1275
|
+
end
|
1276
|
+
if all
|
1277
|
+
return 'all'
|
1278
|
+
elsif none
|
1279
|
+
return 'none'
|
1280
|
+
else
|
1281
|
+
return 'partial'
|
1282
|
+
end
|
1283
|
+
end
|
1284
|
+
|
1285
|
+
def get_library_values(rec)
|
1286
|
+
holdings_location_mappings(rec, 'library')
|
1287
|
+
end
|
1288
|
+
|
1289
|
+
def get_specific_location_values(rec)
|
1290
|
+
holdings_location_mappings(rec, 'specific_location')
|
1291
|
+
end
|
1292
|
+
|
1293
|
+
def get_encoding_level_rank(rec)
|
1294
|
+
EncodingLevel::RANK[rec.leader[17]]
|
1295
|
+
end
|
1296
|
+
|
1297
|
+
def prepare_dates(rec)
|
1298
|
+
f008 = rec.fields('008').first
|
1299
|
+
return nil unless f008
|
1300
|
+
field = f008.value
|
1301
|
+
return nil unless date_type = field[6]
|
1302
|
+
return nil unless date1 = field[7,4]
|
1303
|
+
date2 = field[11,4]
|
1304
|
+
case DateType::MAP[date_type]
|
1305
|
+
when :single
|
1306
|
+
return build_dates_hash(date1)
|
1307
|
+
when :lower_bound
|
1308
|
+
return build_dates_hash(date1, '9999')
|
1309
|
+
when :range
|
1310
|
+
return build_dates_hash(date1, date2)
|
1311
|
+
when :separate_content
|
1312
|
+
return build_dates_hash(date1, nil, date2)
|
1313
|
+
else
|
1314
|
+
return nil
|
1315
|
+
end
|
1316
|
+
end
|
1317
|
+
|
1318
|
+
def build_dates_hash(raw_pub_date_start, raw_pub_date_end = nil, content_date = nil)
|
1319
|
+
pub_date_start = sanitize_date(raw_pub_date_start, '0')
|
1320
|
+
return nil if pub_date_start == nil
|
1321
|
+
if raw_pub_date_end && pub_date_end = sanitize_date(raw_pub_date_end, '9')
|
1322
|
+
if pub_date_start > pub_date_end
|
1323
|
+
# assume date type coded incorrectly; use date2 as content_date
|
1324
|
+
pub_date_end = sanitize_date(raw_pub_date_start, '9')
|
1325
|
+
content_date = raw_pub_date_end
|
1326
|
+
end
|
1327
|
+
else
|
1328
|
+
pub_date_end = sanitize_date(raw_pub_date_start, '9')
|
1329
|
+
end
|
1330
|
+
if content_date == nil
|
1331
|
+
content_date_start = pub_date_start
|
1332
|
+
content_date_end = pub_date_end
|
1333
|
+
elsif content_date =~ /^[0-9]{4}$/
|
1334
|
+
content_date_start = content_date_end = content_date
|
1335
|
+
else
|
1336
|
+
content_date_start = sanitize_date(content_date, '0')
|
1337
|
+
if content_date_start
|
1338
|
+
content_date_end = sanitize_date(content_date, '9')
|
1339
|
+
else
|
1340
|
+
# invalid separate content date provided; fall back to pub_date
|
1341
|
+
content_date_start = pub_date_start
|
1342
|
+
content_date_end = pub_date_end
|
1343
|
+
end
|
1344
|
+
end
|
1345
|
+
{
|
1346
|
+
:pub_date_sort => pub_date_start,
|
1347
|
+
:pub_date_decade => current_year + 15 > pub_date_start.to_i ? pub_date_start[0,3] + '0s' : nil,
|
1348
|
+
:pub_date_range => "[#{pub_date_start} TO #{pub_date_end}]",
|
1349
|
+
:content_date_range => "[#{content_date_start} TO #{content_date_end}]",
|
1350
|
+
:pub_date_minsort => "#{pub_date_start}-01-01T00:00:00Z",
|
1351
|
+
:pub_date_maxsort => "#{pub_date_end.to_i + 1}-01-01T00:00:00Z",
|
1352
|
+
:content_date_minsort => "#{content_date_start}-01-01T00:00:00Z",
|
1353
|
+
:content_date_maxsort => "#{content_date_end.to_i + 1}-01-01T00:00:00Z"
|
1354
|
+
}
|
1355
|
+
end
|
1356
|
+
|
1357
|
+
def sanitize_date(input, replace)
|
1358
|
+
return nil if input !~ /^[0-9]*u*$/
|
1359
|
+
input.gsub(/u/, replace)
|
1360
|
+
end
|
1361
|
+
|
1362
|
+
def publication_date_digits(rec)
|
1363
|
+
rec.fields('008').map { |field| field.value[7,4] }
|
1364
|
+
.select { |year| year.present? }
|
1365
|
+
.map { |year| year.gsub(/\D/, '0') }
|
1366
|
+
end
|
1367
|
+
|
1368
|
+
def get_publication_date_values(rec)
|
1369
|
+
publication_date_digits(rec)
|
1370
|
+
.select { |year| year =~ /^[1-9][0-9]/ && current_year + 15 > year.to_i }
|
1371
|
+
.map { |year| year[0, 3] + '0s' }
|
1372
|
+
end
|
1373
|
+
|
1374
|
+
def get_publication_date_sort_values(rec)
|
1375
|
+
publication_date_digits(rec)
|
1376
|
+
end
|
1377
|
+
|
1378
|
+
def get_classification_values(rec)
|
1379
|
+
acc = []
|
1380
|
+
# not sure whether it's better to use 'item' or 'holding' records here.
|
1381
|
+
# we use 'item' only because it has a helpful call number type subfield,
|
1382
|
+
# which the holding doesn't.
|
1383
|
+
rec.fields(EnrichedMarc::TAG_ITEM).each do |item|
|
1384
|
+
cn_type = item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER_TYPE }.map(&:value).first
|
1385
|
+
|
1386
|
+
results = item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER }
|
1387
|
+
.map(&:value)
|
1388
|
+
.select { |call_num| call_num.present? }
|
1389
|
+
.map { |call_num| call_num[0] }
|
1390
|
+
.compact
|
1391
|
+
|
1392
|
+
results.each do |letter|
|
1393
|
+
verbose = nil
|
1394
|
+
case cn_type
|
1395
|
+
when '0'
|
1396
|
+
verbose = loc_classifications[letter]
|
1397
|
+
when '1'
|
1398
|
+
verbose = dewey_classifications[letter]
|
1399
|
+
letter = letter + '00'
|
1400
|
+
end
|
1401
|
+
if verbose
|
1402
|
+
acc << [ letter, verbose ].join(' - ')
|
1403
|
+
end
|
1404
|
+
end
|
1405
|
+
end
|
1406
|
+
acc.uniq
|
1407
|
+
end
|
1408
|
+
|
1409
|
+
def get_genre_values(rec)
|
1410
|
+
acc = []
|
1411
|
+
|
1412
|
+
is_manuscript = rec.fields(EnrichedMarc::TAG_ITEM).any? do |item|
|
1413
|
+
loc = item[EnrichedMarc::SUB_ITEM_CURRENT_LOCATION]
|
1414
|
+
locations[loc].present? && (locations[loc]['specific_location'] =~ /manuscript/)
|
1415
|
+
end
|
1416
|
+
|
1417
|
+
if rec['007'].try { |r| r.value.start_with?('v') } || is_manuscript
|
1418
|
+
genres = rec.fields('655').map do |field|
|
1419
|
+
field.find_all(&subfield_not_in(%w{0 2 5 c}))
|
1420
|
+
.map(&:value)
|
1421
|
+
.join(' ')
|
1422
|
+
end
|
1423
|
+
genres.each { |genre| acc << genre }
|
1424
|
+
end
|
1425
|
+
acc
|
1426
|
+
end
|
1427
|
+
|
1428
|
+
def get_genre_search_values(rec)
|
1429
|
+
rec.fields('655').map do |field|
|
1430
|
+
join_subfields(field, &subfield_not_in(%w{0 2 5 c}))
|
1431
|
+
end
|
1432
|
+
end
|
1433
|
+
|
1434
|
+
# @param [MARC::Record] rec
|
1435
|
+
# @param [TrueClass, FalseClass] should_link
|
1436
|
+
def get_genre_display(rec, should_link)
|
1437
|
+
rec.fields
|
1438
|
+
.select { |field|
|
1439
|
+
GenreTools.allowed_genre_field? field
|
1440
|
+
}.map do |field|
|
1441
|
+
sub_with_hyphens = field.find_all(&subfield_not_in(%w{0 2 5 6 8 c e w})).map { |sf|
|
1442
|
+
sep = !%w{a b}.member?(sf.code) ? ' -- ' : ' '
|
1443
|
+
sep + sf.value
|
1444
|
+
}.join.lstrip
|
1445
|
+
eandw_with_hyphens = field.find_all(&subfield_in(%w{e w})).join(' -- ')
|
1446
|
+
{ value: sub_with_hyphens, value_append: eandw_with_hyphens, link: should_link, link_type: 'genre_search' }
|
1447
|
+
end.uniq
|
1448
|
+
end
|
1449
|
+
|
1450
|
+
def get_title_values(rec)
|
1451
|
+
acc = []
|
1452
|
+
rec.fields('245').take(1).each do |field|
|
1453
|
+
a_or_k = field.find_all(&subfield_in(%w{a k}))
|
1454
|
+
.map { |sf| trim_trailing_comma(trim_trailing_slash(sf.value).rstrip) }
|
1455
|
+
.first || ''
|
1456
|
+
joined = field.find_all(&subfield_in(%w{b n p}))
|
1457
|
+
.map{ |sf| trim_trailing_slash(sf.value) }
|
1458
|
+
.join(' ')
|
1459
|
+
|
1460
|
+
apunct = a_or_k[-1]
|
1461
|
+
hpunct = field.find_all { |sf| sf.code == 'h' }
|
1462
|
+
.map{ |sf| sf.value[-1] }
|
1463
|
+
.first
|
1464
|
+
punct = if [apunct, hpunct].member?('=')
|
1465
|
+
'='
|
1466
|
+
else
|
1467
|
+
[apunct, hpunct].member?(':') ? ':' : nil
|
1468
|
+
end
|
1469
|
+
|
1470
|
+
acc << [ trim_trailing_colon(trim_trailing_equal(a_or_k)), punct, joined ]
|
1471
|
+
.select(&:present?).join(' ')
|
1472
|
+
end
|
1473
|
+
acc
|
1474
|
+
end
|
1475
|
+
|
1476
|
+
def get_title_880_values(rec)
|
1477
|
+
rec.fields('880')
|
1478
|
+
.select { |f| has_subfield6_value(f, /^245/) }
|
1479
|
+
.map do |field|
|
1480
|
+
suba_value = field.find_all(&subfield_in(%w{a})).first.try(:value)
|
1481
|
+
subk_value = field.find_all(&subfield_in(%w{k})).first.try(:value) || ''
|
1482
|
+
title_with_slash = suba_value.present? ? suba_value : (subk_value + ' ')
|
1483
|
+
title_ak = trim_trailing_comma(join_and_trim_whitespace([ trim_trailing_slash(title_with_slash) ]))
|
1484
|
+
|
1485
|
+
subh = join_and_trim_whitespace(field.find_all(&subfield_in(%w{h})).map(&:value))
|
1486
|
+
|
1487
|
+
apunct = title_ak[-1]
|
1488
|
+
hpunct = subh[-1]
|
1489
|
+
|
1490
|
+
punct = if [apunct, hpunct].member?('=')
|
1491
|
+
'='
|
1492
|
+
else
|
1493
|
+
[apunct, hpunct].member?(':') ? ':' : nil
|
1494
|
+
end
|
1495
|
+
|
1496
|
+
[ trim_trailing_equal(title_ak),
|
1497
|
+
punct,
|
1498
|
+
trim_trailing_slash(field.find_all(&subfield_in(%w{b})).first.try(:value) || ''),
|
1499
|
+
trim_trailing_slash(field.find_all(&subfield_in(%w{n})).first.try(:value) || ''),
|
1500
|
+
trim_trailing_slash(field.find_all(&subfield_in(%w{p})).first.try(:value) || '')
|
1501
|
+
]
|
1502
|
+
.select { |value| value.present? }
|
1503
|
+
.join(' ')
|
1504
|
+
end
|
1505
|
+
end
|
1506
|
+
|
1507
|
+
def separate_leading_bracket_into_prefix_and_filing_hash(s)
|
1508
|
+
if s.start_with?('[')
|
1509
|
+
{ 'prefix' => '[', 'filing' => s[1..-1] }
|
1510
|
+
else
|
1511
|
+
{ 'prefix' => '', 'filing' => s }
|
1512
|
+
end
|
1513
|
+
end
|
1514
|
+
|
1515
|
+
def get_title_from_245_or_880(fields, support_invalid_indicator2 = true)
|
1516
|
+
fields.map do |field|
|
1517
|
+
if field.indicator2 =~ /^[0-9]$/
|
1518
|
+
offset = field.indicator2.to_i
|
1519
|
+
elsif support_invalid_indicator2
|
1520
|
+
offset = 0 # default to 0
|
1521
|
+
else
|
1522
|
+
return []
|
1523
|
+
end
|
1524
|
+
value = {}
|
1525
|
+
suba = join_subfields(field, &subfield_in(%w{a}))
|
1526
|
+
if offset > 0 && offset < 10
|
1527
|
+
part1 = suba[0..offset-1]
|
1528
|
+
part2 = suba[offset..-1]
|
1529
|
+
value = { 'prefix' => part1, 'filing' => part2 }
|
1530
|
+
else
|
1531
|
+
if suba.present?
|
1532
|
+
value = separate_leading_bracket_into_prefix_and_filing_hash(suba)
|
1533
|
+
else
|
1534
|
+
subk = join_subfields(field, &subfield_in(%w{k}))
|
1535
|
+
value = separate_leading_bracket_into_prefix_and_filing_hash(subk)
|
1536
|
+
end
|
1537
|
+
end
|
1538
|
+
value['filing'] = [ value['filing'], join_subfields(field, &subfield_in(%w{b n p})) ].join(' ')
|
1539
|
+
value
|
1540
|
+
end.compact
|
1541
|
+
end
|
1542
|
+
|
1543
|
+
def get_title_245(rec, support_invalid_indicator2 = true)
|
1544
|
+
get_title_from_245_or_880(rec.fields('245').take(1), support_invalid_indicator2)
|
1545
|
+
end
|
1546
|
+
|
1547
|
+
def get_title_880_for_xfacet(rec)
|
1548
|
+
get_title_from_245_or_880(rec.fields('880').select { |f| has_subfield6_value(f, /^245/) })
|
1549
|
+
end
|
1550
|
+
|
1551
|
+
def get_title_xfacet_values(rec)
|
1552
|
+
# 6/16/2017: added 880 to this field for non-roman char handling
|
1553
|
+
get_title_245(rec).map do |v|
|
1554
|
+
references(v)
|
1555
|
+
end + get_title_880_for_xfacet(rec).map do |v|
|
1556
|
+
references(v)
|
1557
|
+
end
|
1558
|
+
end
|
1559
|
+
|
1560
|
+
def get_title_sort_values(rec)
|
1561
|
+
get_title_245(rec).map do |v|
|
1562
|
+
v['filing'] + v['prefix']
|
1563
|
+
end
|
1564
|
+
end
|
1565
|
+
|
1566
|
+
def get_title_sort_filing_parts(rec, support_invalid_indicator2 = true)
|
1567
|
+
get_title_245(rec, support_invalid_indicator2).map do |v|
|
1568
|
+
v['filing']
|
1569
|
+
end
|
1570
|
+
end
|
1571
|
+
|
1572
|
+
def append_title_variants(rec, acc)
|
1573
|
+
do_title_variant_field(rec, acc, '130', 1, 'a')
|
1574
|
+
do_title_variant_field(rec, acc, '240', 2, 'a')
|
1575
|
+
do_title_variant_field(rec, acc, '210', nil, 'a', 'b')
|
1576
|
+
do_title_variant_field(rec, acc, '222', 2, 'a', 'b')
|
1577
|
+
do_title_variant_field(rec, acc, '246', nil, 'a', 'b')
|
1578
|
+
end
|
1579
|
+
|
1580
|
+
def do_title_variant_field(rec, acc, field_id, non_filing_indicator, *subfields_spec)
|
1581
|
+
rec.fields(field_id).each do |field|
|
1582
|
+
parts = subfields_spec.map do |subfield_spec|
|
1583
|
+
matching_subfield = field.find { |subfield| subfield.code == subfield_spec }
|
1584
|
+
matching_subfield.value unless matching_subfield.nil?
|
1585
|
+
end
|
1586
|
+
next if parts.first.nil?
|
1587
|
+
parts.compact!
|
1588
|
+
case non_filing_indicator
|
1589
|
+
when 1
|
1590
|
+
non_filing = field.indicator1
|
1591
|
+
when 2
|
1592
|
+
non_filing = field.indicator2
|
1593
|
+
else
|
1594
|
+
non_filing = nil
|
1595
|
+
end
|
1596
|
+
append_title_variant_field(acc, non_filing, parts)
|
1597
|
+
end
|
1598
|
+
end
|
1599
|
+
|
1600
|
+
def get_title_1_search_main_values(rec, format_filter: false)
|
1601
|
+
format = get_format_from_leader(rec)
|
1602
|
+
acc = rec.fields('245').map do |field|
|
1603
|
+
if !format_filter || format.end_with?('s')
|
1604
|
+
join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8 h})).map(&:value))
|
1605
|
+
end
|
1606
|
+
end.select { |v| v.present? }
|
1607
|
+
acc += rec.fields('880')
|
1608
|
+
.select { |f| has_subfield6_value(f, /^245/) }
|
1609
|
+
.map do |field|
|
1610
|
+
if !format_filter || format.end_with?('s')
|
1611
|
+
join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8 h})).map(&:value))
|
1612
|
+
end
|
1613
|
+
end.select { |v| v.present? }
|
1614
|
+
acc
|
1615
|
+
end
|
1616
|
+
|
1617
|
+
def get_title_1_search_values(rec)
|
1618
|
+
get_title_1_search_main_values(rec)
|
1619
|
+
end
|
1620
|
+
|
1621
|
+
def get_journal_title_1_search_values(rec)
|
1622
|
+
get_title_1_search_main_values(rec, format_filter: true)
|
1623
|
+
end
|
1624
|
+
|
1625
|
+
def title_2_search_main_tags
|
1626
|
+
@title_2_search_main_tags ||= %w{130 210 240 245 246 247 440 490 730 740 830}
|
1627
|
+
end
|
1628
|
+
|
1629
|
+
def title_2_search_aux_tags
|
1630
|
+
@title_2_search_aux_tags ||= %w{773 774 780 785}
|
1631
|
+
end
|
1632
|
+
|
1633
|
+
def title_2_search_7xx_tags
|
1634
|
+
@title_2_search_7xx_tags ||= %w{700 710 711}
|
1635
|
+
end
|
1636
|
+
|
1637
|
+
def get_title_2_search_main_values(rec, format_filter: false)
|
1638
|
+
format = get_format_from_leader(rec)
|
1639
|
+
rec.fields(title_2_search_main_tags).map do |field|
|
1640
|
+
if !format_filter || format.end_with?('s')
|
1641
|
+
join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8})).map(&:value))
|
1642
|
+
end
|
1643
|
+
end.select { |v| v.present? }
|
1644
|
+
end
|
1645
|
+
|
1646
|
+
def get_title_2_search_aux_values(rec, format_filter: false)
|
1647
|
+
format = get_format_from_leader(rec)
|
1648
|
+
rec.fields(title_2_search_aux_tags).map do |field|
|
1649
|
+
if !format_filter || format.end_with?('s')
|
1650
|
+
join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{s t})).map(&:value))
|
1651
|
+
end
|
1652
|
+
end.select { |v| v.present? }
|
1653
|
+
end
|
1654
|
+
|
1655
|
+
def get_title_2_search_7xx_values(rec, format_filter: false)
|
1656
|
+
format = get_format_from_leader(rec)
|
1657
|
+
rec.fields(title_2_search_7xx_tags).map do |field|
|
1658
|
+
if !format_filter || format.end_with?('s')
|
1659
|
+
join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
|
1660
|
+
end
|
1661
|
+
end.select { |v| v.present? }
|
1662
|
+
end
|
1663
|
+
|
1664
|
+
def get_title_2_search_505_values(rec, format_filter: false)
|
1665
|
+
format = get_format_from_leader(rec)
|
1666
|
+
rec.fields('505')
|
1667
|
+
.select { |f| f.indicator1 == '0' && f.indicator2 == '0' }
|
1668
|
+
.map do |field|
|
1669
|
+
if !format_filter || format.end_with?('s')
|
1670
|
+
join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
|
1671
|
+
end
|
1672
|
+
end.select { |v| v.present? }
|
1673
|
+
end
|
1674
|
+
|
1675
|
+
def get_title_2_search_800_values(rec, format_filter: false)
|
1676
|
+
format = get_format_from_leader(rec)
|
1677
|
+
acc = []
|
1678
|
+
acc += rec.fields('880')
|
1679
|
+
.select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(130|210|240|245|246|247|440|490|730|740|830)/ } }
|
1680
|
+
.map do |field|
|
1681
|
+
if !format_filter || format.end_with?('s')
|
1682
|
+
join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8 h})).map(&:value))
|
1683
|
+
end
|
1684
|
+
end.select { |v| v.present? }
|
1685
|
+
acc += rec.fields('880')
|
1686
|
+
.select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(773|774|780|785)/ } }
|
1687
|
+
.map do |field|
|
1688
|
+
if !format_filter || format.end_with?('s')
|
1689
|
+
join_and_trim_whitespace(field.find_all(&subfield_in(%w{s t})).map(&:value))
|
1690
|
+
end
|
1691
|
+
end.select { |v| v.present? }
|
1692
|
+
acc += rec.fields('880')
|
1693
|
+
.select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(700|710|711)/ } }
|
1694
|
+
.map do |field|
|
1695
|
+
if !format_filter || format.end_with?('s')
|
1696
|
+
join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
|
1697
|
+
end
|
1698
|
+
end.select { |v| v.present? }
|
1699
|
+
acc += rec.fields('880')
|
1700
|
+
.select { |f| f.indicator1 == '0' && f.indicator2 == '0' }
|
1701
|
+
.select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^505/ } }
|
1702
|
+
.map do |field|
|
1703
|
+
if !format_filter || format.end_with?('s')
|
1704
|
+
join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
|
1705
|
+
end
|
1706
|
+
end.select { |v| v.present? }
|
1707
|
+
acc
|
1708
|
+
end
|
1709
|
+
|
1710
|
+
def get_title_2_search_values(rec)
|
1711
|
+
get_title_2_search_main_values(rec) +
|
1712
|
+
get_title_2_search_aux_values(rec) +
|
1713
|
+
get_title_2_search_7xx_values(rec) +
|
1714
|
+
get_title_2_search_505_values(rec) +
|
1715
|
+
get_title_2_search_800_values(rec)
|
1716
|
+
end
|
1717
|
+
|
1718
|
+
def get_journal_title_2_search_values(rec)
|
1719
|
+
get_title_2_search_main_values(rec, format_filter: true) +
|
1720
|
+
get_title_2_search_aux_values(rec, format_filter: true) +
|
1721
|
+
get_title_2_search_7xx_values(rec, format_filter: true) +
|
1722
|
+
get_title_2_search_505_values(rec, format_filter: true) +
|
1723
|
+
get_title_2_search_800_values(rec, format_filter: true)
|
1724
|
+
end
|
1725
|
+
|
1726
|
+
# this gets called directly by ShowPresenter rather than via
|
1727
|
+
# Blacklight's show field definition plumbing, so we return a single string
|
1728
|
+
def get_title_display(rec)
|
1729
|
+
acc = []
|
1730
|
+
acc += rec.fields('245').map do |field|
|
1731
|
+
join_subfields(field, &subfield_not_in(%w{6 8}))
|
1732
|
+
end
|
1733
|
+
acc += get_880(rec, '245', &subfield_not_in(%w{6 8}))
|
1734
|
+
.map { |value| " = #{value}" }
|
1735
|
+
acc.join(' ')
|
1736
|
+
end
|
1737
|
+
|
1738
|
+
def author_creator_tags
|
1739
|
+
@author_creator_tags ||= %w{100 110}
|
1740
|
+
end
|
1741
|
+
|
1742
|
+
def get_author_creator_values(rec)
|
1743
|
+
rec.fields(author_creator_tags).map do |field|
|
1744
|
+
get_name_1xx_field(field)
|
1745
|
+
end
|
1746
|
+
end
|
1747
|
+
|
1748
|
+
def get_author_880_values(rec)
|
1749
|
+
rec.fields('880')
|
1750
|
+
.select { |f| has_subfield6_value(f, /^(100|110)/) }
|
1751
|
+
.map do |field|
|
1752
|
+
join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{4 6 8})).map(&:value))
|
1753
|
+
end
|
1754
|
+
end
|
1755
|
+
|
1756
|
+
def get_author_creator_1_search_values(rec)
|
1757
|
+
acc = []
|
1758
|
+
acc += rec.fields(%w{100 110}).map do |field|
|
1759
|
+
pieces = field.map do |sf|
|
1760
|
+
if sf.code == 'a'
|
1761
|
+
after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ', ')) ])
|
1762
|
+
before_comma = substring_before(sf.value, ', ')
|
1763
|
+
" #{after_comma} #{before_comma}"
|
1764
|
+
elsif !%W{a 1 4 6 8}.member?(sf.code)
|
1765
|
+
" #{sf.value}"
|
1766
|
+
elsif sf.code == '4'
|
1767
|
+
", #{relator_codes[sf.value]}"
|
1768
|
+
end
|
1769
|
+
end.compact
|
1770
|
+
value = join_and_trim_whitespace(pieces)
|
1771
|
+
if value.end_with?('.') || value.end_with?('-')
|
1772
|
+
value
|
1773
|
+
else
|
1774
|
+
value + '.'
|
1775
|
+
end
|
1776
|
+
end
|
1777
|
+
acc += rec.fields(%w{100 110}).map do |field|
|
1778
|
+
pieces = field.map do |sf|
|
1779
|
+
if(! %W{4 6 8}.member?(sf.code))
|
1780
|
+
" #{sf.value}"
|
1781
|
+
elsif sf.code == '4'
|
1782
|
+
", #{relator_codes[sf.value]}"
|
1783
|
+
end
|
1784
|
+
end.compact
|
1785
|
+
value = join_and_trim_whitespace(pieces)
|
1786
|
+
if value.end_with?('.') || value.end_with?('-')
|
1787
|
+
value
|
1788
|
+
else
|
1789
|
+
value + '.'
|
1790
|
+
end
|
1791
|
+
end
|
1792
|
+
acc += rec.fields(%w{880})
|
1793
|
+
.select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(100|110)/ } }
|
1794
|
+
.map do |field|
|
1795
|
+
suba = field.find_all(&subfield_in(%w{a})).map do |sf|
|
1796
|
+
after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ',')) ])
|
1797
|
+
before_comma = substring_before(sf.value, ',')
|
1798
|
+
"#{after_comma} #{before_comma}"
|
1799
|
+
end.first
|
1800
|
+
oth = join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{6 8 a t})).map(&:value))
|
1801
|
+
[suba, oth].join(' ')
|
1802
|
+
end
|
1803
|
+
acc
|
1804
|
+
end
|
1805
|
+
|
1806
|
+
def author_creator_2_tags
|
1807
|
+
@author_creator_2_tags ||= %w{100 110 111 400 410 411 700 710 711 800 810 811}
|
1808
|
+
end
|
1809
|
+
|
1810
|
+
def get_author_creator_2_search_values(rec)
|
1811
|
+
acc = []
|
1812
|
+
acc += rec.fields(author_creator_2_tags).map do |field|
|
1813
|
+
pieces1 = field.map do |sf|
|
1814
|
+
if !%W{1 4 5 6 8 t}.member?(sf.code)
|
1815
|
+
" #{sf.value}"
|
1816
|
+
elsif sf.code == '4'
|
1817
|
+
", #{relator_codes[sf.value]}"
|
1818
|
+
end
|
1819
|
+
end.compact
|
1820
|
+
value1 = join_and_trim_whitespace(pieces1)
|
1821
|
+
if value1.end_with?('.') || value1.end_with?('-')
|
1822
|
+
value1
|
1823
|
+
else
|
1824
|
+
value1 + '.'
|
1825
|
+
end
|
1826
|
+
|
1827
|
+
pieces2 = field.map do |sf|
|
1828
|
+
if sf.code == 'a'
|
1829
|
+
after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ', ')) ])
|
1830
|
+
before_comma = substring_before(sf.value, ',')
|
1831
|
+
" #{after_comma} #{before_comma}"
|
1832
|
+
elsif(! %W{a 4 5 6 8 t}.member?(sf.code))
|
1833
|
+
" #{sf.value}"
|
1834
|
+
elsif sf.code == '4'
|
1835
|
+
", #{relator_codes[sf.value]}"
|
1836
|
+
end
|
1837
|
+
end.compact
|
1838
|
+
value2 = join_and_trim_whitespace(pieces2)
|
1839
|
+
if value2.end_with?('.') || value2.end_with?('-')
|
1840
|
+
value2
|
1841
|
+
else
|
1842
|
+
value2 + '.'
|
1843
|
+
end
|
1844
|
+
|
1845
|
+
[ value1, value2 ]
|
1846
|
+
end.flatten(1)
|
1847
|
+
acc += rec.fields(%w{880})
|
1848
|
+
.select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(100|110|111|400|410|411|700|710|711|800|810|811)/ } }
|
1849
|
+
.map do |field|
|
1850
|
+
value1 = join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{5 6 8 t})).map(&:value))
|
1851
|
+
|
1852
|
+
suba = field.find_all(&subfield_in(%w{a})).map do |sf|
|
1853
|
+
after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ',')) ])
|
1854
|
+
before_comma = substring_before(sf.value, ',')
|
1855
|
+
"#{after_comma} #{before_comma}"
|
1856
|
+
end.first
|
1857
|
+
oth = join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{5 6 8 a t})).map(&:value))
|
1858
|
+
value2 = [ suba, oth ].join(' ')
|
1859
|
+
|
1860
|
+
[ value1, value2 ]
|
1861
|
+
end.flatten(1)
|
1862
|
+
acc
|
1863
|
+
end
|
1864
|
+
|
1865
|
+
def get_author_creator_sort_values(rec)
|
1866
|
+
rec.fields(author_creator_tags).take(1).map do |field|
|
1867
|
+
join_subfields(field, &subfield_not_in(%w[1 4 6 8 e]))
|
1868
|
+
end
|
1869
|
+
end
|
1870
|
+
|
1871
|
+
def get_author_display(rec)
|
1872
|
+
acc = []
|
1873
|
+
rec.fields(%w{100 110}).each do |field|
|
1874
|
+
subf4 = get_subfield_4ew(field)
|
1875
|
+
author_parts = []
|
1876
|
+
field.each do |sf|
|
1877
|
+
# added 2017/04/10: filter out 0 (authority record numbers) added by Alma
|
1878
|
+
# added 2022/08/04: filter out 1 (URIs) added by MARCive project
|
1879
|
+
if !%W{0 1 4 6 8 e w}.member?(sf.code)
|
1880
|
+
author_parts << sf.value
|
1881
|
+
end
|
1882
|
+
end
|
1883
|
+
acc << {
|
1884
|
+
value: author_parts.join(' '),
|
1885
|
+
value_append: subf4,
|
1886
|
+
link_type: 'author_creator_xfacet2' }
|
1887
|
+
end
|
1888
|
+
rec.fields('880').each do |field|
|
1889
|
+
if has_subfield6_value(field, /^(100|110)/)
|
1890
|
+
subf4 = get_subfield_4ew(field)
|
1891
|
+
author_parts = []
|
1892
|
+
field.each do |sf|
|
1893
|
+
# added 2017/04/10: filter out 0 (authority record numbers) added by Alma
|
1894
|
+
unless %W{0 4 6 8 e w}.member?(sf.code)
|
1895
|
+
author_parts << sf.value.gsub(/\?$/, '')
|
1896
|
+
end
|
1897
|
+
end
|
1898
|
+
acc << {
|
1899
|
+
value: author_parts.join(' '),
|
1900
|
+
value_append: subf4,
|
1901
|
+
link_type: 'author_creator_xfacet2' }
|
1902
|
+
end
|
1903
|
+
end
|
1904
|
+
acc
|
1905
|
+
end
|
1906
|
+
|
1907
|
+
def get_corporate_author_search_values(rec)
|
1908
|
+
rec.fields(%w{110 710 810}).map do |field|
|
1909
|
+
join_and_trim_whitespace(field.select(&subfield_in(%w{a b c d})).map(&:value))
|
1910
|
+
end
|
1911
|
+
end
|
1912
|
+
|
1913
|
+
def get_standardized_title_values(rec)
|
1914
|
+
rec.fields(%w{130 240}).map do |field|
|
1915
|
+
# added 2017/05/15: filter out 0 (authority record numbers) added by Alma
|
1916
|
+
results = field.find_all(&subfield_not_in(%W{0 6 8})).map(&:value)
|
1917
|
+
join_and_trim_whitespace(results)
|
1918
|
+
end
|
1919
|
+
end
|
1920
|
+
|
1921
|
+
def get_standardized_title_display(rec)
|
1922
|
+
acc = []
|
1923
|
+
rec.fields(%w{130 240}).each do |field|
|
1924
|
+
# added 2017/05/15: filter out 0 (authority record numbers) added by Alma
|
1925
|
+
title = join_subfields(field, &subfield_not_in(%W{0 6 8 e w}))
|
1926
|
+
title_param_value = join_subfields(field, &subfield_not_in(%W{0 5 6 8 e w}))
|
1927
|
+
title_append = get_title_extra(field)
|
1928
|
+
acc << {
|
1929
|
+
value: title,
|
1930
|
+
value_for_link: title_param_value,
|
1931
|
+
value_append: title_append,
|
1932
|
+
link_type: 'title_search' }
|
1933
|
+
end
|
1934
|
+
rec.fields('730')
|
1935
|
+
.select { |f| f.indicator1 == '' || f.indicator2 == '' }
|
1936
|
+
.select { |f| f.none? { |sf| sf.code == 'i'} }
|
1937
|
+
.each do |field|
|
1938
|
+
title = join_subfields(field, &subfield_not_in(%w{5 6 8 e w}))
|
1939
|
+
title_append = get_title_extra(field)
|
1940
|
+
acc << {
|
1941
|
+
value: title,
|
1942
|
+
value_append: title_append,
|
1943
|
+
link_type: 'title_search' }
|
1944
|
+
end
|
1945
|
+
rec.fields('880')
|
1946
|
+
.select { |f| has_subfield6_value(f, /^(130|240|730)/) }
|
1947
|
+
.select { |f| f.none? { |sf| sf.code == 'i'} }
|
1948
|
+
.each do |field|
|
1949
|
+
title = join_subfields(field, &subfield_not_in(%w{5 6 8 e w}))
|
1950
|
+
title_append = get_title_extra(field)
|
1951
|
+
acc << {
|
1952
|
+
value: title,
|
1953
|
+
value_append: title_append,
|
1954
|
+
link_type: 'title_search' }
|
1955
|
+
end
|
1956
|
+
acc
|
1957
|
+
end
|
1958
|
+
|
1959
|
+
def get_edition_values(rec)
|
1960
|
+
rec.fields('250').take(1).map do |field|
|
1961
|
+
results = field.find_all(&subfield_not_in(%w{6 8})).map(&:value)
|
1962
|
+
join_and_trim_whitespace(results)
|
1963
|
+
end
|
1964
|
+
end
|
1965
|
+
|
1966
|
+
def get_edition_display(rec)
|
1967
|
+
acc = []
|
1968
|
+
acc += rec.fields('250').map do |field|
|
1969
|
+
join_subfields(field, &subfield_not_in(%W{6 8}))
|
1970
|
+
end
|
1971
|
+
acc += rec.fields('880')
|
1972
|
+
.select { |f| has_subfield6_value(f, /^250/)}
|
1973
|
+
.map do |field|
|
1974
|
+
join_subfields(field, &subfield_not_in(%W{6 8}))
|
1975
|
+
end
|
1976
|
+
acc
|
1977
|
+
end
|
1978
|
+
|
1979
|
+
def get_conference_values(rec)
|
1980
|
+
rec.fields('111').map do |field|
|
1981
|
+
get_name_1xx_field(field)
|
1982
|
+
end
|
1983
|
+
end
|
1984
|
+
|
1985
|
+
def get_conference_search_values(rec)
|
1986
|
+
rec.fields(%w{111 711 811}).map do |field|
|
1987
|
+
join_and_trim_whitespace(field.select(&subfield_in(%w{a c d e})).map(&:value))
|
1988
|
+
end
|
1989
|
+
end
|
1990
|
+
|
1991
|
+
def get_conference_display(rec)
|
1992
|
+
results = rec.fields(%w{111 711})
|
1993
|
+
.select{ |f| ['', ' '].member?(f.indicator2) }
|
1994
|
+
.map do |field|
|
1995
|
+
conf = ''
|
1996
|
+
if field.none? { |sf| sf.code == 'i' }
|
1997
|
+
# added 2017/05/18: filter out 0 (authority record numbers) added by Alma
|
1998
|
+
conf = join_subfields(field, &subfield_not_in(%w{0 4 5 6 8 e j w}))
|
1999
|
+
end
|
2000
|
+
conf_append = join_subfields(field, &subfield_in(%w{e j w}))
|
2001
|
+
{ value: conf, value_append: conf_append, link_type: 'author_creator_xfacet2' }
|
2002
|
+
end
|
2003
|
+
results += rec.fields('880')
|
2004
|
+
.select { |f| has_subfield6_value(f, /^(111|711)/) }
|
2005
|
+
.select { |f| f.none? { |sf| sf.code == 'i' } }
|
2006
|
+
.map do |field|
|
2007
|
+
# added 2017/05/18: filter out 0 (authority record numbers) added by Alma
|
2008
|
+
conf = join_subfields(field, &subfield_not_in(%w{0 4 5 6 8 e j w}))
|
2009
|
+
conf_extra = join_subfields(field, &subfield_in(%w{4 e j w}))
|
2010
|
+
{ value: conf, value_append: conf_extra, link_type: 'author_creator_xfacet2' }
|
2011
|
+
end
|
2012
|
+
results
|
2013
|
+
end
|
2014
|
+
|
2015
|
+
def get_series_values(rec)
|
2016
|
+
acc = []
|
2017
|
+
added_8xx = false
|
2018
|
+
rec.fields(%w{800 810 811 830}).take(1).each do |field|
|
2019
|
+
acc << get_series_8xx_field(field)
|
2020
|
+
added_8xx = true
|
2021
|
+
end
|
2022
|
+
if !added_8xx
|
2023
|
+
rec.fields(%w{400 410 411 440 490}).take(1).map do |field|
|
2024
|
+
acc << get_series_4xx_field(field)
|
2025
|
+
end
|
2026
|
+
end
|
2027
|
+
acc
|
2028
|
+
end
|
2029
|
+
|
2030
|
+
def series_tags
|
2031
|
+
@series_tags ||= %w{800 810 811 830 400 411 440 490}
|
2032
|
+
end
|
2033
|
+
|
2034
|
+
def get_series_display(rec)
|
2035
|
+
acc = []
|
2036
|
+
|
2037
|
+
tags_present = series_tags.select { |tag| rec[tag].present? }
|
2038
|
+
|
2039
|
+
if %w{800 810 811 400 410 411}.member?(tags_present.first)
|
2040
|
+
rec.fields(tags_present.first).each do |field|
|
2041
|
+
# added 2017/04/10: filter out 0 (authority record numbers) added by Alma
|
2042
|
+
series = join_subfields(field, &subfield_not_in(%w{0 5 6 8 e t w v n}))
|
2043
|
+
pairs = field.map do |sf|
|
2044
|
+
if %w{e w v n t}.member?(sf.code)
|
2045
|
+
[ ' ', sf.value ]
|
2046
|
+
elsif sf.code == '4'
|
2047
|
+
[ ', ', relator_codes[sf.value] ]
|
2048
|
+
end
|
2049
|
+
end
|
2050
|
+
series_append = pairs.flatten.join.strip
|
2051
|
+
acc << { value: series, value_append: series_append, link_type: 'author_search' }
|
2052
|
+
end
|
2053
|
+
elsif %w{830 440 490}.member?(tags_present.first)
|
2054
|
+
rec.fields(tags_present.first).each do |field|
|
2055
|
+
# added 2017/04/10: filter out 0 (authority record numbers) added by Alma
|
2056
|
+
series = join_subfields(field, &subfield_not_in(%w{0 5 6 8 c e w v n}))
|
2057
|
+
series_append = join_subfields(field, &subfield_in(%w{c e w v n}))
|
2058
|
+
acc << { value: series, value_append: series_append, link_type: 'title_search' }
|
2059
|
+
end
|
2060
|
+
end
|
2061
|
+
|
2062
|
+
rec.fields(tags_present.drop(1)).each do |field|
|
2063
|
+
# added 2017/04/10: filter out 0 (authority record numbers) added by Alma
|
2064
|
+
series = join_subfields(field, &subfield_not_in(%w{0 5 6 8}))
|
2065
|
+
acc << { value: series, link: false }
|
2066
|
+
end
|
2067
|
+
|
2068
|
+
rec.fields('880')
|
2069
|
+
.select { |f| has_subfield6_value(f, /^(800|810|811|830|400|410|411|440|490)/) }
|
2070
|
+
.each do |field|
|
2071
|
+
series = join_subfields(field, &subfield_not_in(%W{5 6 8}))
|
2072
|
+
acc << { value: series, link: false }
|
2073
|
+
end
|
2074
|
+
|
2075
|
+
acc
|
2076
|
+
end
|
2077
|
+
|
2078
|
+
def get_series_search_values(rec)
|
2079
|
+
acc = []
|
2080
|
+
acc += rec.fields(%w{400 410 411})
|
2081
|
+
.select { |f| f.indicator2 == '0' }
|
2082
|
+
.map do |field|
|
2083
|
+
join_subfields(field, &subfield_not_in(%w{4 6 8}))
|
2084
|
+
end
|
2085
|
+
acc += rec.fields(%w{400 410 411})
|
2086
|
+
.select { |f| f.indicator2 == '1' }
|
2087
|
+
.map do |field|
|
2088
|
+
join_subfields(field, &subfield_not_in(%w{4 6 8 a}))
|
2089
|
+
end
|
2090
|
+
acc += rec.fields(%w{440})
|
2091
|
+
.map do |field|
|
2092
|
+
join_subfields(field, &subfield_not_in(%w{0 5 6 8 w}))
|
2093
|
+
end
|
2094
|
+
acc += rec.fields(%w{800 810 811})
|
2095
|
+
.map do |field|
|
2096
|
+
join_subfields(field, &subfield_not_in(%w{0 4 5 6 7 8 w}))
|
2097
|
+
end
|
2098
|
+
acc += rec.fields(%w{830})
|
2099
|
+
.map do |field|
|
2100
|
+
join_subfields(field, &subfield_not_in(%w{0 5 6 7 8 w}))
|
2101
|
+
end
|
2102
|
+
acc += rec.fields(%w{533})
|
2103
|
+
.map do |field|
|
2104
|
+
field.find_all { |sf| sf.code == 'f' }
|
2105
|
+
.map(&:value)
|
2106
|
+
.map { |v| v.gsub(/\(|\)/, '') }
|
2107
|
+
.join(' ')
|
2108
|
+
end
|
2109
|
+
acc
|
2110
|
+
end
|
2111
|
+
|
2112
|
+
def get_contained_within_values(rec)
|
2113
|
+
rec.fields('773').map do |field|
|
2114
|
+
results = field.find_all(&subfield_not_in(%w{6 7 8 w})).map(&:value)
|
2115
|
+
join_and_trim_whitespace(results)
|
2116
|
+
end
|
2117
|
+
end
|
2118
|
+
|
2119
|
+
# @return [Array] of hashes each describing a physical holding
|
2120
|
+
def get_physical_holdings(rec)
|
2121
|
+
# enriched MARC looks like this:
|
2122
|
+
# <datafield tag="hld" ind1="0" ind2=" ">
|
2123
|
+
# <subfield code="b">MAIN</subfield>
|
2124
|
+
# <subfield code="c">main</subfield>
|
2125
|
+
# <subfield code="h">NA2540</subfield>
|
2126
|
+
# <subfield code="i">.G63 2009</subfield>
|
2127
|
+
# <subfield code="8">226026380000541</subfield>
|
2128
|
+
# </datafield>
|
2129
|
+
rec.fields(EnrichedMarc::TAG_HOLDING).map do |item|
|
2130
|
+
# Alma never populates subfield 'a' which is 'location'
|
2131
|
+
# it appears to store the location code in 'c'
|
2132
|
+
# and display name in 'b'
|
2133
|
+
{
|
2134
|
+
holding_id: item[EnrichedMarc::SUB_HOLDING_SEQUENCE_NUMBER],
|
2135
|
+
location: item[EnrichedMarc::SUB_HOLDING_SHELVING_LOCATION],
|
2136
|
+
classification_part: item[EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART],
|
2137
|
+
item_part: item[EnrichedMarc::SUB_HOLDING_ITEM_PART],
|
2138
|
+
}
|
2139
|
+
end
|
2140
|
+
end
|
2141
|
+
|
2142
|
+
# @return [Array] of hashes each describing an electronic holding
|
2143
|
+
def get_electronic_holdings(rec)
|
2144
|
+
# enriched MARC looks like this:
|
2145
|
+
# <datafield tag="prt" ind1=" " ind2=" ">
|
2146
|
+
# <subfield code="pid">5310486800000521</subfield>
|
2147
|
+
# <subfield code="url">https://sandbox01-na.alma.exlibrisgroup.com/view/uresolver/01UPENN_INST/openurl?u.ignore_date_coverage=true&rft.mms_id=9926519600521</subfield>
|
2148
|
+
# <subfield code="iface">PubMed Central</subfield>
|
2149
|
+
# <subfield code="coverage"> Available from 2005 volume: 1. Most recent 1 year(s) not available.</subfield>
|
2150
|
+
# <subfield code="library">MAIN</subfield>
|
2151
|
+
# <subfield code="collection">PubMed Central (Training)</subfield>
|
2152
|
+
# <subfield code="czcolid">61111058563444000</subfield>
|
2153
|
+
# <subfield code="8">5310486800000521</subfield>
|
2154
|
+
# </datafield>
|
2155
|
+
|
2156
|
+
# do NOT index electronic holdings where collection name is blank:
|
2157
|
+
# these are records created from 856 fields from Voyager
|
2158
|
+
# that don't have actual links.
|
2159
|
+
|
2160
|
+
rec.fields(EnrichedMarc::TAG_ELECTRONIC_INVENTORY)
|
2161
|
+
.select { |item| item[EnrichedMarc::SUB_ELEC_COLLECTION_NAME].present? }
|
2162
|
+
.map do |item|
|
2163
|
+
{
|
2164
|
+
portfolio_pid: item[EnrichedMarc::SUB_ELEC_PORTFOLIO_PID],
|
2165
|
+
url: item[EnrichedMarc::SUB_ELEC_ACCESS_URL],
|
2166
|
+
collection: item[EnrichedMarc::SUB_ELEC_COLLECTION_NAME],
|
2167
|
+
coverage: item[EnrichedMarc::SUB_ELEC_COVERAGE],
|
2168
|
+
}
|
2169
|
+
end
|
2170
|
+
end
|
2171
|
+
|
2172
|
+
def get_bound_with_id_values(rec)
|
2173
|
+
rec.fields(EnrichedMarc::TAG_HOLDING).flat_map do |field|
|
2174
|
+
field.select(&subfield_in([ EnrichedMarc::SUB_BOUND_WITH_ID ])).map { |sf| sf.value }
|
2175
|
+
end
|
2176
|
+
end
|
2177
|
+
|
2178
|
+
def get_subfield_4ew(field)
|
2179
|
+
field.select(&subfield_in(%W{4 e w}))
|
2180
|
+
.map { |sf| (sf.code == '4' ? ", #{relator_codes[sf.value]}" : " #{sf.value}") }
|
2181
|
+
.join('')
|
2182
|
+
end
|
2183
|
+
|
2184
|
+
def get_title_extra(field)
|
2185
|
+
join_subfields(field, &subfield_in(%W{e w}))
|
2186
|
+
end
|
2187
|
+
|
2188
|
+
def get_other_title_display(rec)
|
2189
|
+
acc = []
|
2190
|
+
acc += rec.fields('246').map do |field|
|
2191
|
+
join_subfields(field, &subfield_not_in(%W{6 8}))
|
2192
|
+
end
|
2193
|
+
acc += rec.fields('740')
|
2194
|
+
.select { |f| ['', ' ', '0', '1', '3'].member?(f.indicator2) }
|
2195
|
+
.map do |field|
|
2196
|
+
join_subfields(field, &subfield_not_in(%W{5 6 8}))
|
2197
|
+
end
|
2198
|
+
acc += rec.fields('880')
|
2199
|
+
.select { |f| has_subfield6_value(f, /^(246|740)/) }
|
2200
|
+
.map do |field|
|
2201
|
+
join_subfields(field, &subfield_not_in(%W{5 6 8}))
|
2202
|
+
end
|
2203
|
+
acc
|
2204
|
+
end
|
2205
|
+
|
2206
|
+
# distribution and manufacture share the same logic except for indicator2
|
2207
|
+
def get_264_or_880_fields(rec, indicator2)
|
2208
|
+
acc = []
|
2209
|
+
acc += rec.fields('264')
|
2210
|
+
.select { |f| f.indicator2 == indicator2 }
|
2211
|
+
.map do |field|
|
2212
|
+
join_subfields(field, &subfield_in(%w{a b c}))
|
2213
|
+
end
|
2214
|
+
acc += rec.fields('880')
|
2215
|
+
.select { |f| f.indicator2 == indicator2 }
|
2216
|
+
.select { |f| has_subfield6_value(f, /^264/) }
|
2217
|
+
.map do |field|
|
2218
|
+
join_subfields(field, &subfield_in(%w{a b c}))
|
2219
|
+
end
|
2220
|
+
acc
|
2221
|
+
end
|
2222
|
+
|
2223
|
+
def get_production_display(rec)
|
2224
|
+
get_264_or_880_fields(rec, '0')
|
2225
|
+
end
|
2226
|
+
|
2227
|
+
def get_distribution_display(rec)
|
2228
|
+
get_264_or_880_fields(rec, '2')
|
2229
|
+
end
|
2230
|
+
|
2231
|
+
def get_manufacture_display(rec)
|
2232
|
+
get_264_or_880_fields(rec, '3')
|
2233
|
+
end
|
2234
|
+
|
2235
|
+
def get_cartographic_display(rec)
|
2236
|
+
rec.fields(%w{255 342}).map do |field|
|
2237
|
+
join_subfields(field, &subfield_not_6_or_8)
|
2238
|
+
end
|
2239
|
+
end
|
2240
|
+
|
2241
|
+
def get_fingerprint_display(rec)
|
2242
|
+
rec.fields('026').map do |field|
|
2243
|
+
join_subfields(field, &subfield_not_in(%w{2 5 6 8}))
|
2244
|
+
end
|
2245
|
+
end
|
2246
|
+
|
2247
|
+
def get_arrangement_display(rec)
|
2248
|
+
get_datafield_and_880(rec, '351')
|
2249
|
+
end
|
2250
|
+
|
2251
|
+
def get_former_title_display(rec)
|
2252
|
+
rec.fields
|
2253
|
+
.select { |f| f.tag == '247' || (f.tag == '880' && has_subfield6_value(f, /^247/)) }
|
2254
|
+
.map do |field|
|
2255
|
+
former_title = join_subfields(field, &subfield_not_in(%w{6 8 e w}))
|
2256
|
+
former_title_append = join_subfields(field, &subfield_in(%w{e w}))
|
2257
|
+
{ value: former_title, value_append: former_title_append, link_type: 'title_search' }
|
2258
|
+
end
|
2259
|
+
end
|
2260
|
+
|
2261
|
+
# logic for 'Continues' and 'Continued By' is very similar
|
2262
|
+
def get_continues(rec, tag)
|
2263
|
+
rec.fields
|
2264
|
+
.select { |f| f.tag == tag || (f.tag == '880' && has_subfield6_value(f, /^#{tag}/)) }
|
2265
|
+
.select { |f| f.any?(&subfield_in(%w{i a s t n d})) }
|
2266
|
+
.map do |field|
|
2267
|
+
join_subfields(field, &subfield_in(%w{i a s t n d}))
|
2268
|
+
end
|
2269
|
+
end
|
2270
|
+
|
2271
|
+
def get_continues_display(rec)
|
2272
|
+
get_continues(rec, '780')
|
2273
|
+
end
|
2274
|
+
|
2275
|
+
def get_continued_by_display(rec)
|
2276
|
+
get_continues(rec, '785')
|
2277
|
+
end
|
2278
|
+
|
2279
|
+
def get_place_of_publication_display(rec)
|
2280
|
+
acc = []
|
2281
|
+
acc += rec.fields('752').map do |field|
|
2282
|
+
place = join_subfields(field, &subfield_not_in(%w{6 8 e w}))
|
2283
|
+
place_extra = join_subfields(field, &subfield_in(%w{e w}))
|
2284
|
+
{ value: place, value_append: place_extra, link_type: 'search' }
|
2285
|
+
end
|
2286
|
+
acc += get_880_subfield_not_6_or_8(rec, '752').map do |result|
|
2287
|
+
{ value: result, link: false }
|
2288
|
+
end
|
2289
|
+
acc
|
2290
|
+
end
|
2291
|
+
|
2292
|
+
def get_language_display(rec)
|
2293
|
+
get_datafield_and_880(rec, '546')
|
2294
|
+
end
|
2295
|
+
|
2296
|
+
# for system details: extract subfield 3 plus other subfields as specified by passed-in block
|
2297
|
+
def get_sub3_and_other_subs(field, &block)
|
2298
|
+
sub3 = field.select(&subfield_in(%w{3})).map(&:value).map { |v| trim_trailing_period(v) }.join(': ')
|
2299
|
+
oth_subs = join_subfields(field, &block)
|
2300
|
+
[ sub3, trim_trailing_semicolon(oth_subs) ].join(' ')
|
2301
|
+
end
|
2302
|
+
|
2303
|
+
def get_system_details_display(rec)
|
2304
|
+
acc = []
|
2305
|
+
acc += rec.fields('538').map do |field|
|
2306
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
|
2307
|
+
end
|
2308
|
+
acc += rec.fields('344').map do |field|
|
2309
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
|
2310
|
+
end
|
2311
|
+
acc += rec.fields(%w{345 346}).map do |field|
|
2312
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
|
2313
|
+
end
|
2314
|
+
acc += rec.fields('347').map do |field|
|
2315
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
|
2316
|
+
end
|
2317
|
+
acc += rec.fields('880')
|
2318
|
+
.select { |f| has_subfield6_value(f, /^538/) }
|
2319
|
+
.map do |field|
|
2320
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
|
2321
|
+
end
|
2322
|
+
acc += rec.fields('880')
|
2323
|
+
.select { |f| has_subfield6_value(f, /^344/) }
|
2324
|
+
.map do |field|
|
2325
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
|
2326
|
+
end
|
2327
|
+
acc += rec.fields('880')
|
2328
|
+
.select { |f| has_subfield6_value(f, /^(345|346)/) }
|
2329
|
+
.map do |field|
|
2330
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
|
2331
|
+
end
|
2332
|
+
acc += rec.fields('880')
|
2333
|
+
.select { |f| has_subfield6_value(f, /^347/) }
|
2334
|
+
.map do |field|
|
2335
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
|
2336
|
+
end
|
2337
|
+
acc
|
2338
|
+
end
|
2339
|
+
|
2340
|
+
def get_biography_display(rec)
|
2341
|
+
get_datafield_and_880(rec, '545')
|
2342
|
+
end
|
2343
|
+
|
2344
|
+
def get_summary_display(rec)
|
2345
|
+
get_datafield_and_880(rec, '520')
|
2346
|
+
end
|
2347
|
+
|
2348
|
+
def get_contents_display(rec)
|
2349
|
+
acc = []
|
2350
|
+
acc += rec.fields('505').flat_map do |field|
|
2351
|
+
join_subfields(field, &subfield_not_6_or_8).split('--')
|
2352
|
+
end
|
2353
|
+
acc += rec.fields('880')
|
2354
|
+
.select { |f| has_subfield6_value(f, /^505/) }
|
2355
|
+
.flat_map do |field|
|
2356
|
+
join_subfields(field, &subfield_not_6_or_8).split('--')
|
2357
|
+
end
|
2358
|
+
acc
|
2359
|
+
end
|
2360
|
+
|
2361
|
+
def get_contents_note_search_values(rec)
|
2362
|
+
rec.fields('505').map do |field|
|
2363
|
+
join_and_trim_whitespace(field.to_a.map(&:value))
|
2364
|
+
end
|
2365
|
+
end
|
2366
|
+
|
2367
|
+
def get_participant_display(rec)
|
2368
|
+
get_datafield_and_880(rec, '511')
|
2369
|
+
end
|
2370
|
+
|
2371
|
+
def get_credits_display(rec)
|
2372
|
+
get_datafield_and_880(rec, '508')
|
2373
|
+
end
|
2374
|
+
|
2375
|
+
# 10/2018 kms: add 586
|
2376
|
+
def get_notes_display(rec)
|
2377
|
+
acc = []
|
2378
|
+
acc += rec.fields(%w{500 502 504 515 518 525 533 550 580 586 588}).map do |field|
|
2379
|
+
if field.tag == '588'
|
2380
|
+
join_subfields(field, &subfield_in(%w{a}))
|
2381
|
+
else
|
2382
|
+
join_subfields(field, &subfield_not_in(%w{5 6 8}))
|
2383
|
+
end
|
2384
|
+
end
|
2385
|
+
acc += rec.fields('880')
|
2386
|
+
.select { |f| has_subfield6_value(f, /^(500|502|504|515|518|525|533|550|580|586|588)/) }
|
2387
|
+
.map do |field|
|
2388
|
+
sub6 = field.select(&subfield_in(%w{6})).map(&:value).first
|
2389
|
+
if sub6 == '588'
|
2390
|
+
join_subfields(field, &subfield_in(%w{a}))
|
2391
|
+
else
|
2392
|
+
join_subfields(field, &subfield_not_in(%w{5 6 8}))
|
2393
|
+
end
|
2394
|
+
end
|
2395
|
+
acc
|
2396
|
+
end
|
2397
|
+
|
2398
|
+
# 10/2018 kms: add 562 563 585. Add 561 if subf a starts with Athenaeum copy:
|
2399
|
+
# non-Athenaeum 561 still displays as Penn Provenance
|
2400
|
+
def get_local_notes_display(rec)
|
2401
|
+
acc = []
|
2402
|
+
acc += rec.fields('561')
|
2403
|
+
.select { |f| f.any?{ |sf| sf.code == 'a' && sf.value =~ /^Athenaeum copy: / } }
|
2404
|
+
.map do |field|
|
2405
|
+
join_subfields(field, &subfield_in(%w{a}))
|
2406
|
+
end
|
2407
|
+
acc += rec.fields(%w{562 563 585 590}).map do |field|
|
2408
|
+
join_subfields(field, &subfield_not_in(%w{5 6 8}))
|
2409
|
+
end
|
2410
|
+
acc += get_880(rec, %w{562 563 585 590}) do |sf|
|
2411
|
+
! %w{5 6 8}.member?(sf.code)
|
2412
|
+
end
|
2413
|
+
acc
|
2414
|
+
end
|
2415
|
+
|
2416
|
+
def get_finding_aid_display(rec)
|
2417
|
+
get_datafield_and_880(rec, '555')
|
2418
|
+
end
|
2419
|
+
|
2420
|
+
# get 650/880 for provenance and chronology: prefix should be 'PRO' or 'CHR'
|
2421
|
+
# 11/2018: do not display $5 in PRO or CHR subjs
|
2422
|
+
def get_650_and_880(rec, prefix)
|
2423
|
+
acc = []
|
2424
|
+
acc += rec.fields('650')
|
2425
|
+
.select { |f| f.indicator2 == '4' }
|
2426
|
+
.select { |f| f.any? { |sf| sf.code == 'a' && sf.value =~ /^(#{prefix}|%#{prefix})/ } }
|
2427
|
+
.map do |field|
|
2428
|
+
suba = field.select(&subfield_in(%w{a})).map {|sf|
|
2429
|
+
sf.value.gsub(/^%?#{prefix}/, '')
|
2430
|
+
}.join(' ')
|
2431
|
+
sub_others = join_subfields(field, &subfield_not_in(%w{a 6 8 e w 5}))
|
2432
|
+
value = [ suba, sub_others ].join(' ')
|
2433
|
+
{ value: value, link_type: 'subject_search' } if value.present?
|
2434
|
+
end.compact
|
2435
|
+
acc += rec.fields('880')
|
2436
|
+
.select { |f| f.indicator2 == '4' }
|
2437
|
+
.select { |f| has_subfield6_value(f,/^650/) }
|
2438
|
+
.select { |f| f.any? { |sf| sf.code == 'a' && sf.value =~ /^(#{prefix}|%#{prefix})/ } }
|
2439
|
+
.map do |field|
|
2440
|
+
suba = field.select(&subfield_in(%w{a})).map {|sf| sf.value.gsub(/^%?#{prefix}/, '') }.join(' ')
|
2441
|
+
sub_others = join_subfields(field, &subfield_not_in(%w{a 6 8 e w 5}))
|
2442
|
+
value = [ suba, sub_others ].join(' ')
|
2443
|
+
{ value: value, link_type: 'subject_search' } if value.present?
|
2444
|
+
end.compact
|
2445
|
+
acc
|
2446
|
+
end
|
2447
|
+
|
2448
|
+
# 11/2018 kms: a 561 starting Athenaeum copy: should not appear as Penn Provenance, display that as Local Notes
|
2449
|
+
def get_provenance_display(rec)
|
2450
|
+
acc = []
|
2451
|
+
acc += rec.fields('561')
|
2452
|
+
.select { |f| ['1', '', ' '].member?(f.indicator1) && [' ', ''].member?(f.indicator2) && f.any?{ |sf| sf.code == 'a' && sf.value !~ /^Athenaeum copy: / } }
|
2453
|
+
.map do |field|
|
2454
|
+
value = join_subfields(field, &subfield_in(%w{a}))
|
2455
|
+
{ value: value, link: false } if value
|
2456
|
+
end.compact
|
2457
|
+
acc += rec.fields('880')
|
2458
|
+
.select { |f| has_subfield6_value(f, /^561/) }
|
2459
|
+
.select { |f| ['1', '', ' '].member?(f.indicator1) && [' ', ''].member?(f.indicator2) }
|
2460
|
+
.map do |field|
|
2461
|
+
value = join_subfields(field, &subfield_in(%w{a}))
|
2462
|
+
{ value: value, link: false } if value
|
2463
|
+
end.compact
|
2464
|
+
acc += get_650_and_880(rec, 'PRO')
|
2465
|
+
acc
|
2466
|
+
end
|
2467
|
+
|
2468
|
+
def get_chronology_display(rec)
|
2469
|
+
get_650_and_880(rec, 'CHR')
|
2470
|
+
end
|
2471
|
+
|
2472
|
+
def get_related_collections_display(rec)
|
2473
|
+
get_datafield_and_880(rec, '544')
|
2474
|
+
end
|
2475
|
+
|
2476
|
+
def get_cited_in_display(rec)
|
2477
|
+
get_datafield_and_880(rec, '510')
|
2478
|
+
end
|
2479
|
+
|
2480
|
+
def get_publications_about_display(rec)
|
2481
|
+
get_datafield_and_880(rec, '581')
|
2482
|
+
end
|
2483
|
+
|
2484
|
+
def get_cite_as_display(rec)
|
2485
|
+
get_datafield_and_880(rec, '524')
|
2486
|
+
end
|
2487
|
+
|
2488
|
+
def get_contributor_display(rec)
|
2489
|
+
acc = []
|
2490
|
+
acc += rec.fields(%w{700 710})
|
2491
|
+
.select { |f| ['', ' ', '0'].member?(f.indicator2) }
|
2492
|
+
.select { |f| f.none? { |sf| sf.code == 'i' } }
|
2493
|
+
.map do |field|
|
2494
|
+
contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
|
2495
|
+
contributor_append = field.select(&subfield_in(%w{e u 3 4})).map do |sf|
|
2496
|
+
if sf.code == '4'
|
2497
|
+
", #{relator_codes[sf.value]}"
|
2498
|
+
else
|
2499
|
+
" #{sf.value}"
|
2500
|
+
end
|
2501
|
+
end.join
|
2502
|
+
{ value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
|
2503
|
+
end
|
2504
|
+
acc += rec.fields('880')
|
2505
|
+
.select { |f| has_subfield6_value(f, /^(700|710)/) && (f.none? { |sf| sf.code == 'i' }) }
|
2506
|
+
.map do |field|
|
2507
|
+
contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
|
2508
|
+
contributor_append = join_subfields(field, &subfield_in(%w{e u 3}))
|
2509
|
+
{ value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
|
2510
|
+
end
|
2511
|
+
acc
|
2512
|
+
end
|
2513
|
+
|
2514
|
+
# if there's a subfield i, extract its value, and if there's something
|
2515
|
+
# in parentheses in that value, extract that.
|
2516
|
+
def remove_paren_value_from_subfield_i(field)
|
2517
|
+
val = field.select { |sf| sf.code == 'i' }.map do |sf|
|
2518
|
+
match = /\((.+?)\)/.match(sf.value)
|
2519
|
+
if match
|
2520
|
+
sf.value.sub('(' + match[1] + ')', '')
|
2521
|
+
else
|
2522
|
+
sf.value
|
2523
|
+
end
|
2524
|
+
end.first || ''
|
2525
|
+
trim_trailing_colon(trim_trailing_period(val))
|
2526
|
+
end
|
2527
|
+
|
2528
|
+
def get_related_work_display(rec)
|
2529
|
+
acc = []
|
2530
|
+
acc += rec.fields(%w{700 710 711 730})
|
2531
|
+
.select { |f| ['', ' '].member?(f.indicator2) }
|
2532
|
+
.select { |f| f.any? { |sf| sf.code == 't' } }
|
2533
|
+
.map do |field|
|
2534
|
+
subi = remove_paren_value_from_subfield_i(field) || ''
|
2535
|
+
related = field.map do |sf|
|
2536
|
+
if ! %w{0 4 i}.member?(sf.code)
|
2537
|
+
" #{sf.value}"
|
2538
|
+
elsif sf.code == '4'
|
2539
|
+
", #{relator_codes[sf.value]}"
|
2540
|
+
end
|
2541
|
+
end.compact.join
|
2542
|
+
[ subi, related ].select(&:present?).join(':')
|
2543
|
+
end
|
2544
|
+
acc += rec.fields('880')
|
2545
|
+
.select { |f| ['', ' '].member?(f.indicator2) }
|
2546
|
+
.select { |f| has_subfield6_value(f, /^(700|710|711|730)/) }
|
2547
|
+
.select { |f| f.any? { |sf| sf.code == 't' } }
|
2548
|
+
.map do |field|
|
2549
|
+
subi = remove_paren_value_from_subfield_i(field) || ''
|
2550
|
+
related = field.map do |sf|
|
2551
|
+
if ! %w{0 4 i}.member?(sf.code)
|
2552
|
+
" #{sf.value}"
|
2553
|
+
elsif sf.code == '4'
|
2554
|
+
", #{relator_codes[sf.value]}"
|
2555
|
+
end
|
2556
|
+
end.compact.join
|
2557
|
+
[ subi, related ].select(&:present?).join(':')
|
2558
|
+
end
|
2559
|
+
acc
|
2560
|
+
end
|
2561
|
+
|
2562
|
+
def get_contains_display(rec)
|
2563
|
+
acc = []
|
2564
|
+
acc += rec.fields(%w{700 710 711 730 740})
|
2565
|
+
.select { |f| f.indicator2 == '2' }
|
2566
|
+
.map do |field|
|
2567
|
+
subi = remove_paren_value_from_subfield_i(field) || ''
|
2568
|
+
contains = field.map do |sf|
|
2569
|
+
if ! %w{0 4 5 6 8 i}.member?(sf.code)
|
2570
|
+
" #{sf.value}"
|
2571
|
+
elsif sf.code == '4'
|
2572
|
+
", #{relator_codes[sf.value]}"
|
2573
|
+
end
|
2574
|
+
end.compact.join
|
2575
|
+
[ subi, contains ].select(&:present?).join(':')
|
2576
|
+
end
|
2577
|
+
acc += rec.fields('880')
|
2578
|
+
.select { |f| f.indicator2 == '2' }
|
2579
|
+
.select { |f| has_subfield6_value(f, /^(700|710|711|730|740)/) }
|
2580
|
+
.map do |field|
|
2581
|
+
subi = remove_paren_value_from_subfield_i(field) || ''
|
2582
|
+
contains = join_subfields(field, &subfield_not_in(%w{0 5 6 8 i}))
|
2583
|
+
[ subi, contains ].select(&:present?).join(':')
|
2584
|
+
end
|
2585
|
+
acc
|
2586
|
+
end
|
2587
|
+
|
2588
|
+
def get_other_edition_value(field)
|
2589
|
+
subi = remove_paren_value_from_subfield_i(field) || ''
|
2590
|
+
other_editions = field.map do |sf|
|
2591
|
+
if %w{s x z}.member?(sf.code)
|
2592
|
+
" #{sf.value}"
|
2593
|
+
elsif sf.code == 't'
|
2594
|
+
" #{relator_codes[sf.value]}. "
|
2595
|
+
end
|
2596
|
+
end.compact.join
|
2597
|
+
other_editions_append = field.map do |sf|
|
2598
|
+
if ! %w{i h s t x z e f o r w y 7}.member?(sf.code)
|
2599
|
+
" #{sf.value}"
|
2600
|
+
elsif sf.code == 'h'
|
2601
|
+
" (#{sf.value}) "
|
2602
|
+
end
|
2603
|
+
end.compact.join
|
2604
|
+
{
|
2605
|
+
value: other_editions,
|
2606
|
+
value_prepend: trim_trailing_period(subi) + ':',
|
2607
|
+
value_append: other_editions_append,
|
2608
|
+
link_type: 'author_creator_xfacet2'
|
2609
|
+
}
|
2610
|
+
end
|
2611
|
+
|
2612
|
+
def get_other_edition_display(rec)
|
2613
|
+
acc = []
|
2614
|
+
acc += rec.fields('775')
|
2615
|
+
.select { |f| f.any? { |sf| sf.code == 'i' } }
|
2616
|
+
.map do |field|
|
2617
|
+
get_other_edition_value(field)
|
2618
|
+
end
|
2619
|
+
acc += rec.fields('880')
|
2620
|
+
.select { |f| ['', ' '].member?(f.indicator2) }
|
2621
|
+
.select { |f| has_subfield6_value(f, /^775/) }
|
2622
|
+
.select { |f| f.any? { |sf| sf.code == 'i' } }
|
2623
|
+
.map do |field|
|
2624
|
+
get_other_edition_value(field)
|
2625
|
+
end
|
2626
|
+
acc
|
2627
|
+
end
|
2628
|
+
|
2629
|
+
def get_contained_in_display(rec)
|
2630
|
+
acc = []
|
2631
|
+
acc += rec.fields('773').map do |field|
|
2632
|
+
join_subfields(field, &subfield_in(%w{a g i s t}))
|
2633
|
+
end.select(&:present?)
|
2634
|
+
acc += get_880(rec, '773') do |sf|
|
2635
|
+
%w{a g i s t}.member?(sf.code)
|
2636
|
+
end
|
2637
|
+
acc
|
2638
|
+
end
|
2639
|
+
|
2640
|
+
def get_constituent_unit_display(rec)
|
2641
|
+
acc = []
|
2642
|
+
acc += rec.fields('774').map do |field|
|
2643
|
+
join_subfields(field, &subfield_in(%w{i a s t}))
|
2644
|
+
end.select(&:present?)
|
2645
|
+
acc += get_880(rec, '774') do |sf|
|
2646
|
+
%w{i a s t}.member?(sf.code)
|
2647
|
+
end
|
2648
|
+
acc
|
2649
|
+
end
|
2650
|
+
|
2651
|
+
def get_has_supplement_display(rec)
|
2652
|
+
acc = []
|
2653
|
+
acc += rec.fields('770').map do |field|
|
2654
|
+
join_subfields(field, &subfield_not_6_or_8)
|
2655
|
+
end.select(&:present?)
|
2656
|
+
acc += get_880_subfield_not_6_or_8(rec, '770')
|
2657
|
+
acc
|
2658
|
+
end
|
2659
|
+
|
2660
|
+
def get_other_format_display(rec)
|
2661
|
+
acc = []
|
2662
|
+
acc += rec.fields('776').map do |field|
|
2663
|
+
join_subfields(field, &subfield_in(%w{i a s t o}))
|
2664
|
+
end.select(&:present?)
|
2665
|
+
acc += get_880(rec, '774') do |sf|
|
2666
|
+
%w{i a s t o}.member?(sf.code)
|
2667
|
+
end
|
2668
|
+
acc
|
2669
|
+
end
|
2670
|
+
|
2671
|
+
def get_isbn_display(rec)
|
2672
|
+
acc = []
|
2673
|
+
acc += rec.fields('020').map do |field|
|
2674
|
+
join_subfields(field, &subfield_in(%w{a z}))
|
2675
|
+
end.select(&:present?)
|
2676
|
+
acc += get_880(rec, '020') do |sf|
|
2677
|
+
%w{a z}.member?(sf.code)
|
2678
|
+
end
|
2679
|
+
acc
|
2680
|
+
end
|
2681
|
+
|
2682
|
+
def get_issn_display(rec)
|
2683
|
+
acc = []
|
2684
|
+
acc += rec.fields('022').map do |field|
|
2685
|
+
join_subfields(field, &subfield_in(%w{a z}))
|
2686
|
+
end.select(&:present?)
|
2687
|
+
acc += get_880(rec, '022') do |sf|
|
2688
|
+
%w{a z}.member?(sf.code)
|
2689
|
+
end
|
2690
|
+
acc
|
2691
|
+
end
|
2692
|
+
|
2693
|
+
def subfield_a_is_oclc(sf)
|
2694
|
+
sf.code == 'a' && sf.value =~ /^\(OCoLC\).*/
|
2695
|
+
end
|
2696
|
+
|
2697
|
+
def get_oclc_id_values(rec)
|
2698
|
+
rec.fields('035')
|
2699
|
+
.select { |f| f.any? { |sf| subfield_a_is_oclc(sf) } }
|
2700
|
+
.take(1)
|
2701
|
+
.flat_map do |field|
|
2702
|
+
field.find_all { |sf| subfield_a_is_oclc(sf) }.map do |sf|
|
2703
|
+
m = /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(sf.value)
|
2704
|
+
if m
|
2705
|
+
m[1]
|
2706
|
+
end
|
2707
|
+
end.compact
|
2708
|
+
end
|
2709
|
+
end
|
2710
|
+
|
2711
|
+
def get_publisher_number_display(rec)
|
2712
|
+
acc = []
|
2713
|
+
acc += rec.fields(%w{024 028}).map do |field|
|
2714
|
+
join_subfields(field, &subfield_not_in(%w{5 6}))
|
2715
|
+
end.select(&:present?)
|
2716
|
+
acc += rec.fields('880')
|
2717
|
+
.select { |f| has_subfield6_value(f, /^(024|028)/) }
|
2718
|
+
.map do |field|
|
2719
|
+
join_subfields(field, &subfield_not_in(%w{5 6}))
|
2720
|
+
end
|
2721
|
+
acc
|
2722
|
+
end
|
2723
|
+
|
2724
|
+
def get_access_restriction_display(rec)
|
2725
|
+
rec.fields('506').map do |field|
|
2726
|
+
join_subfields(field, &subfield_not_in(%w{5 6}))
|
2727
|
+
end.select(&:present?)
|
2728
|
+
end
|
2729
|
+
|
2730
|
+
def get_bound_with_display(rec)
|
2731
|
+
rec.fields('501').map do |field|
|
2732
|
+
join_subfields(field, &subfield_not_in(%w{a}))
|
2733
|
+
end.select(&:present?)
|
2734
|
+
end
|
2735
|
+
|
2736
|
+
# some logic to extract link text and link url from an 856 field
|
2737
|
+
def linktext_and_url(field)
|
2738
|
+
linktext_3 = join_subfields(field, &subfield_in(%w{3}))
|
2739
|
+
linktext_zy = field.find_all(&subfield_in(%w{z})).map(&:value).first ||
|
2740
|
+
field.find_all(&subfield_in(%w{y})).map(&:value).first || ''
|
2741
|
+
linktext = [ linktext_3, linktext_zy ].join(' ')
|
2742
|
+
linkurl = field.find_all(&subfield_in(%w{u})).map(&:value).first || ''
|
2743
|
+
linkurl = linkurl.sub(' target=_blank', '')
|
2744
|
+
[linktext, linkurl]
|
2745
|
+
end
|
2746
|
+
|
2747
|
+
def words_to_remove_from_web_link
|
2748
|
+
@words_to_remove_from_web_link ||=
|
2749
|
+
%w(fund funds collection collections endowment
|
2750
|
+
endowed trust and for of the memorial)
|
2751
|
+
end
|
2752
|
+
|
2753
|
+
def get_web_link_display(rec)
|
2754
|
+
rec.fields('856')
|
2755
|
+
.select { |f| ['2', ' ', ''].member?(f.indicator2) }
|
2756
|
+
.flat_map do |field|
|
2757
|
+
links = []
|
2758
|
+
linktext, linkurl = linktext_and_url(field)
|
2759
|
+
links << {
|
2760
|
+
linktext: linktext,
|
2761
|
+
linkurl: linkurl
|
2762
|
+
}
|
2763
|
+
|
2764
|
+
# if the link text includes words/phrases commonly used in bookplate links
|
2765
|
+
if linktext =~ /(Funds?|Collections?( +Gifts)?|Trust|Development|Endowment.*) +Home +Page|A +Penn +Libraries +Collection +Gift/
|
2766
|
+
# strip out some less-meaningful words to create the filename that leslie will use when creating the bookplate image
|
2767
|
+
imagename = linktext.gsub(/- A Penn Libraries Collection Gift/i, '')
|
2768
|
+
.gsub(/ Home Page/i, '')
|
2769
|
+
.gsub(/[&.]/, '')
|
2770
|
+
.split(/\W+/)
|
2771
|
+
.select { |word| !words_to_remove_from_web_link.member?(word.downcase) }
|
2772
|
+
.join('')
|
2773
|
+
# generate image URL
|
2774
|
+
imagesource = "https://www.library.upenn.edu/sites/default/files/images/bookplates/#{imagename}.gif"
|
2775
|
+
links << {
|
2776
|
+
img_src: imagesource,
|
2777
|
+
img_alt: "#{linktext.strip} Bookplate",
|
2778
|
+
linkurl: linkurl,
|
2779
|
+
}
|
2780
|
+
end
|
2781
|
+
|
2782
|
+
links
|
2783
|
+
end
|
2784
|
+
end
|
2785
|
+
|
2786
|
+
def get_call_number_search_values(rec)
|
2787
|
+
# some records don't have item records, only holdings. so for safety/comprehensivenss,
|
2788
|
+
# we need to index both and take the unique values of the entire result set.
|
2789
|
+
|
2790
|
+
acc = []
|
2791
|
+
|
2792
|
+
acc += rec.fields(EnrichedMarc::TAG_HOLDING).map do |holding|
|
2793
|
+
classification_part =
|
2794
|
+
holding.find_all(&subfield_in([ EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART ])).map(&:value).first
|
2795
|
+
item_part =
|
2796
|
+
holding.find_all(&subfield_in( [EnrichedMarc::SUB_HOLDING_ITEM_PART ])).map(&:value).first
|
2797
|
+
|
2798
|
+
if classification_part || item_part
|
2799
|
+
[ classification_part, item_part ].join(' ')
|
2800
|
+
end
|
2801
|
+
end.compact
|
2802
|
+
|
2803
|
+
acc += rec.fields(EnrichedMarc::TAG_ITEM).map do |item|
|
2804
|
+
cn_type = item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER_TYPE }.map(&:value).first
|
2805
|
+
|
2806
|
+
item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER }
|
2807
|
+
.map(&:value)
|
2808
|
+
.select { |call_num| call_num.present? }
|
2809
|
+
.map { |call_num| call_num }
|
2810
|
+
.compact
|
2811
|
+
end.flatten(1)
|
2812
|
+
|
2813
|
+
acc.uniq
|
2814
|
+
end
|
2815
|
+
|
2816
|
+
def get_call_number_xfacet_values(rec)
|
2817
|
+
get_call_number_search_values(rec).map do |v|
|
2818
|
+
references(v)
|
2819
|
+
end
|
2820
|
+
end
|
2821
|
+
|
2822
|
+
def prepare_timestamps(rec)
|
2823
|
+
most_recent_add = rec.fields(EnrichedMarc::TAG_ITEM).flat_map do |item|
|
2824
|
+
item.find_all(&subfield_in([EnrichedMarc::SUB_ITEM_DATE_CREATED])).map do |sf|
|
2825
|
+
begin
|
2826
|
+
if sf.value.size == 10
|
2827
|
+
# On 2022-05-02, this field value (as exported in enriched publishing
|
2828
|
+
# job from Alma) began truncating time to day-level granularity. We have
|
2829
|
+
# no guarantee that this won't switch back in the future, so for the
|
2830
|
+
# foreseeable future we should support both representations.
|
2831
|
+
DateTime.strptime(sf.value, '%Y-%m-%d').to_time.to_i
|
2832
|
+
else
|
2833
|
+
DateTime.strptime(sf.value, '%Y-%m-%d %H:%M:%S').to_time.to_i
|
2834
|
+
end
|
2835
|
+
rescue Exception => e
|
2836
|
+
puts "Error parsing date string for recently added field: #{sf.value} - #{e}"
|
2837
|
+
nil
|
2838
|
+
end
|
2839
|
+
end.compact
|
2840
|
+
end.max || 0
|
2841
|
+
|
2842
|
+
last_update = rec.fields('005')
|
2843
|
+
.select { |f| f.value.present? && !f.value.start_with?('0000') }
|
2844
|
+
.map do |field|
|
2845
|
+
begin
|
2846
|
+
DateTime.iso8601(field.value).to_time.to_i
|
2847
|
+
rescue ArgumentError => e
|
2848
|
+
nil
|
2849
|
+
end
|
2850
|
+
end.compact.first
|
2851
|
+
|
2852
|
+
if last_update == nil || most_recent_add > last_update
|
2853
|
+
last_update = most_recent_add
|
2854
|
+
end
|
2855
|
+
|
2856
|
+
{
|
2857
|
+
:most_recent_add => most_recent_add,
|
2858
|
+
:last_update => last_update
|
2859
|
+
}
|
2860
|
+
end
|
2861
|
+
|
2862
|
+
def get_full_text_link_values(rec)
|
2863
|
+
acc = rec.fields('856')
|
2864
|
+
.select { |f| (f.indicator1 == '4') && %w{0 1}.member?(f.indicator2) }
|
2865
|
+
.map do |field|
|
2866
|
+
linktext, linkurl = linktext_and_url(field)
|
2867
|
+
{
|
2868
|
+
linktext: linktext.present? ? linktext : linkurl,
|
2869
|
+
linkurl: linkurl
|
2870
|
+
}
|
2871
|
+
end
|
2872
|
+
add_etas_full_text(rec, acc) if is_etas(rec)
|
2873
|
+
acc
|
2874
|
+
end
|
2875
|
+
|
2876
|
+
HATHI_POSTFIX = ' from HathiTrust during COVID-19'
|
2877
|
+
|
2878
|
+
def add_etas_full_text(rec, acc)
|
2879
|
+
primary_oclc_id = get_oclc_id_values(rec).first
|
2880
|
+
return unless primary_oclc_id # defensive (e.g., if hathi match based on subsequently deleted oclc id)
|
2881
|
+
acc << {
|
2882
|
+
linktext: 'Online access',
|
2883
|
+
linkurl: 'http://catalog.hathitrust.org/api/volumes/oclc/' + primary_oclc_id + '.html',
|
2884
|
+
postfix: HATHI_POSTFIX
|
2885
|
+
}
|
2886
|
+
end
|
2887
|
+
|
2888
|
+
# It's not clear whether Alma can suppress these auto-generated
|
2889
|
+
# records (Primo instances seem to show these records!) so we filter
|
2890
|
+
# them out here just in case
|
2891
|
+
def is_boundwith_record(rec)
|
2892
|
+
rec.fields('245').any? { |f|
|
2893
|
+
title = join_subfields(f, &subfield_in(%w{a}))
|
2894
|
+
title.include?('Host bibliographic record for boundwith')
|
2895
|
+
}
|
2896
|
+
end
|
2897
|
+
|
2898
|
+
# values for passed-in args come from Solr, not extracted directly from MARC.
|
2899
|
+
# TODO: this code should return more data-ish values; the HTML should be moved into a render method
|
2900
|
+
def get_offsite_display(rec, crl_id, title, author, oclc_id)
|
2901
|
+
id = crl_id
|
2902
|
+
html = %Q{<a href="#{"http://catalog.crl.edu/record=#{id}~S1"}">Center for Research Libraries Holdings</a>}
|
2903
|
+
|
2904
|
+
f260 = rec.fields('260')
|
2905
|
+
place = f260.map { |f| join_subfields(f, &subfield_in(%w{a})) }.join(' ')
|
2906
|
+
publisher = f260.map { |f| join_subfields(f, &subfield_in(%w{b})) }.join(' ')
|
2907
|
+
pubdate = f260.map { |f| join_subfields(f, &subfield_in(%w{c})) }.join(' ')
|
2908
|
+
|
2909
|
+
atlas_params = {
|
2910
|
+
crl_id: id,
|
2911
|
+
title: title,
|
2912
|
+
author: author,
|
2913
|
+
oclc: oclc_id,
|
2914
|
+
place: place,
|
2915
|
+
publisher: publisher,
|
2916
|
+
pubdate: pubdate,
|
2917
|
+
}
|
2918
|
+
atlas_url = "https://atlas.library.upenn.edu/cgi-bin/forms/illcrl.cgi?#{atlas_params.to_query}"
|
2919
|
+
|
2920
|
+
html += %Q{<a href="#{atlas_url}">Place request</a>}
|
2921
|
+
|
2922
|
+
f590 = rec.fields('590')
|
2923
|
+
if f590.size > 0
|
2924
|
+
html += '<div>'
|
2925
|
+
f590.each do |field|
|
2926
|
+
html += field.join(' ')
|
2927
|
+
end
|
2928
|
+
html += '</div>'
|
2929
|
+
end
|
2930
|
+
[ html ]
|
2931
|
+
end
|
2932
|
+
|
2933
|
+
@@select_pub_field = lambda do |f|
|
2934
|
+
f.tag == '260' || (f.tag == '264' && f.indicator2 == '1')
|
2935
|
+
end
|
2936
|
+
|
2937
|
+
def get_ris_cy_field(rec)
|
2938
|
+
rec.fields.select(&@@select_pub_field).flat_map do |field|
|
2939
|
+
field.find_all(&subfield_in(['a'])).map(&:value)
|
2940
|
+
end
|
2941
|
+
end
|
2942
|
+
|
2943
|
+
def get_ris_pb_field(rec)
|
2944
|
+
rec.fields.select(&@@select_pub_field).flat_map do |field|
|
2945
|
+
field.find_all(&subfield_in(['b'])).map(&:value)
|
2946
|
+
end
|
2947
|
+
end
|
2948
|
+
|
2949
|
+
def get_ris_py_field(rec)
|
2950
|
+
rec.fields.select(&@@select_pub_field).flat_map do |field|
|
2951
|
+
field.find_all(&subfield_in(['c'])).map(&:value)
|
2952
|
+
end
|
2953
|
+
end
|
2954
|
+
|
2955
|
+
def get_ris_sn_field(rec)
|
2956
|
+
rec.fields.select { |f| f.tag == '020' || f.tag == '022' }.flat_map do |field|
|
2957
|
+
field.find_all(&subfield_in(['a'])).map(&:value)
|
2958
|
+
end
|
2959
|
+
end
|
2960
|
+
|
2961
|
+
end
|
2962
|
+
|
2963
|
+
end
|
2964
|
+
# rubocop:enable all
|