pennmarc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
data/legacy/marc.rb ADDED
@@ -0,0 +1,2964 @@
1
+ # rubocop:disable all
2
+ # frozen_string_literal: true
3
+
4
+ require 'nokogiri'
5
+
6
+ module PennLib
7
+
8
+ # Constants for Alma's MARC enrichment
9
+ module EnrichedMarc
10
+ # terminology follows the Publishing Profile screen
11
+ TAG_HOLDING = 'hld'
12
+ TAG_ITEM = 'itm'
13
+ TAG_ELECTRONIC_INVENTORY = 'prt'
14
+ TAG_DIGITAL_INVENTORY = 'dig'
15
+
16
+ # these are 852 subfield codes; terminology comes from MARC spec
17
+ SUB_HOLDING_SHELVING_LOCATION = 'c'
18
+ SUB_HOLDING_SEQUENCE_NUMBER = '8'
19
+ SUB_HOLDING_CLASSIFICATION_PART = 'h'
20
+ SUB_HOLDING_ITEM_PART = 'i'
21
+
22
+ SUB_ITEM_CURRENT_LOCATION = 'g'
23
+ SUB_ITEM_CALL_NUMBER_TYPE = 'h'
24
+ SUB_ITEM_CALL_NUMBER = 'i'
25
+ SUB_ITEM_DATE_CREATED = 'q'
26
+
27
+ SUB_ELEC_PORTFOLIO_PID = 'a'
28
+ SUB_ELEC_ACCESS_URL = 'b'
29
+ SUB_ELEC_COLLECTION_NAME = 'c'
30
+ SUB_ELEC_COVERAGE = 'g'
31
+
32
+ # a subfield code NOT used by the MARC 21 spec for 852 holdings records.
33
+ # we add this subfield during preprocessing to store boundwith record IDs.
34
+ SUB_BOUND_WITH_ID = 'y'
35
+ end
36
+
37
+ module DateType
38
+ # Nothing
39
+ UNSPECIFIED = '|'
40
+ NO_DATES_OR_BC = 'b'
41
+ UNKNOWN = 'n'
42
+
43
+ # Single point
44
+ DETAILED = 'e'
45
+ SINGLE = 's'
46
+
47
+ # Lower bound
48
+ CONTINUING_CURRENTLY_PUBLISHED = 'c'
49
+ CONTINUING_STATUS_UNKNOWN = 'u'
50
+
51
+ # Range
52
+ CONTINUING_CEASED_PUBLICATION = 'd'
53
+ COLLECTION_INCLUSIVE = 'i'
54
+ COLLECTION_BULK = 'k'
55
+ MULTIPLE = 'm'
56
+ QUESTIONABLE = 'q'
57
+
58
+ # Separate date for content
59
+ DISTRIBUTION_AND_PRODUCTION = 'p'
60
+ REPRINT_AND_ORIGINAL = 'r'
61
+ PUBLICATION_AND_COPYRIGHT = 't'
62
+
63
+ MAP = {
64
+ DETAILED => :single,
65
+ SINGLE => :single,
66
+
67
+ CONTINUING_CURRENTLY_PUBLISHED => :lower_bound,
68
+ CONTINUING_STATUS_UNKNOWN => :lower_bound,
69
+
70
+ CONTINUING_CEASED_PUBLICATION => :range,
71
+ COLLECTION_INCLUSIVE => :range,
72
+ COLLECTION_BULK => :range,
73
+ MULTIPLE => :range,
74
+ QUESTIONABLE => :range,
75
+
76
+ DISTRIBUTION_AND_PRODUCTION => :separate_content,
77
+ REPRINT_AND_ORIGINAL => :separate_content,
78
+ PUBLICATION_AND_COPYRIGHT => :separate_content
79
+ }
80
+ end
81
+
82
+ module SubjectConfig
83
+
84
+ module Prefixes
85
+ NAME = 'n'
86
+ TITLE = 't'
87
+ SUBJECT = 's' # used for default, handled as lcsh
88
+ FAST = 'f'
89
+ GEO = 'g'
90
+ CHILDRENS = 'c'
91
+ MESH = 'm'
92
+ OTHER = 'o'
93
+ end
94
+
95
+ class FieldConfig
96
+ def initialize(mapper)
97
+ @mapper = mapper
98
+ end
99
+
100
+ def map_prefix(field)
101
+ @mapper.call(field)
102
+ end
103
+ end
104
+
105
+ THESAURI = {
106
+ 'aat' => Prefixes::OTHER,
107
+ 'cct' => Prefixes::OTHER,
108
+ 'fast' => Prefixes::FAST,
109
+ 'homoit' => Prefixes::OTHER,
110
+ 'jlabsh' => Prefixes::OTHER,
111
+ 'lcsh' => Prefixes::SUBJECT,
112
+ 'lcstt' => Prefixes::OTHER,
113
+ 'lctgm' => Prefixes::OTHER,
114
+ 'local/osu' => Prefixes::OTHER,
115
+ 'mesh' => Prefixes::MESH,
116
+ 'ndlsh' => Prefixes::OTHER,
117
+ 'nlksh' => Prefixes::OTHER
118
+ }
119
+
120
+ # default field mapping is based only on ind2, and topic headings (as
121
+ # opposed to name/title headings) vary significantly across thesauri
122
+ default_field_mapping = FieldConfig.new(lambda { |f|
123
+ case f.indicator2
124
+ when '0'
125
+ return Prefixes::SUBJECT
126
+ when '1'
127
+ return Prefixes::CHILDRENS
128
+ when '2'
129
+ return Prefixes::MESH
130
+ when '4'
131
+ return Prefixes::OTHER
132
+ else
133
+ return nil
134
+ end
135
+ })
136
+
137
+ # for name/title, ind2=='0'/'1'/'2' are _all_ backed by LCNAF. See:
138
+ # https://www.loc.gov/aba/cyac/childsubjhead.html
139
+ # https://www.nlm.nih.gov/tsd/cataloging/trainingcourses/mesh/mod8_020.html
140
+ base_factory = lambda { |base|
141
+ lambda { |f|
142
+ case f.indicator2
143
+ when '0', '1', '2'
144
+ return base
145
+ when '4'
146
+ return Prefixes::OTHER
147
+ else
148
+ return nil
149
+ end
150
+ }
151
+ }
152
+ name_general = FieldConfig.new(base_factory.call(Prefixes::NAME))
153
+ title_general = FieldConfig.new(base_factory.call(Prefixes::TITLE))
154
+ geo_general = FieldConfig.new(base_factory.call(Prefixes::GEO))
155
+ static_other = FieldConfig.new(lambda { |f|
156
+ # For now, treat all of these as "other"
157
+ case f.indicator2
158
+ when '0', '1', '2', '4'
159
+ # NOTE: 2nd indicator for local subject fields is inconsistently applied; map everything to "other"
160
+ return Prefixes::OTHER
161
+ else
162
+ return nil
163
+ end
164
+ })
165
+
166
+ FIELDS = {
167
+ '600' => name_general,
168
+ '610' => name_general,
169
+ '611' => name_general,
170
+ '630' => title_general,
171
+ '650' => default_field_mapping,
172
+ '651' => geo_general,
173
+ '690' => static_other, # topical (650)
174
+ '691' => static_other, # geographic (651)
175
+ #'696' => static_other # personal name (600) NOTE: not currently mapped!
176
+ '697' => static_other # corporate name (610)
177
+ }
178
+
179
+ def self.prepare_subjects(rec)
180
+ acc = []
181
+ rec.fields(FIELDS.keys).each do |f|
182
+ filter_subject(f, f.tag, acc)
183
+ end
184
+ rec.fields('880').each do |f|
185
+ field_type_tag = f.find { |sf| sf.code == '6' && FIELDS.has_key?(sf.value) }&.value
186
+ filter_subject(f, field_type_tag, acc) if field_type_tag
187
+ end
188
+ return acc.empty? ? nil : map_to_input_fields(acc)
189
+ end
190
+
191
+ ONLY_KEYS = [:val, :prefix, :append, :local, :vernacular]
192
+
193
+ def self.map_to_input_fields(acc)
194
+ xfacet = [] # provisionally instantiate; we'll almost always need it
195
+ ret = {
196
+ # `xfacets` entries support browse/facet, and will be mapped to stored fields solr-side
197
+ xfacet: nil,
198
+ # `stored_*` fields (below) are stored only, and do _not_ support browse/facet
199
+ stored_lcsh: nil,
200
+ stored_childrens: nil,
201
+ stored_mesh: nil,
202
+ stored_local: nil
203
+ }
204
+ acc.each do |struct|
205
+ last = struct[:parts].last
206
+ # Normalize trailing punctuation on the last heading component. If a comma is present (to be
207
+ # normalized away), then any `.` present is integral (i.e., not ISBD punctuation), and thus
208
+ # should be left intact as part of the heading.
209
+ Marc.trim_trailing_comma!(last) || Marc.trim_trailing_period!(last)
210
+ if struct[:local] && struct[:prefix] == Prefixes::OTHER
211
+ # local subjects without source specified are really too messy, so they should bypass
212
+ # xfacet processing and be placed directly in stored field for display only
213
+ struct[:val] = struct.delete(:parts).join('--')
214
+ struct.delete(:prefix)
215
+ serialized = struct.to_json(:only => ONLY_KEYS)
216
+ (ret[:stored_local] ||= []) << serialized
217
+ elsif struct.size == 2
218
+ # only `parts` and `prefix` (required keys) are present; use legacy format (for now
219
+ # we're mainly doing this to incidentally test backward compatibility of server-side
220
+ # parsing
221
+ serialized = struct[:prefix] + struct[:parts].join('--')
222
+ xfacet << serialized
223
+ else
224
+ # simply map `parts` to `val`
225
+ struct[:val] = struct.delete(:parts).join('--')
226
+ serialized = struct.to_json(:only => ONLY_KEYS)
227
+ xfacet << serialized
228
+ end
229
+ end
230
+ ret[:xfacet] = xfacet unless xfacet.empty?
231
+ return ret
232
+ end
233
+
234
+ def self.filter_subject(field, tag, acc)
235
+ ret = build_subject_struct(field, tag)
236
+ return nil unless ret
237
+ return nil unless map_prefix(ret, tag, field)
238
+ acc << ret if post_process(ret)
239
+ end
240
+
241
+ def self.map_prefix(ret, tag, field)
242
+ if ret[:source_specified]
243
+ # source_specified takes priority. NOTE: This is true even if ind2!=7 (i.e., source_specified
244
+ # shouldn't even apply), because we want to be lenient with our parsing, so the priciple is that
245
+ # we defer to the _most explicit_ heading type declaration
246
+ prefix = THESAURI[ret[:source_specified].downcase]
247
+ else
248
+ # in the absence of `source_specified`, handling depends on field. NOTE: fields should be
249
+ # pre-filtered to only valid codes, so intentionally don't use the safe-nav operator here
250
+ prefix = FIELDS[tag].map_prefix(field)
251
+ end
252
+ prefix ? (ret[:prefix] = prefix) : nil
253
+ end
254
+
255
+ def self.build_subject_struct(field, tag)
256
+ local = field.indicator2 == '4' || tag.starts_with?('69')
257
+ ret = {
258
+ count: 0,
259
+ parts: [],
260
+ }
261
+ ret[:local] = true if local
262
+ ret[:vernacular] = true if field.tag == '880'
263
+ field.each do |sf|
264
+ case sf.code
265
+ when '0', '6', '8', '5', '1'
266
+ # ignore these subfields
267
+ next
268
+ when 'a'
269
+ # filter out PRO/CHR entirely (but only need to check on local heading types)
270
+ return nil if local && sf.value =~ /^%?(PRO|CHR)([ $]|$)/
271
+ when '2'
272
+ # use the _last_ source specified, so don't worry about overriding any prior values
273
+ ret[:source_specified] = sf.value.strip
274
+ next
275
+ when 'e', 'w'
276
+ # 'e' is relator term; not sure what 'w' is. These are used to append for record-view display only
277
+ (ret[:append] ||= []) << sf.value.strip
278
+ next
279
+ when 'b', 'c', 'd', 'p', 'q', 't'
280
+ # these are appended to the last component if possible (i.e., when joined, should have no delimiter)
281
+ append_to_last_part(ret[:parts], sf.value.strip)
282
+ ret[:count] += 1
283
+ next
284
+ end
285
+ # the usual case; add a new component to `parts`
286
+ append_new_part(ret[:parts], sf.value.strip)
287
+ ret[:count] += 1
288
+ end
289
+ return ret
290
+ end
291
+
292
+ def self.append_new_part(parts, value)
293
+ if parts.empty?
294
+ parts << value
295
+ else
296
+ last = parts.last
297
+ Marc.trim_trailing_comma!(last) || Marc.trim_trailing_period!(last)
298
+ parts << value
299
+ end
300
+ end
301
+
302
+ def self.append_to_last_part(parts, value)
303
+ if parts.empty?
304
+ parts << value
305
+ else
306
+ parts.last << ' ' + value
307
+ end
308
+ end
309
+
310
+ def self.post_process(ret)
311
+ case ret.delete(:count)
312
+ when 0
313
+ return nil
314
+ when 1
315
+ # when we've only encountered one subfield, assume that it might be a poorly-coded record
316
+ # with a bunch of subdivisions mashed together, and attempt to convert it to a consistent
317
+ # form. Note that we must separately track count (as opposed to simply checking `parts.size`),
318
+ # because we're using "subdivision count" as a heuristic for the quality level of the heading.
319
+ only = ret[:parts].first
320
+ only.gsub!(/([[[:alnum:]])])(\s+--\s*|\s*--\s+)([[[:upper:]][[:digit:]]])/, '\1--\3')
321
+ only.gsub!(/([[[:alpha:]])])\s+-\s+([[:upper:]]|[[:digit:]]{2,})/, '\1--\2')
322
+ only.gsub!(/([[[:alnum:]])])\s+-\s+([[:upper:]])/, '\1--\2')
323
+ end
324
+ return ret
325
+ end
326
+ end
327
+
328
+ module EncodingLevel
329
+ # Official MARC codes (https://www.loc.gov/marc/bibliographic/bdleader.html)
330
+ FULL = ' '
331
+ FULL_NOT_EXAMINED = '1'
332
+ UNFULL_NOT_EXAMINED = '2'
333
+ ABBREVIATED = '3'
334
+ CORE = '4'
335
+ PRELIMINARY = '5'
336
+ MINIMAL = '7'
337
+ PREPUBLICATION = '8'
338
+ UNKNOWN = 'u'
339
+ NOT_APPLICABLE = 'z'
340
+
341
+ # OCLC extension codes (https://www.oclc.org/bibformats/en/fixedfield/elvl.html)
342
+ OCLC_FULL = 'I'
343
+ OCLC_MINIMAL = 'K'
344
+ OCLC_BATCH_LEGACY = 'L'
345
+ OCLC_BATCH = 'M'
346
+ OCLC_SOURCE_DELETED = 'J'
347
+
348
+ RANK = {
349
+ # top 4 (per nelsonrr), do not differentiate among "good" records
350
+ FULL => 0,
351
+ FULL_NOT_EXAMINED => 0, # 1
352
+ OCLC_FULL => 0, # 2
353
+ CORE => 0, # 3
354
+ UNFULL_NOT_EXAMINED => 4,
355
+ ABBREVIATED => 5,
356
+ PRELIMINARY => 6,
357
+ MINIMAL => 7,
358
+ OCLC_MINIMAL => 8,
359
+ OCLC_BATCH => 9,
360
+ OCLC_BATCH_LEGACY => 10,
361
+ OCLC_SOURCE_DELETED => 11
362
+ }
363
+ end
364
+
365
+ # Genre/Form
366
+ # display field selector logic
367
+ # reference: https://www.loc.gov/marc/bibliographic/bd655.html
368
+ #
369
+ # We display Genre/Term values if they fulfill the following criteria
370
+ # - The field is in MARC 655. Or the field is in MARC 880 with subfield 2 includes '655'.
371
+ # AND
372
+ # - Above fields have an indicator 2 value of: 0 (LSCH) or 4 (No source specified).
373
+ # OR
374
+ # - Above fields have a subfield 2 (ontology code) in the list of allowed values.
375
+ class GenreTools
376
+ GENRE_FIELD_TAG = '655'
377
+ ALT_GENRE_FIELD_TAG = '880'
378
+ ALLOWED_INDICATOR2_VALUES = %w[0 4]
379
+
380
+ class << self
381
+ # @param [MARC::DataField] field
382
+ # @return [TrueClass, FalseClass]
383
+ def allowed_genre_field?(field)
384
+ return false unless genre_field?(field)
385
+
386
+ allowed_code?(field) || allowed_ind2?(field)
387
+ end
388
+
389
+ # @param [MARC::DataField] field
390
+ # @return [TrueClass, FalseClass]
391
+ def genre_field?(field)
392
+ field.tag == GENRE_FIELD_TAG ||
393
+ (field.tag == ALT_GENRE_FIELD_TAG && MarcUtil.has_subfield_value?(field, '6', /#{GENRE_FIELD_TAG}/))
394
+ end
395
+
396
+ # @param [MARC::DataField] field
397
+ # @return [TrueClass, FalseClass]
398
+ def allowed_code?(field)
399
+ MarcUtil.subfield_value_in?(field, '2', PennLib::Marc::ALLOWED_SUBJ_GENRE_ONTOLOGIES)
400
+ end
401
+
402
+ # 0 in ind2 means LCSH
403
+ # 4 in ind2 means "Source not specified"
404
+ # @param [MARC::DataField] field
405
+ # @return [TrueClass, FalseClass]
406
+ def allowed_ind2?(field)
407
+ field.indicator2.in? ALLOWED_INDICATOR2_VALUES
408
+ end
409
+ end
410
+ end
411
+
412
+ # class to hold "utility" methods used by others methods in main Marc class and new *Tool classes
413
+ # for now, leave methods as also defined in Marc class to avoid unexpected issues
414
+ class MarcUtil
415
+ class << self
416
+ # returns true if field has a value that matches
417
+ # passed-in regex and passed in subfield
418
+ # @param [MARC::DataField] field
419
+ # @param [String|Integer|Symbol] subf
420
+ # @param [Regexp] regex
421
+ # @return [TrueClass, FalseClass]
422
+ def has_subfield_value?(field, subf, regex)
423
+ field.any? { |sf| sf.code == subf.to_s && sf.value =~ regex }
424
+ end
425
+
426
+ # @param [MARC:DataField] field
427
+ # @param [String|Integer|Symbol] subf
428
+ # @param [Array] array
429
+ # @return [TrueClass, FalseClass]
430
+ def subfield_value_in?(field, subf, array)
431
+ field.any? { |sf| sf.code == subf.to_s && sf.value.in?(array) }
432
+ end
433
+ end
434
+ end
435
+
436
+ # Class for doing extraction and processing on MARC::Record objects.
437
+ # This is intended to be used in both indexing code and front-end templating code
438
+ # (since MARC is stored in Solr). As such, there should NOT be any traject-specific
439
+ # things here.
440
+ #
441
+ # For a slight performance increase (~5%?) we use frozen_string_literal for immutable strings.
442
+ #
443
+ # Method naming conventions:
444
+ #
445
+ # *_values = indicates method returns an Array of values
446
+ #
447
+ # *_display = indicates method is intended to be used for
448
+ # individual record view (we should name things more meaningfully, according to
449
+ # the logic by which the values are generated, but I don't always know what this
450
+ # logic is, necessarily - JC)
451
+ #
452
+ class Marc
453
+ include BlacklightSolrplugins::Indexer
454
+
455
+ attr_accessor :code_mappings
456
+
457
+ DATABASES_FACET_VALUE = 'Database & Article Index'
458
+ ALLOWED_SUBJ_GENRE_ONTOLOGIES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
459
+ local/osu mesh ndlsh nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp]
460
+
461
+ # @param [PennLib::CodeMappings]
462
+ def initialize(code_mappings)
463
+ @code_mappings = code_mappings
464
+ end
465
+
466
+ def current_year
467
+ @current_year ||= Date.today.year
468
+ end
469
+
470
+ def relator_codes
471
+ @code_mappings.relator_codes
472
+ end
473
+
474
+ def locations
475
+ @code_mappings.locations
476
+ end
477
+
478
+ def loc_classifications
479
+ @code_mappings.loc_classifications
480
+ end
481
+
482
+ def dewey_classifications
483
+ @code_mappings.dewey_classifications
484
+ end
485
+
486
+ def languages
487
+ @code_mappings.languages
488
+ end
489
+
490
+ def trim_trailing_colon(s)
491
+ s.sub(/\s*:\s*$/, '')
492
+ end
493
+
494
+ def trim_trailing_semicolon(s)
495
+ s.sub(/\s*;\s*$/, '')
496
+ end
497
+
498
+ def trim_trailing_equal(s)
499
+ s.sub(/=$/, '')
500
+ end
501
+
502
+ def trim_trailing_slash(s)
503
+ s.sub(/\s*\/\s*$/, '')
504
+ end
505
+
506
+ def trim_trailing_comma(s)
507
+ self.class.trim_trailing_comma(s, false)
508
+ end
509
+
510
+ def self.trim_trailing_comma!(s)
511
+ trim_trailing_comma(s, true)
512
+ end
513
+
514
+ def self.trim_trailing_comma(s, inplace)
515
+ replace_regex = /\s*,\s*$/
516
+ inplace ? s.sub!(replace_regex, '') : s.sub(replace_regex, '')
517
+ end
518
+
519
+ def trim_trailing_period(s)
520
+ self.class.trim_trailing_period(s, false)
521
+ end
522
+
523
+ def self.trim_trailing_period!(s)
524
+ trim_trailing_period(s, true)
525
+ end
526
+
527
+ def self.trim_trailing_period(s, inplace)
528
+ if s.end_with?('etc.') || s =~ /(^|[^a-zA-Z])[A-Z]\.$/
529
+ inplace ? nil : s # nil if unchanged, for consistency with standard `inplace` semantics
530
+ else
531
+ replace_regex = /\.\s*$/
532
+ inplace ? s.sub!(replace_regex, '') : s.sub(replace_regex, '')
533
+ end
534
+ end
535
+
536
+ # squish in ActiveSupport
537
+ def normalize_space(s)
538
+ s.strip.gsub(/\s{2,}/, ' ')
539
+ end
540
+
541
+ # this logic matches substring-before in XSLT: if no match for sub, returns an empty string
542
+ def substring_before(s, sub)
543
+ s.scan(sub).present? ? s.split(sub, 2)[0] : ''
544
+ end
545
+
546
+ # this logic matches substring-after in XSLT: if no match for sub, returns an empty string
547
+ def substring_after(s, sub)
548
+ s.scan(sub).present? ? s.split(sub, 2)[1] : ''
549
+ end
550
+
551
+ def join_and_trim_whitespace(array)
552
+ normalize_space(array.join(' '))
553
+ end
554
+
555
+ # join subfield values together (as selected using passed-in block)
556
+ def join_subfields(field, &block)
557
+ field.select { |v| block.call(v) }.map(&:value).select { |v| v.present? }.join(' ')
558
+ end
559
+
560
+ # this is used for filtering in a lots of places
561
+ # returns a lambda that can be passed to Enumerable#select
562
+ # using the & syntax
563
+ def subfield_not_6_or_8
564
+ @subfield_not_6_or_8 ||= lambda { |subfield|
565
+ !%w{6 8}.member?(subfield.code)
566
+ }
567
+ end
568
+
569
+ # returns a lambda checking if passed-in subfield's code
570
+ # is a member of array
571
+ def subfield_in(array)
572
+ lambda { |subfield| array.member?(subfield.code) }
573
+ end
574
+
575
+ # returns a lambda checking if passed-in subfield's code
576
+ # is NOT a member of array
577
+ def subfield_not_in(array)
578
+ lambda { |subfield| !array.member?(subfield.code) }
579
+ end
580
+
581
+
582
+ # 11/2018 kms: eventually should deprecate has_subfield6_value and use this for all
583
+ # returns true if field has a value that matches
584
+ # passed-in regex and passed in subfield
585
+ def has_subfield_value(field, subf, regex)
586
+ field.any? { |sf| sf.code == subf && sf.value =~ regex }
587
+ end
588
+
589
+ def subfield_value_in(field, subf, array)
590
+ field.any? { |sf| sf.code == subf && sf.value.in?(array) }
591
+ end
592
+
593
+ # common case of wanting to extract subfields as selected by passed-in block,
594
+ # from 880 datafield that has a particular subfield 6 value
595
+ # @param subf6_value [String|Array] either a single str value to look for in sub6 or an array of them
596
+ # @param block [Proc] takes a subfield as argument, returns a boolean
597
+ def get_880(rec, subf6_value, &block)
598
+ regex_value = subf6_value
599
+ if subf6_value.is_a?(Array)
600
+ regex_value = "(#{subf6_value.join('|')})"
601
+ end
602
+
603
+ rec.fields('880')
604
+ .select { |f| has_subfield6_value(f, /^#{regex_value}/) }
605
+ .map do |field|
606
+ field.select { |sf| block.call(sf) }.map(&:value).join(' ')
607
+ end
608
+ end
609
+
610
+ # common case of wanting to extract all the subfields besides 6 or 8,
611
+ # from 880 datafield that has a particular subfield 6 value
612
+ def get_880_subfield_not_6_or_8(rec, subf6_value)
613
+ get_880(rec, subf6_value) do |sf|
614
+ !%w{6 8}.member?(sf.code)
615
+ end
616
+ end
617
+
618
+ # returns the non-6,8 subfields from a datafield and its 880 link
619
+ def get_datafield_and_880(rec, tag)
620
+ acc = []
621
+ acc += rec.fields(tag).map do |field|
622
+ join_subfields(field, &subfield_not_in(%w{6 8}))
623
+ end
624
+ acc += get_880_subfield_not_6_or_8(rec, tag)
625
+ acc
626
+ end
627
+
628
+ def append_title_variant_field(acc, non_filing, subfields)
629
+ base = subfields.shift;
630
+ return if base.nil? # there's something wrong; first is always required
631
+ if non_filing =~ /[1-9]/
632
+ prefix = base.slice!(0, non_filing.to_i)
633
+ end
634
+ loop do
635
+ acc << base
636
+ if !prefix.nil?
637
+ acc << prefix + base
638
+ end
639
+ return if subfields.empty?
640
+ while (next_part = subfields.shift).nil?
641
+ return if subfields.empty?
642
+ end
643
+ base = "#{base} #{next_part}"
644
+ end
645
+ end
646
+
647
+ # returns true if field's subfield 6 has a value that matches
648
+ # passed-in regex
649
+ def has_subfield6_value(field, regex)
650
+ field.any? { |sf| sf.code == '6' && sf.value =~ regex }
651
+ end
652
+
653
+ # for a string 's', return a hash of ref_type => Array of references,
654
+ # where a reference is a String or a Hash representing a multipart string
655
+ def get_subject_references(s)
656
+ # TODO: just simple test data for now; hook up to actual cross ref data
657
+ case s
658
+ when 'Cyberspace'
659
+ { 'see_also' => [ 'Internet', 'Computer networks' ] }
660
+ when 'Internet'
661
+ { 'see_also' => [ 'Cyberspace', 'Computer networks' ] }
662
+ when 'Computer networks'
663
+ { 'see_also' => [ 'Cyberspace', 'Internet' ] }
664
+ # one way
665
+ when 'Programming Languages'
666
+ { 'use_instead' => [ 'Computer programming' ] }
667
+ end
668
+ end
669
+
670
+ def subject_codes
671
+ @subject_codes ||= %w(600 610 611 630 650 651)
672
+ end
673
+
674
+ def subject_codes_to_xfacet_prefixes
675
+ @subject_codes_to_xfacet_prefixes ||= {
676
+ 600 => 'n',
677
+ 610 => 'n',
678
+ 611 => 'n',
679
+ 630 => 't',
680
+ 650 => 's',
681
+ 651 => 'g'
682
+ }
683
+ end
684
+
685
+ def is_subject_field(field)
686
+ # 10/2018 kms: add 2nd Ind 7
687
+ subject_codes.member?(field.tag) && (%w(0 2 4).member?(field.indicator2) ||
688
+ (field.indicator2 == '7' && field.any? do |sf|
689
+ sf.code == '2' && ALLOWED_SUBJ_GENRE_ONTOLOGIES.member?(sf.value)
690
+ end))
691
+ end
692
+
693
+ def reject_pro_chr(sf)
694
+ %w{a %}.member?(sf.code) && sf.value =~ /^%?(PRO|CHR)([ $]|$)/
695
+ end
696
+
697
+ def is_curated_database(rec)
698
+ rec.fields('944').any? do |field|
699
+ field.any? do |sf|
700
+ sf.code == 'a' && sf.value == 'Database & Article Index'
701
+ end
702
+ end
703
+ end
704
+
705
+ def get_curated_format(rec)
706
+ rec.fields('944').map do |field|
707
+ sf = field.find { |sf| sf.code == 'a' }
708
+ sf.nil? || (sf.value == sf.value.to_i.to_s) ? nil : sf.value
709
+ end.compact.uniq
710
+ end
711
+
712
+ def get_db_types(rec)
713
+ return [] unless is_curated_database(rec)
714
+ rec.fields('944').map do |field|
715
+ if field.any? { |sf| sf.code == 'a' && sf.value == PennLib::Marc::DATABASES_FACET_VALUE }
716
+ sf = field.find { |sf| sf.code == 'b' }
717
+ sf.nil? ? nil : sf.value
718
+ end
719
+ end.compact
720
+ end
721
+
722
+ def get_db_categories(rec)
723
+ return [] unless is_curated_database(rec)
724
+ rec.fields('943').map do |field|
725
+ if field.any? { |sf| sf.code == '2' && sf.value == 'penncoi' }
726
+ sf = field.find { |sf| sf.code == 'a' }
727
+ sf.nil? ? nil : sf.value
728
+ end
729
+ end.compact
730
+ end
731
+
732
+ def get_db_subcategories(rec)
733
+ return [] unless is_curated_database(rec)
734
+ rec.fields('943').map do |field|
735
+ if field.any? { |sf| sf.code == '2' && sf.value == 'penncoi' }
736
+ category = field.find { |sf| sf.code == 'a' }
737
+ unless category.nil?
738
+ sub_category = field.find { |sf| sf.code == 'b' }
739
+ sub_category.nil? ? category : "#{category.value}--#{sub_category.value}"
740
+ end
741
+ end
742
+ end.compact
743
+ end
744
+
745
+ # TODO: MG removed the join_subject_parts method when adding in the SubjectConfig module here. This method still
746
+ # appears to be in use in the FranklinIndexer even though many subject fields are now processed differently
747
+ # Work should be done to remove all usages of join_subject_parts. Perhaps functionality from SubjectConfig could
748
+ # be used instead
749
+ def get_subject_facet_values(rec, toplevel_only = false)
750
+ rec.fields.find_all { |f| is_subject_field(f) }.map do |field|
751
+ just_a = nil
752
+ if field.any? { |sf| sf.code == 'a' } && (toplevel_only || field.any? { |sf| sf.code != 'a' })
753
+ just_a = field.find_all(&subfield_in(%w{a})).map(&:value)
754
+ .select { |v| v !~ /^%?(PRO|CHR)/ }.join(' ')
755
+ end
756
+ [ (toplevel_only ? nil : join_subject_parts(field)), just_a ].compact.map{ |v| trim_trailing_period(v) }
757
+ end.flatten(1).select { |v| v.present? }
758
+ end
759
+
760
+ def get_subject_xfacet_values(rec)
761
+ rec.fields.find_all { |f| is_subject_field(f) }
762
+ .map { |f| { field: f, prefix: subject_codes_to_xfacet_prefixes[f.tag.to_i] } }
763
+ .map { |f_struct| f_struct[:value] = trim_trailing_period(join_subject_parts(f_struct[:field], double_dash: true)); f_struct }
764
+ .select { |f_struct| f_struct[:value].present? }
765
+ .map { |f_struct| f_struct[:prefix] + f_struct[:value] }
766
+ # don't need to wrap data in #references anymore because cross refs are now handled Solr-side
767
+ # .map { |s| references(s, refs: get_subject_references(s)) }
768
+ end
769
+
770
+ def subject_search_tags
771
+ @subject_search_tags ||= %w{541 561 600 610 611 630 650 651 653}
772
+ end
773
+
774
+ def is_subject_search_field(field)
775
+ # 11/2018 kms: add 2nd Ind 7
776
+ if ! (field.respond_to?(:indicator2) && %w{0 1 2 4 7}.member?(field.indicator2))
777
+ false
778
+ elsif subject_search_tags.member?(field.tag) || field.tag.start_with?('69')
779
+ true
780
+ elsif field.tag == '880'
781
+ sub6 = (field.find_all { |sf| sf.code == '6' }.map(&:value).first || '')[0..2]
782
+ subject_search_tags.member?(sub6) || sub6.start_with?('69')
783
+ else
784
+ false
785
+ end
786
+ end
787
+
788
+ def get_subject_search_values(rec)
789
+ # this has been completely migrated
790
+ rec.fields.find_all { |f| is_subject_search_field(f) }
791
+ .map do |field|
792
+ subj = []
793
+ field.each do |sf|
794
+ if sf.code == 'a'
795
+ subj << " #{sf.value.gsub(/^%?(PRO|CHR)/, '').gsub(/\?$/, '')}"
796
+ elsif sf.code == '4'
797
+ subj << "#{sf.value}, #{relator_codes[sf.value]}"
798
+ elsif !%w{a 4 5 6 8}.member?(sf.code)
799
+ subj << " #{sf.value}"
800
+ end
801
+ end
802
+ join_and_trim_whitespace(subj) if subj.present?
803
+ end.compact
804
+ end
805
+
806
+ # @returns [Array] of string field tags to examine for subjects
807
+ def subject_600s
808
+ @subject_600s ||= %w{600 610 611 630 650 651}
809
+ end
810
+
811
+ # 11/2018 kms: add local subj fields- always Local no matter the 2nd Ind
812
+ def subject_69X
813
+ @subject_69X ||= %w{690 691 697}
814
+ end
815
+
816
+ # 11/2018: add 69x as local subj, add 650 _7 as subj
817
+ def get_subjects_from_600s_and_800(rec, indicator2)
818
+ track_dups = Set.new
819
+ acc = []
820
+ if %w{0 1 2}.member?(indicator2)
821
+ #Subjects, Childrens subjects, and Medical Subjects all share this code
822
+ # also 650 _7, subjs w/ source specified in $2. These display as Subjects along w/ the ind2==0 650s
823
+ acc += rec.fields
824
+ .select { |f| subject_600s.member?(f.tag) ||
825
+ (f.tag == '880' && has_subfield6_value(f, /^(#{subject_600s.join('|')})/)) }
826
+ .select { |f| f.indicator2 == indicator2 || (f.indicator2 == '7' && indicator2 == '0' && f.any? do |sf|
827
+ sf.code == '2' && ALLOWED_SUBJ_GENRE_ONTOLOGIES.member?(sf.value)
828
+ end)}
829
+ .map do |field|
830
+ #added 2017/04/10: filter out 0 (authority record numbers) added by Alma
831
+ value_for_link = join_subfields(field, &subfield_not_in(%w{0 6 8 2 e w}))
832
+ sub_with_hyphens = field.select(&subfield_not_in(%w{0 6 8 2 e w})).map do |sf|
833
+ pre = !%w{a b c d p q t}.member?(sf.code) ? ' -- ' : ' '
834
+ pre + sf.value + (sf.code == 'p' ? '.' : '')
835
+ end.join(' ')
836
+ eandw_with_hyphens = field.select(&subfield_in(%w{e w})).map do |sf|
837
+ ' -- ' + sf.value
838
+ end.join(' ')
839
+ if sub_with_hyphens.present?
840
+ {
841
+ value: sub_with_hyphens,
842
+ value_for_link: value_for_link,
843
+ value_append: eandw_with_hyphens,
844
+ link_type: 'subject_xfacet2'
845
+ }
846
+ end
847
+ end.compact.select { |val| track_dups.add?(val) }
848
+ elsif indicator2 == '4'
849
+ # Local subjects
850
+ # either a tag in subject_600s list with ind2==4, or a tag in subject_69X list with any ind2.
851
+ # but NOT a penn community of interest 690 (which have $2 penncoi )
852
+ acc += rec.fields
853
+ .select { |f| subject_600s.member?(f.tag) && f.indicator2 == '4' ||
854
+ ( subject_69X.member?(f.tag) && !(has_subfield_value(f,'2',/penncoi/)) ) }
855
+ .map do |field|
856
+ suba = field.select(&subfield_in(%w{a}))
857
+ .select { |sf| sf.value !~ /^%?(PRO|CHR)/ }
858
+ .map(&:value).join(' ')
859
+ #added 2017/04/10: filter out 0 (authority record numbers) added by Alma
860
+ # 11/2018 kms: also do not display subf 5 or 2
861
+ sub_oth = field.select(&subfield_not_in(%w{0 a 6 8 5 2})).map do |sf|
862
+ pre = !%w{b c d p q t}.member?(sf.code) ? ' -- ' : ' '
863
+ pre + sf.value + (sf.code == 'p' ? '.' : '')
864
+ end
865
+ subj_display = [ suba, sub_oth ].join(' ')
866
+ #added 2017/04/10: filter out 0 (authority record numbers) added by Alma
867
+ # 11/2018 kms: also do not display subf 5 or 2
868
+ sub_oth_no_hyphens = join_subfields(field, &subfield_not_in(%w{0 a 6 8 5 2}))
869
+ subj_search = [ suba, sub_oth_no_hyphens ].join(' ')
870
+ if subj_display.present?
871
+ {
872
+ value: subj_display,
873
+ value_for_link: subj_search,
874
+ link_type: 'subject_search'
875
+ }
876
+ end
877
+ end.compact.select { |val| track_dups.add?(val) }
878
+ end
879
+ acc
880
+ end
881
+
882
+ # 11/2018: 650 _7 is also handled here
883
+ def get_subject_display(rec)
884
+ get_subjects_from_600s_and_800(rec, '0')
885
+ end
886
+
887
+ def get_children_subject_display(rec)
888
+ get_subjects_from_600s_and_800(rec, '1')
889
+ end
890
+
891
+ def get_medical_subject_display(rec)
892
+ get_subjects_from_600s_and_800(rec, '2')
893
+ end
894
+
895
+ def get_local_subject_display(rec)
896
+ get_subjects_from_600s_and_800(rec, '4')
897
+ end
898
+
899
+ def get_subject_solrdoc_display(doc)
900
+ doc[:default_subject_stored_a]
901
+ end
902
+
903
+ def get_children_subject_solrdoc_display(doc)
904
+ doc[:childrens_subject_stored_a]
905
+ end
906
+
907
+ def get_medical_subject_solrdoc_display(doc)
908
+ doc[:mesh_subject_stored_a]
909
+ end
910
+
911
+ def get_local_subject_solrdoc_display(doc)
912
+ doc[:local_subject_stored_a]
913
+ end
914
+
915
+ def get_format(rec)
916
+ acc = []
917
+
918
+ format_code = get_format_from_leader(rec)
919
+ f008 = rec.fields('008').map(&:value).first || ''
920
+ f007 = rec.fields('007').map(&:value)
921
+ f260press = rec.fields('260').any? do |field|
922
+ field.select { |sf| sf.code == 'b' && sf.value =~ /press/i }.any?
923
+ end
924
+ # first letter of every 006
925
+ f006firsts = rec.fields('006').map do |field|
926
+ field.value[0]
927
+ end
928
+ f245k = rec.fields('245').flat_map do |field|
929
+ field.select { |sf| sf.code == 'k' }.map(&:value)
930
+ end
931
+ f245h = rec.fields('245').flat_map do |field|
932
+ field.select { |sf| sf.code == 'h' }.map(&:value)
933
+ end
934
+ f337a = rec.fields('337').flat_map do |field|
935
+ field.select { |sf| sf.code == 'a' }.map(&:value)
936
+ end
937
+ call_nums = rec.fields(EnrichedMarc::TAG_HOLDING).map do |field|
938
+ # h gives us the 'Classification part' which contains strings like 'Microfilm'
939
+ join_subfields(field, &subfield_in([ EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART, EnrichedMarc::SUB_HOLDING_ITEM_PART ]))
940
+ end
941
+ locations = get_specific_location_values(rec)
942
+
943
+ if locations.any? { |loc| loc =~ /manuscripts/i }
944
+ acc << 'Manuscript'
945
+ elsif locations.any? { |loc| loc =~ /archives/i } &&
946
+ locations.none? { |loc| loc =~ /cajs/i } &&
947
+ locations.none? { |loc| loc =~ /nursing/i }
948
+ acc << 'Archive'
949
+ elsif locations.any? { |loc| loc =~ /micro/i } ||
950
+ f245h.any? { |val| val =~ /micro/i } ||
951
+ call_nums.any? { |val| val =~ /micro/i } ||
952
+ f337a.any? { |val| val =~ /microform/i }
953
+ acc << 'Microformat'
954
+ else
955
+ # these next 4 can have this format plus ONE of the formats down farther below
956
+ if rec.fields('502').any? && format_code == 'tm'
957
+ acc << 'Thesis/Dissertation'
958
+ end
959
+ if rec.fields('111').any? || rec.fields('711').any?
960
+ acc << 'Conference/Event'
961
+ end
962
+ if (!%w{c d i j}.member?(format_code[0])) && %w{f i o}.member?(f008[28]) && (!f260press)
963
+ acc << 'Government document'
964
+ end
965
+ if format_code == 'as' && (f008[21] == 'n' || f008[22] == 'e')
966
+ acc << 'Newspaper'
967
+ end
968
+
969
+ # only one of these
970
+ if format_code.end_with?('i') || (format_code == 'am' && f006firsts.member?('m') && f006firsts.member?('s'))
971
+ acc << 'Website/Database'
972
+ elsif %w(aa ac am tm).member?(format_code) &&
973
+ f245k.none? { |v| v =~ /kit/i } &&
974
+ f245h.none? { |v| v =~ /micro/i }
975
+ acc << 'Book'
976
+ elsif %w(ca cb cd cm cs dm).member?(format_code)
977
+ acc << 'Musical score'
978
+ elsif format_code.start_with?('e') || format_code == 'fm'
979
+ acc << 'Map/Atlas'
980
+ elsif format_code == 'gm'
981
+ if f007.any? { |v| v.start_with?('v') }
982
+ acc << 'Video'
983
+ elsif f007.any? { |v| v.start_with?('g') }
984
+ acc << 'Projected graphic'
985
+ else
986
+ acc << 'Video'
987
+ end
988
+ elsif %w(im jm jc jd js).member?(format_code)
989
+ acc << 'Sound recording'
990
+ elsif %w(km kd).member?(format_code)
991
+ acc << 'Image'
992
+ elsif format_code == 'mm'
993
+ acc << 'Datafile'
994
+ elsif %w(as gs).member?(format_code)
995
+ acc << 'Journal/Periodical'
996
+ elsif format_code.start_with?('r')
997
+ acc << '3D object'
998
+ else
999
+ acc << 'Other'
1000
+ end
1001
+ end
1002
+ acc.concat(get_curated_format(rec))
1003
+ end
1004
+
1005
+ # returns two-char format code from MARC leader, representing two fields:
1006
+ # "Type of record" and "Bibliographic level"
1007
+ def get_format_from_leader(rec)
1008
+ rec.leader[6..7]
1009
+ end
1010
+
1011
+ def get_format_display(rec)
1012
+ results = []
1013
+ results += rec.fields('300').map do |field|
1014
+ join_subfields(field, &subfield_not_in(%w{3 6 8}))
1015
+ end
1016
+ results += rec.fields(%w{254 255 310 342 352 362}).map do |field|
1017
+ join_subfields(field, &subfield_not_in(%w{6 8}))
1018
+ end
1019
+ results += rec.fields('340').map do |field|
1020
+ join_subfields(field, &subfield_not_in(%w{0 2 6 8}))
1021
+ end
1022
+ results += rec.fields('880').map do |field|
1023
+ if has_subfield6_value(field,/^300/)
1024
+ join_subfields(field, &subfield_not_in(%w{3 6 8}))
1025
+ elsif has_subfield6_value(field, /^(254|255|310|342|352|362)/)
1026
+ join_subfields(field, &subfield_not_in(%w{6 8}))
1027
+ elsif has_subfield6_value(field, /^340/)
1028
+ join_subfields(field, &subfield_not_in(%w{0 2 6 8}))
1029
+ else
1030
+ []
1031
+ end
1032
+ end
1033
+ results.select { |value| value.present? }
1034
+ end
1035
+
1036
+ def get_itm_count(rec)
1037
+ fields = rec.fields(EnrichedMarc::TAG_ITEM)
1038
+ fields.empty? ? nil : fields.size
1039
+ end
1040
+
1041
+ def get_hld_count(rec)
1042
+ fields = rec.fields(EnrichedMarc::TAG_HOLDING)
1043
+ fields.empty? ? nil : fields.size
1044
+ end
1045
+
1046
+ def get_empty_hld_count(rec)
1047
+ holding_ids_from_items = Set.new
1048
+ rec.each_by_tag(EnrichedMarc::TAG_ITEM) do |field|
1049
+ holding_id_subfield = field.find do |subfield|
1050
+ subfield.code == 'r'
1051
+ end
1052
+ holding_ids_from_items.add(holding_id_subfield.value) if holding_id_subfield
1053
+ end
1054
+ empty_holding_count = 0
1055
+ rec.each_by_tag(EnrichedMarc::TAG_HOLDING) do |field|
1056
+ id_subfield = field.find do |subfield|
1057
+ subfield.code == '8'
1058
+ end
1059
+ unless holding_ids_from_items.include?(id_subfield&.value)
1060
+ empty_holding_count += 1
1061
+ end
1062
+ end
1063
+ empty_holding_count
1064
+ end
1065
+
1066
+ def get_prt_count(rec)
1067
+ fields = rec.fields(EnrichedMarc::TAG_ELECTRONIC_INVENTORY)
1068
+ fields.empty? ? nil : fields.size
1069
+ end
1070
+
1071
+ def get_access_values(rec)
1072
+ acc = rec.map do |f|
1073
+ case f.tag
1074
+ when EnrichedMarc::TAG_HOLDING
1075
+ 'At the library'
1076
+ when EnrichedMarc::TAG_ELECTRONIC_INVENTORY
1077
+ 'Online'
1078
+ end
1079
+ end.compact
1080
+ acc += rec.fields('856')
1081
+ .select { |f| f.indicator1 == '4' && f.indicator2 != '2' }
1082
+ .flat_map do |field|
1083
+ subz = join_subfields(field, &subfield_in(%w{z}))
1084
+ field.find_all(&subfield_in(%w{u})).map do |sf|
1085
+ if !subz.include?('Finding aid') && sf.value.include?('hdl.library.upenn.edu')
1086
+ 'Online'
1087
+ end
1088
+ end.compact
1089
+ end
1090
+ acc << 'Online' if is_etas(rec)
1091
+ acc.uniq
1092
+ end
1093
+
1094
+ def is_etas(rec)
1095
+ rec.fields('977').any? do |f|
1096
+ f.any? do |sf|
1097
+ sf.code == 'e' && sf.value == 'ETAS'
1098
+ end
1099
+ end
1100
+ end
1101
+
1102
+ # examines a 1xx datafield and constructs a string out of select
1103
+ # subfields, including expansion of 'relator' code
1104
+ def get_name_1xx_field(field)
1105
+ s = field.map do |sf|
1106
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
1107
+ # added 2022/08/04: filter our 1 (URIs) added my MARCive project
1108
+ if !%W{0 1 4 6 8}.member?(sf.code)
1109
+ " #{sf.value}"
1110
+ elsif sf.code == '4'
1111
+ ", #{relator_codes[sf.value]}"
1112
+ end
1113
+ end.compact.join
1114
+ s2 = s + (!%w(. -).member?(s[-1]) ? '.' : '')
1115
+ normalize_space(s2)
1116
+ end
1117
+
1118
+ def get_series_8xx_field(field)
1119
+ s = field.map do |sf|
1120
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
1121
+ if(! %W{0 4 5 6 8}.member?(sf.code))
1122
+ " #{sf.value}"
1123
+ elsif sf.code == '4'
1124
+ ", #{relator_codes[sf.value]}"
1125
+ end
1126
+ end.compact.join
1127
+ s2 = s + (!%w(. -).member?(s[-1]) ? '.' : '')
1128
+ normalize_space(s2)
1129
+ end
1130
+
1131
+ def get_series_4xx_field(field)
1132
+ s = field.map do |sf|
1133
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
1134
+ if(! %W{0 4 6 8}.member?(sf.code))
1135
+ " #{sf.value}"
1136
+ elsif sf.code == '4'
1137
+ ", #{relator_codes[sf.value]}"
1138
+ end
1139
+ end.compact.join
1140
+ s2 = s + (!%w(. -).member?(s[-1]) ? '.' : '')
1141
+ normalize_space(s2)
1142
+ end
1143
+
1144
+ def get_publication_values(rec)
1145
+ acc = []
1146
+ rec.fields('245').each do |field|
1147
+ field.find_all { |sf| sf.code == 'f' }
1148
+ .map(&:value)
1149
+ .each { |value| acc << value }
1150
+ end
1151
+ added_2xx = false
1152
+ rec.fields(%w{260 261 262}).take(1).each do |field|
1153
+ results = field.find_all { |sf| sf.code != '6' }
1154
+ .map(&:value)
1155
+ acc << join_and_trim_whitespace(results)
1156
+ added_2xx = true
1157
+ end
1158
+ if(!added_2xx)
1159
+ sf_ab264 = rec.fields.select { |field| field.tag == '264' && field.indicator2 == '1' }
1160
+ .take(1)
1161
+ .flat_map do |field|
1162
+ field.find_all(&subfield_in(%w{a b})).map(&:value)
1163
+ end
1164
+
1165
+ sf_c264_1 = rec.fields.select { |field| field.tag == '264' && field.indicator2 == '1' }
1166
+ .take(1)
1167
+ .flat_map do |field|
1168
+ field.find_all(&subfield_in(['c']))
1169
+ .map(&:value)
1170
+ end
1171
+
1172
+ sf_c264_4 = rec.fields.select { |field| field.tag == '264' && field.indicator2 == '4' }
1173
+ .take(1)
1174
+ .flat_map do |field|
1175
+ field.find_all { |sf| sf.code == 'c' }
1176
+ .map { |sf| (sf_c264_1.present? ? ', ' : '') + sf.value }
1177
+ end
1178
+
1179
+ acc << [sf_ab264, sf_c264_1, sf_c264_4].join(' ')
1180
+ end
1181
+ acc.map!(&:strip).select!(&:present?)
1182
+ acc
1183
+ end
1184
+
1185
+ def get_publication_display(rec)
1186
+ acc = []
1187
+ rec.fields('245').take(1).each do |field|
1188
+ field.find_all { |sf| sf.code == 'f' }
1189
+ .map(&:value)
1190
+ .each { |value| acc << value }
1191
+ end
1192
+ rec.fields(%w{260 261 262}).take(1).each do |field|
1193
+ acc << join_subfields(field, &subfield_not_6_or_8)
1194
+ end
1195
+ rec.fields('880')
1196
+ .select { |f| has_subfield6_value(f, /^(260|261|262)/) }
1197
+ .take(1)
1198
+ .each do |field|
1199
+ acc << join_subfields(field, &subfield_not_6_or_8)
1200
+ end
1201
+ rec.fields('880')
1202
+ .select { |f| has_subfield6_value(f, /^245/) }
1203
+ .each do |field|
1204
+ acc << join_subfields(field, &subfield_in(['f']))
1205
+ end
1206
+ acc += get_264_or_880_fields(rec, '1')
1207
+ acc.select(&:present?)
1208
+ end
1209
+
1210
+ def get_language_values(rec)
1211
+ rec.fields('008').map do |field|
1212
+ lang_code = field.value[35..37]
1213
+ if lang_code
1214
+ languages[lang_code]
1215
+ end
1216
+ end.compact
1217
+ end
1218
+
1219
+ # fieldname = name of field in the locations data structure to use
1220
+ def holdings_location_mappings(rec, display_fieldname)
1221
+
1222
+ # in holdings records, the shelving location is always the permanent location.
1223
+ # in item records, the current location takes into account
1224
+ # temporary locations and permanent locations. if you update the item's perm location,
1225
+ # the holding's shelving location changes.
1226
+ #
1227
+ # Since item records may reflect locations more accurately, we use them if they exist;
1228
+ # if not, we use the holdings.
1229
+
1230
+ tag = EnrichedMarc::TAG_HOLDING
1231
+ subfield_code = EnrichedMarc::SUB_HOLDING_SHELVING_LOCATION
1232
+
1233
+ if rec.fields(EnrichedMarc::TAG_ITEM).size > 0
1234
+ tag = EnrichedMarc::TAG_ITEM
1235
+ subfield_code = EnrichedMarc::SUB_ITEM_CURRENT_LOCATION
1236
+ end
1237
+
1238
+ # we don't facet for 'web' which is the 'Penn Library Web' location used in Voyager.
1239
+ # this location should eventually go away completely with data cleanup in Alma.
1240
+
1241
+ acc = rec.fields(tag).flat_map do |field|
1242
+ results = field.find_all { |sf| sf.code == subfield_code }
1243
+ .select { |sf| sf.value != 'web' }
1244
+ .map { |sf|
1245
+ # sometimes "happening locations" are mistakenly
1246
+ # used in holdings records. that's a data problem that should be fixed.
1247
+ # here, if we encounter a code we can't map, we ignore it, for faceting purposes.
1248
+ if locations[sf.value].present?
1249
+ locations[sf.value][display_fieldname]
1250
+ end
1251
+ }
1252
+ # flatten multiple 'library' values
1253
+ results.select(&:present?).flatten
1254
+ end.uniq
1255
+ if rec.fields(EnrichedMarc::TAG_ELECTRONIC_INVENTORY).any?
1256
+ acc << 'Online library'
1257
+ end
1258
+ return acc
1259
+ end
1260
+
1261
+ def items_nocirc(rec)
1262
+ items = rec.fields(EnrichedMarc::TAG_ITEM)
1263
+ return 'na' if items.empty?
1264
+ all = true
1265
+ none = true
1266
+ items.each do |f|
1267
+ nocirc = f.any? do |sf|
1268
+ sf.code == EnrichedMarc::SUB_ITEM_CURRENT_LOCATION && sf.value == 'vanpNocirc'
1269
+ end
1270
+ if nocirc
1271
+ none = false
1272
+ else
1273
+ all = false
1274
+ end
1275
+ end
1276
+ if all
1277
+ return 'all'
1278
+ elsif none
1279
+ return 'none'
1280
+ else
1281
+ return 'partial'
1282
+ end
1283
+ end
1284
+
1285
+ def get_library_values(rec)
1286
+ holdings_location_mappings(rec, 'library')
1287
+ end
1288
+
1289
+ def get_specific_location_values(rec)
1290
+ holdings_location_mappings(rec, 'specific_location')
1291
+ end
1292
+
1293
+ def get_encoding_level_rank(rec)
1294
+ EncodingLevel::RANK[rec.leader[17]]
1295
+ end
1296
+
1297
+ def prepare_dates(rec)
1298
+ f008 = rec.fields('008').first
1299
+ return nil unless f008
1300
+ field = f008.value
1301
+ return nil unless date_type = field[6]
1302
+ return nil unless date1 = field[7,4]
1303
+ date2 = field[11,4]
1304
+ case DateType::MAP[date_type]
1305
+ when :single
1306
+ return build_dates_hash(date1)
1307
+ when :lower_bound
1308
+ return build_dates_hash(date1, '9999')
1309
+ when :range
1310
+ return build_dates_hash(date1, date2)
1311
+ when :separate_content
1312
+ return build_dates_hash(date1, nil, date2)
1313
+ else
1314
+ return nil
1315
+ end
1316
+ end
1317
+
1318
+ def build_dates_hash(raw_pub_date_start, raw_pub_date_end = nil, content_date = nil)
1319
+ pub_date_start = sanitize_date(raw_pub_date_start, '0')
1320
+ return nil if pub_date_start == nil
1321
+ if raw_pub_date_end && pub_date_end = sanitize_date(raw_pub_date_end, '9')
1322
+ if pub_date_start > pub_date_end
1323
+ # assume date type coded incorrectly; use date2 as content_date
1324
+ pub_date_end = sanitize_date(raw_pub_date_start, '9')
1325
+ content_date = raw_pub_date_end
1326
+ end
1327
+ else
1328
+ pub_date_end = sanitize_date(raw_pub_date_start, '9')
1329
+ end
1330
+ if content_date == nil
1331
+ content_date_start = pub_date_start
1332
+ content_date_end = pub_date_end
1333
+ elsif content_date =~ /^[0-9]{4}$/
1334
+ content_date_start = content_date_end = content_date
1335
+ else
1336
+ content_date_start = sanitize_date(content_date, '0')
1337
+ if content_date_start
1338
+ content_date_end = sanitize_date(content_date, '9')
1339
+ else
1340
+ # invalid separate content date provided; fall back to pub_date
1341
+ content_date_start = pub_date_start
1342
+ content_date_end = pub_date_end
1343
+ end
1344
+ end
1345
+ {
1346
+ :pub_date_sort => pub_date_start,
1347
+ :pub_date_decade => current_year + 15 > pub_date_start.to_i ? pub_date_start[0,3] + '0s' : nil,
1348
+ :pub_date_range => "[#{pub_date_start} TO #{pub_date_end}]",
1349
+ :content_date_range => "[#{content_date_start} TO #{content_date_end}]",
1350
+ :pub_date_minsort => "#{pub_date_start}-01-01T00:00:00Z",
1351
+ :pub_date_maxsort => "#{pub_date_end.to_i + 1}-01-01T00:00:00Z",
1352
+ :content_date_minsort => "#{content_date_start}-01-01T00:00:00Z",
1353
+ :content_date_maxsort => "#{content_date_end.to_i + 1}-01-01T00:00:00Z"
1354
+ }
1355
+ end
1356
+
1357
+ def sanitize_date(input, replace)
1358
+ return nil if input !~ /^[0-9]*u*$/
1359
+ input.gsub(/u/, replace)
1360
+ end
1361
+
1362
+ def publication_date_digits(rec)
1363
+ rec.fields('008').map { |field| field.value[7,4] }
1364
+ .select { |year| year.present? }
1365
+ .map { |year| year.gsub(/\D/, '0') }
1366
+ end
1367
+
1368
+ def get_publication_date_values(rec)
1369
+ publication_date_digits(rec)
1370
+ .select { |year| year =~ /^[1-9][0-9]/ && current_year + 15 > year.to_i }
1371
+ .map { |year| year[0, 3] + '0s' }
1372
+ end
1373
+
1374
+ def get_publication_date_sort_values(rec)
1375
+ publication_date_digits(rec)
1376
+ end
1377
+
1378
+ def get_classification_values(rec)
1379
+ acc = []
1380
+ # not sure whether it's better to use 'item' or 'holding' records here.
1381
+ # we use 'item' only because it has a helpful call number type subfield,
1382
+ # which the holding doesn't.
1383
+ rec.fields(EnrichedMarc::TAG_ITEM).each do |item|
1384
+ cn_type = item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER_TYPE }.map(&:value).first
1385
+
1386
+ results = item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER }
1387
+ .map(&:value)
1388
+ .select { |call_num| call_num.present? }
1389
+ .map { |call_num| call_num[0] }
1390
+ .compact
1391
+
1392
+ results.each do |letter|
1393
+ verbose = nil
1394
+ case cn_type
1395
+ when '0'
1396
+ verbose = loc_classifications[letter]
1397
+ when '1'
1398
+ verbose = dewey_classifications[letter]
1399
+ letter = letter + '00'
1400
+ end
1401
+ if verbose
1402
+ acc << [ letter, verbose ].join(' - ')
1403
+ end
1404
+ end
1405
+ end
1406
+ acc.uniq
1407
+ end
1408
+
1409
+ def get_genre_values(rec)
1410
+ acc = []
1411
+
1412
+ is_manuscript = rec.fields(EnrichedMarc::TAG_ITEM).any? do |item|
1413
+ loc = item[EnrichedMarc::SUB_ITEM_CURRENT_LOCATION]
1414
+ locations[loc].present? && (locations[loc]['specific_location'] =~ /manuscript/)
1415
+ end
1416
+
1417
+ if rec['007'].try { |r| r.value.start_with?('v') } || is_manuscript
1418
+ genres = rec.fields('655').map do |field|
1419
+ field.find_all(&subfield_not_in(%w{0 2 5 c}))
1420
+ .map(&:value)
1421
+ .join(' ')
1422
+ end
1423
+ genres.each { |genre| acc << genre }
1424
+ end
1425
+ acc
1426
+ end
1427
+
1428
+ def get_genre_search_values(rec)
1429
+ rec.fields('655').map do |field|
1430
+ join_subfields(field, &subfield_not_in(%w{0 2 5 c}))
1431
+ end
1432
+ end
1433
+
1434
+ # @param [MARC::Record] rec
1435
+ # @param [TrueClass, FalseClass] should_link
1436
+ def get_genre_display(rec, should_link)
1437
+ rec.fields
1438
+ .select { |field|
1439
+ GenreTools.allowed_genre_field? field
1440
+ }.map do |field|
1441
+ sub_with_hyphens = field.find_all(&subfield_not_in(%w{0 2 5 6 8 c e w})).map { |sf|
1442
+ sep = !%w{a b}.member?(sf.code) ? ' -- ' : ' '
1443
+ sep + sf.value
1444
+ }.join.lstrip
1445
+ eandw_with_hyphens = field.find_all(&subfield_in(%w{e w})).join(' -- ')
1446
+ { value: sub_with_hyphens, value_append: eandw_with_hyphens, link: should_link, link_type: 'genre_search' }
1447
+ end.uniq
1448
+ end
1449
+
1450
+ def get_title_values(rec)
1451
+ acc = []
1452
+ rec.fields('245').take(1).each do |field|
1453
+ a_or_k = field.find_all(&subfield_in(%w{a k}))
1454
+ .map { |sf| trim_trailing_comma(trim_trailing_slash(sf.value).rstrip) }
1455
+ .first || ''
1456
+ joined = field.find_all(&subfield_in(%w{b n p}))
1457
+ .map{ |sf| trim_trailing_slash(sf.value) }
1458
+ .join(' ')
1459
+
1460
+ apunct = a_or_k[-1]
1461
+ hpunct = field.find_all { |sf| sf.code == 'h' }
1462
+ .map{ |sf| sf.value[-1] }
1463
+ .first
1464
+ punct = if [apunct, hpunct].member?('=')
1465
+ '='
1466
+ else
1467
+ [apunct, hpunct].member?(':') ? ':' : nil
1468
+ end
1469
+
1470
+ acc << [ trim_trailing_colon(trim_trailing_equal(a_or_k)), punct, joined ]
1471
+ .select(&:present?).join(' ')
1472
+ end
1473
+ acc
1474
+ end
1475
+
1476
+ def get_title_880_values(rec)
1477
+ rec.fields('880')
1478
+ .select { |f| has_subfield6_value(f, /^245/) }
1479
+ .map do |field|
1480
+ suba_value = field.find_all(&subfield_in(%w{a})).first.try(:value)
1481
+ subk_value = field.find_all(&subfield_in(%w{k})).first.try(:value) || ''
1482
+ title_with_slash = suba_value.present? ? suba_value : (subk_value + ' ')
1483
+ title_ak = trim_trailing_comma(join_and_trim_whitespace([ trim_trailing_slash(title_with_slash) ]))
1484
+
1485
+ subh = join_and_trim_whitespace(field.find_all(&subfield_in(%w{h})).map(&:value))
1486
+
1487
+ apunct = title_ak[-1]
1488
+ hpunct = subh[-1]
1489
+
1490
+ punct = if [apunct, hpunct].member?('=')
1491
+ '='
1492
+ else
1493
+ [apunct, hpunct].member?(':') ? ':' : nil
1494
+ end
1495
+
1496
+ [ trim_trailing_equal(title_ak),
1497
+ punct,
1498
+ trim_trailing_slash(field.find_all(&subfield_in(%w{b})).first.try(:value) || ''),
1499
+ trim_trailing_slash(field.find_all(&subfield_in(%w{n})).first.try(:value) || ''),
1500
+ trim_trailing_slash(field.find_all(&subfield_in(%w{p})).first.try(:value) || '')
1501
+ ]
1502
+ .select { |value| value.present? }
1503
+ .join(' ')
1504
+ end
1505
+ end
1506
+
1507
+ def separate_leading_bracket_into_prefix_and_filing_hash(s)
1508
+ if s.start_with?('[')
1509
+ { 'prefix' => '[', 'filing' => s[1..-1] }
1510
+ else
1511
+ { 'prefix' => '', 'filing' => s }
1512
+ end
1513
+ end
1514
+
1515
+ def get_title_from_245_or_880(fields, support_invalid_indicator2 = true)
1516
+ fields.map do |field|
1517
+ if field.indicator2 =~ /^[0-9]$/
1518
+ offset = field.indicator2.to_i
1519
+ elsif support_invalid_indicator2
1520
+ offset = 0 # default to 0
1521
+ else
1522
+ return []
1523
+ end
1524
+ value = {}
1525
+ suba = join_subfields(field, &subfield_in(%w{a}))
1526
+ if offset > 0 && offset < 10
1527
+ part1 = suba[0..offset-1]
1528
+ part2 = suba[offset..-1]
1529
+ value = { 'prefix' => part1, 'filing' => part2 }
1530
+ else
1531
+ if suba.present?
1532
+ value = separate_leading_bracket_into_prefix_and_filing_hash(suba)
1533
+ else
1534
+ subk = join_subfields(field, &subfield_in(%w{k}))
1535
+ value = separate_leading_bracket_into_prefix_and_filing_hash(subk)
1536
+ end
1537
+ end
1538
+ value['filing'] = [ value['filing'], join_subfields(field, &subfield_in(%w{b n p})) ].join(' ')
1539
+ value
1540
+ end.compact
1541
+ end
1542
+
1543
+ def get_title_245(rec, support_invalid_indicator2 = true)
1544
+ get_title_from_245_or_880(rec.fields('245').take(1), support_invalid_indicator2)
1545
+ end
1546
+
1547
+ def get_title_880_for_xfacet(rec)
1548
+ get_title_from_245_or_880(rec.fields('880').select { |f| has_subfield6_value(f, /^245/) })
1549
+ end
1550
+
1551
+ def get_title_xfacet_values(rec)
1552
+ # 6/16/2017: added 880 to this field for non-roman char handling
1553
+ get_title_245(rec).map do |v|
1554
+ references(v)
1555
+ end + get_title_880_for_xfacet(rec).map do |v|
1556
+ references(v)
1557
+ end
1558
+ end
1559
+
1560
+ def get_title_sort_values(rec)
1561
+ get_title_245(rec).map do |v|
1562
+ v['filing'] + v['prefix']
1563
+ end
1564
+ end
1565
+
1566
+ def get_title_sort_filing_parts(rec, support_invalid_indicator2 = true)
1567
+ get_title_245(rec, support_invalid_indicator2).map do |v|
1568
+ v['filing']
1569
+ end
1570
+ end
1571
+
1572
+ def append_title_variants(rec, acc)
1573
+ do_title_variant_field(rec, acc, '130', 1, 'a')
1574
+ do_title_variant_field(rec, acc, '240', 2, 'a')
1575
+ do_title_variant_field(rec, acc, '210', nil, 'a', 'b')
1576
+ do_title_variant_field(rec, acc, '222', 2, 'a', 'b')
1577
+ do_title_variant_field(rec, acc, '246', nil, 'a', 'b')
1578
+ end
1579
+
1580
+ def do_title_variant_field(rec, acc, field_id, non_filing_indicator, *subfields_spec)
1581
+ rec.fields(field_id).each do |field|
1582
+ parts = subfields_spec.map do |subfield_spec|
1583
+ matching_subfield = field.find { |subfield| subfield.code == subfield_spec }
1584
+ matching_subfield.value unless matching_subfield.nil?
1585
+ end
1586
+ next if parts.first.nil?
1587
+ parts.compact!
1588
+ case non_filing_indicator
1589
+ when 1
1590
+ non_filing = field.indicator1
1591
+ when 2
1592
+ non_filing = field.indicator2
1593
+ else
1594
+ non_filing = nil
1595
+ end
1596
+ append_title_variant_field(acc, non_filing, parts)
1597
+ end
1598
+ end
1599
+
1600
+ def get_title_1_search_main_values(rec, format_filter: false)
1601
+ format = get_format_from_leader(rec)
1602
+ acc = rec.fields('245').map do |field|
1603
+ if !format_filter || format.end_with?('s')
1604
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8 h})).map(&:value))
1605
+ end
1606
+ end.select { |v| v.present? }
1607
+ acc += rec.fields('880')
1608
+ .select { |f| has_subfield6_value(f, /^245/) }
1609
+ .map do |field|
1610
+ if !format_filter || format.end_with?('s')
1611
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8 h})).map(&:value))
1612
+ end
1613
+ end.select { |v| v.present? }
1614
+ acc
1615
+ end
1616
+
1617
+ def get_title_1_search_values(rec)
1618
+ get_title_1_search_main_values(rec)
1619
+ end
1620
+
1621
+ def get_journal_title_1_search_values(rec)
1622
+ get_title_1_search_main_values(rec, format_filter: true)
1623
+ end
1624
+
1625
+ def title_2_search_main_tags
1626
+ @title_2_search_main_tags ||= %w{130 210 240 245 246 247 440 490 730 740 830}
1627
+ end
1628
+
1629
+ def title_2_search_aux_tags
1630
+ @title_2_search_aux_tags ||= %w{773 774 780 785}
1631
+ end
1632
+
1633
+ def title_2_search_7xx_tags
1634
+ @title_2_search_7xx_tags ||= %w{700 710 711}
1635
+ end
1636
+
1637
+ def get_title_2_search_main_values(rec, format_filter: false)
1638
+ format = get_format_from_leader(rec)
1639
+ rec.fields(title_2_search_main_tags).map do |field|
1640
+ if !format_filter || format.end_with?('s')
1641
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8})).map(&:value))
1642
+ end
1643
+ end.select { |v| v.present? }
1644
+ end
1645
+
1646
+ def get_title_2_search_aux_values(rec, format_filter: false)
1647
+ format = get_format_from_leader(rec)
1648
+ rec.fields(title_2_search_aux_tags).map do |field|
1649
+ if !format_filter || format.end_with?('s')
1650
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{s t})).map(&:value))
1651
+ end
1652
+ end.select { |v| v.present? }
1653
+ end
1654
+
1655
+ def get_title_2_search_7xx_values(rec, format_filter: false)
1656
+ format = get_format_from_leader(rec)
1657
+ rec.fields(title_2_search_7xx_tags).map do |field|
1658
+ if !format_filter || format.end_with?('s')
1659
+ join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
1660
+ end
1661
+ end.select { |v| v.present? }
1662
+ end
1663
+
1664
+ def get_title_2_search_505_values(rec, format_filter: false)
1665
+ format = get_format_from_leader(rec)
1666
+ rec.fields('505')
1667
+ .select { |f| f.indicator1 == '0' && f.indicator2 == '0' }
1668
+ .map do |field|
1669
+ if !format_filter || format.end_with?('s')
1670
+ join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
1671
+ end
1672
+ end.select { |v| v.present? }
1673
+ end
1674
+
1675
+ def get_title_2_search_800_values(rec, format_filter: false)
1676
+ format = get_format_from_leader(rec)
1677
+ acc = []
1678
+ acc += rec.fields('880')
1679
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(130|210|240|245|246|247|440|490|730|740|830)/ } }
1680
+ .map do |field|
1681
+ if !format_filter || format.end_with?('s')
1682
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8 h})).map(&:value))
1683
+ end
1684
+ end.select { |v| v.present? }
1685
+ acc += rec.fields('880')
1686
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(773|774|780|785)/ } }
1687
+ .map do |field|
1688
+ if !format_filter || format.end_with?('s')
1689
+ join_and_trim_whitespace(field.find_all(&subfield_in(%w{s t})).map(&:value))
1690
+ end
1691
+ end.select { |v| v.present? }
1692
+ acc += rec.fields('880')
1693
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(700|710|711)/ } }
1694
+ .map do |field|
1695
+ if !format_filter || format.end_with?('s')
1696
+ join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
1697
+ end
1698
+ end.select { |v| v.present? }
1699
+ acc += rec.fields('880')
1700
+ .select { |f| f.indicator1 == '0' && f.indicator2 == '0' }
1701
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^505/ } }
1702
+ .map do |field|
1703
+ if !format_filter || format.end_with?('s')
1704
+ join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
1705
+ end
1706
+ end.select { |v| v.present? }
1707
+ acc
1708
+ end
1709
+
1710
+ def get_title_2_search_values(rec)
1711
+ get_title_2_search_main_values(rec) +
1712
+ get_title_2_search_aux_values(rec) +
1713
+ get_title_2_search_7xx_values(rec) +
1714
+ get_title_2_search_505_values(rec) +
1715
+ get_title_2_search_800_values(rec)
1716
+ end
1717
+
1718
+ def get_journal_title_2_search_values(rec)
1719
+ get_title_2_search_main_values(rec, format_filter: true) +
1720
+ get_title_2_search_aux_values(rec, format_filter: true) +
1721
+ get_title_2_search_7xx_values(rec, format_filter: true) +
1722
+ get_title_2_search_505_values(rec, format_filter: true) +
1723
+ get_title_2_search_800_values(rec, format_filter: true)
1724
+ end
1725
+
1726
+ # this gets called directly by ShowPresenter rather than via
1727
+ # Blacklight's show field definition plumbing, so we return a single string
1728
+ def get_title_display(rec)
1729
+ acc = []
1730
+ acc += rec.fields('245').map do |field|
1731
+ join_subfields(field, &subfield_not_in(%w{6 8}))
1732
+ end
1733
+ acc += get_880(rec, '245', &subfield_not_in(%w{6 8}))
1734
+ .map { |value| " = #{value}" }
1735
+ acc.join(' ')
1736
+ end
1737
+
1738
+ def author_creator_tags
1739
+ @author_creator_tags ||= %w{100 110}
1740
+ end
1741
+
1742
+ def get_author_creator_values(rec)
1743
+ rec.fields(author_creator_tags).map do |field|
1744
+ get_name_1xx_field(field)
1745
+ end
1746
+ end
1747
+
1748
+ def get_author_880_values(rec)
1749
+ rec.fields('880')
1750
+ .select { |f| has_subfield6_value(f, /^(100|110)/) }
1751
+ .map do |field|
1752
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{4 6 8})).map(&:value))
1753
+ end
1754
+ end
1755
+
1756
+ def get_author_creator_1_search_values(rec)
1757
+ acc = []
1758
+ acc += rec.fields(%w{100 110}).map do |field|
1759
+ pieces = field.map do |sf|
1760
+ if sf.code == 'a'
1761
+ after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ', ')) ])
1762
+ before_comma = substring_before(sf.value, ', ')
1763
+ " #{after_comma} #{before_comma}"
1764
+ elsif !%W{a 1 4 6 8}.member?(sf.code)
1765
+ " #{sf.value}"
1766
+ elsif sf.code == '4'
1767
+ ", #{relator_codes[sf.value]}"
1768
+ end
1769
+ end.compact
1770
+ value = join_and_trim_whitespace(pieces)
1771
+ if value.end_with?('.') || value.end_with?('-')
1772
+ value
1773
+ else
1774
+ value + '.'
1775
+ end
1776
+ end
1777
+ acc += rec.fields(%w{100 110}).map do |field|
1778
+ pieces = field.map do |sf|
1779
+ if(! %W{4 6 8}.member?(sf.code))
1780
+ " #{sf.value}"
1781
+ elsif sf.code == '4'
1782
+ ", #{relator_codes[sf.value]}"
1783
+ end
1784
+ end.compact
1785
+ value = join_and_trim_whitespace(pieces)
1786
+ if value.end_with?('.') || value.end_with?('-')
1787
+ value
1788
+ else
1789
+ value + '.'
1790
+ end
1791
+ end
1792
+ acc += rec.fields(%w{880})
1793
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(100|110)/ } }
1794
+ .map do |field|
1795
+ suba = field.find_all(&subfield_in(%w{a})).map do |sf|
1796
+ after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ',')) ])
1797
+ before_comma = substring_before(sf.value, ',')
1798
+ "#{after_comma} #{before_comma}"
1799
+ end.first
1800
+ oth = join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{6 8 a t})).map(&:value))
1801
+ [suba, oth].join(' ')
1802
+ end
1803
+ acc
1804
+ end
1805
+
1806
+ def author_creator_2_tags
1807
+ @author_creator_2_tags ||= %w{100 110 111 400 410 411 700 710 711 800 810 811}
1808
+ end
1809
+
1810
+ def get_author_creator_2_search_values(rec)
1811
+ acc = []
1812
+ acc += rec.fields(author_creator_2_tags).map do |field|
1813
+ pieces1 = field.map do |sf|
1814
+ if !%W{1 4 5 6 8 t}.member?(sf.code)
1815
+ " #{sf.value}"
1816
+ elsif sf.code == '4'
1817
+ ", #{relator_codes[sf.value]}"
1818
+ end
1819
+ end.compact
1820
+ value1 = join_and_trim_whitespace(pieces1)
1821
+ if value1.end_with?('.') || value1.end_with?('-')
1822
+ value1
1823
+ else
1824
+ value1 + '.'
1825
+ end
1826
+
1827
+ pieces2 = field.map do |sf|
1828
+ if sf.code == 'a'
1829
+ after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ', ')) ])
1830
+ before_comma = substring_before(sf.value, ',')
1831
+ " #{after_comma} #{before_comma}"
1832
+ elsif(! %W{a 4 5 6 8 t}.member?(sf.code))
1833
+ " #{sf.value}"
1834
+ elsif sf.code == '4'
1835
+ ", #{relator_codes[sf.value]}"
1836
+ end
1837
+ end.compact
1838
+ value2 = join_and_trim_whitespace(pieces2)
1839
+ if value2.end_with?('.') || value2.end_with?('-')
1840
+ value2
1841
+ else
1842
+ value2 + '.'
1843
+ end
1844
+
1845
+ [ value1, value2 ]
1846
+ end.flatten(1)
1847
+ acc += rec.fields(%w{880})
1848
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(100|110|111|400|410|411|700|710|711|800|810|811)/ } }
1849
+ .map do |field|
1850
+ value1 = join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{5 6 8 t})).map(&:value))
1851
+
1852
+ suba = field.find_all(&subfield_in(%w{a})).map do |sf|
1853
+ after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ',')) ])
1854
+ before_comma = substring_before(sf.value, ',')
1855
+ "#{after_comma} #{before_comma}"
1856
+ end.first
1857
+ oth = join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{5 6 8 a t})).map(&:value))
1858
+ value2 = [ suba, oth ].join(' ')
1859
+
1860
+ [ value1, value2 ]
1861
+ end.flatten(1)
1862
+ acc
1863
+ end
1864
+
1865
+ def get_author_creator_sort_values(rec)
1866
+ rec.fields(author_creator_tags).take(1).map do |field|
1867
+ join_subfields(field, &subfield_not_in(%w[1 4 6 8 e]))
1868
+ end
1869
+ end
1870
+
1871
+ def get_author_display(rec)
1872
+ acc = []
1873
+ rec.fields(%w{100 110}).each do |field|
1874
+ subf4 = get_subfield_4ew(field)
1875
+ author_parts = []
1876
+ field.each do |sf|
1877
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
1878
+ # added 2022/08/04: filter out 1 (URIs) added by MARCive project
1879
+ if !%W{0 1 4 6 8 e w}.member?(sf.code)
1880
+ author_parts << sf.value
1881
+ end
1882
+ end
1883
+ acc << {
1884
+ value: author_parts.join(' '),
1885
+ value_append: subf4,
1886
+ link_type: 'author_creator_xfacet2' }
1887
+ end
1888
+ rec.fields('880').each do |field|
1889
+ if has_subfield6_value(field, /^(100|110)/)
1890
+ subf4 = get_subfield_4ew(field)
1891
+ author_parts = []
1892
+ field.each do |sf|
1893
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
1894
+ unless %W{0 4 6 8 e w}.member?(sf.code)
1895
+ author_parts << sf.value.gsub(/\?$/, '')
1896
+ end
1897
+ end
1898
+ acc << {
1899
+ value: author_parts.join(' '),
1900
+ value_append: subf4,
1901
+ link_type: 'author_creator_xfacet2' }
1902
+ end
1903
+ end
1904
+ acc
1905
+ end
1906
+
1907
+ def get_corporate_author_search_values(rec)
1908
+ rec.fields(%w{110 710 810}).map do |field|
1909
+ join_and_trim_whitespace(field.select(&subfield_in(%w{a b c d})).map(&:value))
1910
+ end
1911
+ end
1912
+
1913
+ def get_standardized_title_values(rec)
1914
+ rec.fields(%w{130 240}).map do |field|
1915
+ # added 2017/05/15: filter out 0 (authority record numbers) added by Alma
1916
+ results = field.find_all(&subfield_not_in(%W{0 6 8})).map(&:value)
1917
+ join_and_trim_whitespace(results)
1918
+ end
1919
+ end
1920
+
1921
+ def get_standardized_title_display(rec)
1922
+ acc = []
1923
+ rec.fields(%w{130 240}).each do |field|
1924
+ # added 2017/05/15: filter out 0 (authority record numbers) added by Alma
1925
+ title = join_subfields(field, &subfield_not_in(%W{0 6 8 e w}))
1926
+ title_param_value = join_subfields(field, &subfield_not_in(%W{0 5 6 8 e w}))
1927
+ title_append = get_title_extra(field)
1928
+ acc << {
1929
+ value: title,
1930
+ value_for_link: title_param_value,
1931
+ value_append: title_append,
1932
+ link_type: 'title_search' }
1933
+ end
1934
+ rec.fields('730')
1935
+ .select { |f| f.indicator1 == '' || f.indicator2 == '' }
1936
+ .select { |f| f.none? { |sf| sf.code == 'i'} }
1937
+ .each do |field|
1938
+ title = join_subfields(field, &subfield_not_in(%w{5 6 8 e w}))
1939
+ title_append = get_title_extra(field)
1940
+ acc << {
1941
+ value: title,
1942
+ value_append: title_append,
1943
+ link_type: 'title_search' }
1944
+ end
1945
+ rec.fields('880')
1946
+ .select { |f| has_subfield6_value(f, /^(130|240|730)/) }
1947
+ .select { |f| f.none? { |sf| sf.code == 'i'} }
1948
+ .each do |field|
1949
+ title = join_subfields(field, &subfield_not_in(%w{5 6 8 e w}))
1950
+ title_append = get_title_extra(field)
1951
+ acc << {
1952
+ value: title,
1953
+ value_append: title_append,
1954
+ link_type: 'title_search' }
1955
+ end
1956
+ acc
1957
+ end
1958
+
1959
+ def get_edition_values(rec)
1960
+ rec.fields('250').take(1).map do |field|
1961
+ results = field.find_all(&subfield_not_in(%w{6 8})).map(&:value)
1962
+ join_and_trim_whitespace(results)
1963
+ end
1964
+ end
1965
+
1966
+ def get_edition_display(rec)
1967
+ acc = []
1968
+ acc += rec.fields('250').map do |field|
1969
+ join_subfields(field, &subfield_not_in(%W{6 8}))
1970
+ end
1971
+ acc += rec.fields('880')
1972
+ .select { |f| has_subfield6_value(f, /^250/)}
1973
+ .map do |field|
1974
+ join_subfields(field, &subfield_not_in(%W{6 8}))
1975
+ end
1976
+ acc
1977
+ end
1978
+
1979
+ def get_conference_values(rec)
1980
+ rec.fields('111').map do |field|
1981
+ get_name_1xx_field(field)
1982
+ end
1983
+ end
1984
+
1985
+ def get_conference_search_values(rec)
1986
+ rec.fields(%w{111 711 811}).map do |field|
1987
+ join_and_trim_whitespace(field.select(&subfield_in(%w{a c d e})).map(&:value))
1988
+ end
1989
+ end
1990
+
1991
+ def get_conference_display(rec)
1992
+ results = rec.fields(%w{111 711})
1993
+ .select{ |f| ['', ' '].member?(f.indicator2) }
1994
+ .map do |field|
1995
+ conf = ''
1996
+ if field.none? { |sf| sf.code == 'i' }
1997
+ # added 2017/05/18: filter out 0 (authority record numbers) added by Alma
1998
+ conf = join_subfields(field, &subfield_not_in(%w{0 4 5 6 8 e j w}))
1999
+ end
2000
+ conf_append = join_subfields(field, &subfield_in(%w{e j w}))
2001
+ { value: conf, value_append: conf_append, link_type: 'author_creator_xfacet2' }
2002
+ end
2003
+ results += rec.fields('880')
2004
+ .select { |f| has_subfield6_value(f, /^(111|711)/) }
2005
+ .select { |f| f.none? { |sf| sf.code == 'i' } }
2006
+ .map do |field|
2007
+ # added 2017/05/18: filter out 0 (authority record numbers) added by Alma
2008
+ conf = join_subfields(field, &subfield_not_in(%w{0 4 5 6 8 e j w}))
2009
+ conf_extra = join_subfields(field, &subfield_in(%w{4 e j w}))
2010
+ { value: conf, value_append: conf_extra, link_type: 'author_creator_xfacet2' }
2011
+ end
2012
+ results
2013
+ end
2014
+
2015
+ def get_series_values(rec)
2016
+ acc = []
2017
+ added_8xx = false
2018
+ rec.fields(%w{800 810 811 830}).take(1).each do |field|
2019
+ acc << get_series_8xx_field(field)
2020
+ added_8xx = true
2021
+ end
2022
+ if !added_8xx
2023
+ rec.fields(%w{400 410 411 440 490}).take(1).map do |field|
2024
+ acc << get_series_4xx_field(field)
2025
+ end
2026
+ end
2027
+ acc
2028
+ end
2029
+
2030
+ def series_tags
2031
+ @series_tags ||= %w{800 810 811 830 400 411 440 490}
2032
+ end
2033
+
2034
+ def get_series_display(rec)
2035
+ acc = []
2036
+
2037
+ tags_present = series_tags.select { |tag| rec[tag].present? }
2038
+
2039
+ if %w{800 810 811 400 410 411}.member?(tags_present.first)
2040
+ rec.fields(tags_present.first).each do |field|
2041
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
2042
+ series = join_subfields(field, &subfield_not_in(%w{0 5 6 8 e t w v n}))
2043
+ pairs = field.map do |sf|
2044
+ if %w{e w v n t}.member?(sf.code)
2045
+ [ ' ', sf.value ]
2046
+ elsif sf.code == '4'
2047
+ [ ', ', relator_codes[sf.value] ]
2048
+ end
2049
+ end
2050
+ series_append = pairs.flatten.join.strip
2051
+ acc << { value: series, value_append: series_append, link_type: 'author_search' }
2052
+ end
2053
+ elsif %w{830 440 490}.member?(tags_present.first)
2054
+ rec.fields(tags_present.first).each do |field|
2055
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
2056
+ series = join_subfields(field, &subfield_not_in(%w{0 5 6 8 c e w v n}))
2057
+ series_append = join_subfields(field, &subfield_in(%w{c e w v n}))
2058
+ acc << { value: series, value_append: series_append, link_type: 'title_search' }
2059
+ end
2060
+ end
2061
+
2062
+ rec.fields(tags_present.drop(1)).each do |field|
2063
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
2064
+ series = join_subfields(field, &subfield_not_in(%w{0 5 6 8}))
2065
+ acc << { value: series, link: false }
2066
+ end
2067
+
2068
+ rec.fields('880')
2069
+ .select { |f| has_subfield6_value(f, /^(800|810|811|830|400|410|411|440|490)/) }
2070
+ .each do |field|
2071
+ series = join_subfields(field, &subfield_not_in(%W{5 6 8}))
2072
+ acc << { value: series, link: false }
2073
+ end
2074
+
2075
+ acc
2076
+ end
2077
+
2078
+ def get_series_search_values(rec)
2079
+ acc = []
2080
+ acc += rec.fields(%w{400 410 411})
2081
+ .select { |f| f.indicator2 == '0' }
2082
+ .map do |field|
2083
+ join_subfields(field, &subfield_not_in(%w{4 6 8}))
2084
+ end
2085
+ acc += rec.fields(%w{400 410 411})
2086
+ .select { |f| f.indicator2 == '1' }
2087
+ .map do |field|
2088
+ join_subfields(field, &subfield_not_in(%w{4 6 8 a}))
2089
+ end
2090
+ acc += rec.fields(%w{440})
2091
+ .map do |field|
2092
+ join_subfields(field, &subfield_not_in(%w{0 5 6 8 w}))
2093
+ end
2094
+ acc += rec.fields(%w{800 810 811})
2095
+ .map do |field|
2096
+ join_subfields(field, &subfield_not_in(%w{0 4 5 6 7 8 w}))
2097
+ end
2098
+ acc += rec.fields(%w{830})
2099
+ .map do |field|
2100
+ join_subfields(field, &subfield_not_in(%w{0 5 6 7 8 w}))
2101
+ end
2102
+ acc += rec.fields(%w{533})
2103
+ .map do |field|
2104
+ field.find_all { |sf| sf.code == 'f' }
2105
+ .map(&:value)
2106
+ .map { |v| v.gsub(/\(|\)/, '') }
2107
+ .join(' ')
2108
+ end
2109
+ acc
2110
+ end
2111
+
2112
+ def get_contained_within_values(rec)
2113
+ rec.fields('773').map do |field|
2114
+ results = field.find_all(&subfield_not_in(%w{6 7 8 w})).map(&:value)
2115
+ join_and_trim_whitespace(results)
2116
+ end
2117
+ end
2118
+
2119
+ # @return [Array] of hashes each describing a physical holding
2120
+ def get_physical_holdings(rec)
2121
+ # enriched MARC looks like this:
2122
+ # <datafield tag="hld" ind1="0" ind2=" ">
2123
+ # <subfield code="b">MAIN</subfield>
2124
+ # <subfield code="c">main</subfield>
2125
+ # <subfield code="h">NA2540</subfield>
2126
+ # <subfield code="i">.G63 2009</subfield>
2127
+ # <subfield code="8">226026380000541</subfield>
2128
+ # </datafield>
2129
+ rec.fields(EnrichedMarc::TAG_HOLDING).map do |item|
2130
+ # Alma never populates subfield 'a' which is 'location'
2131
+ # it appears to store the location code in 'c'
2132
+ # and display name in 'b'
2133
+ {
2134
+ holding_id: item[EnrichedMarc::SUB_HOLDING_SEQUENCE_NUMBER],
2135
+ location: item[EnrichedMarc::SUB_HOLDING_SHELVING_LOCATION],
2136
+ classification_part: item[EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART],
2137
+ item_part: item[EnrichedMarc::SUB_HOLDING_ITEM_PART],
2138
+ }
2139
+ end
2140
+ end
2141
+
2142
+ # @return [Array] of hashes each describing an electronic holding
2143
+ def get_electronic_holdings(rec)
2144
+ # enriched MARC looks like this:
2145
+ # <datafield tag="prt" ind1=" " ind2=" ">
2146
+ # <subfield code="pid">5310486800000521</subfield>
2147
+ # <subfield code="url">https://sandbox01-na.alma.exlibrisgroup.com/view/uresolver/01UPENN_INST/openurl?u.ignore_date_coverage=true&amp;rft.mms_id=9926519600521</subfield>
2148
+ # <subfield code="iface">PubMed Central</subfield>
2149
+ # <subfield code="coverage"> Available from 2005 volume: 1. Most recent 1 year(s) not available.</subfield>
2150
+ # <subfield code="library">MAIN</subfield>
2151
+ # <subfield code="collection">PubMed Central (Training)</subfield>
2152
+ # <subfield code="czcolid">61111058563444000</subfield>
2153
+ # <subfield code="8">5310486800000521</subfield>
2154
+ # </datafield>
2155
+
2156
+ # do NOT index electronic holdings where collection name is blank:
2157
+ # these are records created from 856 fields from Voyager
2158
+ # that don't have actual links.
2159
+
2160
+ rec.fields(EnrichedMarc::TAG_ELECTRONIC_INVENTORY)
2161
+ .select { |item| item[EnrichedMarc::SUB_ELEC_COLLECTION_NAME].present? }
2162
+ .map do |item|
2163
+ {
2164
+ portfolio_pid: item[EnrichedMarc::SUB_ELEC_PORTFOLIO_PID],
2165
+ url: item[EnrichedMarc::SUB_ELEC_ACCESS_URL],
2166
+ collection: item[EnrichedMarc::SUB_ELEC_COLLECTION_NAME],
2167
+ coverage: item[EnrichedMarc::SUB_ELEC_COVERAGE],
2168
+ }
2169
+ end
2170
+ end
2171
+
2172
+ def get_bound_with_id_values(rec)
2173
+ rec.fields(EnrichedMarc::TAG_HOLDING).flat_map do |field|
2174
+ field.select(&subfield_in([ EnrichedMarc::SUB_BOUND_WITH_ID ])).map { |sf| sf.value }
2175
+ end
2176
+ end
2177
+
2178
+ def get_subfield_4ew(field)
2179
+ field.select(&subfield_in(%W{4 e w}))
2180
+ .map { |sf| (sf.code == '4' ? ", #{relator_codes[sf.value]}" : " #{sf.value}") }
2181
+ .join('')
2182
+ end
2183
+
2184
+ def get_title_extra(field)
2185
+ join_subfields(field, &subfield_in(%W{e w}))
2186
+ end
2187
+
2188
+ def get_other_title_display(rec)
2189
+ acc = []
2190
+ acc += rec.fields('246').map do |field|
2191
+ join_subfields(field, &subfield_not_in(%W{6 8}))
2192
+ end
2193
+ acc += rec.fields('740')
2194
+ .select { |f| ['', ' ', '0', '1', '3'].member?(f.indicator2) }
2195
+ .map do |field|
2196
+ join_subfields(field, &subfield_not_in(%W{5 6 8}))
2197
+ end
2198
+ acc += rec.fields('880')
2199
+ .select { |f| has_subfield6_value(f, /^(246|740)/) }
2200
+ .map do |field|
2201
+ join_subfields(field, &subfield_not_in(%W{5 6 8}))
2202
+ end
2203
+ acc
2204
+ end
2205
+
2206
+ # distribution and manufacture share the same logic except for indicator2
2207
+ def get_264_or_880_fields(rec, indicator2)
2208
+ acc = []
2209
+ acc += rec.fields('264')
2210
+ .select { |f| f.indicator2 == indicator2 }
2211
+ .map do |field|
2212
+ join_subfields(field, &subfield_in(%w{a b c}))
2213
+ end
2214
+ acc += rec.fields('880')
2215
+ .select { |f| f.indicator2 == indicator2 }
2216
+ .select { |f| has_subfield6_value(f, /^264/) }
2217
+ .map do |field|
2218
+ join_subfields(field, &subfield_in(%w{a b c}))
2219
+ end
2220
+ acc
2221
+ end
2222
+
2223
+ def get_production_display(rec)
2224
+ get_264_or_880_fields(rec, '0')
2225
+ end
2226
+
2227
+ def get_distribution_display(rec)
2228
+ get_264_or_880_fields(rec, '2')
2229
+ end
2230
+
2231
+ def get_manufacture_display(rec)
2232
+ get_264_or_880_fields(rec, '3')
2233
+ end
2234
+
2235
+ def get_cartographic_display(rec)
2236
+ rec.fields(%w{255 342}).map do |field|
2237
+ join_subfields(field, &subfield_not_6_or_8)
2238
+ end
2239
+ end
2240
+
2241
+ def get_fingerprint_display(rec)
2242
+ rec.fields('026').map do |field|
2243
+ join_subfields(field, &subfield_not_in(%w{2 5 6 8}))
2244
+ end
2245
+ end
2246
+
2247
+ def get_arrangement_display(rec)
2248
+ get_datafield_and_880(rec, '351')
2249
+ end
2250
+
2251
+ def get_former_title_display(rec)
2252
+ rec.fields
2253
+ .select { |f| f.tag == '247' || (f.tag == '880' && has_subfield6_value(f, /^247/)) }
2254
+ .map do |field|
2255
+ former_title = join_subfields(field, &subfield_not_in(%w{6 8 e w}))
2256
+ former_title_append = join_subfields(field, &subfield_in(%w{e w}))
2257
+ { value: former_title, value_append: former_title_append, link_type: 'title_search' }
2258
+ end
2259
+ end
2260
+
2261
+ # logic for 'Continues' and 'Continued By' is very similar
2262
+ def get_continues(rec, tag)
2263
+ rec.fields
2264
+ .select { |f| f.tag == tag || (f.tag == '880' && has_subfield6_value(f, /^#{tag}/)) }
2265
+ .select { |f| f.any?(&subfield_in(%w{i a s t n d})) }
2266
+ .map do |field|
2267
+ join_subfields(field, &subfield_in(%w{i a s t n d}))
2268
+ end
2269
+ end
2270
+
2271
+ def get_continues_display(rec)
2272
+ get_continues(rec, '780')
2273
+ end
2274
+
2275
+ def get_continued_by_display(rec)
2276
+ get_continues(rec, '785')
2277
+ end
2278
+
2279
+ def get_place_of_publication_display(rec)
2280
+ acc = []
2281
+ acc += rec.fields('752').map do |field|
2282
+ place = join_subfields(field, &subfield_not_in(%w{6 8 e w}))
2283
+ place_extra = join_subfields(field, &subfield_in(%w{e w}))
2284
+ { value: place, value_append: place_extra, link_type: 'search' }
2285
+ end
2286
+ acc += get_880_subfield_not_6_or_8(rec, '752').map do |result|
2287
+ { value: result, link: false }
2288
+ end
2289
+ acc
2290
+ end
2291
+
2292
+ def get_language_display(rec)
2293
+ get_datafield_and_880(rec, '546')
2294
+ end
2295
+
2296
+ # for system details: extract subfield 3 plus other subfields as specified by passed-in block
2297
+ def get_sub3_and_other_subs(field, &block)
2298
+ sub3 = field.select(&subfield_in(%w{3})).map(&:value).map { |v| trim_trailing_period(v) }.join(': ')
2299
+ oth_subs = join_subfields(field, &block)
2300
+ [ sub3, trim_trailing_semicolon(oth_subs) ].join(' ')
2301
+ end
2302
+
2303
+ def get_system_details_display(rec)
2304
+ acc = []
2305
+ acc += rec.fields('538').map do |field|
2306
+ get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
2307
+ end
2308
+ acc += rec.fields('344').map do |field|
2309
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
2310
+ end
2311
+ acc += rec.fields(%w{345 346}).map do |field|
2312
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
2313
+ end
2314
+ acc += rec.fields('347').map do |field|
2315
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
2316
+ end
2317
+ acc += rec.fields('880')
2318
+ .select { |f| has_subfield6_value(f, /^538/) }
2319
+ .map do |field|
2320
+ get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
2321
+ end
2322
+ acc += rec.fields('880')
2323
+ .select { |f| has_subfield6_value(f, /^344/) }
2324
+ .map do |field|
2325
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
2326
+ end
2327
+ acc += rec.fields('880')
2328
+ .select { |f| has_subfield6_value(f, /^(345|346)/) }
2329
+ .map do |field|
2330
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
2331
+ end
2332
+ acc += rec.fields('880')
2333
+ .select { |f| has_subfield6_value(f, /^347/) }
2334
+ .map do |field|
2335
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
2336
+ end
2337
+ acc
2338
+ end
2339
+
2340
+ def get_biography_display(rec)
2341
+ get_datafield_and_880(rec, '545')
2342
+ end
2343
+
2344
+ def get_summary_display(rec)
2345
+ get_datafield_and_880(rec, '520')
2346
+ end
2347
+
2348
+ def get_contents_display(rec)
2349
+ acc = []
2350
+ acc += rec.fields('505').flat_map do |field|
2351
+ join_subfields(field, &subfield_not_6_or_8).split('--')
2352
+ end
2353
+ acc += rec.fields('880')
2354
+ .select { |f| has_subfield6_value(f, /^505/) }
2355
+ .flat_map do |field|
2356
+ join_subfields(field, &subfield_not_6_or_8).split('--')
2357
+ end
2358
+ acc
2359
+ end
2360
+
2361
+ def get_contents_note_search_values(rec)
2362
+ rec.fields('505').map do |field|
2363
+ join_and_trim_whitespace(field.to_a.map(&:value))
2364
+ end
2365
+ end
2366
+
2367
+ def get_participant_display(rec)
2368
+ get_datafield_and_880(rec, '511')
2369
+ end
2370
+
2371
+ def get_credits_display(rec)
2372
+ get_datafield_and_880(rec, '508')
2373
+ end
2374
+
2375
+ # 10/2018 kms: add 586
2376
+ def get_notes_display(rec)
2377
+ acc = []
2378
+ acc += rec.fields(%w{500 502 504 515 518 525 533 550 580 586 588}).map do |field|
2379
+ if field.tag == '588'
2380
+ join_subfields(field, &subfield_in(%w{a}))
2381
+ else
2382
+ join_subfields(field, &subfield_not_in(%w{5 6 8}))
2383
+ end
2384
+ end
2385
+ acc += rec.fields('880')
2386
+ .select { |f| has_subfield6_value(f, /^(500|502|504|515|518|525|533|550|580|586|588)/) }
2387
+ .map do |field|
2388
+ sub6 = field.select(&subfield_in(%w{6})).map(&:value).first
2389
+ if sub6 == '588'
2390
+ join_subfields(field, &subfield_in(%w{a}))
2391
+ else
2392
+ join_subfields(field, &subfield_not_in(%w{5 6 8}))
2393
+ end
2394
+ end
2395
+ acc
2396
+ end
2397
+
2398
+ # 10/2018 kms: add 562 563 585. Add 561 if subf a starts with Athenaeum copy:
2399
+ # non-Athenaeum 561 still displays as Penn Provenance
2400
+ def get_local_notes_display(rec)
2401
+ acc = []
2402
+ acc += rec.fields('561')
2403
+ .select { |f| f.any?{ |sf| sf.code == 'a' && sf.value =~ /^Athenaeum copy: / } }
2404
+ .map do |field|
2405
+ join_subfields(field, &subfield_in(%w{a}))
2406
+ end
2407
+ acc += rec.fields(%w{562 563 585 590}).map do |field|
2408
+ join_subfields(field, &subfield_not_in(%w{5 6 8}))
2409
+ end
2410
+ acc += get_880(rec, %w{562 563 585 590}) do |sf|
2411
+ ! %w{5 6 8}.member?(sf.code)
2412
+ end
2413
+ acc
2414
+ end
2415
+
2416
+ def get_finding_aid_display(rec)
2417
+ get_datafield_and_880(rec, '555')
2418
+ end
2419
+
2420
+ # get 650/880 for provenance and chronology: prefix should be 'PRO' or 'CHR'
2421
+ # 11/2018: do not display $5 in PRO or CHR subjs
2422
+ def get_650_and_880(rec, prefix)
2423
+ acc = []
2424
+ acc += rec.fields('650')
2425
+ .select { |f| f.indicator2 == '4' }
2426
+ .select { |f| f.any? { |sf| sf.code == 'a' && sf.value =~ /^(#{prefix}|%#{prefix})/ } }
2427
+ .map do |field|
2428
+ suba = field.select(&subfield_in(%w{a})).map {|sf|
2429
+ sf.value.gsub(/^%?#{prefix}/, '')
2430
+ }.join(' ')
2431
+ sub_others = join_subfields(field, &subfield_not_in(%w{a 6 8 e w 5}))
2432
+ value = [ suba, sub_others ].join(' ')
2433
+ { value: value, link_type: 'subject_search' } if value.present?
2434
+ end.compact
2435
+ acc += rec.fields('880')
2436
+ .select { |f| f.indicator2 == '4' }
2437
+ .select { |f| has_subfield6_value(f,/^650/) }
2438
+ .select { |f| f.any? { |sf| sf.code == 'a' && sf.value =~ /^(#{prefix}|%#{prefix})/ } }
2439
+ .map do |field|
2440
+ suba = field.select(&subfield_in(%w{a})).map {|sf| sf.value.gsub(/^%?#{prefix}/, '') }.join(' ')
2441
+ sub_others = join_subfields(field, &subfield_not_in(%w{a 6 8 e w 5}))
2442
+ value = [ suba, sub_others ].join(' ')
2443
+ { value: value, link_type: 'subject_search' } if value.present?
2444
+ end.compact
2445
+ acc
2446
+ end
2447
+
2448
+ # 11/2018 kms: a 561 starting Athenaeum copy: should not appear as Penn Provenance, display that as Local Notes
2449
+ def get_provenance_display(rec)
2450
+ acc = []
2451
+ acc += rec.fields('561')
2452
+ .select { |f| ['1', '', ' '].member?(f.indicator1) && [' ', ''].member?(f.indicator2) && f.any?{ |sf| sf.code == 'a' && sf.value !~ /^Athenaeum copy: / } }
2453
+ .map do |field|
2454
+ value = join_subfields(field, &subfield_in(%w{a}))
2455
+ { value: value, link: false } if value
2456
+ end.compact
2457
+ acc += rec.fields('880')
2458
+ .select { |f| has_subfield6_value(f, /^561/) }
2459
+ .select { |f| ['1', '', ' '].member?(f.indicator1) && [' ', ''].member?(f.indicator2) }
2460
+ .map do |field|
2461
+ value = join_subfields(field, &subfield_in(%w{a}))
2462
+ { value: value, link: false } if value
2463
+ end.compact
2464
+ acc += get_650_and_880(rec, 'PRO')
2465
+ acc
2466
+ end
2467
+
2468
+ def get_chronology_display(rec)
2469
+ get_650_and_880(rec, 'CHR')
2470
+ end
2471
+
2472
+ def get_related_collections_display(rec)
2473
+ get_datafield_and_880(rec, '544')
2474
+ end
2475
+
2476
+ def get_cited_in_display(rec)
2477
+ get_datafield_and_880(rec, '510')
2478
+ end
2479
+
2480
+ def get_publications_about_display(rec)
2481
+ get_datafield_and_880(rec, '581')
2482
+ end
2483
+
2484
+ def get_cite_as_display(rec)
2485
+ get_datafield_and_880(rec, '524')
2486
+ end
2487
+
2488
+ def get_contributor_display(rec)
2489
+ acc = []
2490
+ acc += rec.fields(%w{700 710})
2491
+ .select { |f| ['', ' ', '0'].member?(f.indicator2) }
2492
+ .select { |f| f.none? { |sf| sf.code == 'i' } }
2493
+ .map do |field|
2494
+ contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
2495
+ contributor_append = field.select(&subfield_in(%w{e u 3 4})).map do |sf|
2496
+ if sf.code == '4'
2497
+ ", #{relator_codes[sf.value]}"
2498
+ else
2499
+ " #{sf.value}"
2500
+ end
2501
+ end.join
2502
+ { value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
2503
+ end
2504
+ acc += rec.fields('880')
2505
+ .select { |f| has_subfield6_value(f, /^(700|710)/) && (f.none? { |sf| sf.code == 'i' }) }
2506
+ .map do |field|
2507
+ contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
2508
+ contributor_append = join_subfields(field, &subfield_in(%w{e u 3}))
2509
+ { value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
2510
+ end
2511
+ acc
2512
+ end
2513
+
2514
+ # if there's a subfield i, extract its value, and if there's something
2515
+ # in parentheses in that value, extract that.
2516
+ def remove_paren_value_from_subfield_i(field)
2517
+ val = field.select { |sf| sf.code == 'i' }.map do |sf|
2518
+ match = /\((.+?)\)/.match(sf.value)
2519
+ if match
2520
+ sf.value.sub('(' + match[1] + ')', '')
2521
+ else
2522
+ sf.value
2523
+ end
2524
+ end.first || ''
2525
+ trim_trailing_colon(trim_trailing_period(val))
2526
+ end
2527
+
2528
+ def get_related_work_display(rec)
2529
+ acc = []
2530
+ acc += rec.fields(%w{700 710 711 730})
2531
+ .select { |f| ['', ' '].member?(f.indicator2) }
2532
+ .select { |f| f.any? { |sf| sf.code == 't' } }
2533
+ .map do |field|
2534
+ subi = remove_paren_value_from_subfield_i(field) || ''
2535
+ related = field.map do |sf|
2536
+ if ! %w{0 4 i}.member?(sf.code)
2537
+ " #{sf.value}"
2538
+ elsif sf.code == '4'
2539
+ ", #{relator_codes[sf.value]}"
2540
+ end
2541
+ end.compact.join
2542
+ [ subi, related ].select(&:present?).join(':')
2543
+ end
2544
+ acc += rec.fields('880')
2545
+ .select { |f| ['', ' '].member?(f.indicator2) }
2546
+ .select { |f| has_subfield6_value(f, /^(700|710|711|730)/) }
2547
+ .select { |f| f.any? { |sf| sf.code == 't' } }
2548
+ .map do |field|
2549
+ subi = remove_paren_value_from_subfield_i(field) || ''
2550
+ related = field.map do |sf|
2551
+ if ! %w{0 4 i}.member?(sf.code)
2552
+ " #{sf.value}"
2553
+ elsif sf.code == '4'
2554
+ ", #{relator_codes[sf.value]}"
2555
+ end
2556
+ end.compact.join
2557
+ [ subi, related ].select(&:present?).join(':')
2558
+ end
2559
+ acc
2560
+ end
2561
+
2562
+ def get_contains_display(rec)
2563
+ acc = []
2564
+ acc += rec.fields(%w{700 710 711 730 740})
2565
+ .select { |f| f.indicator2 == '2' }
2566
+ .map do |field|
2567
+ subi = remove_paren_value_from_subfield_i(field) || ''
2568
+ contains = field.map do |sf|
2569
+ if ! %w{0 4 5 6 8 i}.member?(sf.code)
2570
+ " #{sf.value}"
2571
+ elsif sf.code == '4'
2572
+ ", #{relator_codes[sf.value]}"
2573
+ end
2574
+ end.compact.join
2575
+ [ subi, contains ].select(&:present?).join(':')
2576
+ end
2577
+ acc += rec.fields('880')
2578
+ .select { |f| f.indicator2 == '2' }
2579
+ .select { |f| has_subfield6_value(f, /^(700|710|711|730|740)/) }
2580
+ .map do |field|
2581
+ subi = remove_paren_value_from_subfield_i(field) || ''
2582
+ contains = join_subfields(field, &subfield_not_in(%w{0 5 6 8 i}))
2583
+ [ subi, contains ].select(&:present?).join(':')
2584
+ end
2585
+ acc
2586
+ end
2587
+
2588
+ def get_other_edition_value(field)
2589
+ subi = remove_paren_value_from_subfield_i(field) || ''
2590
+ other_editions = field.map do |sf|
2591
+ if %w{s x z}.member?(sf.code)
2592
+ " #{sf.value}"
2593
+ elsif sf.code == 't'
2594
+ " #{relator_codes[sf.value]}. "
2595
+ end
2596
+ end.compact.join
2597
+ other_editions_append = field.map do |sf|
2598
+ if ! %w{i h s t x z e f o r w y 7}.member?(sf.code)
2599
+ " #{sf.value}"
2600
+ elsif sf.code == 'h'
2601
+ " (#{sf.value}) "
2602
+ end
2603
+ end.compact.join
2604
+ {
2605
+ value: other_editions,
2606
+ value_prepend: trim_trailing_period(subi) + ':',
2607
+ value_append: other_editions_append,
2608
+ link_type: 'author_creator_xfacet2'
2609
+ }
2610
+ end
2611
+
2612
+ def get_other_edition_display(rec)
2613
+ acc = []
2614
+ acc += rec.fields('775')
2615
+ .select { |f| f.any? { |sf| sf.code == 'i' } }
2616
+ .map do |field|
2617
+ get_other_edition_value(field)
2618
+ end
2619
+ acc += rec.fields('880')
2620
+ .select { |f| ['', ' '].member?(f.indicator2) }
2621
+ .select { |f| has_subfield6_value(f, /^775/) }
2622
+ .select { |f| f.any? { |sf| sf.code == 'i' } }
2623
+ .map do |field|
2624
+ get_other_edition_value(field)
2625
+ end
2626
+ acc
2627
+ end
2628
+
2629
+ def get_contained_in_display(rec)
2630
+ acc = []
2631
+ acc += rec.fields('773').map do |field|
2632
+ join_subfields(field, &subfield_in(%w{a g i s t}))
2633
+ end.select(&:present?)
2634
+ acc += get_880(rec, '773') do |sf|
2635
+ %w{a g i s t}.member?(sf.code)
2636
+ end
2637
+ acc
2638
+ end
2639
+
2640
+ def get_constituent_unit_display(rec)
2641
+ acc = []
2642
+ acc += rec.fields('774').map do |field|
2643
+ join_subfields(field, &subfield_in(%w{i a s t}))
2644
+ end.select(&:present?)
2645
+ acc += get_880(rec, '774') do |sf|
2646
+ %w{i a s t}.member?(sf.code)
2647
+ end
2648
+ acc
2649
+ end
2650
+
2651
+ def get_has_supplement_display(rec)
2652
+ acc = []
2653
+ acc += rec.fields('770').map do |field|
2654
+ join_subfields(field, &subfield_not_6_or_8)
2655
+ end.select(&:present?)
2656
+ acc += get_880_subfield_not_6_or_8(rec, '770')
2657
+ acc
2658
+ end
2659
+
2660
+ def get_other_format_display(rec)
2661
+ acc = []
2662
+ acc += rec.fields('776').map do |field|
2663
+ join_subfields(field, &subfield_in(%w{i a s t o}))
2664
+ end.select(&:present?)
2665
+ acc += get_880(rec, '774') do |sf|
2666
+ %w{i a s t o}.member?(sf.code)
2667
+ end
2668
+ acc
2669
+ end
2670
+
2671
+ def get_isbn_display(rec)
2672
+ acc = []
2673
+ acc += rec.fields('020').map do |field|
2674
+ join_subfields(field, &subfield_in(%w{a z}))
2675
+ end.select(&:present?)
2676
+ acc += get_880(rec, '020') do |sf|
2677
+ %w{a z}.member?(sf.code)
2678
+ end
2679
+ acc
2680
+ end
2681
+
2682
+ def get_issn_display(rec)
2683
+ acc = []
2684
+ acc += rec.fields('022').map do |field|
2685
+ join_subfields(field, &subfield_in(%w{a z}))
2686
+ end.select(&:present?)
2687
+ acc += get_880(rec, '022') do |sf|
2688
+ %w{a z}.member?(sf.code)
2689
+ end
2690
+ acc
2691
+ end
2692
+
2693
+ def subfield_a_is_oclc(sf)
2694
+ sf.code == 'a' && sf.value =~ /^\(OCoLC\).*/
2695
+ end
2696
+
2697
+ def get_oclc_id_values(rec)
2698
+ rec.fields('035')
2699
+ .select { |f| f.any? { |sf| subfield_a_is_oclc(sf) } }
2700
+ .take(1)
2701
+ .flat_map do |field|
2702
+ field.find_all { |sf| subfield_a_is_oclc(sf) }.map do |sf|
2703
+ m = /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(sf.value)
2704
+ if m
2705
+ m[1]
2706
+ end
2707
+ end.compact
2708
+ end
2709
+ end
2710
+
2711
+ def get_publisher_number_display(rec)
2712
+ acc = []
2713
+ acc += rec.fields(%w{024 028}).map do |field|
2714
+ join_subfields(field, &subfield_not_in(%w{5 6}))
2715
+ end.select(&:present?)
2716
+ acc += rec.fields('880')
2717
+ .select { |f| has_subfield6_value(f, /^(024|028)/) }
2718
+ .map do |field|
2719
+ join_subfields(field, &subfield_not_in(%w{5 6}))
2720
+ end
2721
+ acc
2722
+ end
2723
+
2724
+ def get_access_restriction_display(rec)
2725
+ rec.fields('506').map do |field|
2726
+ join_subfields(field, &subfield_not_in(%w{5 6}))
2727
+ end.select(&:present?)
2728
+ end
2729
+
2730
+ def get_bound_with_display(rec)
2731
+ rec.fields('501').map do |field|
2732
+ join_subfields(field, &subfield_not_in(%w{a}))
2733
+ end.select(&:present?)
2734
+ end
2735
+
2736
+ # some logic to extract link text and link url from an 856 field
2737
+ def linktext_and_url(field)
2738
+ linktext_3 = join_subfields(field, &subfield_in(%w{3}))
2739
+ linktext_zy = field.find_all(&subfield_in(%w{z})).map(&:value).first ||
2740
+ field.find_all(&subfield_in(%w{y})).map(&:value).first || ''
2741
+ linktext = [ linktext_3, linktext_zy ].join(' ')
2742
+ linkurl = field.find_all(&subfield_in(%w{u})).map(&:value).first || ''
2743
+ linkurl = linkurl.sub(' target=_blank', '')
2744
+ [linktext, linkurl]
2745
+ end
2746
+
2747
+ def words_to_remove_from_web_link
2748
+ @words_to_remove_from_web_link ||=
2749
+ %w(fund funds collection collections endowment
2750
+ endowed trust and for of the memorial)
2751
+ end
2752
+
2753
+ def get_web_link_display(rec)
2754
+ rec.fields('856')
2755
+ .select { |f| ['2', ' ', ''].member?(f.indicator2) }
2756
+ .flat_map do |field|
2757
+ links = []
2758
+ linktext, linkurl = linktext_and_url(field)
2759
+ links << {
2760
+ linktext: linktext,
2761
+ linkurl: linkurl
2762
+ }
2763
+
2764
+ # if the link text includes words/phrases commonly used in bookplate links
2765
+ if linktext =~ /(Funds?|Collections?( +Gifts)?|Trust|Development|Endowment.*) +Home +Page|A +Penn +Libraries +Collection +Gift/
2766
+ # strip out some less-meaningful words to create the filename that leslie will use when creating the bookplate image
2767
+ imagename = linktext.gsub(/- A Penn Libraries Collection Gift/i, '')
2768
+ .gsub(/ Home Page/i, '')
2769
+ .gsub(/[&.]/, '')
2770
+ .split(/\W+/)
2771
+ .select { |word| !words_to_remove_from_web_link.member?(word.downcase) }
2772
+ .join('')
2773
+ # generate image URL
2774
+ imagesource = "https://www.library.upenn.edu/sites/default/files/images/bookplates/#{imagename}.gif"
2775
+ links << {
2776
+ img_src: imagesource,
2777
+ img_alt: "#{linktext.strip} Bookplate",
2778
+ linkurl: linkurl,
2779
+ }
2780
+ end
2781
+
2782
+ links
2783
+ end
2784
+ end
2785
+
2786
+ def get_call_number_search_values(rec)
2787
+ # some records don't have item records, only holdings. so for safety/comprehensivenss,
2788
+ # we need to index both and take the unique values of the entire result set.
2789
+
2790
+ acc = []
2791
+
2792
+ acc += rec.fields(EnrichedMarc::TAG_HOLDING).map do |holding|
2793
+ classification_part =
2794
+ holding.find_all(&subfield_in([ EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART ])).map(&:value).first
2795
+ item_part =
2796
+ holding.find_all(&subfield_in( [EnrichedMarc::SUB_HOLDING_ITEM_PART ])).map(&:value).first
2797
+
2798
+ if classification_part || item_part
2799
+ [ classification_part, item_part ].join(' ')
2800
+ end
2801
+ end.compact
2802
+
2803
+ acc += rec.fields(EnrichedMarc::TAG_ITEM).map do |item|
2804
+ cn_type = item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER_TYPE }.map(&:value).first
2805
+
2806
+ item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER }
2807
+ .map(&:value)
2808
+ .select { |call_num| call_num.present? }
2809
+ .map { |call_num| call_num }
2810
+ .compact
2811
+ end.flatten(1)
2812
+
2813
+ acc.uniq
2814
+ end
2815
+
2816
+ def get_call_number_xfacet_values(rec)
2817
+ get_call_number_search_values(rec).map do |v|
2818
+ references(v)
2819
+ end
2820
+ end
2821
+
2822
+ def prepare_timestamps(rec)
2823
+ most_recent_add = rec.fields(EnrichedMarc::TAG_ITEM).flat_map do |item|
2824
+ item.find_all(&subfield_in([EnrichedMarc::SUB_ITEM_DATE_CREATED])).map do |sf|
2825
+ begin
2826
+ if sf.value.size == 10
2827
+ # On 2022-05-02, this field value (as exported in enriched publishing
2828
+ # job from Alma) began truncating time to day-level granularity. We have
2829
+ # no guarantee that this won't switch back in the future, so for the
2830
+ # foreseeable future we should support both representations.
2831
+ DateTime.strptime(sf.value, '%Y-%m-%d').to_time.to_i
2832
+ else
2833
+ DateTime.strptime(sf.value, '%Y-%m-%d %H:%M:%S').to_time.to_i
2834
+ end
2835
+ rescue Exception => e
2836
+ puts "Error parsing date string for recently added field: #{sf.value} - #{e}"
2837
+ nil
2838
+ end
2839
+ end.compact
2840
+ end.max || 0
2841
+
2842
+ last_update = rec.fields('005')
2843
+ .select { |f| f.value.present? && !f.value.start_with?('0000') }
2844
+ .map do |field|
2845
+ begin
2846
+ DateTime.iso8601(field.value).to_time.to_i
2847
+ rescue ArgumentError => e
2848
+ nil
2849
+ end
2850
+ end.compact.first
2851
+
2852
+ if last_update == nil || most_recent_add > last_update
2853
+ last_update = most_recent_add
2854
+ end
2855
+
2856
+ {
2857
+ :most_recent_add => most_recent_add,
2858
+ :last_update => last_update
2859
+ }
2860
+ end
2861
+
2862
+ def get_full_text_link_values(rec)
2863
+ acc = rec.fields('856')
2864
+ .select { |f| (f.indicator1 == '4') && %w{0 1}.member?(f.indicator2) }
2865
+ .map do |field|
2866
+ linktext, linkurl = linktext_and_url(field)
2867
+ {
2868
+ linktext: linktext.present? ? linktext : linkurl,
2869
+ linkurl: linkurl
2870
+ }
2871
+ end
2872
+ add_etas_full_text(rec, acc) if is_etas(rec)
2873
+ acc
2874
+ end
2875
+
2876
+ HATHI_POSTFIX = ' from HathiTrust during COVID-19'
2877
+
2878
+ def add_etas_full_text(rec, acc)
2879
+ primary_oclc_id = get_oclc_id_values(rec).first
2880
+ return unless primary_oclc_id # defensive (e.g., if hathi match based on subsequently deleted oclc id)
2881
+ acc << {
2882
+ linktext: 'Online access',
2883
+ linkurl: 'http://catalog.hathitrust.org/api/volumes/oclc/' + primary_oclc_id + '.html',
2884
+ postfix: HATHI_POSTFIX
2885
+ }
2886
+ end
2887
+
2888
+ # It's not clear whether Alma can suppress these auto-generated
2889
+ # records (Primo instances seem to show these records!) so we filter
2890
+ # them out here just in case
2891
+ def is_boundwith_record(rec)
2892
+ rec.fields('245').any? { |f|
2893
+ title = join_subfields(f, &subfield_in(%w{a}))
2894
+ title.include?('Host bibliographic record for boundwith')
2895
+ }
2896
+ end
2897
+
2898
+ # values for passed-in args come from Solr, not extracted directly from MARC.
2899
+ # TODO: this code should return more data-ish values; the HTML should be moved into a render method
2900
+ def get_offsite_display(rec, crl_id, title, author, oclc_id)
2901
+ id = crl_id
2902
+ html = %Q{<a href="#{"http://catalog.crl.edu/record=#{id}~S1"}">Center for Research Libraries Holdings</a>}
2903
+
2904
+ f260 = rec.fields('260')
2905
+ place = f260.map { |f| join_subfields(f, &subfield_in(%w{a})) }.join(' ')
2906
+ publisher = f260.map { |f| join_subfields(f, &subfield_in(%w{b})) }.join(' ')
2907
+ pubdate = f260.map { |f| join_subfields(f, &subfield_in(%w{c})) }.join(' ')
2908
+
2909
+ atlas_params = {
2910
+ crl_id: id,
2911
+ title: title,
2912
+ author: author,
2913
+ oclc: oclc_id,
2914
+ place: place,
2915
+ publisher: publisher,
2916
+ pubdate: pubdate,
2917
+ }
2918
+ atlas_url = "https://atlas.library.upenn.edu/cgi-bin/forms/illcrl.cgi?#{atlas_params.to_query}"
2919
+
2920
+ html += %Q{<a href="#{atlas_url}">Place request</a>}
2921
+
2922
+ f590 = rec.fields('590')
2923
+ if f590.size > 0
2924
+ html += '<div>'
2925
+ f590.each do |field|
2926
+ html += field.join(' ')
2927
+ end
2928
+ html += '</div>'
2929
+ end
2930
+ [ html ]
2931
+ end
2932
+
2933
+ @@select_pub_field = lambda do |f|
2934
+ f.tag == '260' || (f.tag == '264' && f.indicator2 == '1')
2935
+ end
2936
+
2937
+ def get_ris_cy_field(rec)
2938
+ rec.fields.select(&@@select_pub_field).flat_map do |field|
2939
+ field.find_all(&subfield_in(['a'])).map(&:value)
2940
+ end
2941
+ end
2942
+
2943
+ def get_ris_pb_field(rec)
2944
+ rec.fields.select(&@@select_pub_field).flat_map do |field|
2945
+ field.find_all(&subfield_in(['b'])).map(&:value)
2946
+ end
2947
+ end
2948
+
2949
+ def get_ris_py_field(rec)
2950
+ rec.fields.select(&@@select_pub_field).flat_map do |field|
2951
+ field.find_all(&subfield_in(['c'])).map(&:value)
2952
+ end
2953
+ end
2954
+
2955
+ def get_ris_sn_field(rec)
2956
+ rec.fields.select { |f| f.tag == '020' || f.tag == '022' }.flat_map do |field|
2957
+ field.find_all(&subfield_in(['a'])).map(&:value)
2958
+ end
2959
+ end
2960
+
2961
+ end
2962
+
2963
+ end
2964
+ # rubocop:enable all