pennmarc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
data/legacy/marc.rb ADDED
@@ -0,0 +1,2964 @@
1
+ # rubocop:disable all
2
+ # frozen_string_literal: true
3
+
4
+ require 'nokogiri'
5
+
6
+ module PennLib
7
+
8
+ # Constants for Alma's MARC enrichment
9
+ module EnrichedMarc
10
+ # terminology follows the Publishing Profile screen
11
+ TAG_HOLDING = 'hld'
12
+ TAG_ITEM = 'itm'
13
+ TAG_ELECTRONIC_INVENTORY = 'prt'
14
+ TAG_DIGITAL_INVENTORY = 'dig'
15
+
16
+ # these are 852 subfield codes; terminology comes from MARC spec
17
+ SUB_HOLDING_SHELVING_LOCATION = 'c'
18
+ SUB_HOLDING_SEQUENCE_NUMBER = '8'
19
+ SUB_HOLDING_CLASSIFICATION_PART = 'h'
20
+ SUB_HOLDING_ITEM_PART = 'i'
21
+
22
+ SUB_ITEM_CURRENT_LOCATION = 'g'
23
+ SUB_ITEM_CALL_NUMBER_TYPE = 'h'
24
+ SUB_ITEM_CALL_NUMBER = 'i'
25
+ SUB_ITEM_DATE_CREATED = 'q'
26
+
27
+ SUB_ELEC_PORTFOLIO_PID = 'a'
28
+ SUB_ELEC_ACCESS_URL = 'b'
29
+ SUB_ELEC_COLLECTION_NAME = 'c'
30
+ SUB_ELEC_COVERAGE = 'g'
31
+
32
+ # a subfield code NOT used by the MARC 21 spec for 852 holdings records.
33
+ # we add this subfield during preprocessing to store boundwith record IDs.
34
+ SUB_BOUND_WITH_ID = 'y'
35
+ end
36
+
37
+ module DateType
38
+ # Nothing
39
+ UNSPECIFIED = '|'
40
+ NO_DATES_OR_BC = 'b'
41
+ UNKNOWN = 'n'
42
+
43
+ # Single point
44
+ DETAILED = 'e'
45
+ SINGLE = 's'
46
+
47
+ # Lower bound
48
+ CONTINUING_CURRENTLY_PUBLISHED = 'c'
49
+ CONTINUING_STATUS_UNKNOWN = 'u'
50
+
51
+ # Range
52
+ CONTINUING_CEASED_PUBLICATION = 'd'
53
+ COLLECTION_INCLUSIVE = 'i'
54
+ COLLECTION_BULK = 'k'
55
+ MULTIPLE = 'm'
56
+ QUESTIONABLE = 'q'
57
+
58
+ # Separate date for content
59
+ DISTRIBUTION_AND_PRODUCTION = 'p'
60
+ REPRINT_AND_ORIGINAL = 'r'
61
+ PUBLICATION_AND_COPYRIGHT = 't'
62
+
63
+ MAP = {
64
+ DETAILED => :single,
65
+ SINGLE => :single,
66
+
67
+ CONTINUING_CURRENTLY_PUBLISHED => :lower_bound,
68
+ CONTINUING_STATUS_UNKNOWN => :lower_bound,
69
+
70
+ CONTINUING_CEASED_PUBLICATION => :range,
71
+ COLLECTION_INCLUSIVE => :range,
72
+ COLLECTION_BULK => :range,
73
+ MULTIPLE => :range,
74
+ QUESTIONABLE => :range,
75
+
76
+ DISTRIBUTION_AND_PRODUCTION => :separate_content,
77
+ REPRINT_AND_ORIGINAL => :separate_content,
78
+ PUBLICATION_AND_COPYRIGHT => :separate_content
79
+ }
80
+ end
81
+
82
+ module SubjectConfig
83
+
84
+ module Prefixes
85
+ NAME = 'n'
86
+ TITLE = 't'
87
+ SUBJECT = 's' # used for default, handled as lcsh
88
+ FAST = 'f'
89
+ GEO = 'g'
90
+ CHILDRENS = 'c'
91
+ MESH = 'm'
92
+ OTHER = 'o'
93
+ end
94
+
95
+ class FieldConfig
96
+ def initialize(mapper)
97
+ @mapper = mapper
98
+ end
99
+
100
+ def map_prefix(field)
101
+ @mapper.call(field)
102
+ end
103
+ end
104
+
105
+ THESAURI = {
106
+ 'aat' => Prefixes::OTHER,
107
+ 'cct' => Prefixes::OTHER,
108
+ 'fast' => Prefixes::FAST,
109
+ 'homoit' => Prefixes::OTHER,
110
+ 'jlabsh' => Prefixes::OTHER,
111
+ 'lcsh' => Prefixes::SUBJECT,
112
+ 'lcstt' => Prefixes::OTHER,
113
+ 'lctgm' => Prefixes::OTHER,
114
+ 'local/osu' => Prefixes::OTHER,
115
+ 'mesh' => Prefixes::MESH,
116
+ 'ndlsh' => Prefixes::OTHER,
117
+ 'nlksh' => Prefixes::OTHER
118
+ }
119
+
120
+ # default field mapping is based only on ind2, and topic headings (as
121
+ # opposed to name/title headings) vary significantly across thesauri
122
+ default_field_mapping = FieldConfig.new(lambda { |f|
123
+ case f.indicator2
124
+ when '0'
125
+ return Prefixes::SUBJECT
126
+ when '1'
127
+ return Prefixes::CHILDRENS
128
+ when '2'
129
+ return Prefixes::MESH
130
+ when '4'
131
+ return Prefixes::OTHER
132
+ else
133
+ return nil
134
+ end
135
+ })
136
+
137
+ # for name/title, ind2=='0'/'1'/'2' are _all_ backed by LCNAF. See:
138
+ # https://www.loc.gov/aba/cyac/childsubjhead.html
139
+ # https://www.nlm.nih.gov/tsd/cataloging/trainingcourses/mesh/mod8_020.html
140
+ base_factory = lambda { |base|
141
+ lambda { |f|
142
+ case f.indicator2
143
+ when '0', '1', '2'
144
+ return base
145
+ when '4'
146
+ return Prefixes::OTHER
147
+ else
148
+ return nil
149
+ end
150
+ }
151
+ }
152
+ name_general = FieldConfig.new(base_factory.call(Prefixes::NAME))
153
+ title_general = FieldConfig.new(base_factory.call(Prefixes::TITLE))
154
+ geo_general = FieldConfig.new(base_factory.call(Prefixes::GEO))
155
+ static_other = FieldConfig.new(lambda { |f|
156
+ # For now, treat all of these as "other"
157
+ case f.indicator2
158
+ when '0', '1', '2', '4'
159
+ # NOTE: 2nd indicator for local subject fields is inconsistently applied; map everything to "other"
160
+ return Prefixes::OTHER
161
+ else
162
+ return nil
163
+ end
164
+ })
165
+
166
+ FIELDS = {
167
+ '600' => name_general,
168
+ '610' => name_general,
169
+ '611' => name_general,
170
+ '630' => title_general,
171
+ '650' => default_field_mapping,
172
+ '651' => geo_general,
173
+ '690' => static_other, # topical (650)
174
+ '691' => static_other, # geographic (651)
175
+ #'696' => static_other # personal name (600) NOTE: not currently mapped!
176
+ '697' => static_other # corporate name (610)
177
+ }
178
+
179
+ def self.prepare_subjects(rec)
180
+ acc = []
181
+ rec.fields(FIELDS.keys).each do |f|
182
+ filter_subject(f, f.tag, acc)
183
+ end
184
+ rec.fields('880').each do |f|
185
+ field_type_tag = f.find { |sf| sf.code == '6' && FIELDS.has_key?(sf.value) }&.value
186
+ filter_subject(f, field_type_tag, acc) if field_type_tag
187
+ end
188
+ return acc.empty? ? nil : map_to_input_fields(acc)
189
+ end
190
+
191
+ ONLY_KEYS = [:val, :prefix, :append, :local, :vernacular]
192
+
193
+ def self.map_to_input_fields(acc)
194
+ xfacet = [] # provisionally instantiate; we'll almost always need it
195
+ ret = {
196
+ # `xfacets` entries support browse/facet, and will be mapped to stored fields solr-side
197
+ xfacet: nil,
198
+ # `stored_*` fields (below) are stored only, and do _not_ support browse/facet
199
+ stored_lcsh: nil,
200
+ stored_childrens: nil,
201
+ stored_mesh: nil,
202
+ stored_local: nil
203
+ }
204
+ acc.each do |struct|
205
+ last = struct[:parts].last
206
+ # Normalize trailing punctuation on the last heading component. If a comma is present (to be
207
+ # normalized away), then any `.` present is integral (i.e., not ISBD punctuation), and thus
208
+ # should be left intact as part of the heading.
209
+ Marc.trim_trailing_comma!(last) || Marc.trim_trailing_period!(last)
210
+ if struct[:local] && struct[:prefix] == Prefixes::OTHER
211
+ # local subjects without source specified are really too messy, so they should bypass
212
+ # xfacet processing and be placed directly in stored field for display only
213
+ struct[:val] = struct.delete(:parts).join('--')
214
+ struct.delete(:prefix)
215
+ serialized = struct.to_json(:only => ONLY_KEYS)
216
+ (ret[:stored_local] ||= []) << serialized
217
+ elsif struct.size == 2
218
+ # only `parts` and `prefix` (required keys) are present; use legacy format (for now
219
+ # we're mainly doing this to incidentally test backward compatibility of server-side
220
+ # parsing
221
+ serialized = struct[:prefix] + struct[:parts].join('--')
222
+ xfacet << serialized
223
+ else
224
+ # simply map `parts` to `val`
225
+ struct[:val] = struct.delete(:parts).join('--')
226
+ serialized = struct.to_json(:only => ONLY_KEYS)
227
+ xfacet << serialized
228
+ end
229
+ end
230
+ ret[:xfacet] = xfacet unless xfacet.empty?
231
+ return ret
232
+ end
233
+
234
+ def self.filter_subject(field, tag, acc)
235
+ ret = build_subject_struct(field, tag)
236
+ return nil unless ret
237
+ return nil unless map_prefix(ret, tag, field)
238
+ acc << ret if post_process(ret)
239
+ end
240
+
241
+ def self.map_prefix(ret, tag, field)
242
+ if ret[:source_specified]
243
+ # source_specified takes priority. NOTE: This is true even if ind2!=7 (i.e., source_specified
244
+ # shouldn't even apply), because we want to be lenient with our parsing, so the priciple is that
245
+ # we defer to the _most explicit_ heading type declaration
246
+ prefix = THESAURI[ret[:source_specified].downcase]
247
+ else
248
+ # in the absence of `source_specified`, handling depends on field. NOTE: fields should be
249
+ # pre-filtered to only valid codes, so intentionally don't use the safe-nav operator here
250
+ prefix = FIELDS[tag].map_prefix(field)
251
+ end
252
+ prefix ? (ret[:prefix] = prefix) : nil
253
+ end
254
+
255
+ def self.build_subject_struct(field, tag)
256
+ local = field.indicator2 == '4' || tag.starts_with?('69')
257
+ ret = {
258
+ count: 0,
259
+ parts: [],
260
+ }
261
+ ret[:local] = true if local
262
+ ret[:vernacular] = true if field.tag == '880'
263
+ field.each do |sf|
264
+ case sf.code
265
+ when '0', '6', '8', '5', '1'
266
+ # ignore these subfields
267
+ next
268
+ when 'a'
269
+ # filter out PRO/CHR entirely (but only need to check on local heading types)
270
+ return nil if local && sf.value =~ /^%?(PRO|CHR)([ $]|$)/
271
+ when '2'
272
+ # use the _last_ source specified, so don't worry about overriding any prior values
273
+ ret[:source_specified] = sf.value.strip
274
+ next
275
+ when 'e', 'w'
276
+ # 'e' is relator term; not sure what 'w' is. These are used to append for record-view display only
277
+ (ret[:append] ||= []) << sf.value.strip
278
+ next
279
+ when 'b', 'c', 'd', 'p', 'q', 't'
280
+ # these are appended to the last component if possible (i.e., when joined, should have no delimiter)
281
+ append_to_last_part(ret[:parts], sf.value.strip)
282
+ ret[:count] += 1
283
+ next
284
+ end
285
+ # the usual case; add a new component to `parts`
286
+ append_new_part(ret[:parts], sf.value.strip)
287
+ ret[:count] += 1
288
+ end
289
+ return ret
290
+ end
291
+
292
+ def self.append_new_part(parts, value)
293
+ if parts.empty?
294
+ parts << value
295
+ else
296
+ last = parts.last
297
+ Marc.trim_trailing_comma!(last) || Marc.trim_trailing_period!(last)
298
+ parts << value
299
+ end
300
+ end
301
+
302
+ def self.append_to_last_part(parts, value)
303
+ if parts.empty?
304
+ parts << value
305
+ else
306
+ parts.last << ' ' + value
307
+ end
308
+ end
309
+
310
+ def self.post_process(ret)
311
+ case ret.delete(:count)
312
+ when 0
313
+ return nil
314
+ when 1
315
+ # when we've only encountered one subfield, assume that it might be a poorly-coded record
316
+ # with a bunch of subdivisions mashed together, and attempt to convert it to a consistent
317
+ # form. Note that we must separately track count (as opposed to simply checking `parts.size`),
318
+ # because we're using "subdivision count" as a heuristic for the quality level of the heading.
319
+ only = ret[:parts].first
320
+ only.gsub!(/([[[:alnum:]])])(\s+--\s*|\s*--\s+)([[[:upper:]][[:digit:]]])/, '\1--\3')
321
+ only.gsub!(/([[[:alpha:]])])\s+-\s+([[:upper:]]|[[:digit:]]{2,})/, '\1--\2')
322
+ only.gsub!(/([[[:alnum:]])])\s+-\s+([[:upper:]])/, '\1--\2')
323
+ end
324
+ return ret
325
+ end
326
+ end
327
+
328
+ module EncodingLevel
329
+ # Official MARC codes (https://www.loc.gov/marc/bibliographic/bdleader.html)
330
+ FULL = ' '
331
+ FULL_NOT_EXAMINED = '1'
332
+ UNFULL_NOT_EXAMINED = '2'
333
+ ABBREVIATED = '3'
334
+ CORE = '4'
335
+ PRELIMINARY = '5'
336
+ MINIMAL = '7'
337
+ PREPUBLICATION = '8'
338
+ UNKNOWN = 'u'
339
+ NOT_APPLICABLE = 'z'
340
+
341
+ # OCLC extension codes (https://www.oclc.org/bibformats/en/fixedfield/elvl.html)
342
+ OCLC_FULL = 'I'
343
+ OCLC_MINIMAL = 'K'
344
+ OCLC_BATCH_LEGACY = 'L'
345
+ OCLC_BATCH = 'M'
346
+ OCLC_SOURCE_DELETED = 'J'
347
+
348
+ RANK = {
349
+ # top 4 (per nelsonrr), do not differentiate among "good" records
350
+ FULL => 0,
351
+ FULL_NOT_EXAMINED => 0, # 1
352
+ OCLC_FULL => 0, # 2
353
+ CORE => 0, # 3
354
+ UNFULL_NOT_EXAMINED => 4,
355
+ ABBREVIATED => 5,
356
+ PRELIMINARY => 6,
357
+ MINIMAL => 7,
358
+ OCLC_MINIMAL => 8,
359
+ OCLC_BATCH => 9,
360
+ OCLC_BATCH_LEGACY => 10,
361
+ OCLC_SOURCE_DELETED => 11
362
+ }
363
+ end
364
+
365
+ # Genre/Form
366
+ # display field selector logic
367
+ # reference: https://www.loc.gov/marc/bibliographic/bd655.html
368
+ #
369
+ # We display Genre/Term values if they fulfill the following criteria
370
+ # - The field is in MARC 655. Or the field is in MARC 880 with subfield 2 includes '655'.
371
+ # AND
372
+ # - Above fields have an indicator 2 value of: 0 (LSCH) or 4 (No source specified).
373
+ # OR
374
+ # - Above fields have a subfield 2 (ontology code) in the list of allowed values.
375
+ class GenreTools
376
+ GENRE_FIELD_TAG = '655'
377
+ ALT_GENRE_FIELD_TAG = '880'
378
+ ALLOWED_INDICATOR2_VALUES = %w[0 4]
379
+
380
+ class << self
381
+ # @param [MARC::DataField] field
382
+ # @return [TrueClass, FalseClass]
383
+ def allowed_genre_field?(field)
384
+ return false unless genre_field?(field)
385
+
386
+ allowed_code?(field) || allowed_ind2?(field)
387
+ end
388
+
389
+ # @param [MARC::DataField] field
390
+ # @return [TrueClass, FalseClass]
391
+ def genre_field?(field)
392
+ field.tag == GENRE_FIELD_TAG ||
393
+ (field.tag == ALT_GENRE_FIELD_TAG && MarcUtil.has_subfield_value?(field, '6', /#{GENRE_FIELD_TAG}/))
394
+ end
395
+
396
+ # @param [MARC::DataField] field
397
+ # @return [TrueClass, FalseClass]
398
+ def allowed_code?(field)
399
+ MarcUtil.subfield_value_in?(field, '2', PennLib::Marc::ALLOWED_SUBJ_GENRE_ONTOLOGIES)
400
+ end
401
+
402
+ # 0 in ind2 means LCSH
403
+ # 4 in ind2 means "Source not specified"
404
+ # @param [MARC::DataField] field
405
+ # @return [TrueClass, FalseClass]
406
+ def allowed_ind2?(field)
407
+ field.indicator2.in? ALLOWED_INDICATOR2_VALUES
408
+ end
409
+ end
410
+ end
411
+
412
+ # class to hold "utility" methods used by others methods in main Marc class and new *Tool classes
413
+ # for now, leave methods as also defined in Marc class to avoid unexpected issues
414
+ class MarcUtil
415
+ class << self
416
+ # returns true if field has a value that matches
417
+ # passed-in regex and passed in subfield
418
+ # @param [MARC::DataField] field
419
+ # @param [String|Integer|Symbol] subf
420
+ # @param [Regexp] regex
421
+ # @return [TrueClass, FalseClass]
422
+ def has_subfield_value?(field, subf, regex)
423
+ field.any? { |sf| sf.code == subf.to_s && sf.value =~ regex }
424
+ end
425
+
426
+ # @param [MARC:DataField] field
427
+ # @param [String|Integer|Symbol] subf
428
+ # @param [Array] array
429
+ # @return [TrueClass, FalseClass]
430
+ def subfield_value_in?(field, subf, array)
431
+ field.any? { |sf| sf.code == subf.to_s && sf.value.in?(array) }
432
+ end
433
+ end
434
+ end
435
+
436
+ # Class for doing extraction and processing on MARC::Record objects.
437
+ # This is intended to be used in both indexing code and front-end templating code
438
+ # (since MARC is stored in Solr). As such, there should NOT be any traject-specific
439
+ # things here.
440
+ #
441
+ # For a slight performance increase (~5%?) we use frozen_string_literal for immutable strings.
442
+ #
443
+ # Method naming conventions:
444
+ #
445
+ # *_values = indicates method returns an Array of values
446
+ #
447
+ # *_display = indicates method is intended to be used for
448
+ # individual record view (we should name things more meaningfully, according to
449
+ # the logic by which the values are generated, but I don't always know what this
450
+ # logic is, necessarily - JC)
451
+ #
452
+ class Marc
453
+ include BlacklightSolrplugins::Indexer
454
+
455
+ attr_accessor :code_mappings
456
+
457
+ DATABASES_FACET_VALUE = 'Database & Article Index'
458
+ ALLOWED_SUBJ_GENRE_ONTOLOGIES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
459
+ local/osu mesh ndlsh nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp]
460
+
461
+ # @param [PennLib::CodeMappings]
462
+ def initialize(code_mappings)
463
+ @code_mappings = code_mappings
464
+ end
465
+
466
+ def current_year
467
+ @current_year ||= Date.today.year
468
+ end
469
+
470
+ def relator_codes
471
+ @code_mappings.relator_codes
472
+ end
473
+
474
+ def locations
475
+ @code_mappings.locations
476
+ end
477
+
478
+ def loc_classifications
479
+ @code_mappings.loc_classifications
480
+ end
481
+
482
+ def dewey_classifications
483
+ @code_mappings.dewey_classifications
484
+ end
485
+
486
+ def languages
487
+ @code_mappings.languages
488
+ end
489
+
490
+ def trim_trailing_colon(s)
491
+ s.sub(/\s*:\s*$/, '')
492
+ end
493
+
494
+ def trim_trailing_semicolon(s)
495
+ s.sub(/\s*;\s*$/, '')
496
+ end
497
+
498
+ def trim_trailing_equal(s)
499
+ s.sub(/=$/, '')
500
+ end
501
+
502
+ def trim_trailing_slash(s)
503
+ s.sub(/\s*\/\s*$/, '')
504
+ end
505
+
506
+ def trim_trailing_comma(s)
507
+ self.class.trim_trailing_comma(s, false)
508
+ end
509
+
510
+ def self.trim_trailing_comma!(s)
511
+ trim_trailing_comma(s, true)
512
+ end
513
+
514
+ def self.trim_trailing_comma(s, inplace)
515
+ replace_regex = /\s*,\s*$/
516
+ inplace ? s.sub!(replace_regex, '') : s.sub(replace_regex, '')
517
+ end
518
+
519
+ def trim_trailing_period(s)
520
+ self.class.trim_trailing_period(s, false)
521
+ end
522
+
523
+ def self.trim_trailing_period!(s)
524
+ trim_trailing_period(s, true)
525
+ end
526
+
527
+ def self.trim_trailing_period(s, inplace)
528
+ if s.end_with?('etc.') || s =~ /(^|[^a-zA-Z])[A-Z]\.$/
529
+ inplace ? nil : s # nil if unchanged, for consistency with standard `inplace` semantics
530
+ else
531
+ replace_regex = /\.\s*$/
532
+ inplace ? s.sub!(replace_regex, '') : s.sub(replace_regex, '')
533
+ end
534
+ end
535
+
536
+ # squish in ActiveSupport
537
+ def normalize_space(s)
538
+ s.strip.gsub(/\s{2,}/, ' ')
539
+ end
540
+
541
+ # this logic matches substring-before in XSLT: if no match for sub, returns an empty string
542
+ def substring_before(s, sub)
543
+ s.scan(sub).present? ? s.split(sub, 2)[0] : ''
544
+ end
545
+
546
+ # this logic matches substring-after in XSLT: if no match for sub, returns an empty string
547
+ def substring_after(s, sub)
548
+ s.scan(sub).present? ? s.split(sub, 2)[1] : ''
549
+ end
550
+
551
+ def join_and_trim_whitespace(array)
552
+ normalize_space(array.join(' '))
553
+ end
554
+
555
+ # join subfield values together (as selected using passed-in block)
556
+ def join_subfields(field, &block)
557
+ field.select { |v| block.call(v) }.map(&:value).select { |v| v.present? }.join(' ')
558
+ end
559
+
560
+ # this is used for filtering in a lots of places
561
+ # returns a lambda that can be passed to Enumerable#select
562
+ # using the & syntax
563
+ def subfield_not_6_or_8
564
+ @subfield_not_6_or_8 ||= lambda { |subfield|
565
+ !%w{6 8}.member?(subfield.code)
566
+ }
567
+ end
568
+
569
+ # returns a lambda checking if passed-in subfield's code
570
+ # is a member of array
571
+ def subfield_in(array)
572
+ lambda { |subfield| array.member?(subfield.code) }
573
+ end
574
+
575
+ # returns a lambda checking if passed-in subfield's code
576
+ # is NOT a member of array
577
+ def subfield_not_in(array)
578
+ lambda { |subfield| !array.member?(subfield.code) }
579
+ end
580
+
581
+
582
+ # 11/2018 kms: eventually should deprecate has_subfield6_value and use this for all
583
+ # returns true if field has a value that matches
584
+ # passed-in regex and passed in subfield
585
+ def has_subfield_value(field, subf, regex)
586
+ field.any? { |sf| sf.code == subf && sf.value =~ regex }
587
+ end
588
+
589
+ def subfield_value_in(field, subf, array)
590
+ field.any? { |sf| sf.code == subf && sf.value.in?(array) }
591
+ end
592
+
593
+ # common case of wanting to extract subfields as selected by passed-in block,
594
+ # from 880 datafield that has a particular subfield 6 value
595
+ # @param subf6_value [String|Array] either a single str value to look for in sub6 or an array of them
596
+ # @param block [Proc] takes a subfield as argument, returns a boolean
597
+ def get_880(rec, subf6_value, &block)
598
+ regex_value = subf6_value
599
+ if subf6_value.is_a?(Array)
600
+ regex_value = "(#{subf6_value.join('|')})"
601
+ end
602
+
603
+ rec.fields('880')
604
+ .select { |f| has_subfield6_value(f, /^#{regex_value}/) }
605
+ .map do |field|
606
+ field.select { |sf| block.call(sf) }.map(&:value).join(' ')
607
+ end
608
+ end
609
+
610
+ # common case of wanting to extract all the subfields besides 6 or 8,
611
+ # from 880 datafield that has a particular subfield 6 value
612
+ def get_880_subfield_not_6_or_8(rec, subf6_value)
613
+ get_880(rec, subf6_value) do |sf|
614
+ !%w{6 8}.member?(sf.code)
615
+ end
616
+ end
617
+
618
+ # returns the non-6,8 subfields from a datafield and its 880 link
619
+ def get_datafield_and_880(rec, tag)
620
+ acc = []
621
+ acc += rec.fields(tag).map do |field|
622
+ join_subfields(field, &subfield_not_in(%w{6 8}))
623
+ end
624
+ acc += get_880_subfield_not_6_or_8(rec, tag)
625
+ acc
626
+ end
627
+
628
+ def append_title_variant_field(acc, non_filing, subfields)
629
+ base = subfields.shift;
630
+ return if base.nil? # there's something wrong; first is always required
631
+ if non_filing =~ /[1-9]/
632
+ prefix = base.slice!(0, non_filing.to_i)
633
+ end
634
+ loop do
635
+ acc << base
636
+ if !prefix.nil?
637
+ acc << prefix + base
638
+ end
639
+ return if subfields.empty?
640
+ while (next_part = subfields.shift).nil?
641
+ return if subfields.empty?
642
+ end
643
+ base = "#{base} #{next_part}"
644
+ end
645
+ end
646
+
647
+ # returns true if field's subfield 6 has a value that matches
648
+ # passed-in regex
649
+ def has_subfield6_value(field, regex)
650
+ field.any? { |sf| sf.code == '6' && sf.value =~ regex }
651
+ end
652
+
653
+ # for a string 's', return a hash of ref_type => Array of references,
654
+ # where a reference is a String or a Hash representing a multipart string
655
+ def get_subject_references(s)
656
+ # TODO: just simple test data for now; hook up to actual cross ref data
657
+ case s
658
+ when 'Cyberspace'
659
+ { 'see_also' => [ 'Internet', 'Computer networks' ] }
660
+ when 'Internet'
661
+ { 'see_also' => [ 'Cyberspace', 'Computer networks' ] }
662
+ when 'Computer networks'
663
+ { 'see_also' => [ 'Cyberspace', 'Internet' ] }
664
+ # one way
665
+ when 'Programming Languages'
666
+ { 'use_instead' => [ 'Computer programming' ] }
667
+ end
668
+ end
669
+
670
+ def subject_codes
671
+ @subject_codes ||= %w(600 610 611 630 650 651)
672
+ end
673
+
674
+ def subject_codes_to_xfacet_prefixes
675
+ @subject_codes_to_xfacet_prefixes ||= {
676
+ 600 => 'n',
677
+ 610 => 'n',
678
+ 611 => 'n',
679
+ 630 => 't',
680
+ 650 => 's',
681
+ 651 => 'g'
682
+ }
683
+ end
684
+
685
+ def is_subject_field(field)
686
+ # 10/2018 kms: add 2nd Ind 7
687
+ subject_codes.member?(field.tag) && (%w(0 2 4).member?(field.indicator2) ||
688
+ (field.indicator2 == '7' && field.any? do |sf|
689
+ sf.code == '2' && ALLOWED_SUBJ_GENRE_ONTOLOGIES.member?(sf.value)
690
+ end))
691
+ end
692
+
693
+ def reject_pro_chr(sf)
694
+ %w{a %}.member?(sf.code) && sf.value =~ /^%?(PRO|CHR)([ $]|$)/
695
+ end
696
+
697
+ def is_curated_database(rec)
698
+ rec.fields('944').any? do |field|
699
+ field.any? do |sf|
700
+ sf.code == 'a' && sf.value == 'Database & Article Index'
701
+ end
702
+ end
703
+ end
704
+
705
+ def get_curated_format(rec)
706
+ rec.fields('944').map do |field|
707
+ sf = field.find { |sf| sf.code == 'a' }
708
+ sf.nil? || (sf.value == sf.value.to_i.to_s) ? nil : sf.value
709
+ end.compact.uniq
710
+ end
711
+
712
+ def get_db_types(rec)
713
+ return [] unless is_curated_database(rec)
714
+ rec.fields('944').map do |field|
715
+ if field.any? { |sf| sf.code == 'a' && sf.value == PennLib::Marc::DATABASES_FACET_VALUE }
716
+ sf = field.find { |sf| sf.code == 'b' }
717
+ sf.nil? ? nil : sf.value
718
+ end
719
+ end.compact
720
+ end
721
+
722
+ def get_db_categories(rec)
723
+ return [] unless is_curated_database(rec)
724
+ rec.fields('943').map do |field|
725
+ if field.any? { |sf| sf.code == '2' && sf.value == 'penncoi' }
726
+ sf = field.find { |sf| sf.code == 'a' }
727
+ sf.nil? ? nil : sf.value
728
+ end
729
+ end.compact
730
+ end
731
+
732
+ def get_db_subcategories(rec)
733
+ return [] unless is_curated_database(rec)
734
+ rec.fields('943').map do |field|
735
+ if field.any? { |sf| sf.code == '2' && sf.value == 'penncoi' }
736
+ category = field.find { |sf| sf.code == 'a' }
737
+ unless category.nil?
738
+ sub_category = field.find { |sf| sf.code == 'b' }
739
+ sub_category.nil? ? category : "#{category.value}--#{sub_category.value}"
740
+ end
741
+ end
742
+ end.compact
743
+ end
744
+
745
+ # TODO: MG removed the join_subject_parts method when adding in the SubjectConfig module here. This method still
746
+ # appears to be in use in the FranklinIndexer even though many subject fields are now processed differently
747
+ # Work should be done to remove all usages of join_subject_parts. Perhaps functionality from SubjectConfig could
748
+ # be used instead
749
+ def get_subject_facet_values(rec, toplevel_only = false)
750
+ rec.fields.find_all { |f| is_subject_field(f) }.map do |field|
751
+ just_a = nil
752
+ if field.any? { |sf| sf.code == 'a' } && (toplevel_only || field.any? { |sf| sf.code != 'a' })
753
+ just_a = field.find_all(&subfield_in(%w{a})).map(&:value)
754
+ .select { |v| v !~ /^%?(PRO|CHR)/ }.join(' ')
755
+ end
756
+ [ (toplevel_only ? nil : join_subject_parts(field)), just_a ].compact.map{ |v| trim_trailing_period(v) }
757
+ end.flatten(1).select { |v| v.present? }
758
+ end
759
+
760
+ def get_subject_xfacet_values(rec)
761
+ rec.fields.find_all { |f| is_subject_field(f) }
762
+ .map { |f| { field: f, prefix: subject_codes_to_xfacet_prefixes[f.tag.to_i] } }
763
+ .map { |f_struct| f_struct[:value] = trim_trailing_period(join_subject_parts(f_struct[:field], double_dash: true)); f_struct }
764
+ .select { |f_struct| f_struct[:value].present? }
765
+ .map { |f_struct| f_struct[:prefix] + f_struct[:value] }
766
+ # don't need to wrap data in #references anymore because cross refs are now handled Solr-side
767
+ # .map { |s| references(s, refs: get_subject_references(s)) }
768
+ end
769
+
770
+ def subject_search_tags
771
+ @subject_search_tags ||= %w{541 561 600 610 611 630 650 651 653}
772
+ end
773
+
774
+ def is_subject_search_field(field)
775
+ # 11/2018 kms: add 2nd Ind 7
776
+ if ! (field.respond_to?(:indicator2) && %w{0 1 2 4 7}.member?(field.indicator2))
777
+ false
778
+ elsif subject_search_tags.member?(field.tag) || field.tag.start_with?('69')
779
+ true
780
+ elsif field.tag == '880'
781
+ sub6 = (field.find_all { |sf| sf.code == '6' }.map(&:value).first || '')[0..2]
782
+ subject_search_tags.member?(sub6) || sub6.start_with?('69')
783
+ else
784
+ false
785
+ end
786
+ end
787
+
788
+ def get_subject_search_values(rec)
789
+ # this has been completely migrated
790
+ rec.fields.find_all { |f| is_subject_search_field(f) }
791
+ .map do |field|
792
+ subj = []
793
+ field.each do |sf|
794
+ if sf.code == 'a'
795
+ subj << " #{sf.value.gsub(/^%?(PRO|CHR)/, '').gsub(/\?$/, '')}"
796
+ elsif sf.code == '4'
797
+ subj << "#{sf.value}, #{relator_codes[sf.value]}"
798
+ elsif !%w{a 4 5 6 8}.member?(sf.code)
799
+ subj << " #{sf.value}"
800
+ end
801
+ end
802
+ join_and_trim_whitespace(subj) if subj.present?
803
+ end.compact
804
+ end
805
+
806
+ # @returns [Array] of string field tags to examine for subjects
807
+ def subject_600s
808
+ @subject_600s ||= %w{600 610 611 630 650 651}
809
+ end
810
+
811
+ # 11/2018 kms: add local subj fields- always Local no matter the 2nd Ind
812
+ def subject_69X
813
+ @subject_69X ||= %w{690 691 697}
814
+ end
815
+
816
+ # 11/2018: add 69x as local subj, add 650 _7 as subj
817
+ def get_subjects_from_600s_and_800(rec, indicator2)
818
+ track_dups = Set.new
819
+ acc = []
820
+ if %w{0 1 2}.member?(indicator2)
821
+ #Subjects, Childrens subjects, and Medical Subjects all share this code
822
+ # also 650 _7, subjs w/ source specified in $2. These display as Subjects along w/ the ind2==0 650s
823
+ acc += rec.fields
824
+ .select { |f| subject_600s.member?(f.tag) ||
825
+ (f.tag == '880' && has_subfield6_value(f, /^(#{subject_600s.join('|')})/)) }
826
+ .select { |f| f.indicator2 == indicator2 || (f.indicator2 == '7' && indicator2 == '0' && f.any? do |sf|
827
+ sf.code == '2' && ALLOWED_SUBJ_GENRE_ONTOLOGIES.member?(sf.value)
828
+ end)}
829
+ .map do |field|
830
+ #added 2017/04/10: filter out 0 (authority record numbers) added by Alma
831
+ value_for_link = join_subfields(field, &subfield_not_in(%w{0 6 8 2 e w}))
832
+ sub_with_hyphens = field.select(&subfield_not_in(%w{0 6 8 2 e w})).map do |sf|
833
+ pre = !%w{a b c d p q t}.member?(sf.code) ? ' -- ' : ' '
834
+ pre + sf.value + (sf.code == 'p' ? '.' : '')
835
+ end.join(' ')
836
+ eandw_with_hyphens = field.select(&subfield_in(%w{e w})).map do |sf|
837
+ ' -- ' + sf.value
838
+ end.join(' ')
839
+ if sub_with_hyphens.present?
840
+ {
841
+ value: sub_with_hyphens,
842
+ value_for_link: value_for_link,
843
+ value_append: eandw_with_hyphens,
844
+ link_type: 'subject_xfacet2'
845
+ }
846
+ end
847
+ end.compact.select { |val| track_dups.add?(val) }
848
+ elsif indicator2 == '4'
849
+ # Local subjects
850
+ # either a tag in subject_600s list with ind2==4, or a tag in subject_69X list with any ind2.
851
+ # but NOT a penn community of interest 690 (which have $2 penncoi )
852
+ acc += rec.fields
853
+ .select { |f| subject_600s.member?(f.tag) && f.indicator2 == '4' ||
854
+ ( subject_69X.member?(f.tag) && !(has_subfield_value(f,'2',/penncoi/)) ) }
855
+ .map do |field|
856
+ suba = field.select(&subfield_in(%w{a}))
857
+ .select { |sf| sf.value !~ /^%?(PRO|CHR)/ }
858
+ .map(&:value).join(' ')
859
+ #added 2017/04/10: filter out 0 (authority record numbers) added by Alma
860
+ # 11/2018 kms: also do not display subf 5 or 2
861
+ sub_oth = field.select(&subfield_not_in(%w{0 a 6 8 5 2})).map do |sf|
862
+ pre = !%w{b c d p q t}.member?(sf.code) ? ' -- ' : ' '
863
+ pre + sf.value + (sf.code == 'p' ? '.' : '')
864
+ end
865
+ subj_display = [ suba, sub_oth ].join(' ')
866
+ #added 2017/04/10: filter out 0 (authority record numbers) added by Alma
867
+ # 11/2018 kms: also do not display subf 5 or 2
868
+ sub_oth_no_hyphens = join_subfields(field, &subfield_not_in(%w{0 a 6 8 5 2}))
869
+ subj_search = [ suba, sub_oth_no_hyphens ].join(' ')
870
+ if subj_display.present?
871
+ {
872
+ value: subj_display,
873
+ value_for_link: subj_search,
874
+ link_type: 'subject_search'
875
+ }
876
+ end
877
+ end.compact.select { |val| track_dups.add?(val) }
878
+ end
879
+ acc
880
+ end
881
+
882
+ # 11/2018: 650 _7 is also handled here
883
+ def get_subject_display(rec)
884
+ get_subjects_from_600s_and_800(rec, '0')
885
+ end
886
+
887
+ def get_children_subject_display(rec)
888
+ get_subjects_from_600s_and_800(rec, '1')
889
+ end
890
+
891
+ def get_medical_subject_display(rec)
892
+ get_subjects_from_600s_and_800(rec, '2')
893
+ end
894
+
895
+ def get_local_subject_display(rec)
896
+ get_subjects_from_600s_and_800(rec, '4')
897
+ end
898
+
899
+ def get_subject_solrdoc_display(doc)
900
+ doc[:default_subject_stored_a]
901
+ end
902
+
903
+ def get_children_subject_solrdoc_display(doc)
904
+ doc[:childrens_subject_stored_a]
905
+ end
906
+
907
+ def get_medical_subject_solrdoc_display(doc)
908
+ doc[:mesh_subject_stored_a]
909
+ end
910
+
911
+ def get_local_subject_solrdoc_display(doc)
912
+ doc[:local_subject_stored_a]
913
+ end
914
+
915
+ def get_format(rec)
916
+ acc = []
917
+
918
+ format_code = get_format_from_leader(rec)
919
+ f008 = rec.fields('008').map(&:value).first || ''
920
+ f007 = rec.fields('007').map(&:value)
921
+ f260press = rec.fields('260').any? do |field|
922
+ field.select { |sf| sf.code == 'b' && sf.value =~ /press/i }.any?
923
+ end
924
+ # first letter of every 006
925
+ f006firsts = rec.fields('006').map do |field|
926
+ field.value[0]
927
+ end
928
+ f245k = rec.fields('245').flat_map do |field|
929
+ field.select { |sf| sf.code == 'k' }.map(&:value)
930
+ end
931
+ f245h = rec.fields('245').flat_map do |field|
932
+ field.select { |sf| sf.code == 'h' }.map(&:value)
933
+ end
934
+ f337a = rec.fields('337').flat_map do |field|
935
+ field.select { |sf| sf.code == 'a' }.map(&:value)
936
+ end
937
+ call_nums = rec.fields(EnrichedMarc::TAG_HOLDING).map do |field|
938
+ # h gives us the 'Classification part' which contains strings like 'Microfilm'
939
+ join_subfields(field, &subfield_in([ EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART, EnrichedMarc::SUB_HOLDING_ITEM_PART ]))
940
+ end
941
+ locations = get_specific_location_values(rec)
942
+
943
+ if locations.any? { |loc| loc =~ /manuscripts/i }
944
+ acc << 'Manuscript'
945
+ elsif locations.any? { |loc| loc =~ /archives/i } &&
946
+ locations.none? { |loc| loc =~ /cajs/i } &&
947
+ locations.none? { |loc| loc =~ /nursing/i }
948
+ acc << 'Archive'
949
+ elsif locations.any? { |loc| loc =~ /micro/i } ||
950
+ f245h.any? { |val| val =~ /micro/i } ||
951
+ call_nums.any? { |val| val =~ /micro/i } ||
952
+ f337a.any? { |val| val =~ /microform/i }
953
+ acc << 'Microformat'
954
+ else
955
+ # these next 4 can have this format plus ONE of the formats down farther below
956
+ if rec.fields('502').any? && format_code == 'tm'
957
+ acc << 'Thesis/Dissertation'
958
+ end
959
+ if rec.fields('111').any? || rec.fields('711').any?
960
+ acc << 'Conference/Event'
961
+ end
962
+ if (!%w{c d i j}.member?(format_code[0])) && %w{f i o}.member?(f008[28]) && (!f260press)
963
+ acc << 'Government document'
964
+ end
965
+ if format_code == 'as' && (f008[21] == 'n' || f008[22] == 'e')
966
+ acc << 'Newspaper'
967
+ end
968
+
969
+ # only one of these
970
+ if format_code.end_with?('i') || (format_code == 'am' && f006firsts.member?('m') && f006firsts.member?('s'))
971
+ acc << 'Website/Database'
972
+ elsif %w(aa ac am tm).member?(format_code) &&
973
+ f245k.none? { |v| v =~ /kit/i } &&
974
+ f245h.none? { |v| v =~ /micro/i }
975
+ acc << 'Book'
976
+ elsif %w(ca cb cd cm cs dm).member?(format_code)
977
+ acc << 'Musical score'
978
+ elsif format_code.start_with?('e') || format_code == 'fm'
979
+ acc << 'Map/Atlas'
980
+ elsif format_code == 'gm'
981
+ if f007.any? { |v| v.start_with?('v') }
982
+ acc << 'Video'
983
+ elsif f007.any? { |v| v.start_with?('g') }
984
+ acc << 'Projected graphic'
985
+ else
986
+ acc << 'Video'
987
+ end
988
+ elsif %w(im jm jc jd js).member?(format_code)
989
+ acc << 'Sound recording'
990
+ elsif %w(km kd).member?(format_code)
991
+ acc << 'Image'
992
+ elsif format_code == 'mm'
993
+ acc << 'Datafile'
994
+ elsif %w(as gs).member?(format_code)
995
+ acc << 'Journal/Periodical'
996
+ elsif format_code.start_with?('r')
997
+ acc << '3D object'
998
+ else
999
+ acc << 'Other'
1000
+ end
1001
+ end
1002
+ acc.concat(get_curated_format(rec))
1003
+ end
1004
+
1005
+ # returns two-char format code from MARC leader, representing two fields:
1006
+ # "Type of record" and "Bibliographic level"
1007
+ def get_format_from_leader(rec)
1008
+ rec.leader[6..7]
1009
+ end
1010
+
1011
+ def get_format_display(rec)
1012
+ results = []
1013
+ results += rec.fields('300').map do |field|
1014
+ join_subfields(field, &subfield_not_in(%w{3 6 8}))
1015
+ end
1016
+ results += rec.fields(%w{254 255 310 342 352 362}).map do |field|
1017
+ join_subfields(field, &subfield_not_in(%w{6 8}))
1018
+ end
1019
+ results += rec.fields('340').map do |field|
1020
+ join_subfields(field, &subfield_not_in(%w{0 2 6 8}))
1021
+ end
1022
+ results += rec.fields('880').map do |field|
1023
+ if has_subfield6_value(field,/^300/)
1024
+ join_subfields(field, &subfield_not_in(%w{3 6 8}))
1025
+ elsif has_subfield6_value(field, /^(254|255|310|342|352|362)/)
1026
+ join_subfields(field, &subfield_not_in(%w{6 8}))
1027
+ elsif has_subfield6_value(field, /^340/)
1028
+ join_subfields(field, &subfield_not_in(%w{0 2 6 8}))
1029
+ else
1030
+ []
1031
+ end
1032
+ end
1033
+ results.select { |value| value.present? }
1034
+ end
1035
+
1036
+ def get_itm_count(rec)
1037
+ fields = rec.fields(EnrichedMarc::TAG_ITEM)
1038
+ fields.empty? ? nil : fields.size
1039
+ end
1040
+
1041
+ def get_hld_count(rec)
1042
+ fields = rec.fields(EnrichedMarc::TAG_HOLDING)
1043
+ fields.empty? ? nil : fields.size
1044
+ end
1045
+
1046
+ def get_empty_hld_count(rec)
1047
+ holding_ids_from_items = Set.new
1048
+ rec.each_by_tag(EnrichedMarc::TAG_ITEM) do |field|
1049
+ holding_id_subfield = field.find do |subfield|
1050
+ subfield.code == 'r'
1051
+ end
1052
+ holding_ids_from_items.add(holding_id_subfield.value) if holding_id_subfield
1053
+ end
1054
+ empty_holding_count = 0
1055
+ rec.each_by_tag(EnrichedMarc::TAG_HOLDING) do |field|
1056
+ id_subfield = field.find do |subfield|
1057
+ subfield.code == '8'
1058
+ end
1059
+ unless holding_ids_from_items.include?(id_subfield&.value)
1060
+ empty_holding_count += 1
1061
+ end
1062
+ end
1063
+ empty_holding_count
1064
+ end
1065
+
1066
+ def get_prt_count(rec)
1067
+ fields = rec.fields(EnrichedMarc::TAG_ELECTRONIC_INVENTORY)
1068
+ fields.empty? ? nil : fields.size
1069
+ end
1070
+
1071
+ def get_access_values(rec)
1072
+ acc = rec.map do |f|
1073
+ case f.tag
1074
+ when EnrichedMarc::TAG_HOLDING
1075
+ 'At the library'
1076
+ when EnrichedMarc::TAG_ELECTRONIC_INVENTORY
1077
+ 'Online'
1078
+ end
1079
+ end.compact
1080
+ acc += rec.fields('856')
1081
+ .select { |f| f.indicator1 == '4' && f.indicator2 != '2' }
1082
+ .flat_map do |field|
1083
+ subz = join_subfields(field, &subfield_in(%w{z}))
1084
+ field.find_all(&subfield_in(%w{u})).map do |sf|
1085
+ if !subz.include?('Finding aid') && sf.value.include?('hdl.library.upenn.edu')
1086
+ 'Online'
1087
+ end
1088
+ end.compact
1089
+ end
1090
+ acc << 'Online' if is_etas(rec)
1091
+ acc.uniq
1092
+ end
1093
+
1094
+ def is_etas(rec)
1095
+ rec.fields('977').any? do |f|
1096
+ f.any? do |sf|
1097
+ sf.code == 'e' && sf.value == 'ETAS'
1098
+ end
1099
+ end
1100
+ end
1101
+
1102
+ # examines a 1xx datafield and constructs a string out of select
1103
+ # subfields, including expansion of 'relator' code
1104
+ def get_name_1xx_field(field)
1105
+ s = field.map do |sf|
1106
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
1107
+ # added 2022/08/04: filter our 1 (URIs) added my MARCive project
1108
+ if !%W{0 1 4 6 8}.member?(sf.code)
1109
+ " #{sf.value}"
1110
+ elsif sf.code == '4'
1111
+ ", #{relator_codes[sf.value]}"
1112
+ end
1113
+ end.compact.join
1114
+ s2 = s + (!%w(. -).member?(s[-1]) ? '.' : '')
1115
+ normalize_space(s2)
1116
+ end
1117
+
1118
+ def get_series_8xx_field(field)
1119
+ s = field.map do |sf|
1120
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
1121
+ if(! %W{0 4 5 6 8}.member?(sf.code))
1122
+ " #{sf.value}"
1123
+ elsif sf.code == '4'
1124
+ ", #{relator_codes[sf.value]}"
1125
+ end
1126
+ end.compact.join
1127
+ s2 = s + (!%w(. -).member?(s[-1]) ? '.' : '')
1128
+ normalize_space(s2)
1129
+ end
1130
+
1131
+ def get_series_4xx_field(field)
1132
+ s = field.map do |sf|
1133
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
1134
+ if(! %W{0 4 6 8}.member?(sf.code))
1135
+ " #{sf.value}"
1136
+ elsif sf.code == '4'
1137
+ ", #{relator_codes[sf.value]}"
1138
+ end
1139
+ end.compact.join
1140
+ s2 = s + (!%w(. -).member?(s[-1]) ? '.' : '')
1141
+ normalize_space(s2)
1142
+ end
1143
+
1144
+ def get_publication_values(rec)
1145
+ acc = []
1146
+ rec.fields('245').each do |field|
1147
+ field.find_all { |sf| sf.code == 'f' }
1148
+ .map(&:value)
1149
+ .each { |value| acc << value }
1150
+ end
1151
+ added_2xx = false
1152
+ rec.fields(%w{260 261 262}).take(1).each do |field|
1153
+ results = field.find_all { |sf| sf.code != '6' }
1154
+ .map(&:value)
1155
+ acc << join_and_trim_whitespace(results)
1156
+ added_2xx = true
1157
+ end
1158
+ if(!added_2xx)
1159
+ sf_ab264 = rec.fields.select { |field| field.tag == '264' && field.indicator2 == '1' }
1160
+ .take(1)
1161
+ .flat_map do |field|
1162
+ field.find_all(&subfield_in(%w{a b})).map(&:value)
1163
+ end
1164
+
1165
+ sf_c264_1 = rec.fields.select { |field| field.tag == '264' && field.indicator2 == '1' }
1166
+ .take(1)
1167
+ .flat_map do |field|
1168
+ field.find_all(&subfield_in(['c']))
1169
+ .map(&:value)
1170
+ end
1171
+
1172
+ sf_c264_4 = rec.fields.select { |field| field.tag == '264' && field.indicator2 == '4' }
1173
+ .take(1)
1174
+ .flat_map do |field|
1175
+ field.find_all { |sf| sf.code == 'c' }
1176
+ .map { |sf| (sf_c264_1.present? ? ', ' : '') + sf.value }
1177
+ end
1178
+
1179
+ acc << [sf_ab264, sf_c264_1, sf_c264_4].join(' ')
1180
+ end
1181
+ acc.map!(&:strip).select!(&:present?)
1182
+ acc
1183
+ end
1184
+
1185
+ def get_publication_display(rec)
1186
+ acc = []
1187
+ rec.fields('245').take(1).each do |field|
1188
+ field.find_all { |sf| sf.code == 'f' }
1189
+ .map(&:value)
1190
+ .each { |value| acc << value }
1191
+ end
1192
+ rec.fields(%w{260 261 262}).take(1).each do |field|
1193
+ acc << join_subfields(field, &subfield_not_6_or_8)
1194
+ end
1195
+ rec.fields('880')
1196
+ .select { |f| has_subfield6_value(f, /^(260|261|262)/) }
1197
+ .take(1)
1198
+ .each do |field|
1199
+ acc << join_subfields(field, &subfield_not_6_or_8)
1200
+ end
1201
+ rec.fields('880')
1202
+ .select { |f| has_subfield6_value(f, /^245/) }
1203
+ .each do |field|
1204
+ acc << join_subfields(field, &subfield_in(['f']))
1205
+ end
1206
+ acc += get_264_or_880_fields(rec, '1')
1207
+ acc.select(&:present?)
1208
+ end
1209
+
1210
+ def get_language_values(rec)
1211
+ rec.fields('008').map do |field|
1212
+ lang_code = field.value[35..37]
1213
+ if lang_code
1214
+ languages[lang_code]
1215
+ end
1216
+ end.compact
1217
+ end
1218
+
1219
+ # fieldname = name of field in the locations data structure to use
1220
+ def holdings_location_mappings(rec, display_fieldname)
1221
+
1222
+ # in holdings records, the shelving location is always the permanent location.
1223
+ # in item records, the current location takes into account
1224
+ # temporary locations and permanent locations. if you update the item's perm location,
1225
+ # the holding's shelving location changes.
1226
+ #
1227
+ # Since item records may reflect locations more accurately, we use them if they exist;
1228
+ # if not, we use the holdings.
1229
+
1230
+ tag = EnrichedMarc::TAG_HOLDING
1231
+ subfield_code = EnrichedMarc::SUB_HOLDING_SHELVING_LOCATION
1232
+
1233
+ if rec.fields(EnrichedMarc::TAG_ITEM).size > 0
1234
+ tag = EnrichedMarc::TAG_ITEM
1235
+ subfield_code = EnrichedMarc::SUB_ITEM_CURRENT_LOCATION
1236
+ end
1237
+
1238
+ # we don't facet for 'web' which is the 'Penn Library Web' location used in Voyager.
1239
+ # this location should eventually go away completely with data cleanup in Alma.
1240
+
1241
+ acc = rec.fields(tag).flat_map do |field|
1242
+ results = field.find_all { |sf| sf.code == subfield_code }
1243
+ .select { |sf| sf.value != 'web' }
1244
+ .map { |sf|
1245
+ # sometimes "happening locations" are mistakenly
1246
+ # used in holdings records. that's a data problem that should be fixed.
1247
+ # here, if we encounter a code we can't map, we ignore it, for faceting purposes.
1248
+ if locations[sf.value].present?
1249
+ locations[sf.value][display_fieldname]
1250
+ end
1251
+ }
1252
+ # flatten multiple 'library' values
1253
+ results.select(&:present?).flatten
1254
+ end.uniq
1255
+ if rec.fields(EnrichedMarc::TAG_ELECTRONIC_INVENTORY).any?
1256
+ acc << 'Online library'
1257
+ end
1258
+ return acc
1259
+ end
1260
+
1261
+ def items_nocirc(rec)
1262
+ items = rec.fields(EnrichedMarc::TAG_ITEM)
1263
+ return 'na' if items.empty?
1264
+ all = true
1265
+ none = true
1266
+ items.each do |f|
1267
+ nocirc = f.any? do |sf|
1268
+ sf.code == EnrichedMarc::SUB_ITEM_CURRENT_LOCATION && sf.value == 'vanpNocirc'
1269
+ end
1270
+ if nocirc
1271
+ none = false
1272
+ else
1273
+ all = false
1274
+ end
1275
+ end
1276
+ if all
1277
+ return 'all'
1278
+ elsif none
1279
+ return 'none'
1280
+ else
1281
+ return 'partial'
1282
+ end
1283
+ end
1284
+
1285
+ def get_library_values(rec)
1286
+ holdings_location_mappings(rec, 'library')
1287
+ end
1288
+
1289
+ def get_specific_location_values(rec)
1290
+ holdings_location_mappings(rec, 'specific_location')
1291
+ end
1292
+
1293
+ def get_encoding_level_rank(rec)
1294
+ EncodingLevel::RANK[rec.leader[17]]
1295
+ end
1296
+
1297
+ def prepare_dates(rec)
1298
+ f008 = rec.fields('008').first
1299
+ return nil unless f008
1300
+ field = f008.value
1301
+ return nil unless date_type = field[6]
1302
+ return nil unless date1 = field[7,4]
1303
+ date2 = field[11,4]
1304
+ case DateType::MAP[date_type]
1305
+ when :single
1306
+ return build_dates_hash(date1)
1307
+ when :lower_bound
1308
+ return build_dates_hash(date1, '9999')
1309
+ when :range
1310
+ return build_dates_hash(date1, date2)
1311
+ when :separate_content
1312
+ return build_dates_hash(date1, nil, date2)
1313
+ else
1314
+ return nil
1315
+ end
1316
+ end
1317
+
1318
+ def build_dates_hash(raw_pub_date_start, raw_pub_date_end = nil, content_date = nil)
1319
+ pub_date_start = sanitize_date(raw_pub_date_start, '0')
1320
+ return nil if pub_date_start == nil
1321
+ if raw_pub_date_end && pub_date_end = sanitize_date(raw_pub_date_end, '9')
1322
+ if pub_date_start > pub_date_end
1323
+ # assume date type coded incorrectly; use date2 as content_date
1324
+ pub_date_end = sanitize_date(raw_pub_date_start, '9')
1325
+ content_date = raw_pub_date_end
1326
+ end
1327
+ else
1328
+ pub_date_end = sanitize_date(raw_pub_date_start, '9')
1329
+ end
1330
+ if content_date == nil
1331
+ content_date_start = pub_date_start
1332
+ content_date_end = pub_date_end
1333
+ elsif content_date =~ /^[0-9]{4}$/
1334
+ content_date_start = content_date_end = content_date
1335
+ else
1336
+ content_date_start = sanitize_date(content_date, '0')
1337
+ if content_date_start
1338
+ content_date_end = sanitize_date(content_date, '9')
1339
+ else
1340
+ # invalid separate content date provided; fall back to pub_date
1341
+ content_date_start = pub_date_start
1342
+ content_date_end = pub_date_end
1343
+ end
1344
+ end
1345
+ {
1346
+ :pub_date_sort => pub_date_start,
1347
+ :pub_date_decade => current_year + 15 > pub_date_start.to_i ? pub_date_start[0,3] + '0s' : nil,
1348
+ :pub_date_range => "[#{pub_date_start} TO #{pub_date_end}]",
1349
+ :content_date_range => "[#{content_date_start} TO #{content_date_end}]",
1350
+ :pub_date_minsort => "#{pub_date_start}-01-01T00:00:00Z",
1351
+ :pub_date_maxsort => "#{pub_date_end.to_i + 1}-01-01T00:00:00Z",
1352
+ :content_date_minsort => "#{content_date_start}-01-01T00:00:00Z",
1353
+ :content_date_maxsort => "#{content_date_end.to_i + 1}-01-01T00:00:00Z"
1354
+ }
1355
+ end
1356
+
1357
+ def sanitize_date(input, replace)
1358
+ return nil if input !~ /^[0-9]*u*$/
1359
+ input.gsub(/u/, replace)
1360
+ end
1361
+
1362
+ def publication_date_digits(rec)
1363
+ rec.fields('008').map { |field| field.value[7,4] }
1364
+ .select { |year| year.present? }
1365
+ .map { |year| year.gsub(/\D/, '0') }
1366
+ end
1367
+
1368
+ def get_publication_date_values(rec)
1369
+ publication_date_digits(rec)
1370
+ .select { |year| year =~ /^[1-9][0-9]/ && current_year + 15 > year.to_i }
1371
+ .map { |year| year[0, 3] + '0s' }
1372
+ end
1373
+
1374
+ def get_publication_date_sort_values(rec)
1375
+ publication_date_digits(rec)
1376
+ end
1377
+
1378
+ def get_classification_values(rec)
1379
+ acc = []
1380
+ # not sure whether it's better to use 'item' or 'holding' records here.
1381
+ # we use 'item' only because it has a helpful call number type subfield,
1382
+ # which the holding doesn't.
1383
+ rec.fields(EnrichedMarc::TAG_ITEM).each do |item|
1384
+ cn_type = item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER_TYPE }.map(&:value).first
1385
+
1386
+ results = item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER }
1387
+ .map(&:value)
1388
+ .select { |call_num| call_num.present? }
1389
+ .map { |call_num| call_num[0] }
1390
+ .compact
1391
+
1392
+ results.each do |letter|
1393
+ verbose = nil
1394
+ case cn_type
1395
+ when '0'
1396
+ verbose = loc_classifications[letter]
1397
+ when '1'
1398
+ verbose = dewey_classifications[letter]
1399
+ letter = letter + '00'
1400
+ end
1401
+ if verbose
1402
+ acc << [ letter, verbose ].join(' - ')
1403
+ end
1404
+ end
1405
+ end
1406
+ acc.uniq
1407
+ end
1408
+
1409
+ def get_genre_values(rec)
1410
+ acc = []
1411
+
1412
+ is_manuscript = rec.fields(EnrichedMarc::TAG_ITEM).any? do |item|
1413
+ loc = item[EnrichedMarc::SUB_ITEM_CURRENT_LOCATION]
1414
+ locations[loc].present? && (locations[loc]['specific_location'] =~ /manuscript/)
1415
+ end
1416
+
1417
+ if rec['007'].try { |r| r.value.start_with?('v') } || is_manuscript
1418
+ genres = rec.fields('655').map do |field|
1419
+ field.find_all(&subfield_not_in(%w{0 2 5 c}))
1420
+ .map(&:value)
1421
+ .join(' ')
1422
+ end
1423
+ genres.each { |genre| acc << genre }
1424
+ end
1425
+ acc
1426
+ end
1427
+
1428
+ def get_genre_search_values(rec)
1429
+ rec.fields('655').map do |field|
1430
+ join_subfields(field, &subfield_not_in(%w{0 2 5 c}))
1431
+ end
1432
+ end
1433
+
1434
+ # @param [MARC::Record] rec
1435
+ # @param [TrueClass, FalseClass] should_link
1436
+ def get_genre_display(rec, should_link)
1437
+ rec.fields
1438
+ .select { |field|
1439
+ GenreTools.allowed_genre_field? field
1440
+ }.map do |field|
1441
+ sub_with_hyphens = field.find_all(&subfield_not_in(%w{0 2 5 6 8 c e w})).map { |sf|
1442
+ sep = !%w{a b}.member?(sf.code) ? ' -- ' : ' '
1443
+ sep + sf.value
1444
+ }.join.lstrip
1445
+ eandw_with_hyphens = field.find_all(&subfield_in(%w{e w})).join(' -- ')
1446
+ { value: sub_with_hyphens, value_append: eandw_with_hyphens, link: should_link, link_type: 'genre_search' }
1447
+ end.uniq
1448
+ end
1449
+
1450
+ def get_title_values(rec)
1451
+ acc = []
1452
+ rec.fields('245').take(1).each do |field|
1453
+ a_or_k = field.find_all(&subfield_in(%w{a k}))
1454
+ .map { |sf| trim_trailing_comma(trim_trailing_slash(sf.value).rstrip) }
1455
+ .first || ''
1456
+ joined = field.find_all(&subfield_in(%w{b n p}))
1457
+ .map{ |sf| trim_trailing_slash(sf.value) }
1458
+ .join(' ')
1459
+
1460
+ apunct = a_or_k[-1]
1461
+ hpunct = field.find_all { |sf| sf.code == 'h' }
1462
+ .map{ |sf| sf.value[-1] }
1463
+ .first
1464
+ punct = if [apunct, hpunct].member?('=')
1465
+ '='
1466
+ else
1467
+ [apunct, hpunct].member?(':') ? ':' : nil
1468
+ end
1469
+
1470
+ acc << [ trim_trailing_colon(trim_trailing_equal(a_or_k)), punct, joined ]
1471
+ .select(&:present?).join(' ')
1472
+ end
1473
+ acc
1474
+ end
1475
+
1476
+ def get_title_880_values(rec)
1477
+ rec.fields('880')
1478
+ .select { |f| has_subfield6_value(f, /^245/) }
1479
+ .map do |field|
1480
+ suba_value = field.find_all(&subfield_in(%w{a})).first.try(:value)
1481
+ subk_value = field.find_all(&subfield_in(%w{k})).first.try(:value) || ''
1482
+ title_with_slash = suba_value.present? ? suba_value : (subk_value + ' ')
1483
+ title_ak = trim_trailing_comma(join_and_trim_whitespace([ trim_trailing_slash(title_with_slash) ]))
1484
+
1485
+ subh = join_and_trim_whitespace(field.find_all(&subfield_in(%w{h})).map(&:value))
1486
+
1487
+ apunct = title_ak[-1]
1488
+ hpunct = subh[-1]
1489
+
1490
+ punct = if [apunct, hpunct].member?('=')
1491
+ '='
1492
+ else
1493
+ [apunct, hpunct].member?(':') ? ':' : nil
1494
+ end
1495
+
1496
+ [ trim_trailing_equal(title_ak),
1497
+ punct,
1498
+ trim_trailing_slash(field.find_all(&subfield_in(%w{b})).first.try(:value) || ''),
1499
+ trim_trailing_slash(field.find_all(&subfield_in(%w{n})).first.try(:value) || ''),
1500
+ trim_trailing_slash(field.find_all(&subfield_in(%w{p})).first.try(:value) || '')
1501
+ ]
1502
+ .select { |value| value.present? }
1503
+ .join(' ')
1504
+ end
1505
+ end
1506
+
1507
+ def separate_leading_bracket_into_prefix_and_filing_hash(s)
1508
+ if s.start_with?('[')
1509
+ { 'prefix' => '[', 'filing' => s[1..-1] }
1510
+ else
1511
+ { 'prefix' => '', 'filing' => s }
1512
+ end
1513
+ end
1514
+
1515
+ def get_title_from_245_or_880(fields, support_invalid_indicator2 = true)
1516
+ fields.map do |field|
1517
+ if field.indicator2 =~ /^[0-9]$/
1518
+ offset = field.indicator2.to_i
1519
+ elsif support_invalid_indicator2
1520
+ offset = 0 # default to 0
1521
+ else
1522
+ return []
1523
+ end
1524
+ value = {}
1525
+ suba = join_subfields(field, &subfield_in(%w{a}))
1526
+ if offset > 0 && offset < 10
1527
+ part1 = suba[0..offset-1]
1528
+ part2 = suba[offset..-1]
1529
+ value = { 'prefix' => part1, 'filing' => part2 }
1530
+ else
1531
+ if suba.present?
1532
+ value = separate_leading_bracket_into_prefix_and_filing_hash(suba)
1533
+ else
1534
+ subk = join_subfields(field, &subfield_in(%w{k}))
1535
+ value = separate_leading_bracket_into_prefix_and_filing_hash(subk)
1536
+ end
1537
+ end
1538
+ value['filing'] = [ value['filing'], join_subfields(field, &subfield_in(%w{b n p})) ].join(' ')
1539
+ value
1540
+ end.compact
1541
+ end
1542
+
1543
+ def get_title_245(rec, support_invalid_indicator2 = true)
1544
+ get_title_from_245_or_880(rec.fields('245').take(1), support_invalid_indicator2)
1545
+ end
1546
+
1547
+ def get_title_880_for_xfacet(rec)
1548
+ get_title_from_245_or_880(rec.fields('880').select { |f| has_subfield6_value(f, /^245/) })
1549
+ end
1550
+
1551
+ def get_title_xfacet_values(rec)
1552
+ # 6/16/2017: added 880 to this field for non-roman char handling
1553
+ get_title_245(rec).map do |v|
1554
+ references(v)
1555
+ end + get_title_880_for_xfacet(rec).map do |v|
1556
+ references(v)
1557
+ end
1558
+ end
1559
+
1560
+ def get_title_sort_values(rec)
1561
+ get_title_245(rec).map do |v|
1562
+ v['filing'] + v['prefix']
1563
+ end
1564
+ end
1565
+
1566
+ def get_title_sort_filing_parts(rec, support_invalid_indicator2 = true)
1567
+ get_title_245(rec, support_invalid_indicator2).map do |v|
1568
+ v['filing']
1569
+ end
1570
+ end
1571
+
1572
+ def append_title_variants(rec, acc)
1573
+ do_title_variant_field(rec, acc, '130', 1, 'a')
1574
+ do_title_variant_field(rec, acc, '240', 2, 'a')
1575
+ do_title_variant_field(rec, acc, '210', nil, 'a', 'b')
1576
+ do_title_variant_field(rec, acc, '222', 2, 'a', 'b')
1577
+ do_title_variant_field(rec, acc, '246', nil, 'a', 'b')
1578
+ end
1579
+
1580
+ def do_title_variant_field(rec, acc, field_id, non_filing_indicator, *subfields_spec)
1581
+ rec.fields(field_id).each do |field|
1582
+ parts = subfields_spec.map do |subfield_spec|
1583
+ matching_subfield = field.find { |subfield| subfield.code == subfield_spec }
1584
+ matching_subfield.value unless matching_subfield.nil?
1585
+ end
1586
+ next if parts.first.nil?
1587
+ parts.compact!
1588
+ case non_filing_indicator
1589
+ when 1
1590
+ non_filing = field.indicator1
1591
+ when 2
1592
+ non_filing = field.indicator2
1593
+ else
1594
+ non_filing = nil
1595
+ end
1596
+ append_title_variant_field(acc, non_filing, parts)
1597
+ end
1598
+ end
1599
+
1600
+ def get_title_1_search_main_values(rec, format_filter: false)
1601
+ format = get_format_from_leader(rec)
1602
+ acc = rec.fields('245').map do |field|
1603
+ if !format_filter || format.end_with?('s')
1604
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8 h})).map(&:value))
1605
+ end
1606
+ end.select { |v| v.present? }
1607
+ acc += rec.fields('880')
1608
+ .select { |f| has_subfield6_value(f, /^245/) }
1609
+ .map do |field|
1610
+ if !format_filter || format.end_with?('s')
1611
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8 h})).map(&:value))
1612
+ end
1613
+ end.select { |v| v.present? }
1614
+ acc
1615
+ end
1616
+
1617
+ def get_title_1_search_values(rec)
1618
+ get_title_1_search_main_values(rec)
1619
+ end
1620
+
1621
+ def get_journal_title_1_search_values(rec)
1622
+ get_title_1_search_main_values(rec, format_filter: true)
1623
+ end
1624
+
1625
+ def title_2_search_main_tags
1626
+ @title_2_search_main_tags ||= %w{130 210 240 245 246 247 440 490 730 740 830}
1627
+ end
1628
+
1629
+ def title_2_search_aux_tags
1630
+ @title_2_search_aux_tags ||= %w{773 774 780 785}
1631
+ end
1632
+
1633
+ def title_2_search_7xx_tags
1634
+ @title_2_search_7xx_tags ||= %w{700 710 711}
1635
+ end
1636
+
1637
+ def get_title_2_search_main_values(rec, format_filter: false)
1638
+ format = get_format_from_leader(rec)
1639
+ rec.fields(title_2_search_main_tags).map do |field|
1640
+ if !format_filter || format.end_with?('s')
1641
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8})).map(&:value))
1642
+ end
1643
+ end.select { |v| v.present? }
1644
+ end
1645
+
1646
+ def get_title_2_search_aux_values(rec, format_filter: false)
1647
+ format = get_format_from_leader(rec)
1648
+ rec.fields(title_2_search_aux_tags).map do |field|
1649
+ if !format_filter || format.end_with?('s')
1650
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{s t})).map(&:value))
1651
+ end
1652
+ end.select { |v| v.present? }
1653
+ end
1654
+
1655
+ def get_title_2_search_7xx_values(rec, format_filter: false)
1656
+ format = get_format_from_leader(rec)
1657
+ rec.fields(title_2_search_7xx_tags).map do |field|
1658
+ if !format_filter || format.end_with?('s')
1659
+ join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
1660
+ end
1661
+ end.select { |v| v.present? }
1662
+ end
1663
+
1664
+ def get_title_2_search_505_values(rec, format_filter: false)
1665
+ format = get_format_from_leader(rec)
1666
+ rec.fields('505')
1667
+ .select { |f| f.indicator1 == '0' && f.indicator2 == '0' }
1668
+ .map do |field|
1669
+ if !format_filter || format.end_with?('s')
1670
+ join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
1671
+ end
1672
+ end.select { |v| v.present? }
1673
+ end
1674
+
1675
+ def get_title_2_search_800_values(rec, format_filter: false)
1676
+ format = get_format_from_leader(rec)
1677
+ acc = []
1678
+ acc += rec.fields('880')
1679
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(130|210|240|245|246|247|440|490|730|740|830)/ } }
1680
+ .map do |field|
1681
+ if !format_filter || format.end_with?('s')
1682
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{c 6 8 h})).map(&:value))
1683
+ end
1684
+ end.select { |v| v.present? }
1685
+ acc += rec.fields('880')
1686
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(773|774|780|785)/ } }
1687
+ .map do |field|
1688
+ if !format_filter || format.end_with?('s')
1689
+ join_and_trim_whitespace(field.find_all(&subfield_in(%w{s t})).map(&:value))
1690
+ end
1691
+ end.select { |v| v.present? }
1692
+ acc += rec.fields('880')
1693
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(700|710|711)/ } }
1694
+ .map do |field|
1695
+ if !format_filter || format.end_with?('s')
1696
+ join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
1697
+ end
1698
+ end.select { |v| v.present? }
1699
+ acc += rec.fields('880')
1700
+ .select { |f| f.indicator1 == '0' && f.indicator2 == '0' }
1701
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^505/ } }
1702
+ .map do |field|
1703
+ if !format_filter || format.end_with?('s')
1704
+ join_and_trim_whitespace(field.find_all(&subfield_in(%w{t})).map(&:value))
1705
+ end
1706
+ end.select { |v| v.present? }
1707
+ acc
1708
+ end
1709
+
1710
+ def get_title_2_search_values(rec)
1711
+ get_title_2_search_main_values(rec) +
1712
+ get_title_2_search_aux_values(rec) +
1713
+ get_title_2_search_7xx_values(rec) +
1714
+ get_title_2_search_505_values(rec) +
1715
+ get_title_2_search_800_values(rec)
1716
+ end
1717
+
1718
+ def get_journal_title_2_search_values(rec)
1719
+ get_title_2_search_main_values(rec, format_filter: true) +
1720
+ get_title_2_search_aux_values(rec, format_filter: true) +
1721
+ get_title_2_search_7xx_values(rec, format_filter: true) +
1722
+ get_title_2_search_505_values(rec, format_filter: true) +
1723
+ get_title_2_search_800_values(rec, format_filter: true)
1724
+ end
1725
+
1726
+ # this gets called directly by ShowPresenter rather than via
1727
+ # Blacklight's show field definition plumbing, so we return a single string
1728
+ def get_title_display(rec)
1729
+ acc = []
1730
+ acc += rec.fields('245').map do |field|
1731
+ join_subfields(field, &subfield_not_in(%w{6 8}))
1732
+ end
1733
+ acc += get_880(rec, '245', &subfield_not_in(%w{6 8}))
1734
+ .map { |value| " = #{value}" }
1735
+ acc.join(' ')
1736
+ end
1737
+
1738
+ def author_creator_tags
1739
+ @author_creator_tags ||= %w{100 110}
1740
+ end
1741
+
1742
+ def get_author_creator_values(rec)
1743
+ rec.fields(author_creator_tags).map do |field|
1744
+ get_name_1xx_field(field)
1745
+ end
1746
+ end
1747
+
1748
+ def get_author_880_values(rec)
1749
+ rec.fields('880')
1750
+ .select { |f| has_subfield6_value(f, /^(100|110)/) }
1751
+ .map do |field|
1752
+ join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{4 6 8})).map(&:value))
1753
+ end
1754
+ end
1755
+
1756
+ def get_author_creator_1_search_values(rec)
1757
+ acc = []
1758
+ acc += rec.fields(%w{100 110}).map do |field|
1759
+ pieces = field.map do |sf|
1760
+ if sf.code == 'a'
1761
+ after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ', ')) ])
1762
+ before_comma = substring_before(sf.value, ', ')
1763
+ " #{after_comma} #{before_comma}"
1764
+ elsif !%W{a 1 4 6 8}.member?(sf.code)
1765
+ " #{sf.value}"
1766
+ elsif sf.code == '4'
1767
+ ", #{relator_codes[sf.value]}"
1768
+ end
1769
+ end.compact
1770
+ value = join_and_trim_whitespace(pieces)
1771
+ if value.end_with?('.') || value.end_with?('-')
1772
+ value
1773
+ else
1774
+ value + '.'
1775
+ end
1776
+ end
1777
+ acc += rec.fields(%w{100 110}).map do |field|
1778
+ pieces = field.map do |sf|
1779
+ if(! %W{4 6 8}.member?(sf.code))
1780
+ " #{sf.value}"
1781
+ elsif sf.code == '4'
1782
+ ", #{relator_codes[sf.value]}"
1783
+ end
1784
+ end.compact
1785
+ value = join_and_trim_whitespace(pieces)
1786
+ if value.end_with?('.') || value.end_with?('-')
1787
+ value
1788
+ else
1789
+ value + '.'
1790
+ end
1791
+ end
1792
+ acc += rec.fields(%w{880})
1793
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(100|110)/ } }
1794
+ .map do |field|
1795
+ suba = field.find_all(&subfield_in(%w{a})).map do |sf|
1796
+ after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ',')) ])
1797
+ before_comma = substring_before(sf.value, ',')
1798
+ "#{after_comma} #{before_comma}"
1799
+ end.first
1800
+ oth = join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{6 8 a t})).map(&:value))
1801
+ [suba, oth].join(' ')
1802
+ end
1803
+ acc
1804
+ end
1805
+
1806
+ def author_creator_2_tags
1807
+ @author_creator_2_tags ||= %w{100 110 111 400 410 411 700 710 711 800 810 811}
1808
+ end
1809
+
1810
+ def get_author_creator_2_search_values(rec)
1811
+ acc = []
1812
+ acc += rec.fields(author_creator_2_tags).map do |field|
1813
+ pieces1 = field.map do |sf|
1814
+ if !%W{1 4 5 6 8 t}.member?(sf.code)
1815
+ " #{sf.value}"
1816
+ elsif sf.code == '4'
1817
+ ", #{relator_codes[sf.value]}"
1818
+ end
1819
+ end.compact
1820
+ value1 = join_and_trim_whitespace(pieces1)
1821
+ if value1.end_with?('.') || value1.end_with?('-')
1822
+ value1
1823
+ else
1824
+ value1 + '.'
1825
+ end
1826
+
1827
+ pieces2 = field.map do |sf|
1828
+ if sf.code == 'a'
1829
+ after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ', ')) ])
1830
+ before_comma = substring_before(sf.value, ',')
1831
+ " #{after_comma} #{before_comma}"
1832
+ elsif(! %W{a 4 5 6 8 t}.member?(sf.code))
1833
+ " #{sf.value}"
1834
+ elsif sf.code == '4'
1835
+ ", #{relator_codes[sf.value]}"
1836
+ end
1837
+ end.compact
1838
+ value2 = join_and_trim_whitespace(pieces2)
1839
+ if value2.end_with?('.') || value2.end_with?('-')
1840
+ value2
1841
+ else
1842
+ value2 + '.'
1843
+ end
1844
+
1845
+ [ value1, value2 ]
1846
+ end.flatten(1)
1847
+ acc += rec.fields(%w{880})
1848
+ .select { |f| f.any? { |sf| sf.code =='6' && sf.value =~ /^(100|110|111|400|410|411|700|710|711|800|810|811)/ } }
1849
+ .map do |field|
1850
+ value1 = join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{5 6 8 t})).map(&:value))
1851
+
1852
+ suba = field.find_all(&subfield_in(%w{a})).map do |sf|
1853
+ after_comma = join_and_trim_whitespace([ trim_trailing_comma(substring_after(sf.value, ',')) ])
1854
+ before_comma = substring_before(sf.value, ',')
1855
+ "#{after_comma} #{before_comma}"
1856
+ end.first
1857
+ oth = join_and_trim_whitespace(field.find_all(&subfield_not_in(%w{5 6 8 a t})).map(&:value))
1858
+ value2 = [ suba, oth ].join(' ')
1859
+
1860
+ [ value1, value2 ]
1861
+ end.flatten(1)
1862
+ acc
1863
+ end
1864
+
1865
+ def get_author_creator_sort_values(rec)
1866
+ rec.fields(author_creator_tags).take(1).map do |field|
1867
+ join_subfields(field, &subfield_not_in(%w[1 4 6 8 e]))
1868
+ end
1869
+ end
1870
+
1871
+ def get_author_display(rec)
1872
+ acc = []
1873
+ rec.fields(%w{100 110}).each do |field|
1874
+ subf4 = get_subfield_4ew(field)
1875
+ author_parts = []
1876
+ field.each do |sf|
1877
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
1878
+ # added 2022/08/04: filter out 1 (URIs) added by MARCive project
1879
+ if !%W{0 1 4 6 8 e w}.member?(sf.code)
1880
+ author_parts << sf.value
1881
+ end
1882
+ end
1883
+ acc << {
1884
+ value: author_parts.join(' '),
1885
+ value_append: subf4,
1886
+ link_type: 'author_creator_xfacet2' }
1887
+ end
1888
+ rec.fields('880').each do |field|
1889
+ if has_subfield6_value(field, /^(100|110)/)
1890
+ subf4 = get_subfield_4ew(field)
1891
+ author_parts = []
1892
+ field.each do |sf|
1893
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
1894
+ unless %W{0 4 6 8 e w}.member?(sf.code)
1895
+ author_parts << sf.value.gsub(/\?$/, '')
1896
+ end
1897
+ end
1898
+ acc << {
1899
+ value: author_parts.join(' '),
1900
+ value_append: subf4,
1901
+ link_type: 'author_creator_xfacet2' }
1902
+ end
1903
+ end
1904
+ acc
1905
+ end
1906
+
1907
+ def get_corporate_author_search_values(rec)
1908
+ rec.fields(%w{110 710 810}).map do |field|
1909
+ join_and_trim_whitespace(field.select(&subfield_in(%w{a b c d})).map(&:value))
1910
+ end
1911
+ end
1912
+
1913
+ def get_standardized_title_values(rec)
1914
+ rec.fields(%w{130 240}).map do |field|
1915
+ # added 2017/05/15: filter out 0 (authority record numbers) added by Alma
1916
+ results = field.find_all(&subfield_not_in(%W{0 6 8})).map(&:value)
1917
+ join_and_trim_whitespace(results)
1918
+ end
1919
+ end
1920
+
1921
+ def get_standardized_title_display(rec)
1922
+ acc = []
1923
+ rec.fields(%w{130 240}).each do |field|
1924
+ # added 2017/05/15: filter out 0 (authority record numbers) added by Alma
1925
+ title = join_subfields(field, &subfield_not_in(%W{0 6 8 e w}))
1926
+ title_param_value = join_subfields(field, &subfield_not_in(%W{0 5 6 8 e w}))
1927
+ title_append = get_title_extra(field)
1928
+ acc << {
1929
+ value: title,
1930
+ value_for_link: title_param_value,
1931
+ value_append: title_append,
1932
+ link_type: 'title_search' }
1933
+ end
1934
+ rec.fields('730')
1935
+ .select { |f| f.indicator1 == '' || f.indicator2 == '' }
1936
+ .select { |f| f.none? { |sf| sf.code == 'i'} }
1937
+ .each do |field|
1938
+ title = join_subfields(field, &subfield_not_in(%w{5 6 8 e w}))
1939
+ title_append = get_title_extra(field)
1940
+ acc << {
1941
+ value: title,
1942
+ value_append: title_append,
1943
+ link_type: 'title_search' }
1944
+ end
1945
+ rec.fields('880')
1946
+ .select { |f| has_subfield6_value(f, /^(130|240|730)/) }
1947
+ .select { |f| f.none? { |sf| sf.code == 'i'} }
1948
+ .each do |field|
1949
+ title = join_subfields(field, &subfield_not_in(%w{5 6 8 e w}))
1950
+ title_append = get_title_extra(field)
1951
+ acc << {
1952
+ value: title,
1953
+ value_append: title_append,
1954
+ link_type: 'title_search' }
1955
+ end
1956
+ acc
1957
+ end
1958
+
1959
+ def get_edition_values(rec)
1960
+ rec.fields('250').take(1).map do |field|
1961
+ results = field.find_all(&subfield_not_in(%w{6 8})).map(&:value)
1962
+ join_and_trim_whitespace(results)
1963
+ end
1964
+ end
1965
+
1966
+ def get_edition_display(rec)
1967
+ acc = []
1968
+ acc += rec.fields('250').map do |field|
1969
+ join_subfields(field, &subfield_not_in(%W{6 8}))
1970
+ end
1971
+ acc += rec.fields('880')
1972
+ .select { |f| has_subfield6_value(f, /^250/)}
1973
+ .map do |field|
1974
+ join_subfields(field, &subfield_not_in(%W{6 8}))
1975
+ end
1976
+ acc
1977
+ end
1978
+
1979
+ def get_conference_values(rec)
1980
+ rec.fields('111').map do |field|
1981
+ get_name_1xx_field(field)
1982
+ end
1983
+ end
1984
+
1985
+ def get_conference_search_values(rec)
1986
+ rec.fields(%w{111 711 811}).map do |field|
1987
+ join_and_trim_whitespace(field.select(&subfield_in(%w{a c d e})).map(&:value))
1988
+ end
1989
+ end
1990
+
1991
+ def get_conference_display(rec)
1992
+ results = rec.fields(%w{111 711})
1993
+ .select{ |f| ['', ' '].member?(f.indicator2) }
1994
+ .map do |field|
1995
+ conf = ''
1996
+ if field.none? { |sf| sf.code == 'i' }
1997
+ # added 2017/05/18: filter out 0 (authority record numbers) added by Alma
1998
+ conf = join_subfields(field, &subfield_not_in(%w{0 4 5 6 8 e j w}))
1999
+ end
2000
+ conf_append = join_subfields(field, &subfield_in(%w{e j w}))
2001
+ { value: conf, value_append: conf_append, link_type: 'author_creator_xfacet2' }
2002
+ end
2003
+ results += rec.fields('880')
2004
+ .select { |f| has_subfield6_value(f, /^(111|711)/) }
2005
+ .select { |f| f.none? { |sf| sf.code == 'i' } }
2006
+ .map do |field|
2007
+ # added 2017/05/18: filter out 0 (authority record numbers) added by Alma
2008
+ conf = join_subfields(field, &subfield_not_in(%w{0 4 5 6 8 e j w}))
2009
+ conf_extra = join_subfields(field, &subfield_in(%w{4 e j w}))
2010
+ { value: conf, value_append: conf_extra, link_type: 'author_creator_xfacet2' }
2011
+ end
2012
+ results
2013
+ end
2014
+
2015
+ def get_series_values(rec)
2016
+ acc = []
2017
+ added_8xx = false
2018
+ rec.fields(%w{800 810 811 830}).take(1).each do |field|
2019
+ acc << get_series_8xx_field(field)
2020
+ added_8xx = true
2021
+ end
2022
+ if !added_8xx
2023
+ rec.fields(%w{400 410 411 440 490}).take(1).map do |field|
2024
+ acc << get_series_4xx_field(field)
2025
+ end
2026
+ end
2027
+ acc
2028
+ end
2029
+
2030
+ def series_tags
2031
+ @series_tags ||= %w{800 810 811 830 400 411 440 490}
2032
+ end
2033
+
2034
+ def get_series_display(rec)
2035
+ acc = []
2036
+
2037
+ tags_present = series_tags.select { |tag| rec[tag].present? }
2038
+
2039
+ if %w{800 810 811 400 410 411}.member?(tags_present.first)
2040
+ rec.fields(tags_present.first).each do |field|
2041
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
2042
+ series = join_subfields(field, &subfield_not_in(%w{0 5 6 8 e t w v n}))
2043
+ pairs = field.map do |sf|
2044
+ if %w{e w v n t}.member?(sf.code)
2045
+ [ ' ', sf.value ]
2046
+ elsif sf.code == '4'
2047
+ [ ', ', relator_codes[sf.value] ]
2048
+ end
2049
+ end
2050
+ series_append = pairs.flatten.join.strip
2051
+ acc << { value: series, value_append: series_append, link_type: 'author_search' }
2052
+ end
2053
+ elsif %w{830 440 490}.member?(tags_present.first)
2054
+ rec.fields(tags_present.first).each do |field|
2055
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
2056
+ series = join_subfields(field, &subfield_not_in(%w{0 5 6 8 c e w v n}))
2057
+ series_append = join_subfields(field, &subfield_in(%w{c e w v n}))
2058
+ acc << { value: series, value_append: series_append, link_type: 'title_search' }
2059
+ end
2060
+ end
2061
+
2062
+ rec.fields(tags_present.drop(1)).each do |field|
2063
+ # added 2017/04/10: filter out 0 (authority record numbers) added by Alma
2064
+ series = join_subfields(field, &subfield_not_in(%w{0 5 6 8}))
2065
+ acc << { value: series, link: false }
2066
+ end
2067
+
2068
+ rec.fields('880')
2069
+ .select { |f| has_subfield6_value(f, /^(800|810|811|830|400|410|411|440|490)/) }
2070
+ .each do |field|
2071
+ series = join_subfields(field, &subfield_not_in(%W{5 6 8}))
2072
+ acc << { value: series, link: false }
2073
+ end
2074
+
2075
+ acc
2076
+ end
2077
+
2078
+ def get_series_search_values(rec)
2079
+ acc = []
2080
+ acc += rec.fields(%w{400 410 411})
2081
+ .select { |f| f.indicator2 == '0' }
2082
+ .map do |field|
2083
+ join_subfields(field, &subfield_not_in(%w{4 6 8}))
2084
+ end
2085
+ acc += rec.fields(%w{400 410 411})
2086
+ .select { |f| f.indicator2 == '1' }
2087
+ .map do |field|
2088
+ join_subfields(field, &subfield_not_in(%w{4 6 8 a}))
2089
+ end
2090
+ acc += rec.fields(%w{440})
2091
+ .map do |field|
2092
+ join_subfields(field, &subfield_not_in(%w{0 5 6 8 w}))
2093
+ end
2094
+ acc += rec.fields(%w{800 810 811})
2095
+ .map do |field|
2096
+ join_subfields(field, &subfield_not_in(%w{0 4 5 6 7 8 w}))
2097
+ end
2098
+ acc += rec.fields(%w{830})
2099
+ .map do |field|
2100
+ join_subfields(field, &subfield_not_in(%w{0 5 6 7 8 w}))
2101
+ end
2102
+ acc += rec.fields(%w{533})
2103
+ .map do |field|
2104
+ field.find_all { |sf| sf.code == 'f' }
2105
+ .map(&:value)
2106
+ .map { |v| v.gsub(/\(|\)/, '') }
2107
+ .join(' ')
2108
+ end
2109
+ acc
2110
+ end
2111
+
2112
+ def get_contained_within_values(rec)
2113
+ rec.fields('773').map do |field|
2114
+ results = field.find_all(&subfield_not_in(%w{6 7 8 w})).map(&:value)
2115
+ join_and_trim_whitespace(results)
2116
+ end
2117
+ end
2118
+
2119
+ # @return [Array] of hashes each describing a physical holding
2120
+ def get_physical_holdings(rec)
2121
+ # enriched MARC looks like this:
2122
+ # <datafield tag="hld" ind1="0" ind2=" ">
2123
+ # <subfield code="b">MAIN</subfield>
2124
+ # <subfield code="c">main</subfield>
2125
+ # <subfield code="h">NA2540</subfield>
2126
+ # <subfield code="i">.G63 2009</subfield>
2127
+ # <subfield code="8">226026380000541</subfield>
2128
+ # </datafield>
2129
+ rec.fields(EnrichedMarc::TAG_HOLDING).map do |item|
2130
+ # Alma never populates subfield 'a' which is 'location'
2131
+ # it appears to store the location code in 'c'
2132
+ # and display name in 'b'
2133
+ {
2134
+ holding_id: item[EnrichedMarc::SUB_HOLDING_SEQUENCE_NUMBER],
2135
+ location: item[EnrichedMarc::SUB_HOLDING_SHELVING_LOCATION],
2136
+ classification_part: item[EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART],
2137
+ item_part: item[EnrichedMarc::SUB_HOLDING_ITEM_PART],
2138
+ }
2139
+ end
2140
+ end
2141
+
2142
+ # @return [Array] of hashes each describing an electronic holding
2143
+ def get_electronic_holdings(rec)
2144
+ # enriched MARC looks like this:
2145
+ # <datafield tag="prt" ind1=" " ind2=" ">
2146
+ # <subfield code="pid">5310486800000521</subfield>
2147
+ # <subfield code="url">https://sandbox01-na.alma.exlibrisgroup.com/view/uresolver/01UPENN_INST/openurl?u.ignore_date_coverage=true&amp;rft.mms_id=9926519600521</subfield>
2148
+ # <subfield code="iface">PubMed Central</subfield>
2149
+ # <subfield code="coverage"> Available from 2005 volume: 1. Most recent 1 year(s) not available.</subfield>
2150
+ # <subfield code="library">MAIN</subfield>
2151
+ # <subfield code="collection">PubMed Central (Training)</subfield>
2152
+ # <subfield code="czcolid">61111058563444000</subfield>
2153
+ # <subfield code="8">5310486800000521</subfield>
2154
+ # </datafield>
2155
+
2156
+ # do NOT index electronic holdings where collection name is blank:
2157
+ # these are records created from 856 fields from Voyager
2158
+ # that don't have actual links.
2159
+
2160
+ rec.fields(EnrichedMarc::TAG_ELECTRONIC_INVENTORY)
2161
+ .select { |item| item[EnrichedMarc::SUB_ELEC_COLLECTION_NAME].present? }
2162
+ .map do |item|
2163
+ {
2164
+ portfolio_pid: item[EnrichedMarc::SUB_ELEC_PORTFOLIO_PID],
2165
+ url: item[EnrichedMarc::SUB_ELEC_ACCESS_URL],
2166
+ collection: item[EnrichedMarc::SUB_ELEC_COLLECTION_NAME],
2167
+ coverage: item[EnrichedMarc::SUB_ELEC_COVERAGE],
2168
+ }
2169
+ end
2170
+ end
2171
+
2172
+ def get_bound_with_id_values(rec)
2173
+ rec.fields(EnrichedMarc::TAG_HOLDING).flat_map do |field|
2174
+ field.select(&subfield_in([ EnrichedMarc::SUB_BOUND_WITH_ID ])).map { |sf| sf.value }
2175
+ end
2176
+ end
2177
+
2178
+ def get_subfield_4ew(field)
2179
+ field.select(&subfield_in(%W{4 e w}))
2180
+ .map { |sf| (sf.code == '4' ? ", #{relator_codes[sf.value]}" : " #{sf.value}") }
2181
+ .join('')
2182
+ end
2183
+
2184
+ def get_title_extra(field)
2185
+ join_subfields(field, &subfield_in(%W{e w}))
2186
+ end
2187
+
2188
+ def get_other_title_display(rec)
2189
+ acc = []
2190
+ acc += rec.fields('246').map do |field|
2191
+ join_subfields(field, &subfield_not_in(%W{6 8}))
2192
+ end
2193
+ acc += rec.fields('740')
2194
+ .select { |f| ['', ' ', '0', '1', '3'].member?(f.indicator2) }
2195
+ .map do |field|
2196
+ join_subfields(field, &subfield_not_in(%W{5 6 8}))
2197
+ end
2198
+ acc += rec.fields('880')
2199
+ .select { |f| has_subfield6_value(f, /^(246|740)/) }
2200
+ .map do |field|
2201
+ join_subfields(field, &subfield_not_in(%W{5 6 8}))
2202
+ end
2203
+ acc
2204
+ end
2205
+
2206
+ # distribution and manufacture share the same logic except for indicator2
2207
+ def get_264_or_880_fields(rec, indicator2)
2208
+ acc = []
2209
+ acc += rec.fields('264')
2210
+ .select { |f| f.indicator2 == indicator2 }
2211
+ .map do |field|
2212
+ join_subfields(field, &subfield_in(%w{a b c}))
2213
+ end
2214
+ acc += rec.fields('880')
2215
+ .select { |f| f.indicator2 == indicator2 }
2216
+ .select { |f| has_subfield6_value(f, /^264/) }
2217
+ .map do |field|
2218
+ join_subfields(field, &subfield_in(%w{a b c}))
2219
+ end
2220
+ acc
2221
+ end
2222
+
2223
+ def get_production_display(rec)
2224
+ get_264_or_880_fields(rec, '0')
2225
+ end
2226
+
2227
+ def get_distribution_display(rec)
2228
+ get_264_or_880_fields(rec, '2')
2229
+ end
2230
+
2231
+ def get_manufacture_display(rec)
2232
+ get_264_or_880_fields(rec, '3')
2233
+ end
2234
+
2235
+ def get_cartographic_display(rec)
2236
+ rec.fields(%w{255 342}).map do |field|
2237
+ join_subfields(field, &subfield_not_6_or_8)
2238
+ end
2239
+ end
2240
+
2241
+ def get_fingerprint_display(rec)
2242
+ rec.fields('026').map do |field|
2243
+ join_subfields(field, &subfield_not_in(%w{2 5 6 8}))
2244
+ end
2245
+ end
2246
+
2247
+ def get_arrangement_display(rec)
2248
+ get_datafield_and_880(rec, '351')
2249
+ end
2250
+
2251
+ def get_former_title_display(rec)
2252
+ rec.fields
2253
+ .select { |f| f.tag == '247' || (f.tag == '880' && has_subfield6_value(f, /^247/)) }
2254
+ .map do |field|
2255
+ former_title = join_subfields(field, &subfield_not_in(%w{6 8 e w}))
2256
+ former_title_append = join_subfields(field, &subfield_in(%w{e w}))
2257
+ { value: former_title, value_append: former_title_append, link_type: 'title_search' }
2258
+ end
2259
+ end
2260
+
2261
+ # logic for 'Continues' and 'Continued By' is very similar
2262
+ def get_continues(rec, tag)
2263
+ rec.fields
2264
+ .select { |f| f.tag == tag || (f.tag == '880' && has_subfield6_value(f, /^#{tag}/)) }
2265
+ .select { |f| f.any?(&subfield_in(%w{i a s t n d})) }
2266
+ .map do |field|
2267
+ join_subfields(field, &subfield_in(%w{i a s t n d}))
2268
+ end
2269
+ end
2270
+
2271
+ def get_continues_display(rec)
2272
+ get_continues(rec, '780')
2273
+ end
2274
+
2275
+ def get_continued_by_display(rec)
2276
+ get_continues(rec, '785')
2277
+ end
2278
+
2279
+ def get_place_of_publication_display(rec)
2280
+ acc = []
2281
+ acc += rec.fields('752').map do |field|
2282
+ place = join_subfields(field, &subfield_not_in(%w{6 8 e w}))
2283
+ place_extra = join_subfields(field, &subfield_in(%w{e w}))
2284
+ { value: place, value_append: place_extra, link_type: 'search' }
2285
+ end
2286
+ acc += get_880_subfield_not_6_or_8(rec, '752').map do |result|
2287
+ { value: result, link: false }
2288
+ end
2289
+ acc
2290
+ end
2291
+
2292
+ def get_language_display(rec)
2293
+ get_datafield_and_880(rec, '546')
2294
+ end
2295
+
2296
+ # for system details: extract subfield 3 plus other subfields as specified by passed-in block
2297
+ def get_sub3_and_other_subs(field, &block)
2298
+ sub3 = field.select(&subfield_in(%w{3})).map(&:value).map { |v| trim_trailing_period(v) }.join(': ')
2299
+ oth_subs = join_subfields(field, &block)
2300
+ [ sub3, trim_trailing_semicolon(oth_subs) ].join(' ')
2301
+ end
2302
+
2303
+ def get_system_details_display(rec)
2304
+ acc = []
2305
+ acc += rec.fields('538').map do |field|
2306
+ get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
2307
+ end
2308
+ acc += rec.fields('344').map do |field|
2309
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
2310
+ end
2311
+ acc += rec.fields(%w{345 346}).map do |field|
2312
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
2313
+ end
2314
+ acc += rec.fields('347').map do |field|
2315
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
2316
+ end
2317
+ acc += rec.fields('880')
2318
+ .select { |f| has_subfield6_value(f, /^538/) }
2319
+ .map do |field|
2320
+ get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
2321
+ end
2322
+ acc += rec.fields('880')
2323
+ .select { |f| has_subfield6_value(f, /^344/) }
2324
+ .map do |field|
2325
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
2326
+ end
2327
+ acc += rec.fields('880')
2328
+ .select { |f| has_subfield6_value(f, /^(345|346)/) }
2329
+ .map do |field|
2330
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
2331
+ end
2332
+ acc += rec.fields('880')
2333
+ .select { |f| has_subfield6_value(f, /^347/) }
2334
+ .map do |field|
2335
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
2336
+ end
2337
+ acc
2338
+ end
2339
+
2340
+ def get_biography_display(rec)
2341
+ get_datafield_and_880(rec, '545')
2342
+ end
2343
+
2344
+ def get_summary_display(rec)
2345
+ get_datafield_and_880(rec, '520')
2346
+ end
2347
+
2348
+ def get_contents_display(rec)
2349
+ acc = []
2350
+ acc += rec.fields('505').flat_map do |field|
2351
+ join_subfields(field, &subfield_not_6_or_8).split('--')
2352
+ end
2353
+ acc += rec.fields('880')
2354
+ .select { |f| has_subfield6_value(f, /^505/) }
2355
+ .flat_map do |field|
2356
+ join_subfields(field, &subfield_not_6_or_8).split('--')
2357
+ end
2358
+ acc
2359
+ end
2360
+
2361
+ def get_contents_note_search_values(rec)
2362
+ rec.fields('505').map do |field|
2363
+ join_and_trim_whitespace(field.to_a.map(&:value))
2364
+ end
2365
+ end
2366
+
2367
+ def get_participant_display(rec)
2368
+ get_datafield_and_880(rec, '511')
2369
+ end
2370
+
2371
+ def get_credits_display(rec)
2372
+ get_datafield_and_880(rec, '508')
2373
+ end
2374
+
2375
+ # 10/2018 kms: add 586
2376
+ def get_notes_display(rec)
2377
+ acc = []
2378
+ acc += rec.fields(%w{500 502 504 515 518 525 533 550 580 586 588}).map do |field|
2379
+ if field.tag == '588'
2380
+ join_subfields(field, &subfield_in(%w{a}))
2381
+ else
2382
+ join_subfields(field, &subfield_not_in(%w{5 6 8}))
2383
+ end
2384
+ end
2385
+ acc += rec.fields('880')
2386
+ .select { |f| has_subfield6_value(f, /^(500|502|504|515|518|525|533|550|580|586|588)/) }
2387
+ .map do |field|
2388
+ sub6 = field.select(&subfield_in(%w{6})).map(&:value).first
2389
+ if sub6 == '588'
2390
+ join_subfields(field, &subfield_in(%w{a}))
2391
+ else
2392
+ join_subfields(field, &subfield_not_in(%w{5 6 8}))
2393
+ end
2394
+ end
2395
+ acc
2396
+ end
2397
+
2398
+ # 10/2018 kms: add 562 563 585. Add 561 if subf a starts with Athenaeum copy:
2399
+ # non-Athenaeum 561 still displays as Penn Provenance
2400
+ def get_local_notes_display(rec)
2401
+ acc = []
2402
+ acc += rec.fields('561')
2403
+ .select { |f| f.any?{ |sf| sf.code == 'a' && sf.value =~ /^Athenaeum copy: / } }
2404
+ .map do |field|
2405
+ join_subfields(field, &subfield_in(%w{a}))
2406
+ end
2407
+ acc += rec.fields(%w{562 563 585 590}).map do |field|
2408
+ join_subfields(field, &subfield_not_in(%w{5 6 8}))
2409
+ end
2410
+ acc += get_880(rec, %w{562 563 585 590}) do |sf|
2411
+ ! %w{5 6 8}.member?(sf.code)
2412
+ end
2413
+ acc
2414
+ end
2415
+
2416
+ def get_finding_aid_display(rec)
2417
+ get_datafield_and_880(rec, '555')
2418
+ end
2419
+
2420
+ # get 650/880 for provenance and chronology: prefix should be 'PRO' or 'CHR'
2421
+ # 11/2018: do not display $5 in PRO or CHR subjs
2422
+ def get_650_and_880(rec, prefix)
2423
+ acc = []
2424
+ acc += rec.fields('650')
2425
+ .select { |f| f.indicator2 == '4' }
2426
+ .select { |f| f.any? { |sf| sf.code == 'a' && sf.value =~ /^(#{prefix}|%#{prefix})/ } }
2427
+ .map do |field|
2428
+ suba = field.select(&subfield_in(%w{a})).map {|sf|
2429
+ sf.value.gsub(/^%?#{prefix}/, '')
2430
+ }.join(' ')
2431
+ sub_others = join_subfields(field, &subfield_not_in(%w{a 6 8 e w 5}))
2432
+ value = [ suba, sub_others ].join(' ')
2433
+ { value: value, link_type: 'subject_search' } if value.present?
2434
+ end.compact
2435
+ acc += rec.fields('880')
2436
+ .select { |f| f.indicator2 == '4' }
2437
+ .select { |f| has_subfield6_value(f,/^650/) }
2438
+ .select { |f| f.any? { |sf| sf.code == 'a' && sf.value =~ /^(#{prefix}|%#{prefix})/ } }
2439
+ .map do |field|
2440
+ suba = field.select(&subfield_in(%w{a})).map {|sf| sf.value.gsub(/^%?#{prefix}/, '') }.join(' ')
2441
+ sub_others = join_subfields(field, &subfield_not_in(%w{a 6 8 e w 5}))
2442
+ value = [ suba, sub_others ].join(' ')
2443
+ { value: value, link_type: 'subject_search' } if value.present?
2444
+ end.compact
2445
+ acc
2446
+ end
2447
+
2448
+ # 11/2018 kms: a 561 starting Athenaeum copy: should not appear as Penn Provenance, display that as Local Notes
2449
+ def get_provenance_display(rec)
2450
+ acc = []
2451
+ acc += rec.fields('561')
2452
+ .select { |f| ['1', '', ' '].member?(f.indicator1) && [' ', ''].member?(f.indicator2) && f.any?{ |sf| sf.code == 'a' && sf.value !~ /^Athenaeum copy: / } }
2453
+ .map do |field|
2454
+ value = join_subfields(field, &subfield_in(%w{a}))
2455
+ { value: value, link: false } if value
2456
+ end.compact
2457
+ acc += rec.fields('880')
2458
+ .select { |f| has_subfield6_value(f, /^561/) }
2459
+ .select { |f| ['1', '', ' '].member?(f.indicator1) && [' ', ''].member?(f.indicator2) }
2460
+ .map do |field|
2461
+ value = join_subfields(field, &subfield_in(%w{a}))
2462
+ { value: value, link: false } if value
2463
+ end.compact
2464
+ acc += get_650_and_880(rec, 'PRO')
2465
+ acc
2466
+ end
2467
+
2468
+ def get_chronology_display(rec)
2469
+ get_650_and_880(rec, 'CHR')
2470
+ end
2471
+
2472
+ def get_related_collections_display(rec)
2473
+ get_datafield_and_880(rec, '544')
2474
+ end
2475
+
2476
+ def get_cited_in_display(rec)
2477
+ get_datafield_and_880(rec, '510')
2478
+ end
2479
+
2480
+ def get_publications_about_display(rec)
2481
+ get_datafield_and_880(rec, '581')
2482
+ end
2483
+
2484
+ def get_cite_as_display(rec)
2485
+ get_datafield_and_880(rec, '524')
2486
+ end
2487
+
2488
+ def get_contributor_display(rec)
2489
+ acc = []
2490
+ acc += rec.fields(%w{700 710})
2491
+ .select { |f| ['', ' ', '0'].member?(f.indicator2) }
2492
+ .select { |f| f.none? { |sf| sf.code == 'i' } }
2493
+ .map do |field|
2494
+ contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
2495
+ contributor_append = field.select(&subfield_in(%w{e u 3 4})).map do |sf|
2496
+ if sf.code == '4'
2497
+ ", #{relator_codes[sf.value]}"
2498
+ else
2499
+ " #{sf.value}"
2500
+ end
2501
+ end.join
2502
+ { value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
2503
+ end
2504
+ acc += rec.fields('880')
2505
+ .select { |f| has_subfield6_value(f, /^(700|710)/) && (f.none? { |sf| sf.code == 'i' }) }
2506
+ .map do |field|
2507
+ contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
2508
+ contributor_append = join_subfields(field, &subfield_in(%w{e u 3}))
2509
+ { value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
2510
+ end
2511
+ acc
2512
+ end
2513
+
2514
+ # if there's a subfield i, extract its value, and if there's something
2515
+ # in parentheses in that value, extract that.
2516
+ def remove_paren_value_from_subfield_i(field)
2517
+ val = field.select { |sf| sf.code == 'i' }.map do |sf|
2518
+ match = /\((.+?)\)/.match(sf.value)
2519
+ if match
2520
+ sf.value.sub('(' + match[1] + ')', '')
2521
+ else
2522
+ sf.value
2523
+ end
2524
+ end.first || ''
2525
+ trim_trailing_colon(trim_trailing_period(val))
2526
+ end
2527
+
2528
+ def get_related_work_display(rec)
2529
+ acc = []
2530
+ acc += rec.fields(%w{700 710 711 730})
2531
+ .select { |f| ['', ' '].member?(f.indicator2) }
2532
+ .select { |f| f.any? { |sf| sf.code == 't' } }
2533
+ .map do |field|
2534
+ subi = remove_paren_value_from_subfield_i(field) || ''
2535
+ related = field.map do |sf|
2536
+ if ! %w{0 4 i}.member?(sf.code)
2537
+ " #{sf.value}"
2538
+ elsif sf.code == '4'
2539
+ ", #{relator_codes[sf.value]}"
2540
+ end
2541
+ end.compact.join
2542
+ [ subi, related ].select(&:present?).join(':')
2543
+ end
2544
+ acc += rec.fields('880')
2545
+ .select { |f| ['', ' '].member?(f.indicator2) }
2546
+ .select { |f| has_subfield6_value(f, /^(700|710|711|730)/) }
2547
+ .select { |f| f.any? { |sf| sf.code == 't' } }
2548
+ .map do |field|
2549
+ subi = remove_paren_value_from_subfield_i(field) || ''
2550
+ related = field.map do |sf|
2551
+ if ! %w{0 4 i}.member?(sf.code)
2552
+ " #{sf.value}"
2553
+ elsif sf.code == '4'
2554
+ ", #{relator_codes[sf.value]}"
2555
+ end
2556
+ end.compact.join
2557
+ [ subi, related ].select(&:present?).join(':')
2558
+ end
2559
+ acc
2560
+ end
2561
+
2562
+ def get_contains_display(rec)
2563
+ acc = []
2564
+ acc += rec.fields(%w{700 710 711 730 740})
2565
+ .select { |f| f.indicator2 == '2' }
2566
+ .map do |field|
2567
+ subi = remove_paren_value_from_subfield_i(field) || ''
2568
+ contains = field.map do |sf|
2569
+ if ! %w{0 4 5 6 8 i}.member?(sf.code)
2570
+ " #{sf.value}"
2571
+ elsif sf.code == '4'
2572
+ ", #{relator_codes[sf.value]}"
2573
+ end
2574
+ end.compact.join
2575
+ [ subi, contains ].select(&:present?).join(':')
2576
+ end
2577
+ acc += rec.fields('880')
2578
+ .select { |f| f.indicator2 == '2' }
2579
+ .select { |f| has_subfield6_value(f, /^(700|710|711|730|740)/) }
2580
+ .map do |field|
2581
+ subi = remove_paren_value_from_subfield_i(field) || ''
2582
+ contains = join_subfields(field, &subfield_not_in(%w{0 5 6 8 i}))
2583
+ [ subi, contains ].select(&:present?).join(':')
2584
+ end
2585
+ acc
2586
+ end
2587
+
2588
+ def get_other_edition_value(field)
2589
+ subi = remove_paren_value_from_subfield_i(field) || ''
2590
+ other_editions = field.map do |sf|
2591
+ if %w{s x z}.member?(sf.code)
2592
+ " #{sf.value}"
2593
+ elsif sf.code == 't'
2594
+ " #{relator_codes[sf.value]}. "
2595
+ end
2596
+ end.compact.join
2597
+ other_editions_append = field.map do |sf|
2598
+ if ! %w{i h s t x z e f o r w y 7}.member?(sf.code)
2599
+ " #{sf.value}"
2600
+ elsif sf.code == 'h'
2601
+ " (#{sf.value}) "
2602
+ end
2603
+ end.compact.join
2604
+ {
2605
+ value: other_editions,
2606
+ value_prepend: trim_trailing_period(subi) + ':',
2607
+ value_append: other_editions_append,
2608
+ link_type: 'author_creator_xfacet2'
2609
+ }
2610
+ end
2611
+
2612
+ def get_other_edition_display(rec)
2613
+ acc = []
2614
+ acc += rec.fields('775')
2615
+ .select { |f| f.any? { |sf| sf.code == 'i' } }
2616
+ .map do |field|
2617
+ get_other_edition_value(field)
2618
+ end
2619
+ acc += rec.fields('880')
2620
+ .select { |f| ['', ' '].member?(f.indicator2) }
2621
+ .select { |f| has_subfield6_value(f, /^775/) }
2622
+ .select { |f| f.any? { |sf| sf.code == 'i' } }
2623
+ .map do |field|
2624
+ get_other_edition_value(field)
2625
+ end
2626
+ acc
2627
+ end
2628
+
2629
+ def get_contained_in_display(rec)
2630
+ acc = []
2631
+ acc += rec.fields('773').map do |field|
2632
+ join_subfields(field, &subfield_in(%w{a g i s t}))
2633
+ end.select(&:present?)
2634
+ acc += get_880(rec, '773') do |sf|
2635
+ %w{a g i s t}.member?(sf.code)
2636
+ end
2637
+ acc
2638
+ end
2639
+
2640
+ def get_constituent_unit_display(rec)
2641
+ acc = []
2642
+ acc += rec.fields('774').map do |field|
2643
+ join_subfields(field, &subfield_in(%w{i a s t}))
2644
+ end.select(&:present?)
2645
+ acc += get_880(rec, '774') do |sf|
2646
+ %w{i a s t}.member?(sf.code)
2647
+ end
2648
+ acc
2649
+ end
2650
+
2651
+ def get_has_supplement_display(rec)
2652
+ acc = []
2653
+ acc += rec.fields('770').map do |field|
2654
+ join_subfields(field, &subfield_not_6_or_8)
2655
+ end.select(&:present?)
2656
+ acc += get_880_subfield_not_6_or_8(rec, '770')
2657
+ acc
2658
+ end
2659
+
2660
+ def get_other_format_display(rec)
2661
+ acc = []
2662
+ acc += rec.fields('776').map do |field|
2663
+ join_subfields(field, &subfield_in(%w{i a s t o}))
2664
+ end.select(&:present?)
2665
+ acc += get_880(rec, '774') do |sf|
2666
+ %w{i a s t o}.member?(sf.code)
2667
+ end
2668
+ acc
2669
+ end
2670
+
2671
+ def get_isbn_display(rec)
2672
+ acc = []
2673
+ acc += rec.fields('020').map do |field|
2674
+ join_subfields(field, &subfield_in(%w{a z}))
2675
+ end.select(&:present?)
2676
+ acc += get_880(rec, '020') do |sf|
2677
+ %w{a z}.member?(sf.code)
2678
+ end
2679
+ acc
2680
+ end
2681
+
2682
+ def get_issn_display(rec)
2683
+ acc = []
2684
+ acc += rec.fields('022').map do |field|
2685
+ join_subfields(field, &subfield_in(%w{a z}))
2686
+ end.select(&:present?)
2687
+ acc += get_880(rec, '022') do |sf|
2688
+ %w{a z}.member?(sf.code)
2689
+ end
2690
+ acc
2691
+ end
2692
+
2693
+ def subfield_a_is_oclc(sf)
2694
+ sf.code == 'a' && sf.value =~ /^\(OCoLC\).*/
2695
+ end
2696
+
2697
+ def get_oclc_id_values(rec)
2698
+ rec.fields('035')
2699
+ .select { |f| f.any? { |sf| subfield_a_is_oclc(sf) } }
2700
+ .take(1)
2701
+ .flat_map do |field|
2702
+ field.find_all { |sf| subfield_a_is_oclc(sf) }.map do |sf|
2703
+ m = /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(sf.value)
2704
+ if m
2705
+ m[1]
2706
+ end
2707
+ end.compact
2708
+ end
2709
+ end
2710
+
2711
+ def get_publisher_number_display(rec)
2712
+ acc = []
2713
+ acc += rec.fields(%w{024 028}).map do |field|
2714
+ join_subfields(field, &subfield_not_in(%w{5 6}))
2715
+ end.select(&:present?)
2716
+ acc += rec.fields('880')
2717
+ .select { |f| has_subfield6_value(f, /^(024|028)/) }
2718
+ .map do |field|
2719
+ join_subfields(field, &subfield_not_in(%w{5 6}))
2720
+ end
2721
+ acc
2722
+ end
2723
+
2724
+ def get_access_restriction_display(rec)
2725
+ rec.fields('506').map do |field|
2726
+ join_subfields(field, &subfield_not_in(%w{5 6}))
2727
+ end.select(&:present?)
2728
+ end
2729
+
2730
+ def get_bound_with_display(rec)
2731
+ rec.fields('501').map do |field|
2732
+ join_subfields(field, &subfield_not_in(%w{a}))
2733
+ end.select(&:present?)
2734
+ end
2735
+
2736
+ # some logic to extract link text and link url from an 856 field
2737
+ def linktext_and_url(field)
2738
+ linktext_3 = join_subfields(field, &subfield_in(%w{3}))
2739
+ linktext_zy = field.find_all(&subfield_in(%w{z})).map(&:value).first ||
2740
+ field.find_all(&subfield_in(%w{y})).map(&:value).first || ''
2741
+ linktext = [ linktext_3, linktext_zy ].join(' ')
2742
+ linkurl = field.find_all(&subfield_in(%w{u})).map(&:value).first || ''
2743
+ linkurl = linkurl.sub(' target=_blank', '')
2744
+ [linktext, linkurl]
2745
+ end
2746
+
2747
+ def words_to_remove_from_web_link
2748
+ @words_to_remove_from_web_link ||=
2749
+ %w(fund funds collection collections endowment
2750
+ endowed trust and for of the memorial)
2751
+ end
2752
+
2753
+ def get_web_link_display(rec)
2754
+ rec.fields('856')
2755
+ .select { |f| ['2', ' ', ''].member?(f.indicator2) }
2756
+ .flat_map do |field|
2757
+ links = []
2758
+ linktext, linkurl = linktext_and_url(field)
2759
+ links << {
2760
+ linktext: linktext,
2761
+ linkurl: linkurl
2762
+ }
2763
+
2764
+ # if the link text includes words/phrases commonly used in bookplate links
2765
+ if linktext =~ /(Funds?|Collections?( +Gifts)?|Trust|Development|Endowment.*) +Home +Page|A +Penn +Libraries +Collection +Gift/
2766
+ # strip out some less-meaningful words to create the filename that leslie will use when creating the bookplate image
2767
+ imagename = linktext.gsub(/- A Penn Libraries Collection Gift/i, '')
2768
+ .gsub(/ Home Page/i, '')
2769
+ .gsub(/[&.]/, '')
2770
+ .split(/\W+/)
2771
+ .select { |word| !words_to_remove_from_web_link.member?(word.downcase) }
2772
+ .join('')
2773
+ # generate image URL
2774
+ imagesource = "https://www.library.upenn.edu/sites/default/files/images/bookplates/#{imagename}.gif"
2775
+ links << {
2776
+ img_src: imagesource,
2777
+ img_alt: "#{linktext.strip} Bookplate",
2778
+ linkurl: linkurl,
2779
+ }
2780
+ end
2781
+
2782
+ links
2783
+ end
2784
+ end
2785
+
2786
+ def get_call_number_search_values(rec)
2787
+ # some records don't have item records, only holdings. so for safety/comprehensivenss,
2788
+ # we need to index both and take the unique values of the entire result set.
2789
+
2790
+ acc = []
2791
+
2792
+ acc += rec.fields(EnrichedMarc::TAG_HOLDING).map do |holding|
2793
+ classification_part =
2794
+ holding.find_all(&subfield_in([ EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART ])).map(&:value).first
2795
+ item_part =
2796
+ holding.find_all(&subfield_in( [EnrichedMarc::SUB_HOLDING_ITEM_PART ])).map(&:value).first
2797
+
2798
+ if classification_part || item_part
2799
+ [ classification_part, item_part ].join(' ')
2800
+ end
2801
+ end.compact
2802
+
2803
+ acc += rec.fields(EnrichedMarc::TAG_ITEM).map do |item|
2804
+ cn_type = item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER_TYPE }.map(&:value).first
2805
+
2806
+ item.find_all { |sf| sf.code == EnrichedMarc::SUB_ITEM_CALL_NUMBER }
2807
+ .map(&:value)
2808
+ .select { |call_num| call_num.present? }
2809
+ .map { |call_num| call_num }
2810
+ .compact
2811
+ end.flatten(1)
2812
+
2813
+ acc.uniq
2814
+ end
2815
+
2816
+ def get_call_number_xfacet_values(rec)
2817
+ get_call_number_search_values(rec).map do |v|
2818
+ references(v)
2819
+ end
2820
+ end
2821
+
2822
+ def prepare_timestamps(rec)
2823
+ most_recent_add = rec.fields(EnrichedMarc::TAG_ITEM).flat_map do |item|
2824
+ item.find_all(&subfield_in([EnrichedMarc::SUB_ITEM_DATE_CREATED])).map do |sf|
2825
+ begin
2826
+ if sf.value.size == 10
2827
+ # On 2022-05-02, this field value (as exported in enriched publishing
2828
+ # job from Alma) began truncating time to day-level granularity. We have
2829
+ # no guarantee that this won't switch back in the future, so for the
2830
+ # foreseeable future we should support both representations.
2831
+ DateTime.strptime(sf.value, '%Y-%m-%d').to_time.to_i
2832
+ else
2833
+ DateTime.strptime(sf.value, '%Y-%m-%d %H:%M:%S').to_time.to_i
2834
+ end
2835
+ rescue Exception => e
2836
+ puts "Error parsing date string for recently added field: #{sf.value} - #{e}"
2837
+ nil
2838
+ end
2839
+ end.compact
2840
+ end.max || 0
2841
+
2842
+ last_update = rec.fields('005')
2843
+ .select { |f| f.value.present? && !f.value.start_with?('0000') }
2844
+ .map do |field|
2845
+ begin
2846
+ DateTime.iso8601(field.value).to_time.to_i
2847
+ rescue ArgumentError => e
2848
+ nil
2849
+ end
2850
+ end.compact.first
2851
+
2852
+ if last_update == nil || most_recent_add > last_update
2853
+ last_update = most_recent_add
2854
+ end
2855
+
2856
+ {
2857
+ :most_recent_add => most_recent_add,
2858
+ :last_update => last_update
2859
+ }
2860
+ end
2861
+
2862
+ def get_full_text_link_values(rec)
2863
+ acc = rec.fields('856')
2864
+ .select { |f| (f.indicator1 == '4') && %w{0 1}.member?(f.indicator2) }
2865
+ .map do |field|
2866
+ linktext, linkurl = linktext_and_url(field)
2867
+ {
2868
+ linktext: linktext.present? ? linktext : linkurl,
2869
+ linkurl: linkurl
2870
+ }
2871
+ end
2872
+ add_etas_full_text(rec, acc) if is_etas(rec)
2873
+ acc
2874
+ end
2875
+
2876
+ HATHI_POSTFIX = ' from HathiTrust during COVID-19'
2877
+
2878
+ def add_etas_full_text(rec, acc)
2879
+ primary_oclc_id = get_oclc_id_values(rec).first
2880
+ return unless primary_oclc_id # defensive (e.g., if hathi match based on subsequently deleted oclc id)
2881
+ acc << {
2882
+ linktext: 'Online access',
2883
+ linkurl: 'http://catalog.hathitrust.org/api/volumes/oclc/' + primary_oclc_id + '.html',
2884
+ postfix: HATHI_POSTFIX
2885
+ }
2886
+ end
2887
+
2888
+ # It's not clear whether Alma can suppress these auto-generated
2889
+ # records (Primo instances seem to show these records!) so we filter
2890
+ # them out here just in case
2891
+ def is_boundwith_record(rec)
2892
+ rec.fields('245').any? { |f|
2893
+ title = join_subfields(f, &subfield_in(%w{a}))
2894
+ title.include?('Host bibliographic record for boundwith')
2895
+ }
2896
+ end
2897
+
2898
+ # values for passed-in args come from Solr, not extracted directly from MARC.
2899
+ # TODO: this code should return more data-ish values; the HTML should be moved into a render method
2900
+ def get_offsite_display(rec, crl_id, title, author, oclc_id)
2901
+ id = crl_id
2902
+ html = %Q{<a href="#{"http://catalog.crl.edu/record=#{id}~S1"}">Center for Research Libraries Holdings</a>}
2903
+
2904
+ f260 = rec.fields('260')
2905
+ place = f260.map { |f| join_subfields(f, &subfield_in(%w{a})) }.join(' ')
2906
+ publisher = f260.map { |f| join_subfields(f, &subfield_in(%w{b})) }.join(' ')
2907
+ pubdate = f260.map { |f| join_subfields(f, &subfield_in(%w{c})) }.join(' ')
2908
+
2909
+ atlas_params = {
2910
+ crl_id: id,
2911
+ title: title,
2912
+ author: author,
2913
+ oclc: oclc_id,
2914
+ place: place,
2915
+ publisher: publisher,
2916
+ pubdate: pubdate,
2917
+ }
2918
+ atlas_url = "https://atlas.library.upenn.edu/cgi-bin/forms/illcrl.cgi?#{atlas_params.to_query}"
2919
+
2920
+ html += %Q{<a href="#{atlas_url}">Place request</a>}
2921
+
2922
+ f590 = rec.fields('590')
2923
+ if f590.size > 0
2924
+ html += '<div>'
2925
+ f590.each do |field|
2926
+ html += field.join(' ')
2927
+ end
2928
+ html += '</div>'
2929
+ end
2930
+ [ html ]
2931
+ end
2932
+
2933
+ @@select_pub_field = lambda do |f|
2934
+ f.tag == '260' || (f.tag == '264' && f.indicator2 == '1')
2935
+ end
2936
+
2937
+ def get_ris_cy_field(rec)
2938
+ rec.fields.select(&@@select_pub_field).flat_map do |field|
2939
+ field.find_all(&subfield_in(['a'])).map(&:value)
2940
+ end
2941
+ end
2942
+
2943
+ def get_ris_pb_field(rec)
2944
+ rec.fields.select(&@@select_pub_field).flat_map do |field|
2945
+ field.find_all(&subfield_in(['b'])).map(&:value)
2946
+ end
2947
+ end
2948
+
2949
+ def get_ris_py_field(rec)
2950
+ rec.fields.select(&@@select_pub_field).flat_map do |field|
2951
+ field.find_all(&subfield_in(['c'])).map(&:value)
2952
+ end
2953
+ end
2954
+
2955
+ def get_ris_sn_field(rec)
2956
+ rec.fields.select { |f| f.tag == '020' || f.tag == '022' }.flat_map do |field|
2957
+ field.find_all(&subfield_in(['a'])).map(&:value)
2958
+ end
2959
+ end
2960
+
2961
+ end
2962
+
2963
+ end
2964
+ # rubocop:enable all