pennmarc 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +119 -0
- data/README.md +82 -0
- data/legacy/indexer.rb +568 -0
- data/legacy/marc.rb +2964 -0
- data/legacy/test_file_output.json +49 -0
- data/lib/pennmarc/encoding_level.rb +43 -0
- data/lib/pennmarc/enriched_marc.rb +36 -0
- data/lib/pennmarc/heading_control.rb +11 -0
- data/lib/pennmarc/helpers/citation.rb +31 -0
- data/lib/pennmarc/helpers/creator.rb +237 -0
- data/lib/pennmarc/helpers/database.rb +89 -0
- data/lib/pennmarc/helpers/date.rb +85 -0
- data/lib/pennmarc/helpers/edition.rb +90 -0
- data/lib/pennmarc/helpers/format.rb +312 -0
- data/lib/pennmarc/helpers/genre.rb +71 -0
- data/lib/pennmarc/helpers/helper.rb +11 -0
- data/lib/pennmarc/helpers/identifier.rb +134 -0
- data/lib/pennmarc/helpers/language.rb +37 -0
- data/lib/pennmarc/helpers/link.rb +12 -0
- data/lib/pennmarc/helpers/location.rb +97 -0
- data/lib/pennmarc/helpers/note.rb +132 -0
- data/lib/pennmarc/helpers/production.rb +131 -0
- data/lib/pennmarc/helpers/relation.rb +135 -0
- data/lib/pennmarc/helpers/series.rb +118 -0
- data/lib/pennmarc/helpers/subject.rb +304 -0
- data/lib/pennmarc/helpers/title.rb +197 -0
- data/lib/pennmarc/mappings/language.yml +516 -0
- data/lib/pennmarc/mappings/locations.yml +1801 -0
- data/lib/pennmarc/mappings/relator.yml +263 -0
- data/lib/pennmarc/parser.rb +177 -0
- data/lib/pennmarc/util.rb +240 -0
- data/lib/pennmarc.rb +6 -0
- data/pennmarc.gemspec +22 -0
- data/spec/fixtures/marcxml/test.xml +167 -0
- data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
- data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
- data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
- data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
- data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
- data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
- data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
- data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
- data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
- data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
- data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
- data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
- data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
- data/spec/lib/pennmarc/parser_spec.rb +13 -0
- data/spec/spec_helper.rb +104 -0
- data/spec/support/marc_spec_helpers.rb +84 -0
- metadata +171 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Do Edition-y stuff
|
5
|
+
class Edition < Helper
|
6
|
+
class << self
|
7
|
+
# Edition values for display on a record page. Field 250 is information relating to the edition of a work as
|
8
|
+
# determined by applicable cataloging rules. For mixed materials, field 250 is used to record statements relating
|
9
|
+
# to collections that contain versions of works existing in two or more versions (or states) in single or multiple
|
10
|
+
# copies (e.g., different drafts of a film script). For continuing resources, this field is not used for
|
11
|
+
# sequential edition statements such as 1st- ed. This type of information is contained in field 362 (Dates of
|
12
|
+
# Publication and/or Volume Designation).
|
13
|
+
# https://www.loc.gov/marc/bibliographic/bd250.html
|
14
|
+
# @param [MARC::Record] record
|
15
|
+
# @return [Array<String>] array of editions and their alternates
|
16
|
+
def show(record)
|
17
|
+
record.fields('250').map do |field|
|
18
|
+
join_subfields(field, &subfield_not_in?(%w[6 8]))
|
19
|
+
end + linked_alternate_not_6_or_8(record, '250')
|
20
|
+
end
|
21
|
+
|
22
|
+
# Edition values for display in search results. Just grab the first 250 field.
|
23
|
+
# @param [MARC::Record] record
|
24
|
+
# @return [String, NilClass] string of all first 250 subfields, excluding 6 and 8
|
25
|
+
def values(record)
|
26
|
+
edition = record.fields('250').first
|
27
|
+
return unless edition.present?
|
28
|
+
|
29
|
+
join_subfields(edition, &subfield_not_in?(%w[6 8]))
|
30
|
+
end
|
31
|
+
|
32
|
+
# Entry for another available edition of the target item (horizontal relationship). When a note is generated
|
33
|
+
# from this field, the introductory phrase Other editions available: may be generated based on the field tag for
|
34
|
+
# display.
|
35
|
+
# https://www.loc.gov/marc/bibliographic/bd775.html
|
36
|
+
# @param [MARC::Record] record
|
37
|
+
# @return [Array<String>] array of other edition strings
|
38
|
+
def other_show(record, relator_mapping)
|
39
|
+
record.fields('775').filter_map do |field|
|
40
|
+
next unless subfield_defined?(field, :i)
|
41
|
+
|
42
|
+
other_edition_value(field, relator_mapping)
|
43
|
+
end + record.fields('880').filter_map do |field|
|
44
|
+
next unless field.indicator2.blank? && subfield_value_in?(field, '6', %w[775]) &&
|
45
|
+
subfield_defined?(field, 'i')
|
46
|
+
|
47
|
+
other_edition_value(field, relator_mapping)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
# Assemble a string of relevant edition information.
|
54
|
+
# @param [MARC::DataField] field
|
55
|
+
# @param [Hash] relator_mapping
|
56
|
+
# @return [String (frozen)] assembled other version string
|
57
|
+
def other_edition_value(field, relator_mapping)
|
58
|
+
subi = remove_paren_value_from_subfield_i(field) || ''
|
59
|
+
other_editions = field.filter_map do |sf|
|
60
|
+
next if %w[6 8].member?(sf.code)
|
61
|
+
|
62
|
+
if %w[s x z].member?(sf.code)
|
63
|
+
" #{sf.value}"
|
64
|
+
elsif sf.code == 't'
|
65
|
+
relator = translate_relator(sf.value, relator_mapping)
|
66
|
+
next if relator.blank?
|
67
|
+
|
68
|
+
" #{relator}. "
|
69
|
+
end
|
70
|
+
end.join
|
71
|
+
other_editions_append = field.filter_map do |sf|
|
72
|
+
next if %w[6 8].member?(sf.code)
|
73
|
+
|
74
|
+
if %w[i h s t x z e f o r w y 7].exclude?(sf.code)
|
75
|
+
" #{sf.value}"
|
76
|
+
elsif sf.code == 'h'
|
77
|
+
" (#{sf.value}) "
|
78
|
+
end
|
79
|
+
end.join
|
80
|
+
prepend = trim_trailing(:period, subi).squish
|
81
|
+
|
82
|
+
if other_editions.present? || other_editions_append.present?
|
83
|
+
"#{prepend}: #{other_editions} #{other_editions_append}".squish
|
84
|
+
else
|
85
|
+
prepend
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,312 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Handle parsing out "Format" and "Other Format" values. Special care goes into controlling the format values for
|
5
|
+
# faceting.
|
6
|
+
class Format < Helper
|
7
|
+
class << self
|
8
|
+
# These constants represent the set of desired Format values for faceting.
|
9
|
+
ARCHIVE = 'Archive'
|
10
|
+
BOOK = 'Book'
|
11
|
+
CONFERENCE_EVENT = 'Conference/Event'
|
12
|
+
DATAFILE = 'Datafile'
|
13
|
+
GOVDOC = 'Government document'
|
14
|
+
IMAGE = 'Image'
|
15
|
+
JOURNAL_PERIODICAL = 'Journal/Periodical'
|
16
|
+
MANUSCRIPT = 'Manuscript'
|
17
|
+
MAP_ATLAS = 'Map/Atlas'
|
18
|
+
MICROFORMAT = 'Microformat'
|
19
|
+
MUSICAL_SCORE = 'Musical score'
|
20
|
+
NEWSPAPER = 'Newspaper'
|
21
|
+
OTHER = 'Other'
|
22
|
+
PROJECTED_GRAPHIC = 'Projected graphic'
|
23
|
+
SOUND_RECORDING = 'Sound recording'
|
24
|
+
THESIS_DISSERTATION = 'Thesis/Dissertation'
|
25
|
+
THREE_D_OBJECT = '3D object'
|
26
|
+
VIDEO = 'Video'
|
27
|
+
WEBSITE_DATABASE = 'Website/Database'
|
28
|
+
|
29
|
+
# Get any Format values from {https://www.oclc.org/bibformats/en/3xx/300.html 300},
|
30
|
+
# 254, 255, 310, 342, 352 or {https://www.oclc.org/bibformats/en/3xx/340.html 340} field. based on the source
|
31
|
+
# field, different subfields are used.
|
32
|
+
# @note ported from get_format_display
|
33
|
+
# @param [MARC::Record] record
|
34
|
+
# @return [Array<String>] format values for display
|
35
|
+
def show(record)
|
36
|
+
results = record.fields('300').map { |f| join_subfields(f, &subfield_not_in?(%w[3 6 8])) }
|
37
|
+
results += record.fields(%w[254 255 310 342 352 362]).map do |f|
|
38
|
+
join_subfields(f, &subfield_not_in?(%w[6 8]))
|
39
|
+
end
|
40
|
+
results += record.fields('340').map { |f| join_subfields(f, &subfield_not_in?(%w[0 2 6 8])) }
|
41
|
+
results += record.fields('880').map do |f|
|
42
|
+
subfield_to_ignore = if subfield_value?(f, 6, /^300/)
|
43
|
+
%w[3 6 8]
|
44
|
+
elsif subfield_value?(f, 6, /^(254|255|310|342|352|362)/)
|
45
|
+
%w[6 8]
|
46
|
+
elsif subfield_value?(f, 6, /^340/)
|
47
|
+
%w[0 2 6 8]
|
48
|
+
end
|
49
|
+
join_subfields(f, &subfield_not_in?(subfield_to_ignore))
|
50
|
+
end
|
51
|
+
results.compact_blank
|
52
|
+
end
|
53
|
+
|
54
|
+
# Get Format values for faceting. Format values are determined using complex logic for each possible format value.
|
55
|
+
# The primary fields considered in determining Format facet values are:
|
56
|
+
#
|
57
|
+
# 1. "Type of Record" and "Bibliographic level" values extracted from the
|
58
|
+
# {https://www.loc.gov/marc/bibliographic/bdleader.html MARC leader}.
|
59
|
+
# 2. Location name values and "Classification part" from Alma "enhanced" MARC holding/item information
|
60
|
+
# 3. {https://www.loc.gov/marc/bibliographic/bd007.html 007} values, the first
|
61
|
+
# {https://www.loc.gov/marc/bibliographic/bd008.html 008} value, and the first character form all
|
62
|
+
# {https://www.loc.gov/marc/bibliographic/bd006.html 006} values (form)
|
63
|
+
# 4. Medium values from {https://www.oclc.org/bibformats/en/2xx/245.html#subfieldh 245 ǂh}
|
64
|
+
# 5. Media Type values from {https://www.oclc.org/bibformats/en/3xx/337.html#subfielda 337 ǂa}
|
65
|
+
# Additional fields are considered for many of the formats. Much of this logic has been moved to private methods
|
66
|
+
# to keep this method from becoming too unwieldy.
|
67
|
+
# @todo is the conditional structure here still best practice? see the "Thesis on Microfilm" case in the specs
|
68
|
+
# for this helper method
|
69
|
+
# @note ported from get_format
|
70
|
+
# @param [MARC::Record] record
|
71
|
+
# @param [Hash] location_map
|
72
|
+
# @return [Array<String>] format values for faceting
|
73
|
+
|
74
|
+
def facet(record, location_map)
|
75
|
+
formats = []
|
76
|
+
format_code = leader_format(record.leader)
|
77
|
+
f007 = record.fields('007').map(&:value)
|
78
|
+
f008 = record.fields('008').first&.value || ''
|
79
|
+
f006_forms = record.fields('006').map { |field| field.value[0] }
|
80
|
+
title_medium = subfield_values_for tag: '245', subfield: :h, record: record
|
81
|
+
media_type = subfield_values_for tag: '337', subfield: :a, record: record
|
82
|
+
|
83
|
+
# Get Call Number for holdings - ǂh gives us the 'Classification part' which can contain strings like
|
84
|
+
# 'Microfilm'
|
85
|
+
call_nums = record.fields(EnrichedMarc::TAG_HOLDING).map do |field|
|
86
|
+
join_subfields(field, &subfield_in?([EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART,
|
87
|
+
EnrichedMarc::SUB_HOLDING_ITEM_PART]))
|
88
|
+
end
|
89
|
+
|
90
|
+
# get all specific_location values from inventory info
|
91
|
+
locations = Location.location record: record, location_map: location_map, display_value: :specific_location
|
92
|
+
|
93
|
+
if include_manuscripts?(locations)
|
94
|
+
formats << MANUSCRIPT
|
95
|
+
elsif archives_but_not_cajs_or_nursing?(locations)
|
96
|
+
formats << ARCHIVE
|
97
|
+
elsif micro_or_microform?(call_nums, locations, media_type, title_medium)
|
98
|
+
formats << MICROFORMAT
|
99
|
+
else
|
100
|
+
# any of these
|
101
|
+
formats << THESIS_DISSERTATION if thesis_or_dissertation?(format_code, record)
|
102
|
+
formats << CONFERENCE_EVENT if conference_event?(record)
|
103
|
+
formats << NEWSPAPER if newspaper?(f008, format_code)
|
104
|
+
formats << GOVDOC if government_document?(f008, record, format_code)
|
105
|
+
|
106
|
+
# but only one of these
|
107
|
+
formats << if website_database?(f006_forms, format_code)
|
108
|
+
WEBSITE_DATABASE
|
109
|
+
elsif book?(format_code, title_medium, record)
|
110
|
+
BOOK
|
111
|
+
elsif musical_score?(format_code)
|
112
|
+
MUSICAL_SCORE
|
113
|
+
elsif map_atlas?(format_code)
|
114
|
+
MAP_ATLAS
|
115
|
+
elsif graphical_media?(format_code)
|
116
|
+
graphical_media_type(f007)
|
117
|
+
elsif sound_recording?(format_code)
|
118
|
+
SOUND_RECORDING
|
119
|
+
elsif image?(format_code)
|
120
|
+
IMAGE
|
121
|
+
elsif datafile?(format_code)
|
122
|
+
DATAFILE
|
123
|
+
elsif journal_periodical?(format_code)
|
124
|
+
JOURNAL_PERIODICAL
|
125
|
+
elsif three_d_object?(format_code)
|
126
|
+
THREE_D_OBJECT
|
127
|
+
else
|
128
|
+
OTHER
|
129
|
+
end
|
130
|
+
end
|
131
|
+
formats.concat(curated_format(record))
|
132
|
+
end
|
133
|
+
|
134
|
+
# Show "Other Format" values from {https://www.oclc.org/bibformats/en/7xx/776.html 776} and any 880 linkage.
|
135
|
+
# @todo is 774 an error in the linked field in legacy? i changed to 776 here
|
136
|
+
# @param [MARC::Record] record
|
137
|
+
# @return [Array] other format values for display
|
138
|
+
def other_show(record)
|
139
|
+
other_formats = record.fields('776').filter_map do |field|
|
140
|
+
value = join_subfields(field, &subfield_in?(%w[i a s t o]))
|
141
|
+
next if value.blank?
|
142
|
+
|
143
|
+
value
|
144
|
+
end
|
145
|
+
other_formats + linked_alternate(record, '776') do |sf|
|
146
|
+
sf.code.in? %w[i a s t o]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Check if a set of locations has any locations that include the term 'manuscripts'
|
151
|
+
# @param [Array<String>] locations
|
152
|
+
# @return [Boolean]
|
153
|
+
def include_manuscripts?(locations)
|
154
|
+
locations.any? { |loc| loc =~ /manuscripts/i }
|
155
|
+
end
|
156
|
+
|
157
|
+
private
|
158
|
+
|
159
|
+
# Get 'Curated' format from.
|
160
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Alma local field
|
161
|
+
# 944} ǂa, as long as it is not a numerical value.
|
162
|
+
# @param [MARC::Record] record
|
163
|
+
# @return [Array]
|
164
|
+
def curated_format(record)
|
165
|
+
record.fields('944').filter_map do |field|
|
166
|
+
subfield_a = field.find { |sf| sf.code == 'a' }
|
167
|
+
next if subfield_a.nil? || (subfield_a.value == subfield_a.value.to_i.to_s)
|
168
|
+
|
169
|
+
subfield_a.value
|
170
|
+
end.uniq
|
171
|
+
end
|
172
|
+
|
173
|
+
# @param [String] format_code
|
174
|
+
# @return [Boolean]
|
175
|
+
def image?(format_code)
|
176
|
+
format_code.in?(%w[km kd])
|
177
|
+
end
|
178
|
+
|
179
|
+
# @param [String] format_code
|
180
|
+
# @return [Boolean]
|
181
|
+
def datafile?(format_code)
|
182
|
+
format_code == 'mm'
|
183
|
+
end
|
184
|
+
|
185
|
+
# @param [String] format_code
|
186
|
+
# @return [Boolean]
|
187
|
+
def journal_periodical?(format_code)
|
188
|
+
format_code.in?(%w[as gs])
|
189
|
+
end
|
190
|
+
|
191
|
+
# @param [String] format_code
|
192
|
+
# @return [Boolean]
|
193
|
+
def three_d_object?(format_code)
|
194
|
+
format_code.start_with?('r')
|
195
|
+
end
|
196
|
+
|
197
|
+
# @param [String] format_code
|
198
|
+
# @return [Boolean]
|
199
|
+
def sound_recording?(format_code)
|
200
|
+
format_code.in?(%w[im jm jc jd js])
|
201
|
+
end
|
202
|
+
|
203
|
+
# @param [String] format_code
|
204
|
+
# @return [Boolean]
|
205
|
+
def graphical_media?(format_code)
|
206
|
+
format_code == 'gm'
|
207
|
+
end
|
208
|
+
|
209
|
+
# @param [String] format_code
|
210
|
+
# @return [Boolean]
|
211
|
+
def map_atlas?(format_code)
|
212
|
+
format_code&.start_with?('e') || format_code == 'fm'
|
213
|
+
end
|
214
|
+
|
215
|
+
# @param [String] format_code
|
216
|
+
# @return [Boolean]
|
217
|
+
def musical_score?(format_code)
|
218
|
+
format_code.in?(%w[ca cb cd cm cs dm])
|
219
|
+
end
|
220
|
+
|
221
|
+
# @param [String] format_code
|
222
|
+
# @param [Array<String>] title_medium
|
223
|
+
# @param [MARC::Record] record
|
224
|
+
# @return [Boolean]
|
225
|
+
def book?(format_code, title_medium, record)
|
226
|
+
title_forms = subfield_values_for tag: '245', subfield: :k, record: record
|
227
|
+
format_code.in?(%w[aa ac am tm]) &&
|
228
|
+
title_forms.none? { |v| v =~ /kit/i } &&
|
229
|
+
title_medium.none? { |v| v =~ /micro/i }
|
230
|
+
end
|
231
|
+
|
232
|
+
# @param [Array<String>] f006_forms
|
233
|
+
# @param [String] format_code
|
234
|
+
# @return [Boolean]
|
235
|
+
def website_database?(f006_forms, format_code)
|
236
|
+
format_code&.end_with?('i') ||
|
237
|
+
(format_code == 'am' && f006_forms.include?('m') && f006_forms.include?('s'))
|
238
|
+
end
|
239
|
+
|
240
|
+
# @param [String] f008
|
241
|
+
# @param [MARC::Record] record
|
242
|
+
# @param [String] format_code
|
243
|
+
# @return [Boolean]
|
244
|
+
def government_document?(f008, record, format_code)
|
245
|
+
# is a 260 entry present, and does it have a b that matches 'press'
|
246
|
+
f260press = record.fields('260').any? do |field|
|
247
|
+
field.select { |sf| sf.code == 'b' && sf.value =~ /press/i }.any?
|
248
|
+
end
|
249
|
+
%w[c d i j].exclude?(format_code[0]) && f008[28].in?(%w[f i o]) && !f260press
|
250
|
+
end
|
251
|
+
|
252
|
+
# @param [String] f008
|
253
|
+
# @param [String] format_code
|
254
|
+
# @return [Boolean]
|
255
|
+
def newspaper?(f008, format_code)
|
256
|
+
format_code == 'as' && (f008[21] == 'n' || f008[22] == 'e')
|
257
|
+
end
|
258
|
+
|
259
|
+
# @param [MARC::Record] record
|
260
|
+
# @return [Boolean]
|
261
|
+
def conference_event?(record)
|
262
|
+
record.fields('111').any? || record.fields('711').any? # TODO: use field_present helper here and below?
|
263
|
+
end
|
264
|
+
|
265
|
+
# @param [MARC::Record] record
|
266
|
+
# @param [String] format_code
|
267
|
+
# @return [Boolean]
|
268
|
+
def thesis_or_dissertation?(format_code, record)
|
269
|
+
record.fields('502').any? && format_code == 'tm'
|
270
|
+
end
|
271
|
+
|
272
|
+
# @param [Array<String>] title_medium
|
273
|
+
# @param [Array<String>] media_type
|
274
|
+
# @param [Array<String>] locations
|
275
|
+
# @param [Array<String>] call_nums
|
276
|
+
# @return [Boolean]
|
277
|
+
def micro_or_microform?(call_nums, locations, media_type, title_medium)
|
278
|
+
locations.any? { |loc| loc =~ /micro/i } ||
|
279
|
+
title_medium.any? { |val| val =~ /micro/i } ||
|
280
|
+
call_nums.any? { |val| val =~ /micro/i } ||
|
281
|
+
media_type.any? { |val| val =~ /microform/i }
|
282
|
+
end
|
283
|
+
|
284
|
+
# @todo "cajs" has no match in our location map, so it is not doing anything. Does this intend to catch cjsambx
|
285
|
+
# "Library at the Katz Center - Archives"?
|
286
|
+
# @param [Array<String>] locations
|
287
|
+
# @return [Boolean]
|
288
|
+
def archives_but_not_cajs_or_nursing?(locations)
|
289
|
+
locations.any? { |loc| loc =~ /archives/i } &&
|
290
|
+
locations.none? { |loc| loc =~ /cajs/i } &&
|
291
|
+
locations.none? { |loc| loc =~ /nursing/i }
|
292
|
+
end
|
293
|
+
|
294
|
+
# Consider {https://www.loc.gov/marc/bibliographic/bd007g.html 007} to determine graphical media format
|
295
|
+
# @param [Array<String>] f007
|
296
|
+
# @return [String (frozen)]
|
297
|
+
def graphical_media_type(f007)
|
298
|
+
if f007.any? { |v| v.start_with?('g') }
|
299
|
+
PROJECTED_GRAPHIC
|
300
|
+
else
|
301
|
+
VIDEO
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
# @param [String] leader
|
306
|
+
# @return [String]
|
307
|
+
def leader_format(leader)
|
308
|
+
leader[6..7] || ' '
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Genre field values come from the {https://www.oclc.org/bibformats/en/6xx/655.html 655}, but for some
|
5
|
+
# contexts we are only interested in a subset of the declared terms in a record.
|
6
|
+
class Genre < Helper
|
7
|
+
class << self
|
8
|
+
# Genre values for searching. We're less picky about what is included here to enable discovery via any included
|
9
|
+
# 655 data.
|
10
|
+
#
|
11
|
+
# @param [MARC::Record] record
|
12
|
+
# @return [Array<String>] array of genre values for search
|
13
|
+
def search(record)
|
14
|
+
record.fields('655').map do |field|
|
15
|
+
join_subfields(field, &subfield_not_in?(%w[0 2 5 c]))
|
16
|
+
end.uniq
|
17
|
+
end
|
18
|
+
|
19
|
+
# Genre values for display. We display Genre/Term values if they fulfill the following criteria:
|
20
|
+
# - The field is in {https://www.oclc.org/bibformats/en/6xx/655.html MARC 655}. Or the field is in MARC 880 with
|
21
|
+
# subfield 6 including '655'.
|
22
|
+
# AND
|
23
|
+
# - Above fields have an indicator 2 value of: 0 (LSCH) or 4 (No source specified).
|
24
|
+
# OR
|
25
|
+
# - Above fields have a subfield 2 (ontology code) in the list of allowed values.
|
26
|
+
# @todo subfields e and w do not appear in the documentation for 655, but we give them special consideration here,
|
27
|
+
# what gives?
|
28
|
+
# @note legacy method returns a link object
|
29
|
+
# @param [MARC::Record] record
|
30
|
+
# @return [Array<String>] array of genre values for display
|
31
|
+
def show(record)
|
32
|
+
record.fields(%w[655 880]).filter_map do |field|
|
33
|
+
next unless allowed_genre_field?(field)
|
34
|
+
|
35
|
+
next if field.tag == '880' && subfield_values(field, '6').exclude?('655')
|
36
|
+
|
37
|
+
sub_with_hyphens = field.find_all(&subfield_not_in?(%w[0 2 5 6 8 c e w])).map do |sf|
|
38
|
+
sep = %w[a b].exclude?(sf.code) ? ' -- ' : ' '
|
39
|
+
sep + sf.value
|
40
|
+
end.join.lstrip
|
41
|
+
"#{sub_with_hyphens} #{field.find_all(&subfield_in?(%w[e w])).join(' -- ')}".strip
|
42
|
+
end.uniq
|
43
|
+
end
|
44
|
+
|
45
|
+
# Genre values for faceting. We only set Genre facet values for movies (videos) and manuscripts(?)
|
46
|
+
# @todo the Genre facet in Franklin is pretty ugly. It could be cleaned up by limiting the subfields included
|
47
|
+
# here and cleaning up punctuation.
|
48
|
+
# @param [MARC::Record] record
|
49
|
+
# @param [Hash] location_map
|
50
|
+
# @return [Array<String>]
|
51
|
+
def facet(record, location_map)
|
52
|
+
locations = Location.location record: record, location_map: location_map, display_value: :specific_location
|
53
|
+
manuscript = Format.include_manuscripts?(locations)
|
54
|
+
video = record.fields('007').any? { |field| field.value.starts_with? 'v' }
|
55
|
+
return [] unless manuscript || video
|
56
|
+
|
57
|
+
record.fields('655').filter_map do |field|
|
58
|
+
join_subfields field, &subfield_not_in?(%w[0 2 5 c])
|
59
|
+
end.uniq
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
# @param [MARC::DataField] field
|
65
|
+
# @return [TrueClass, FalseClass]
|
66
|
+
def allowed_genre_field?(field)
|
67
|
+
field.indicator2.in?(%w[0 4]) || subfield_value_in?(field, '2', PennMARC::HeadingControl::ALLOWED_SOURCE_CODES)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Parser methods for extracting identifier values.
|
5
|
+
class Identifier < Helper
|
6
|
+
class << self
|
7
|
+
# Get Alma MMS ID value
|
8
|
+
#
|
9
|
+
# @param [MARC::Record] record
|
10
|
+
# @return [String]
|
11
|
+
def mmsid(record)
|
12
|
+
record.fields('001').first.value
|
13
|
+
end
|
14
|
+
|
15
|
+
# Get normalized ISXN values for searching of a record. Values aggregated from subfield 'a' and 'z' of the
|
16
|
+
# {https://www.oclc.org/bibformats/en/0xx/020.html 020 field}, and subfield 'a', 'l', and 'z' of the
|
17
|
+
# the {https://www.oclc.org/bibformats/en/0xx/020.html 022 field}.
|
18
|
+
#
|
19
|
+
# @param [MARC::Record] record
|
20
|
+
# @return [Array<String>]
|
21
|
+
def isxn_search(record)
|
22
|
+
record.fields(%w[020 022]).filter_map do |field|
|
23
|
+
if field.tag == '020'
|
24
|
+
field.filter_map { |subfield| normalize_isbn(subfield.value) if subfield_in?(%w[a z]).call(subfield) }
|
25
|
+
else
|
26
|
+
field.filter_map { |subfield| subfield.value if subfield_in?(%w[a l z]).call(subfield) }
|
27
|
+
end
|
28
|
+
end.flatten.uniq
|
29
|
+
end
|
30
|
+
|
31
|
+
# Get ISBN values for display from the {https://www.oclc.org/bibformats/en/0xx/020.html 020 field}
|
32
|
+
# and related {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
|
33
|
+
#
|
34
|
+
# @param [MARC::Record] record
|
35
|
+
# @return [Array<String>]
|
36
|
+
# @todo look into z subfield for 020 field, should we show cancelled isbn?
|
37
|
+
def isbn_show(record)
|
38
|
+
isbn_values = record.fields('020').filter_map do |field|
|
39
|
+
joined_isbn = join_subfields(field, &subfield_in?(%w[a z]))
|
40
|
+
joined_isbn if joined_isbn.present?
|
41
|
+
end
|
42
|
+
isbn_values += linked_alternate(record, '020', &subfield_in?(%w[a z]))
|
43
|
+
isbn_values
|
44
|
+
end
|
45
|
+
|
46
|
+
# Get ISSN values for display from the {https://www.oclc.org/bibformats/en/0xx/022.html 022 field} and related
|
47
|
+
# {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
|
48
|
+
#
|
49
|
+
# @param [MARC::Record] record
|
50
|
+
# @return [Array<String>]
|
51
|
+
def issn_show(record)
|
52
|
+
issn_values = record.fields('022').filter_map do |field|
|
53
|
+
joined_issn = join_subfields(field, &subfield_in?(%w[a z]))
|
54
|
+
joined_issn if joined_issn.present?
|
55
|
+
end
|
56
|
+
issn_values += linked_alternate(record, '022', &subfield_in?(%w[a z]))
|
57
|
+
issn_values
|
58
|
+
end
|
59
|
+
|
60
|
+
# Get numeric OCLC ID of first {https://www.oclc.org/bibformats/en/0xx/035.html 035 field}
|
61
|
+
# with an OCLC ID defined in subfield 'a'.
|
62
|
+
#
|
63
|
+
# @todo We should evaluate this to return a single value in the future since subfield a is non-repeatable
|
64
|
+
# @param [MARC::Record] record
|
65
|
+
# @return [Array<String>]
|
66
|
+
def oclc_id(record)
|
67
|
+
oclc_id = Array.wrap(record.fields('035')
|
68
|
+
.find { |field| field.any? { |subfield| subfield_a_is_oclc?(subfield) } })
|
69
|
+
|
70
|
+
oclc_id.flat_map do |field|
|
71
|
+
field.filter_map do |subfield|
|
72
|
+
# skip unless subfield 'a' is an oclc id value
|
73
|
+
next unless subfield_a_is_oclc?(subfield)
|
74
|
+
|
75
|
+
# search for numeric part of oclc id (e.g. '610094484' in '(OCoLC)ocn610094484')
|
76
|
+
match = /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(subfield.value)
|
77
|
+
|
78
|
+
# skip unless search to find numeric part of oclc id has a match
|
79
|
+
next unless match
|
80
|
+
|
81
|
+
match[1]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# Get publisher issued identifiers from fields {https://www.oclc.org/bibformats/en/0xx/024.html 024},
|
87
|
+
# {https://www.oclc.org/bibformats/en/0xx/024.html 028}, and related
|
88
|
+
# {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
|
89
|
+
#
|
90
|
+
# @param [MARC::Record] record
|
91
|
+
# @return [Array<string>]
|
92
|
+
def publisher_number_show(record)
|
93
|
+
publisher_numbers = record.fields(%w[024 028]).filter_map do |field|
|
94
|
+
joined_identifiers = join_subfields(field, &subfield_not_in?(%w[5 6]))
|
95
|
+
joined_identifiers if joined_identifiers.present?
|
96
|
+
end
|
97
|
+
publisher_numbers += linked_alternate(record, %w[024 028], &subfield_not_in?(%w[5 6]))
|
98
|
+
publisher_numbers
|
99
|
+
end
|
100
|
+
|
101
|
+
# Get publisher issued identifiers for searching of a record. Values extracted from fields
|
102
|
+
# {https://www.oclc.org/bibformats/en/0xx/024.html 024} and {https://www.oclc.org/bibformats/en/0xx/024.html 028}.
|
103
|
+
#
|
104
|
+
# @param [MARC::Record] record
|
105
|
+
# @return [Array<String>]
|
106
|
+
def publisher_number_search(record)
|
107
|
+
record.fields(%w[024 028]).filter_map do |field|
|
108
|
+
joined_identifiers = join_subfields(field, &subfield_in?(%w[a]))
|
109
|
+
joined_identifiers if joined_identifiers.present?
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
# Determine if subfield 'a' is an OCLC id.
|
116
|
+
#
|
117
|
+
# @param [MARC::Subfield]
|
118
|
+
# @return [TrueClass, FalseClass]
|
119
|
+
def subfield_a_is_oclc?(subfield)
|
120
|
+
subfield.code == 'a' && subfield.value =~ /^\(OCoLC\).*/
|
121
|
+
end
|
122
|
+
|
123
|
+
# Normalize isbn value using {https://github.com/billdueber/library_stdnums library_stdnums gem}.
|
124
|
+
# Converts ISBN10 (ten-digit) to validated ISBN13 (thriteen-digit) and returns both values. If passed
|
125
|
+
# ISBN13 parameter, only returns validated ISBN13 value.
|
126
|
+
#
|
127
|
+
# @param [String] isbn
|
128
|
+
# @return [Array<String, String>, nil]
|
129
|
+
def normalize_isbn(isbn)
|
130
|
+
StdNum::ISBN.allNormalizedValues(isbn)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
|
5
|
+
# the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
|
6
|
+
# is then mapped to a display-friendly value using the a provided mapping hash.
|
7
|
+
# @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
|
8
|
+
class Language < Helper
|
9
|
+
# Used when no value is present in the control field - still mapped
|
10
|
+
UNDETERMINED_CODE = :und
|
11
|
+
|
12
|
+
class << self
|
13
|
+
# Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
|
14
|
+
# related 880.
|
15
|
+
# @param [MARC::Record] record
|
16
|
+
# @return [Array<String>] language values and notes
|
17
|
+
def show(record)
|
18
|
+
values = record.fields('546').map do |field|
|
19
|
+
join_subfields field, &subfield_not_in?(%w[6 8])
|
20
|
+
end
|
21
|
+
values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
|
22
|
+
end
|
23
|
+
|
24
|
+
# Get language values for searching and faceting of a record. The value is extracted from a defined position in
|
25
|
+
# the 008 control field. Language facet and search values will typically be the same.
|
26
|
+
#
|
27
|
+
# @param [MARC::Record] record
|
28
|
+
# @param [Hash] mapping hash for language code translation
|
29
|
+
# @return [String] nice value for language
|
30
|
+
def search(record, mapping)
|
31
|
+
control_field = record['008']&.value
|
32
|
+
language_code = control_field[35..37]
|
33
|
+
mapping[language_code.to_sym || UNDETERMINED_CODE]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|