pennmarc 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +119 -0
- data/README.md +82 -0
- data/legacy/indexer.rb +568 -0
- data/legacy/marc.rb +2964 -0
- data/legacy/test_file_output.json +49 -0
- data/lib/pennmarc/encoding_level.rb +43 -0
- data/lib/pennmarc/enriched_marc.rb +36 -0
- data/lib/pennmarc/heading_control.rb +11 -0
- data/lib/pennmarc/helpers/citation.rb +31 -0
- data/lib/pennmarc/helpers/creator.rb +237 -0
- data/lib/pennmarc/helpers/database.rb +89 -0
- data/lib/pennmarc/helpers/date.rb +85 -0
- data/lib/pennmarc/helpers/edition.rb +90 -0
- data/lib/pennmarc/helpers/format.rb +312 -0
- data/lib/pennmarc/helpers/genre.rb +71 -0
- data/lib/pennmarc/helpers/helper.rb +11 -0
- data/lib/pennmarc/helpers/identifier.rb +134 -0
- data/lib/pennmarc/helpers/language.rb +37 -0
- data/lib/pennmarc/helpers/link.rb +12 -0
- data/lib/pennmarc/helpers/location.rb +97 -0
- data/lib/pennmarc/helpers/note.rb +132 -0
- data/lib/pennmarc/helpers/production.rb +131 -0
- data/lib/pennmarc/helpers/relation.rb +135 -0
- data/lib/pennmarc/helpers/series.rb +118 -0
- data/lib/pennmarc/helpers/subject.rb +304 -0
- data/lib/pennmarc/helpers/title.rb +197 -0
- data/lib/pennmarc/mappings/language.yml +516 -0
- data/lib/pennmarc/mappings/locations.yml +1801 -0
- data/lib/pennmarc/mappings/relator.yml +263 -0
- data/lib/pennmarc/parser.rb +177 -0
- data/lib/pennmarc/util.rb +240 -0
- data/lib/pennmarc.rb +6 -0
- data/pennmarc.gemspec +22 -0
- data/spec/fixtures/marcxml/test.xml +167 -0
- data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
- data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
- data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
- data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
- data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
- data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
- data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
- data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
- data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
- data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
- data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
- data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
- data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
- data/spec/lib/pennmarc/parser_spec.rb +13 -0
- data/spec/spec_helper.rb +104 -0
- data/spec/support/marc_spec_helpers.rb +84 -0
- metadata +171 -0
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Do Edition-y stuff
|
5
|
+
class Edition < Helper
|
6
|
+
class << self
|
7
|
+
# Edition values for display on a record page. Field 250 is information relating to the edition of a work as
|
8
|
+
# determined by applicable cataloging rules. For mixed materials, field 250 is used to record statements relating
|
9
|
+
# to collections that contain versions of works existing in two or more versions (or states) in single or multiple
|
10
|
+
# copies (e.g., different drafts of a film script). For continuing resources, this field is not used for
|
11
|
+
# sequential edition statements such as 1st- ed. This type of information is contained in field 362 (Dates of
|
12
|
+
# Publication and/or Volume Designation).
|
13
|
+
# https://www.loc.gov/marc/bibliographic/bd250.html
|
14
|
+
# @param [MARC::Record] record
|
15
|
+
# @return [Array<String>] array of editions and their alternates
|
16
|
+
def show(record)
|
17
|
+
record.fields('250').map do |field|
|
18
|
+
join_subfields(field, &subfield_not_in?(%w[6 8]))
|
19
|
+
end + linked_alternate_not_6_or_8(record, '250')
|
20
|
+
end
|
21
|
+
|
22
|
+
# Edition values for display in search results. Just grab the first 250 field.
|
23
|
+
# @param [MARC::Record] record
|
24
|
+
# @return [String, NilClass] string of all first 250 subfields, excluding 6 and 8
|
25
|
+
def values(record)
|
26
|
+
edition = record.fields('250').first
|
27
|
+
return unless edition.present?
|
28
|
+
|
29
|
+
join_subfields(edition, &subfield_not_in?(%w[6 8]))
|
30
|
+
end
|
31
|
+
|
32
|
+
# Entry for another available edition of the target item (horizontal relationship). When a note is generated
|
33
|
+
# from this field, the introductory phrase Other editions available: may be generated based on the field tag for
|
34
|
+
# display.
|
35
|
+
# https://www.loc.gov/marc/bibliographic/bd775.html
|
36
|
+
# @param [MARC::Record] record
|
37
|
+
# @return [Array<String>] array of other edition strings
|
38
|
+
def other_show(record, relator_mapping)
|
39
|
+
record.fields('775').filter_map do |field|
|
40
|
+
next unless subfield_defined?(field, :i)
|
41
|
+
|
42
|
+
other_edition_value(field, relator_mapping)
|
43
|
+
end + record.fields('880').filter_map do |field|
|
44
|
+
next unless field.indicator2.blank? && subfield_value_in?(field, '6', %w[775]) &&
|
45
|
+
subfield_defined?(field, 'i')
|
46
|
+
|
47
|
+
other_edition_value(field, relator_mapping)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
# Assemble a string of relevant edition information.
|
54
|
+
# @param [MARC::DataField] field
|
55
|
+
# @param [Hash] relator_mapping
|
56
|
+
# @return [String (frozen)] assembled other version string
|
57
|
+
def other_edition_value(field, relator_mapping)
|
58
|
+
subi = remove_paren_value_from_subfield_i(field) || ''
|
59
|
+
other_editions = field.filter_map do |sf|
|
60
|
+
next if %w[6 8].member?(sf.code)
|
61
|
+
|
62
|
+
if %w[s x z].member?(sf.code)
|
63
|
+
" #{sf.value}"
|
64
|
+
elsif sf.code == 't'
|
65
|
+
relator = translate_relator(sf.value, relator_mapping)
|
66
|
+
next if relator.blank?
|
67
|
+
|
68
|
+
" #{relator}. "
|
69
|
+
end
|
70
|
+
end.join
|
71
|
+
other_editions_append = field.filter_map do |sf|
|
72
|
+
next if %w[6 8].member?(sf.code)
|
73
|
+
|
74
|
+
if %w[i h s t x z e f o r w y 7].exclude?(sf.code)
|
75
|
+
" #{sf.value}"
|
76
|
+
elsif sf.code == 'h'
|
77
|
+
" (#{sf.value}) "
|
78
|
+
end
|
79
|
+
end.join
|
80
|
+
prepend = trim_trailing(:period, subi).squish
|
81
|
+
|
82
|
+
if other_editions.present? || other_editions_append.present?
|
83
|
+
"#{prepend}: #{other_editions} #{other_editions_append}".squish
|
84
|
+
else
|
85
|
+
prepend
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,312 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Handle parsing out "Format" and "Other Format" values. Special care goes into controlling the format values for
|
5
|
+
# faceting.
|
6
|
+
class Format < Helper
|
7
|
+
class << self
|
8
|
+
# These constants represent the set of desired Format values for faceting.
|
9
|
+
ARCHIVE = 'Archive'
|
10
|
+
BOOK = 'Book'
|
11
|
+
CONFERENCE_EVENT = 'Conference/Event'
|
12
|
+
DATAFILE = 'Datafile'
|
13
|
+
GOVDOC = 'Government document'
|
14
|
+
IMAGE = 'Image'
|
15
|
+
JOURNAL_PERIODICAL = 'Journal/Periodical'
|
16
|
+
MANUSCRIPT = 'Manuscript'
|
17
|
+
MAP_ATLAS = 'Map/Atlas'
|
18
|
+
MICROFORMAT = 'Microformat'
|
19
|
+
MUSICAL_SCORE = 'Musical score'
|
20
|
+
NEWSPAPER = 'Newspaper'
|
21
|
+
OTHER = 'Other'
|
22
|
+
PROJECTED_GRAPHIC = 'Projected graphic'
|
23
|
+
SOUND_RECORDING = 'Sound recording'
|
24
|
+
THESIS_DISSERTATION = 'Thesis/Dissertation'
|
25
|
+
THREE_D_OBJECT = '3D object'
|
26
|
+
VIDEO = 'Video'
|
27
|
+
WEBSITE_DATABASE = 'Website/Database'
|
28
|
+
|
29
|
+
# Get any Format values from {https://www.oclc.org/bibformats/en/3xx/300.html 300},
|
30
|
+
# 254, 255, 310, 342, 352 or {https://www.oclc.org/bibformats/en/3xx/340.html 340} field. based on the source
|
31
|
+
# field, different subfields are used.
|
32
|
+
# @note ported from get_format_display
|
33
|
+
# @param [MARC::Record] record
|
34
|
+
# @return [Array<String>] format values for display
|
35
|
+
def show(record)
|
36
|
+
results = record.fields('300').map { |f| join_subfields(f, &subfield_not_in?(%w[3 6 8])) }
|
37
|
+
results += record.fields(%w[254 255 310 342 352 362]).map do |f|
|
38
|
+
join_subfields(f, &subfield_not_in?(%w[6 8]))
|
39
|
+
end
|
40
|
+
results += record.fields('340').map { |f| join_subfields(f, &subfield_not_in?(%w[0 2 6 8])) }
|
41
|
+
results += record.fields('880').map do |f|
|
42
|
+
subfield_to_ignore = if subfield_value?(f, 6, /^300/)
|
43
|
+
%w[3 6 8]
|
44
|
+
elsif subfield_value?(f, 6, /^(254|255|310|342|352|362)/)
|
45
|
+
%w[6 8]
|
46
|
+
elsif subfield_value?(f, 6, /^340/)
|
47
|
+
%w[0 2 6 8]
|
48
|
+
end
|
49
|
+
join_subfields(f, &subfield_not_in?(subfield_to_ignore))
|
50
|
+
end
|
51
|
+
results.compact_blank
|
52
|
+
end
|
53
|
+
|
54
|
+
# Get Format values for faceting. Format values are determined using complex logic for each possible format value.
|
55
|
+
# The primary fields considered in determining Format facet values are:
|
56
|
+
#
|
57
|
+
# 1. "Type of Record" and "Bibliographic level" values extracted from the
|
58
|
+
# {https://www.loc.gov/marc/bibliographic/bdleader.html MARC leader}.
|
59
|
+
# 2. Location name values and "Classification part" from Alma "enhanced" MARC holding/item information
|
60
|
+
# 3. {https://www.loc.gov/marc/bibliographic/bd007.html 007} values, the first
|
61
|
+
# {https://www.loc.gov/marc/bibliographic/bd008.html 008} value, and the first character form all
|
62
|
+
# {https://www.loc.gov/marc/bibliographic/bd006.html 006} values (form)
|
63
|
+
# 4. Medium values from {https://www.oclc.org/bibformats/en/2xx/245.html#subfieldh 245 ǂh}
|
64
|
+
# 5. Media Type values from {https://www.oclc.org/bibformats/en/3xx/337.html#subfielda 337 ǂa}
|
65
|
+
# Additional fields are considered for many of the formats. Much of this logic has been moved to private methods
|
66
|
+
# to keep this method from becoming too unwieldy.
|
67
|
+
# @todo is the conditional structure here still best practice? see the "Thesis on Microfilm" case in the specs
|
68
|
+
# for this helper method
|
69
|
+
# @note ported from get_format
|
70
|
+
# @param [MARC::Record] record
|
71
|
+
# @param [Hash] location_map
|
72
|
+
# @return [Array<String>] format values for faceting
|
73
|
+
|
74
|
+
def facet(record, location_map)
|
75
|
+
formats = []
|
76
|
+
format_code = leader_format(record.leader)
|
77
|
+
f007 = record.fields('007').map(&:value)
|
78
|
+
f008 = record.fields('008').first&.value || ''
|
79
|
+
f006_forms = record.fields('006').map { |field| field.value[0] }
|
80
|
+
title_medium = subfield_values_for tag: '245', subfield: :h, record: record
|
81
|
+
media_type = subfield_values_for tag: '337', subfield: :a, record: record
|
82
|
+
|
83
|
+
# Get Call Number for holdings - ǂh gives us the 'Classification part' which can contain strings like
|
84
|
+
# 'Microfilm'
|
85
|
+
call_nums = record.fields(EnrichedMarc::TAG_HOLDING).map do |field|
|
86
|
+
join_subfields(field, &subfield_in?([EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART,
|
87
|
+
EnrichedMarc::SUB_HOLDING_ITEM_PART]))
|
88
|
+
end
|
89
|
+
|
90
|
+
# get all specific_location values from inventory info
|
91
|
+
locations = Location.location record: record, location_map: location_map, display_value: :specific_location
|
92
|
+
|
93
|
+
if include_manuscripts?(locations)
|
94
|
+
formats << MANUSCRIPT
|
95
|
+
elsif archives_but_not_cajs_or_nursing?(locations)
|
96
|
+
formats << ARCHIVE
|
97
|
+
elsif micro_or_microform?(call_nums, locations, media_type, title_medium)
|
98
|
+
formats << MICROFORMAT
|
99
|
+
else
|
100
|
+
# any of these
|
101
|
+
formats << THESIS_DISSERTATION if thesis_or_dissertation?(format_code, record)
|
102
|
+
formats << CONFERENCE_EVENT if conference_event?(record)
|
103
|
+
formats << NEWSPAPER if newspaper?(f008, format_code)
|
104
|
+
formats << GOVDOC if government_document?(f008, record, format_code)
|
105
|
+
|
106
|
+
# but only one of these
|
107
|
+
formats << if website_database?(f006_forms, format_code)
|
108
|
+
WEBSITE_DATABASE
|
109
|
+
elsif book?(format_code, title_medium, record)
|
110
|
+
BOOK
|
111
|
+
elsif musical_score?(format_code)
|
112
|
+
MUSICAL_SCORE
|
113
|
+
elsif map_atlas?(format_code)
|
114
|
+
MAP_ATLAS
|
115
|
+
elsif graphical_media?(format_code)
|
116
|
+
graphical_media_type(f007)
|
117
|
+
elsif sound_recording?(format_code)
|
118
|
+
SOUND_RECORDING
|
119
|
+
elsif image?(format_code)
|
120
|
+
IMAGE
|
121
|
+
elsif datafile?(format_code)
|
122
|
+
DATAFILE
|
123
|
+
elsif journal_periodical?(format_code)
|
124
|
+
JOURNAL_PERIODICAL
|
125
|
+
elsif three_d_object?(format_code)
|
126
|
+
THREE_D_OBJECT
|
127
|
+
else
|
128
|
+
OTHER
|
129
|
+
end
|
130
|
+
end
|
131
|
+
formats.concat(curated_format(record))
|
132
|
+
end
|
133
|
+
|
134
|
+
# Show "Other Format" values from {https://www.oclc.org/bibformats/en/7xx/776.html 776} and any 880 linkage.
|
135
|
+
# @todo is 774 an error in the linked field in legacy? i changed to 776 here
|
136
|
+
# @param [MARC::Record] record
|
137
|
+
# @return [Array] other format values for display
|
138
|
+
def other_show(record)
|
139
|
+
other_formats = record.fields('776').filter_map do |field|
|
140
|
+
value = join_subfields(field, &subfield_in?(%w[i a s t o]))
|
141
|
+
next if value.blank?
|
142
|
+
|
143
|
+
value
|
144
|
+
end
|
145
|
+
other_formats + linked_alternate(record, '776') do |sf|
|
146
|
+
sf.code.in? %w[i a s t o]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Check if a set of locations has any locations that include the term 'manuscripts'
|
151
|
+
# @param [Array<String>] locations
|
152
|
+
# @return [Boolean]
|
153
|
+
def include_manuscripts?(locations)
|
154
|
+
locations.any? { |loc| loc =~ /manuscripts/i }
|
155
|
+
end
|
156
|
+
|
157
|
+
private
|
158
|
+
|
159
|
+
# Get 'Curated' format from.
|
160
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Alma local field
|
161
|
+
# 944} ǂa, as long as it is not a numerical value.
|
162
|
+
# @param [MARC::Record] record
|
163
|
+
# @return [Array]
|
164
|
+
def curated_format(record)
|
165
|
+
record.fields('944').filter_map do |field|
|
166
|
+
subfield_a = field.find { |sf| sf.code == 'a' }
|
167
|
+
next if subfield_a.nil? || (subfield_a.value == subfield_a.value.to_i.to_s)
|
168
|
+
|
169
|
+
subfield_a.value
|
170
|
+
end.uniq
|
171
|
+
end
|
172
|
+
|
173
|
+
# @param [String] format_code
|
174
|
+
# @return [Boolean]
|
175
|
+
def image?(format_code)
|
176
|
+
format_code.in?(%w[km kd])
|
177
|
+
end
|
178
|
+
|
179
|
+
# @param [String] format_code
|
180
|
+
# @return [Boolean]
|
181
|
+
def datafile?(format_code)
|
182
|
+
format_code == 'mm'
|
183
|
+
end
|
184
|
+
|
185
|
+
# @param [String] format_code
|
186
|
+
# @return [Boolean]
|
187
|
+
def journal_periodical?(format_code)
|
188
|
+
format_code.in?(%w[as gs])
|
189
|
+
end
|
190
|
+
|
191
|
+
# @param [String] format_code
|
192
|
+
# @return [Boolean]
|
193
|
+
def three_d_object?(format_code)
|
194
|
+
format_code.start_with?('r')
|
195
|
+
end
|
196
|
+
|
197
|
+
# @param [String] format_code
|
198
|
+
# @return [Boolean]
|
199
|
+
def sound_recording?(format_code)
|
200
|
+
format_code.in?(%w[im jm jc jd js])
|
201
|
+
end
|
202
|
+
|
203
|
+
# @param [String] format_code
|
204
|
+
# @return [Boolean]
|
205
|
+
def graphical_media?(format_code)
|
206
|
+
format_code == 'gm'
|
207
|
+
end
|
208
|
+
|
209
|
+
# @param [String] format_code
|
210
|
+
# @return [Boolean]
|
211
|
+
def map_atlas?(format_code)
|
212
|
+
format_code&.start_with?('e') || format_code == 'fm'
|
213
|
+
end
|
214
|
+
|
215
|
+
# @param [String] format_code
|
216
|
+
# @return [Boolean]
|
217
|
+
def musical_score?(format_code)
|
218
|
+
format_code.in?(%w[ca cb cd cm cs dm])
|
219
|
+
end
|
220
|
+
|
221
|
+
# @param [String] format_code
|
222
|
+
# @param [Array<String>] title_medium
|
223
|
+
# @param [MARC::Record] record
|
224
|
+
# @return [Boolean]
|
225
|
+
def book?(format_code, title_medium, record)
|
226
|
+
title_forms = subfield_values_for tag: '245', subfield: :k, record: record
|
227
|
+
format_code.in?(%w[aa ac am tm]) &&
|
228
|
+
title_forms.none? { |v| v =~ /kit/i } &&
|
229
|
+
title_medium.none? { |v| v =~ /micro/i }
|
230
|
+
end
|
231
|
+
|
232
|
+
# @param [Array<String>] f006_forms
|
233
|
+
# @param [String] format_code
|
234
|
+
# @return [Boolean]
|
235
|
+
def website_database?(f006_forms, format_code)
|
236
|
+
format_code&.end_with?('i') ||
|
237
|
+
(format_code == 'am' && f006_forms.include?('m') && f006_forms.include?('s'))
|
238
|
+
end
|
239
|
+
|
240
|
+
# @param [String] f008
|
241
|
+
# @param [MARC::Record] record
|
242
|
+
# @param [String] format_code
|
243
|
+
# @return [Boolean]
|
244
|
+
def government_document?(f008, record, format_code)
|
245
|
+
# is a 260 entry present, and does it have a b that matches 'press'
|
246
|
+
f260press = record.fields('260').any? do |field|
|
247
|
+
field.select { |sf| sf.code == 'b' && sf.value =~ /press/i }.any?
|
248
|
+
end
|
249
|
+
%w[c d i j].exclude?(format_code[0]) && f008[28].in?(%w[f i o]) && !f260press
|
250
|
+
end
|
251
|
+
|
252
|
+
# @param [String] f008
|
253
|
+
# @param [String] format_code
|
254
|
+
# @return [Boolean]
|
255
|
+
def newspaper?(f008, format_code)
|
256
|
+
format_code == 'as' && (f008[21] == 'n' || f008[22] == 'e')
|
257
|
+
end
|
258
|
+
|
259
|
+
# @param [MARC::Record] record
|
260
|
+
# @return [Boolean]
|
261
|
+
def conference_event?(record)
|
262
|
+
record.fields('111').any? || record.fields('711').any? # TODO: use field_present helper here and below?
|
263
|
+
end
|
264
|
+
|
265
|
+
# @param [MARC::Record] record
|
266
|
+
# @param [String] format_code
|
267
|
+
# @return [Boolean]
|
268
|
+
def thesis_or_dissertation?(format_code, record)
|
269
|
+
record.fields('502').any? && format_code == 'tm'
|
270
|
+
end
|
271
|
+
|
272
|
+
# @param [Array<String>] title_medium
|
273
|
+
# @param [Array<String>] media_type
|
274
|
+
# @param [Array<String>] locations
|
275
|
+
# @param [Array<String>] call_nums
|
276
|
+
# @return [Boolean]
|
277
|
+
def micro_or_microform?(call_nums, locations, media_type, title_medium)
|
278
|
+
locations.any? { |loc| loc =~ /micro/i } ||
|
279
|
+
title_medium.any? { |val| val =~ /micro/i } ||
|
280
|
+
call_nums.any? { |val| val =~ /micro/i } ||
|
281
|
+
media_type.any? { |val| val =~ /microform/i }
|
282
|
+
end
|
283
|
+
|
284
|
+
# @todo "cajs" has no match in our location map, so it is not doing anything. Does this intend to catch cjsambx
|
285
|
+
# "Library at the Katz Center - Archives"?
|
286
|
+
# @param [Array<String>] locations
|
287
|
+
# @return [Boolean]
|
288
|
+
def archives_but_not_cajs_or_nursing?(locations)
|
289
|
+
locations.any? { |loc| loc =~ /archives/i } &&
|
290
|
+
locations.none? { |loc| loc =~ /cajs/i } &&
|
291
|
+
locations.none? { |loc| loc =~ /nursing/i }
|
292
|
+
end
|
293
|
+
|
294
|
+
# Consider {https://www.loc.gov/marc/bibliographic/bd007g.html 007} to determine graphical media format
|
295
|
+
# @param [Array<String>] f007
|
296
|
+
# @return [String (frozen)]
|
297
|
+
def graphical_media_type(f007)
|
298
|
+
if f007.any? { |v| v.start_with?('g') }
|
299
|
+
PROJECTED_GRAPHIC
|
300
|
+
else
|
301
|
+
VIDEO
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
# @param [String] leader
|
306
|
+
# @return [String]
|
307
|
+
def leader_format(leader)
|
308
|
+
leader[6..7] || ' '
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Genre field values come from the {https://www.oclc.org/bibformats/en/6xx/655.html 655}, but for some
|
5
|
+
# contexts we are only interested in a subset of the declared terms in a record.
|
6
|
+
class Genre < Helper
|
7
|
+
class << self
|
8
|
+
# Genre values for searching. We're less picky about what is included here to enable discovery via any included
|
9
|
+
# 655 data.
|
10
|
+
#
|
11
|
+
# @param [MARC::Record] record
|
12
|
+
# @return [Array<String>] array of genre values for search
|
13
|
+
def search(record)
|
14
|
+
record.fields('655').map do |field|
|
15
|
+
join_subfields(field, &subfield_not_in?(%w[0 2 5 c]))
|
16
|
+
end.uniq
|
17
|
+
end
|
18
|
+
|
19
|
+
# Genre values for display. We display Genre/Term values if they fulfill the following criteria:
|
20
|
+
# - The field is in {https://www.oclc.org/bibformats/en/6xx/655.html MARC 655}. Or the field is in MARC 880 with
|
21
|
+
# subfield 6 including '655'.
|
22
|
+
# AND
|
23
|
+
# - Above fields have an indicator 2 value of: 0 (LSCH) or 4 (No source specified).
|
24
|
+
# OR
|
25
|
+
# - Above fields have a subfield 2 (ontology code) in the list of allowed values.
|
26
|
+
# @todo subfields e and w do not appear in the documentation for 655, but we give them special consideration here,
|
27
|
+
# what gives?
|
28
|
+
# @note legacy method returns a link object
|
29
|
+
# @param [MARC::Record] record
|
30
|
+
# @return [Array<String>] array of genre values for display
|
31
|
+
def show(record)
|
32
|
+
record.fields(%w[655 880]).filter_map do |field|
|
33
|
+
next unless allowed_genre_field?(field)
|
34
|
+
|
35
|
+
next if field.tag == '880' && subfield_values(field, '6').exclude?('655')
|
36
|
+
|
37
|
+
sub_with_hyphens = field.find_all(&subfield_not_in?(%w[0 2 5 6 8 c e w])).map do |sf|
|
38
|
+
sep = %w[a b].exclude?(sf.code) ? ' -- ' : ' '
|
39
|
+
sep + sf.value
|
40
|
+
end.join.lstrip
|
41
|
+
"#{sub_with_hyphens} #{field.find_all(&subfield_in?(%w[e w])).join(' -- ')}".strip
|
42
|
+
end.uniq
|
43
|
+
end
|
44
|
+
|
45
|
+
# Genre values for faceting. We only set Genre facet values for movies (videos) and manuscripts(?)
|
46
|
+
# @todo the Genre facet in Franklin is pretty ugly. It could be cleaned up by limiting the subfields included
|
47
|
+
# here and cleaning up punctuation.
|
48
|
+
# @param [MARC::Record] record
|
49
|
+
# @param [Hash] location_map
|
50
|
+
# @return [Array<String>]
|
51
|
+
def facet(record, location_map)
|
52
|
+
locations = Location.location record: record, location_map: location_map, display_value: :specific_location
|
53
|
+
manuscript = Format.include_manuscripts?(locations)
|
54
|
+
video = record.fields('007').any? { |field| field.value.starts_with? 'v' }
|
55
|
+
return [] unless manuscript || video
|
56
|
+
|
57
|
+
record.fields('655').filter_map do |field|
|
58
|
+
join_subfields field, &subfield_not_in?(%w[0 2 5 c])
|
59
|
+
end.uniq
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
# @param [MARC::DataField] field
|
65
|
+
# @return [TrueClass, FalseClass]
|
66
|
+
def allowed_genre_field?(field)
|
67
|
+
field.indicator2.in?(%w[0 4]) || subfield_value_in?(field, '2', PennMARC::HeadingControl::ALLOWED_SOURCE_CODES)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,134 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Parser methods for extracting identifier values.
|
5
|
+
class Identifier < Helper
|
6
|
+
class << self
|
7
|
+
# Get Alma MMS ID value
|
8
|
+
#
|
9
|
+
# @param [MARC::Record] record
|
10
|
+
# @return [String]
|
11
|
+
def mmsid(record)
|
12
|
+
record.fields('001').first.value
|
13
|
+
end
|
14
|
+
|
15
|
+
# Get normalized ISXN values for searching of a record. Values aggregated from subfield 'a' and 'z' of the
|
16
|
+
# {https://www.oclc.org/bibformats/en/0xx/020.html 020 field}, and subfield 'a', 'l', and 'z' of the
|
17
|
+
# the {https://www.oclc.org/bibformats/en/0xx/020.html 022 field}.
|
18
|
+
#
|
19
|
+
# @param [MARC::Record] record
|
20
|
+
# @return [Array<String>]
|
21
|
+
def isxn_search(record)
|
22
|
+
record.fields(%w[020 022]).filter_map do |field|
|
23
|
+
if field.tag == '020'
|
24
|
+
field.filter_map { |subfield| normalize_isbn(subfield.value) if subfield_in?(%w[a z]).call(subfield) }
|
25
|
+
else
|
26
|
+
field.filter_map { |subfield| subfield.value if subfield_in?(%w[a l z]).call(subfield) }
|
27
|
+
end
|
28
|
+
end.flatten.uniq
|
29
|
+
end
|
30
|
+
|
31
|
+
# Get ISBN values for display from the {https://www.oclc.org/bibformats/en/0xx/020.html 020 field}
|
32
|
+
# and related {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
|
33
|
+
#
|
34
|
+
# @param [MARC::Record] record
|
35
|
+
# @return [Array<String>]
|
36
|
+
# @todo look into z subfield for 020 field, should we show cancelled isbn?
|
37
|
+
def isbn_show(record)
|
38
|
+
isbn_values = record.fields('020').filter_map do |field|
|
39
|
+
joined_isbn = join_subfields(field, &subfield_in?(%w[a z]))
|
40
|
+
joined_isbn if joined_isbn.present?
|
41
|
+
end
|
42
|
+
isbn_values += linked_alternate(record, '020', &subfield_in?(%w[a z]))
|
43
|
+
isbn_values
|
44
|
+
end
|
45
|
+
|
46
|
+
# Get ISSN values for display from the {https://www.oclc.org/bibformats/en/0xx/022.html 022 field} and related
|
47
|
+
# {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
|
48
|
+
#
|
49
|
+
# @param [MARC::Record] record
|
50
|
+
# @return [Array<String>]
|
51
|
+
def issn_show(record)
|
52
|
+
issn_values = record.fields('022').filter_map do |field|
|
53
|
+
joined_issn = join_subfields(field, &subfield_in?(%w[a z]))
|
54
|
+
joined_issn if joined_issn.present?
|
55
|
+
end
|
56
|
+
issn_values += linked_alternate(record, '022', &subfield_in?(%w[a z]))
|
57
|
+
issn_values
|
58
|
+
end
|
59
|
+
|
60
|
+
# Get numeric OCLC ID of first {https://www.oclc.org/bibformats/en/0xx/035.html 035 field}
|
61
|
+
# with an OCLC ID defined in subfield 'a'.
|
62
|
+
#
|
63
|
+
# @todo We should evaluate this to return a single value in the future since subfield a is non-repeatable
|
64
|
+
# @param [MARC::Record] record
|
65
|
+
# @return [Array<String>]
|
66
|
+
def oclc_id(record)
|
67
|
+
oclc_id = Array.wrap(record.fields('035')
|
68
|
+
.find { |field| field.any? { |subfield| subfield_a_is_oclc?(subfield) } })
|
69
|
+
|
70
|
+
oclc_id.flat_map do |field|
|
71
|
+
field.filter_map do |subfield|
|
72
|
+
# skip unless subfield 'a' is an oclc id value
|
73
|
+
next unless subfield_a_is_oclc?(subfield)
|
74
|
+
|
75
|
+
# search for numeric part of oclc id (e.g. '610094484' in '(OCoLC)ocn610094484')
|
76
|
+
match = /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(subfield.value)
|
77
|
+
|
78
|
+
# skip unless search to find numeric part of oclc id has a match
|
79
|
+
next unless match
|
80
|
+
|
81
|
+
match[1]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# Get publisher issued identifiers from fields {https://www.oclc.org/bibformats/en/0xx/024.html 024},
|
87
|
+
# {https://www.oclc.org/bibformats/en/0xx/024.html 028}, and related
|
88
|
+
# {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
|
89
|
+
#
|
90
|
+
# @param [MARC::Record] record
|
91
|
+
# @return [Array<string>]
|
92
|
+
def publisher_number_show(record)
|
93
|
+
publisher_numbers = record.fields(%w[024 028]).filter_map do |field|
|
94
|
+
joined_identifiers = join_subfields(field, &subfield_not_in?(%w[5 6]))
|
95
|
+
joined_identifiers if joined_identifiers.present?
|
96
|
+
end
|
97
|
+
publisher_numbers += linked_alternate(record, %w[024 028], &subfield_not_in?(%w[5 6]))
|
98
|
+
publisher_numbers
|
99
|
+
end
|
100
|
+
|
101
|
+
# Get publisher issued identifiers for searching of a record. Values extracted from fields
|
102
|
+
# {https://www.oclc.org/bibformats/en/0xx/024.html 024} and {https://www.oclc.org/bibformats/en/0xx/024.html 028}.
|
103
|
+
#
|
104
|
+
# @param [MARC::Record] record
|
105
|
+
# @return [Array<String>]
|
106
|
+
def publisher_number_search(record)
|
107
|
+
record.fields(%w[024 028]).filter_map do |field|
|
108
|
+
joined_identifiers = join_subfields(field, &subfield_in?(%w[a]))
|
109
|
+
joined_identifiers if joined_identifiers.present?
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
# Determine if subfield 'a' is an OCLC id.
|
116
|
+
#
|
117
|
+
# @param [MARC::Subfield]
|
118
|
+
# @return [TrueClass, FalseClass]
|
119
|
+
def subfield_a_is_oclc?(subfield)
|
120
|
+
subfield.code == 'a' && subfield.value =~ /^\(OCoLC\).*/
|
121
|
+
end
|
122
|
+
|
123
|
+
# Normalize isbn value using {https://github.com/billdueber/library_stdnums library_stdnums gem}.
|
124
|
+
# Converts ISBN10 (ten-digit) to validated ISBN13 (thriteen-digit) and returns both values. If passed
|
125
|
+
# ISBN13 parameter, only returns validated ISBN13 value.
|
126
|
+
#
|
127
|
+
# @param [String] isbn
|
128
|
+
# @return [Array<String, String>, nil]
|
129
|
+
def normalize_isbn(isbn)
|
130
|
+
StdNum::ISBN.allNormalizedValues(isbn)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
|
5
|
+
# the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
|
6
|
+
# is then mapped to a display-friendly value using the a provided mapping hash.
|
7
|
+
# @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
|
8
|
+
class Language < Helper
|
9
|
+
# Used when no value is present in the control field - still mapped
|
10
|
+
UNDETERMINED_CODE = :und
|
11
|
+
|
12
|
+
class << self
|
13
|
+
# Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
|
14
|
+
# related 880.
|
15
|
+
# @param [MARC::Record] record
|
16
|
+
# @return [Array<String>] language values and notes
|
17
|
+
def show(record)
|
18
|
+
values = record.fields('546').map do |field|
|
19
|
+
join_subfields field, &subfield_not_in?(%w[6 8])
|
20
|
+
end
|
21
|
+
values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
|
22
|
+
end
|
23
|
+
|
24
|
+
# Get language values for searching and faceting of a record. The value is extracted from a defined position in
|
25
|
+
# the 008 control field. Language facet and search values will typically be the same.
|
26
|
+
#
|
27
|
+
# @param [MARC::Record] record
|
28
|
+
# @param [Hash] mapping hash for language code translation
|
29
|
+
# @return [String] nice value for language
|
30
|
+
def search(record, mapping)
|
31
|
+
control_field = record['008']&.value
|
32
|
+
language_code = control_field[35..37]
|
33
|
+
mapping[language_code.to_sym || UNDETERMINED_CODE]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|