pennmarc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Do Edition-y stuff
5
+ class Edition < Helper
6
+ class << self
7
+ # Edition values for display on a record page. Field 250 is information relating to the edition of a work as
8
+ # determined by applicable cataloging rules. For mixed materials, field 250 is used to record statements relating
9
+ # to collections that contain versions of works existing in two or more versions (or states) in single or multiple
10
+ # copies (e.g., different drafts of a film script). For continuing resources, this field is not used for
11
+ # sequential edition statements such as 1st- ed. This type of information is contained in field 362 (Dates of
12
+ # Publication and/or Volume Designation).
13
+ # https://www.loc.gov/marc/bibliographic/bd250.html
14
+ # @param [MARC::Record] record
15
+ # @return [Array<String>] array of editions and their alternates
16
+ def show(record)
17
+ record.fields('250').map do |field|
18
+ join_subfields(field, &subfield_not_in?(%w[6 8]))
19
+ end + linked_alternate_not_6_or_8(record, '250')
20
+ end
21
+
22
+ # Edition values for display in search results. Just grab the first 250 field.
23
+ # @param [MARC::Record] record
24
+ # @return [String, NilClass] string of all first 250 subfields, excluding 6 and 8
25
+ def values(record)
26
+ edition = record.fields('250').first
27
+ return unless edition.present?
28
+
29
+ join_subfields(edition, &subfield_not_in?(%w[6 8]))
30
+ end
31
+
32
+ # Entry for another available edition of the target item (horizontal relationship). When a note is generated
33
+ # from this field, the introductory phrase Other editions available: may be generated based on the field tag for
34
+ # display.
35
+ # https://www.loc.gov/marc/bibliographic/bd775.html
36
+ # @param [MARC::Record] record
37
+ # @return [Array<String>] array of other edition strings
38
+ def other_show(record, relator_mapping)
39
+ record.fields('775').filter_map do |field|
40
+ next unless subfield_defined?(field, :i)
41
+
42
+ other_edition_value(field, relator_mapping)
43
+ end + record.fields('880').filter_map do |field|
44
+ next unless field.indicator2.blank? && subfield_value_in?(field, '6', %w[775]) &&
45
+ subfield_defined?(field, 'i')
46
+
47
+ other_edition_value(field, relator_mapping)
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ # Assemble a string of relevant edition information.
54
+ # @param [MARC::DataField] field
55
+ # @param [Hash] relator_mapping
56
+ # @return [String (frozen)] assembled other version string
57
+ def other_edition_value(field, relator_mapping)
58
+ subi = remove_paren_value_from_subfield_i(field) || ''
59
+ other_editions = field.filter_map do |sf|
60
+ next if %w[6 8].member?(sf.code)
61
+
62
+ if %w[s x z].member?(sf.code)
63
+ " #{sf.value}"
64
+ elsif sf.code == 't'
65
+ relator = translate_relator(sf.value, relator_mapping)
66
+ next if relator.blank?
67
+
68
+ " #{relator}. "
69
+ end
70
+ end.join
71
+ other_editions_append = field.filter_map do |sf|
72
+ next if %w[6 8].member?(sf.code)
73
+
74
+ if %w[i h s t x z e f o r w y 7].exclude?(sf.code)
75
+ " #{sf.value}"
76
+ elsif sf.code == 'h'
77
+ " (#{sf.value}) "
78
+ end
79
+ end.join
80
+ prepend = trim_trailing(:period, subi).squish
81
+
82
+ if other_editions.present? || other_editions_append.present?
83
+ "#{prepend}: #{other_editions} #{other_editions_append}".squish
84
+ else
85
+ prepend
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,312 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Handle parsing out "Format" and "Other Format" values. Special care goes into controlling the format values for
5
+ # faceting.
6
+ class Format < Helper
7
+ class << self
8
+ # These constants represent the set of desired Format values for faceting.
9
+ ARCHIVE = 'Archive'
10
+ BOOK = 'Book'
11
+ CONFERENCE_EVENT = 'Conference/Event'
12
+ DATAFILE = 'Datafile'
13
+ GOVDOC = 'Government document'
14
+ IMAGE = 'Image'
15
+ JOURNAL_PERIODICAL = 'Journal/Periodical'
16
+ MANUSCRIPT = 'Manuscript'
17
+ MAP_ATLAS = 'Map/Atlas'
18
+ MICROFORMAT = 'Microformat'
19
+ MUSICAL_SCORE = 'Musical score'
20
+ NEWSPAPER = 'Newspaper'
21
+ OTHER = 'Other'
22
+ PROJECTED_GRAPHIC = 'Projected graphic'
23
+ SOUND_RECORDING = 'Sound recording'
24
+ THESIS_DISSERTATION = 'Thesis/Dissertation'
25
+ THREE_D_OBJECT = '3D object'
26
+ VIDEO = 'Video'
27
+ WEBSITE_DATABASE = 'Website/Database'
28
+
29
+ # Get any Format values from {https://www.oclc.org/bibformats/en/3xx/300.html 300},
30
+ # 254, 255, 310, 342, 352 or {https://www.oclc.org/bibformats/en/3xx/340.html 340} field. based on the source
31
+ # field, different subfields are used.
32
+ # @note ported from get_format_display
33
+ # @param [MARC::Record] record
34
+ # @return [Array<String>] format values for display
35
+ def show(record)
36
+ results = record.fields('300').map { |f| join_subfields(f, &subfield_not_in?(%w[3 6 8])) }
37
+ results += record.fields(%w[254 255 310 342 352 362]).map do |f|
38
+ join_subfields(f, &subfield_not_in?(%w[6 8]))
39
+ end
40
+ results += record.fields('340').map { |f| join_subfields(f, &subfield_not_in?(%w[0 2 6 8])) }
41
+ results += record.fields('880').map do |f|
42
+ subfield_to_ignore = if subfield_value?(f, 6, /^300/)
43
+ %w[3 6 8]
44
+ elsif subfield_value?(f, 6, /^(254|255|310|342|352|362)/)
45
+ %w[6 8]
46
+ elsif subfield_value?(f, 6, /^340/)
47
+ %w[0 2 6 8]
48
+ end
49
+ join_subfields(f, &subfield_not_in?(subfield_to_ignore))
50
+ end
51
+ results.compact_blank
52
+ end
53
+
54
+ # Get Format values for faceting. Format values are determined using complex logic for each possible format value.
55
+ # The primary fields considered in determining Format facet values are:
56
+ #
57
+ # 1. "Type of Record" and "Bibliographic level" values extracted from the
58
+ # {https://www.loc.gov/marc/bibliographic/bdleader.html MARC leader}.
59
+ # 2. Location name values and "Classification part" from Alma "enhanced" MARC holding/item information
60
+ # 3. {https://www.loc.gov/marc/bibliographic/bd007.html 007} values, the first
61
+ # {https://www.loc.gov/marc/bibliographic/bd008.html 008} value, and the first character form all
62
+ # {https://www.loc.gov/marc/bibliographic/bd006.html 006} values (form)
63
+ # 4. Medium values from {https://www.oclc.org/bibformats/en/2xx/245.html#subfieldh 245 ǂh}
64
+ # 5. Media Type values from {https://www.oclc.org/bibformats/en/3xx/337.html#subfielda 337 ǂa}
65
+ # Additional fields are considered for many of the formats. Much of this logic has been moved to private methods
66
+ # to keep this method from becoming too unwieldy.
67
+ # @todo is the conditional structure here still best practice? see the "Thesis on Microfilm" case in the specs
68
+ # for this helper method
69
+ # @note ported from get_format
70
+ # @param [MARC::Record] record
71
+ # @param [Hash] location_map
72
+ # @return [Array<String>] format values for faceting
73
+
74
+ def facet(record, location_map)
75
+ formats = []
76
+ format_code = leader_format(record.leader)
77
+ f007 = record.fields('007').map(&:value)
78
+ f008 = record.fields('008').first&.value || ''
79
+ f006_forms = record.fields('006').map { |field| field.value[0] }
80
+ title_medium = subfield_values_for tag: '245', subfield: :h, record: record
81
+ media_type = subfield_values_for tag: '337', subfield: :a, record: record
82
+
83
+ # Get Call Number for holdings - ǂh gives us the 'Classification part' which can contain strings like
84
+ # 'Microfilm'
85
+ call_nums = record.fields(EnrichedMarc::TAG_HOLDING).map do |field|
86
+ join_subfields(field, &subfield_in?([EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART,
87
+ EnrichedMarc::SUB_HOLDING_ITEM_PART]))
88
+ end
89
+
90
+ # get all specific_location values from inventory info
91
+ locations = Location.location record: record, location_map: location_map, display_value: :specific_location
92
+
93
+ if include_manuscripts?(locations)
94
+ formats << MANUSCRIPT
95
+ elsif archives_but_not_cajs_or_nursing?(locations)
96
+ formats << ARCHIVE
97
+ elsif micro_or_microform?(call_nums, locations, media_type, title_medium)
98
+ formats << MICROFORMAT
99
+ else
100
+ # any of these
101
+ formats << THESIS_DISSERTATION if thesis_or_dissertation?(format_code, record)
102
+ formats << CONFERENCE_EVENT if conference_event?(record)
103
+ formats << NEWSPAPER if newspaper?(f008, format_code)
104
+ formats << GOVDOC if government_document?(f008, record, format_code)
105
+
106
+ # but only one of these
107
+ formats << if website_database?(f006_forms, format_code)
108
+ WEBSITE_DATABASE
109
+ elsif book?(format_code, title_medium, record)
110
+ BOOK
111
+ elsif musical_score?(format_code)
112
+ MUSICAL_SCORE
113
+ elsif map_atlas?(format_code)
114
+ MAP_ATLAS
115
+ elsif graphical_media?(format_code)
116
+ graphical_media_type(f007)
117
+ elsif sound_recording?(format_code)
118
+ SOUND_RECORDING
119
+ elsif image?(format_code)
120
+ IMAGE
121
+ elsif datafile?(format_code)
122
+ DATAFILE
123
+ elsif journal_periodical?(format_code)
124
+ JOURNAL_PERIODICAL
125
+ elsif three_d_object?(format_code)
126
+ THREE_D_OBJECT
127
+ else
128
+ OTHER
129
+ end
130
+ end
131
+ formats.concat(curated_format(record))
132
+ end
133
+
134
+ # Show "Other Format" values from {https://www.oclc.org/bibformats/en/7xx/776.html 776} and any 880 linkage.
135
+ # @todo is 774 an error in the linked field in legacy? i changed to 776 here
136
+ # @param [MARC::Record] record
137
+ # @return [Array] other format values for display
138
+ def other_show(record)
139
+ other_formats = record.fields('776').filter_map do |field|
140
+ value = join_subfields(field, &subfield_in?(%w[i a s t o]))
141
+ next if value.blank?
142
+
143
+ value
144
+ end
145
+ other_formats + linked_alternate(record, '776') do |sf|
146
+ sf.code.in? %w[i a s t o]
147
+ end
148
+ end
149
+
150
+ # Check if a set of locations has any locations that include the term 'manuscripts'
151
+ # @param [Array<String>] locations
152
+ # @return [Boolean]
153
+ def include_manuscripts?(locations)
154
+ locations.any? { |loc| loc =~ /manuscripts/i }
155
+ end
156
+
157
+ private
158
+
159
+ # Get 'Curated' format from.
160
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Alma local field
161
+ # 944} ǂa, as long as it is not a numerical value.
162
+ # @param [MARC::Record] record
163
+ # @return [Array]
164
+ def curated_format(record)
165
+ record.fields('944').filter_map do |field|
166
+ subfield_a = field.find { |sf| sf.code == 'a' }
167
+ next if subfield_a.nil? || (subfield_a.value == subfield_a.value.to_i.to_s)
168
+
169
+ subfield_a.value
170
+ end.uniq
171
+ end
172
+
173
+ # @param [String] format_code
174
+ # @return [Boolean]
175
+ def image?(format_code)
176
+ format_code.in?(%w[km kd])
177
+ end
178
+
179
+ # @param [String] format_code
180
+ # @return [Boolean]
181
+ def datafile?(format_code)
182
+ format_code == 'mm'
183
+ end
184
+
185
+ # @param [String] format_code
186
+ # @return [Boolean]
187
+ def journal_periodical?(format_code)
188
+ format_code.in?(%w[as gs])
189
+ end
190
+
191
+ # @param [String] format_code
192
+ # @return [Boolean]
193
+ def three_d_object?(format_code)
194
+ format_code.start_with?('r')
195
+ end
196
+
197
+ # @param [String] format_code
198
+ # @return [Boolean]
199
+ def sound_recording?(format_code)
200
+ format_code.in?(%w[im jm jc jd js])
201
+ end
202
+
203
+ # @param [String] format_code
204
+ # @return [Boolean]
205
+ def graphical_media?(format_code)
206
+ format_code == 'gm'
207
+ end
208
+
209
+ # @param [String] format_code
210
+ # @return [Boolean]
211
+ def map_atlas?(format_code)
212
+ format_code&.start_with?('e') || format_code == 'fm'
213
+ end
214
+
215
+ # @param [String] format_code
216
+ # @return [Boolean]
217
+ def musical_score?(format_code)
218
+ format_code.in?(%w[ca cb cd cm cs dm])
219
+ end
220
+
221
+ # @param [String] format_code
222
+ # @param [Array<String>] title_medium
223
+ # @param [MARC::Record] record
224
+ # @return [Boolean]
225
+ def book?(format_code, title_medium, record)
226
+ title_forms = subfield_values_for tag: '245', subfield: :k, record: record
227
+ format_code.in?(%w[aa ac am tm]) &&
228
+ title_forms.none? { |v| v =~ /kit/i } &&
229
+ title_medium.none? { |v| v =~ /micro/i }
230
+ end
231
+
232
+ # @param [Array<String>] f006_forms
233
+ # @param [String] format_code
234
+ # @return [Boolean]
235
+ def website_database?(f006_forms, format_code)
236
+ format_code&.end_with?('i') ||
237
+ (format_code == 'am' && f006_forms.include?('m') && f006_forms.include?('s'))
238
+ end
239
+
240
+ # @param [String] f008
241
+ # @param [MARC::Record] record
242
+ # @param [String] format_code
243
+ # @return [Boolean]
244
+ def government_document?(f008, record, format_code)
245
+ # is a 260 entry present, and does it have a b that matches 'press'
246
+ f260press = record.fields('260').any? do |field|
247
+ field.select { |sf| sf.code == 'b' && sf.value =~ /press/i }.any?
248
+ end
249
+ %w[c d i j].exclude?(format_code[0]) && f008[28].in?(%w[f i o]) && !f260press
250
+ end
251
+
252
+ # @param [String] f008
253
+ # @param [String] format_code
254
+ # @return [Boolean]
255
+ def newspaper?(f008, format_code)
256
+ format_code == 'as' && (f008[21] == 'n' || f008[22] == 'e')
257
+ end
258
+
259
+ # @param [MARC::Record] record
260
+ # @return [Boolean]
261
+ def conference_event?(record)
262
+ record.fields('111').any? || record.fields('711').any? # TODO: use field_present helper here and below?
263
+ end
264
+
265
+ # @param [MARC::Record] record
266
+ # @param [String] format_code
267
+ # @return [Boolean]
268
+ def thesis_or_dissertation?(format_code, record)
269
+ record.fields('502').any? && format_code == 'tm'
270
+ end
271
+
272
+ # @param [Array<String>] title_medium
273
+ # @param [Array<String>] media_type
274
+ # @param [Array<String>] locations
275
+ # @param [Array<String>] call_nums
276
+ # @return [Boolean]
277
+ def micro_or_microform?(call_nums, locations, media_type, title_medium)
278
+ locations.any? { |loc| loc =~ /micro/i } ||
279
+ title_medium.any? { |val| val =~ /micro/i } ||
280
+ call_nums.any? { |val| val =~ /micro/i } ||
281
+ media_type.any? { |val| val =~ /microform/i }
282
+ end
283
+
284
+ # @todo "cajs" has no match in our location map, so it is not doing anything. Does this intend to catch cjsambx
285
+ # "Library at the Katz Center - Archives"?
286
+ # @param [Array<String>] locations
287
+ # @return [Boolean]
288
+ def archives_but_not_cajs_or_nursing?(locations)
289
+ locations.any? { |loc| loc =~ /archives/i } &&
290
+ locations.none? { |loc| loc =~ /cajs/i } &&
291
+ locations.none? { |loc| loc =~ /nursing/i }
292
+ end
293
+
294
+ # Consider {https://www.loc.gov/marc/bibliographic/bd007g.html 007} to determine graphical media format
295
+ # @param [Array<String>] f007
296
+ # @return [String (frozen)]
297
+ def graphical_media_type(f007)
298
+ if f007.any? { |v| v.start_with?('g') }
299
+ PROJECTED_GRAPHIC
300
+ else
301
+ VIDEO
302
+ end
303
+ end
304
+
305
+ # @param [String] leader
306
+ # @return [String]
307
+ def leader_format(leader)
308
+ leader[6..7] || ' '
309
+ end
310
+ end
311
+ end
312
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Genre field values come from the {https://www.oclc.org/bibformats/en/6xx/655.html 655}, but for some
5
+ # contexts we are only interested in a subset of the declared terms in a record.
6
+ class Genre < Helper
7
+ class << self
8
+ # Genre values for searching. We're less picky about what is included here to enable discovery via any included
9
+ # 655 data.
10
+ #
11
+ # @param [MARC::Record] record
12
+ # @return [Array<String>] array of genre values for search
13
+ def search(record)
14
+ record.fields('655').map do |field|
15
+ join_subfields(field, &subfield_not_in?(%w[0 2 5 c]))
16
+ end.uniq
17
+ end
18
+
19
+ # Genre values for display. We display Genre/Term values if they fulfill the following criteria:
20
+ # - The field is in {https://www.oclc.org/bibformats/en/6xx/655.html MARC 655}. Or the field is in MARC 880 with
21
+ # subfield 6 including '655'.
22
+ # AND
23
+ # - Above fields have an indicator 2 value of: 0 (LSCH) or 4 (No source specified).
24
+ # OR
25
+ # - Above fields have a subfield 2 (ontology code) in the list of allowed values.
26
+ # @todo subfields e and w do not appear in the documentation for 655, but we give them special consideration here,
27
+ # what gives?
28
+ # @note legacy method returns a link object
29
+ # @param [MARC::Record] record
30
+ # @return [Array<String>] array of genre values for display
31
+ def show(record)
32
+ record.fields(%w[655 880]).filter_map do |field|
33
+ next unless allowed_genre_field?(field)
34
+
35
+ next if field.tag == '880' && subfield_values(field, '6').exclude?('655')
36
+
37
+ sub_with_hyphens = field.find_all(&subfield_not_in?(%w[0 2 5 6 8 c e w])).map do |sf|
38
+ sep = %w[a b].exclude?(sf.code) ? ' -- ' : ' '
39
+ sep + sf.value
40
+ end.join.lstrip
41
+ "#{sub_with_hyphens} #{field.find_all(&subfield_in?(%w[e w])).join(' -- ')}".strip
42
+ end.uniq
43
+ end
44
+
45
+ # Genre values for faceting. We only set Genre facet values for movies (videos) and manuscripts(?)
46
+ # @todo the Genre facet in Franklin is pretty ugly. It could be cleaned up by limiting the subfields included
47
+ # here and cleaning up punctuation.
48
+ # @param [MARC::Record] record
49
+ # @param [Hash] location_map
50
+ # @return [Array<String>]
51
+ def facet(record, location_map)
52
+ locations = Location.location record: record, location_map: location_map, display_value: :specific_location
53
+ manuscript = Format.include_manuscripts?(locations)
54
+ video = record.fields('007').any? { |field| field.value.starts_with? 'v' }
55
+ return [] unless manuscript || video
56
+
57
+ record.fields('655').filter_map do |field|
58
+ join_subfields field, &subfield_not_in?(%w[0 2 5 c])
59
+ end.uniq
60
+ end
61
+
62
+ private
63
+
64
+ # @param [MARC::DataField] field
65
+ # @return [TrueClass, FalseClass]
66
+ def allowed_genre_field?(field)
67
+ field.indicator2.in?(%w[0 4]) || subfield_value_in?(field, '2', PennMARC::HeadingControl::ALLOWED_SOURCE_CODES)
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../enriched_marc'
4
+ require_relative '../util'
5
+
6
+ module PennMARC
7
+ # Shared helper code
8
+ class Helper
9
+ extend Util
10
+ end
11
+ end
@@ -0,0 +1,134 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Parser methods for extracting identifier values.
5
+ class Identifier < Helper
6
+ class << self
7
+ # Get Alma MMS ID value
8
+ #
9
+ # @param [MARC::Record] record
10
+ # @return [String]
11
+ def mmsid(record)
12
+ record.fields('001').first.value
13
+ end
14
+
15
+ # Get normalized ISXN values for searching of a record. Values aggregated from subfield 'a' and 'z' of the
16
+ # {https://www.oclc.org/bibformats/en/0xx/020.html 020 field}, and subfield 'a', 'l', and 'z' of the
17
+ # the {https://www.oclc.org/bibformats/en/0xx/020.html 022 field}.
18
+ #
19
+ # @param [MARC::Record] record
20
+ # @return [Array<String>]
21
+ def isxn_search(record)
22
+ record.fields(%w[020 022]).filter_map do |field|
23
+ if field.tag == '020'
24
+ field.filter_map { |subfield| normalize_isbn(subfield.value) if subfield_in?(%w[a z]).call(subfield) }
25
+ else
26
+ field.filter_map { |subfield| subfield.value if subfield_in?(%w[a l z]).call(subfield) }
27
+ end
28
+ end.flatten.uniq
29
+ end
30
+
31
+ # Get ISBN values for display from the {https://www.oclc.org/bibformats/en/0xx/020.html 020 field}
32
+ # and related {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
33
+ #
34
+ # @param [MARC::Record] record
35
+ # @return [Array<String>]
36
+ # @todo look into z subfield for 020 field, should we show cancelled isbn?
37
+ def isbn_show(record)
38
+ isbn_values = record.fields('020').filter_map do |field|
39
+ joined_isbn = join_subfields(field, &subfield_in?(%w[a z]))
40
+ joined_isbn if joined_isbn.present?
41
+ end
42
+ isbn_values += linked_alternate(record, '020', &subfield_in?(%w[a z]))
43
+ isbn_values
44
+ end
45
+
46
+ # Get ISSN values for display from the {https://www.oclc.org/bibformats/en/0xx/022.html 022 field} and related
47
+ # {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
48
+ #
49
+ # @param [MARC::Record] record
50
+ # @return [Array<String>]
51
+ def issn_show(record)
52
+ issn_values = record.fields('022').filter_map do |field|
53
+ joined_issn = join_subfields(field, &subfield_in?(%w[a z]))
54
+ joined_issn if joined_issn.present?
55
+ end
56
+ issn_values += linked_alternate(record, '022', &subfield_in?(%w[a z]))
57
+ issn_values
58
+ end
59
+
60
+ # Get numeric OCLC ID of first {https://www.oclc.org/bibformats/en/0xx/035.html 035 field}
61
+ # with an OCLC ID defined in subfield 'a'.
62
+ #
63
+ # @todo We should evaluate this to return a single value in the future since subfield a is non-repeatable
64
+ # @param [MARC::Record] record
65
+ # @return [Array<String>]
66
+ def oclc_id(record)
67
+ oclc_id = Array.wrap(record.fields('035')
68
+ .find { |field| field.any? { |subfield| subfield_a_is_oclc?(subfield) } })
69
+
70
+ oclc_id.flat_map do |field|
71
+ field.filter_map do |subfield|
72
+ # skip unless subfield 'a' is an oclc id value
73
+ next unless subfield_a_is_oclc?(subfield)
74
+
75
+ # search for numeric part of oclc id (e.g. '610094484' in '(OCoLC)ocn610094484')
76
+ match = /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(subfield.value)
77
+
78
+ # skip unless search to find numeric part of oclc id has a match
79
+ next unless match
80
+
81
+ match[1]
82
+ end
83
+ end
84
+ end
85
+
86
+ # Get publisher issued identifiers from fields {https://www.oclc.org/bibformats/en/0xx/024.html 024},
87
+ # {https://www.oclc.org/bibformats/en/0xx/024.html 028}, and related
88
+ # {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
89
+ #
90
+ # @param [MARC::Record] record
91
+ # @return [Array<string>]
92
+ def publisher_number_show(record)
93
+ publisher_numbers = record.fields(%w[024 028]).filter_map do |field|
94
+ joined_identifiers = join_subfields(field, &subfield_not_in?(%w[5 6]))
95
+ joined_identifiers if joined_identifiers.present?
96
+ end
97
+ publisher_numbers += linked_alternate(record, %w[024 028], &subfield_not_in?(%w[5 6]))
98
+ publisher_numbers
99
+ end
100
+
101
+ # Get publisher issued identifiers for searching of a record. Values extracted from fields
102
+ # {https://www.oclc.org/bibformats/en/0xx/024.html 024} and {https://www.oclc.org/bibformats/en/0xx/024.html 028}.
103
+ #
104
+ # @param [MARC::Record] record
105
+ # @return [Array<String>]
106
+ def publisher_number_search(record)
107
+ record.fields(%w[024 028]).filter_map do |field|
108
+ joined_identifiers = join_subfields(field, &subfield_in?(%w[a]))
109
+ joined_identifiers if joined_identifiers.present?
110
+ end
111
+ end
112
+
113
+ private
114
+
115
+ # Determine if subfield 'a' is an OCLC id.
116
+ #
117
+ # @param [MARC::Subfield]
118
+ # @return [TrueClass, FalseClass]
119
+ def subfield_a_is_oclc?(subfield)
120
+ subfield.code == 'a' && subfield.value =~ /^\(OCoLC\).*/
121
+ end
122
+
123
+ # Normalize isbn value using {https://github.com/billdueber/library_stdnums library_stdnums gem}.
124
+ # Converts ISBN10 (ten-digit) to validated ISBN13 (thriteen-digit) and returns both values. If passed
125
+ # ISBN13 parameter, only returns validated ISBN13 value.
126
+ #
127
+ # @param [String] isbn
128
+ # @return [Array<String, String>, nil]
129
+ def normalize_isbn(isbn)
130
+ StdNum::ISBN.allNormalizedValues(isbn)
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
5
+ # the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
6
+ # is then mapped to a display-friendly value using the a provided mapping hash.
7
+ # @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
8
+ class Language < Helper
9
+ # Used when no value is present in the control field - still mapped
10
+ UNDETERMINED_CODE = :und
11
+
12
+ class << self
13
+ # Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
14
+ # related 880.
15
+ # @param [MARC::Record] record
16
+ # @return [Array<String>] language values and notes
17
+ def show(record)
18
+ values = record.fields('546').map do |field|
19
+ join_subfields field, &subfield_not_in?(%w[6 8])
20
+ end
21
+ values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
22
+ end
23
+
24
+ # Get language values for searching and faceting of a record. The value is extracted from a defined position in
25
+ # the 008 control field. Language facet and search values will typically be the same.
26
+ #
27
+ # @param [MARC::Record] record
28
+ # @param [Hash] mapping hash for language code translation
29
+ # @return [String] nice value for language
30
+ def search(record, mapping)
31
+ control_field = record['008']&.value
32
+ language_code = control_field[35..37]
33
+ mapping[language_code.to_sym || UNDETERMINED_CODE]
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Do Link-y stuff
5
+ class Link < Helper
6
+ class << self
7
+ def full_text(record:); end
8
+
9
+ def web(record:); end
10
+ end
11
+ end
12
+ end