pennmarc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Do Edition-y stuff
5
+ class Edition < Helper
6
+ class << self
7
+ # Edition values for display on a record page. Field 250 is information relating to the edition of a work as
8
+ # determined by applicable cataloging rules. For mixed materials, field 250 is used to record statements relating
9
+ # to collections that contain versions of works existing in two or more versions (or states) in single or multiple
10
+ # copies (e.g., different drafts of a film script). For continuing resources, this field is not used for
11
+ # sequential edition statements such as 1st- ed. This type of information is contained in field 362 (Dates of
12
+ # Publication and/or Volume Designation).
13
+ # https://www.loc.gov/marc/bibliographic/bd250.html
14
+ # @param [MARC::Record] record
15
+ # @return [Array<String>] array of editions and their alternates
16
+ def show(record)
17
+ record.fields('250').map do |field|
18
+ join_subfields(field, &subfield_not_in?(%w[6 8]))
19
+ end + linked_alternate_not_6_or_8(record, '250')
20
+ end
21
+
22
+ # Edition values for display in search results. Just grab the first 250 field.
23
+ # @param [MARC::Record] record
24
+ # @return [String, NilClass] string of all first 250 subfields, excluding 6 and 8
25
+ def values(record)
26
+ edition = record.fields('250').first
27
+ return unless edition.present?
28
+
29
+ join_subfields(edition, &subfield_not_in?(%w[6 8]))
30
+ end
31
+
32
+ # Entry for another available edition of the target item (horizontal relationship). When a note is generated
33
+ # from this field, the introductory phrase Other editions available: may be generated based on the field tag for
34
+ # display.
35
+ # https://www.loc.gov/marc/bibliographic/bd775.html
36
+ # @param [MARC::Record] record
37
+ # @return [Array<String>] array of other edition strings
38
+ def other_show(record, relator_mapping)
39
+ record.fields('775').filter_map do |field|
40
+ next unless subfield_defined?(field, :i)
41
+
42
+ other_edition_value(field, relator_mapping)
43
+ end + record.fields('880').filter_map do |field|
44
+ next unless field.indicator2.blank? && subfield_value_in?(field, '6', %w[775]) &&
45
+ subfield_defined?(field, 'i')
46
+
47
+ other_edition_value(field, relator_mapping)
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ # Assemble a string of relevant edition information.
54
+ # @param [MARC::DataField] field
55
+ # @param [Hash] relator_mapping
56
+ # @return [String (frozen)] assembled other version string
57
+ def other_edition_value(field, relator_mapping)
58
+ subi = remove_paren_value_from_subfield_i(field) || ''
59
+ other_editions = field.filter_map do |sf|
60
+ next if %w[6 8].member?(sf.code)
61
+
62
+ if %w[s x z].member?(sf.code)
63
+ " #{sf.value}"
64
+ elsif sf.code == 't'
65
+ relator = translate_relator(sf.value, relator_mapping)
66
+ next if relator.blank?
67
+
68
+ " #{relator}. "
69
+ end
70
+ end.join
71
+ other_editions_append = field.filter_map do |sf|
72
+ next if %w[6 8].member?(sf.code)
73
+
74
+ if %w[i h s t x z e f o r w y 7].exclude?(sf.code)
75
+ " #{sf.value}"
76
+ elsif sf.code == 'h'
77
+ " (#{sf.value}) "
78
+ end
79
+ end.join
80
+ prepend = trim_trailing(:period, subi).squish
81
+
82
+ if other_editions.present? || other_editions_append.present?
83
+ "#{prepend}: #{other_editions} #{other_editions_append}".squish
84
+ else
85
+ prepend
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,312 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Handle parsing out "Format" and "Other Format" values. Special care goes into controlling the format values for
5
+ # faceting.
6
+ class Format < Helper
7
+ class << self
8
+ # These constants represent the set of desired Format values for faceting.
9
+ ARCHIVE = 'Archive'
10
+ BOOK = 'Book'
11
+ CONFERENCE_EVENT = 'Conference/Event'
12
+ DATAFILE = 'Datafile'
13
+ GOVDOC = 'Government document'
14
+ IMAGE = 'Image'
15
+ JOURNAL_PERIODICAL = 'Journal/Periodical'
16
+ MANUSCRIPT = 'Manuscript'
17
+ MAP_ATLAS = 'Map/Atlas'
18
+ MICROFORMAT = 'Microformat'
19
+ MUSICAL_SCORE = 'Musical score'
20
+ NEWSPAPER = 'Newspaper'
21
+ OTHER = 'Other'
22
+ PROJECTED_GRAPHIC = 'Projected graphic'
23
+ SOUND_RECORDING = 'Sound recording'
24
+ THESIS_DISSERTATION = 'Thesis/Dissertation'
25
+ THREE_D_OBJECT = '3D object'
26
+ VIDEO = 'Video'
27
+ WEBSITE_DATABASE = 'Website/Database'
28
+
29
+ # Get any Format values from {https://www.oclc.org/bibformats/en/3xx/300.html 300},
30
+ # 254, 255, 310, 342, 352 or {https://www.oclc.org/bibformats/en/3xx/340.html 340} field. based on the source
31
+ # field, different subfields are used.
32
+ # @note ported from get_format_display
33
+ # @param [MARC::Record] record
34
+ # @return [Array<String>] format values for display
35
+ def show(record)
36
+ results = record.fields('300').map { |f| join_subfields(f, &subfield_not_in?(%w[3 6 8])) }
37
+ results += record.fields(%w[254 255 310 342 352 362]).map do |f|
38
+ join_subfields(f, &subfield_not_in?(%w[6 8]))
39
+ end
40
+ results += record.fields('340').map { |f| join_subfields(f, &subfield_not_in?(%w[0 2 6 8])) }
41
+ results += record.fields('880').map do |f|
42
+ subfield_to_ignore = if subfield_value?(f, 6, /^300/)
43
+ %w[3 6 8]
44
+ elsif subfield_value?(f, 6, /^(254|255|310|342|352|362)/)
45
+ %w[6 8]
46
+ elsif subfield_value?(f, 6, /^340/)
47
+ %w[0 2 6 8]
48
+ end
49
+ join_subfields(f, &subfield_not_in?(subfield_to_ignore))
50
+ end
51
+ results.compact_blank
52
+ end
53
+
54
+ # Get Format values for faceting. Format values are determined using complex logic for each possible format value.
55
+ # The primary fields considered in determining Format facet values are:
56
+ #
57
+ # 1. "Type of Record" and "Bibliographic level" values extracted from the
58
+ # {https://www.loc.gov/marc/bibliographic/bdleader.html MARC leader}.
59
+ # 2. Location name values and "Classification part" from Alma "enhanced" MARC holding/item information
60
+ # 3. {https://www.loc.gov/marc/bibliographic/bd007.html 007} values, the first
61
+ # {https://www.loc.gov/marc/bibliographic/bd008.html 008} value, and the first character form all
62
+ # {https://www.loc.gov/marc/bibliographic/bd006.html 006} values (form)
63
+ # 4. Medium values from {https://www.oclc.org/bibformats/en/2xx/245.html#subfieldh 245 ǂh}
64
+ # 5. Media Type values from {https://www.oclc.org/bibformats/en/3xx/337.html#subfielda 337 ǂa}
65
+ # Additional fields are considered for many of the formats. Much of this logic has been moved to private methods
66
+ # to keep this method from becoming too unwieldy.
67
+ # @todo is the conditional structure here still best practice? see the "Thesis on Microfilm" case in the specs
68
+ # for this helper method
69
+ # @note ported from get_format
70
+ # @param [MARC::Record] record
71
+ # @param [Hash] location_map
72
+ # @return [Array<String>] format values for faceting
73
+
74
+ def facet(record, location_map)
75
+ formats = []
76
+ format_code = leader_format(record.leader)
77
+ f007 = record.fields('007').map(&:value)
78
+ f008 = record.fields('008').first&.value || ''
79
+ f006_forms = record.fields('006').map { |field| field.value[0] }
80
+ title_medium = subfield_values_for tag: '245', subfield: :h, record: record
81
+ media_type = subfield_values_for tag: '337', subfield: :a, record: record
82
+
83
+ # Get Call Number for holdings - ǂh gives us the 'Classification part' which can contain strings like
84
+ # 'Microfilm'
85
+ call_nums = record.fields(EnrichedMarc::TAG_HOLDING).map do |field|
86
+ join_subfields(field, &subfield_in?([EnrichedMarc::SUB_HOLDING_CLASSIFICATION_PART,
87
+ EnrichedMarc::SUB_HOLDING_ITEM_PART]))
88
+ end
89
+
90
+ # get all specific_location values from inventory info
91
+ locations = Location.location record: record, location_map: location_map, display_value: :specific_location
92
+
93
+ if include_manuscripts?(locations)
94
+ formats << MANUSCRIPT
95
+ elsif archives_but_not_cajs_or_nursing?(locations)
96
+ formats << ARCHIVE
97
+ elsif micro_or_microform?(call_nums, locations, media_type, title_medium)
98
+ formats << MICROFORMAT
99
+ else
100
+ # any of these
101
+ formats << THESIS_DISSERTATION if thesis_or_dissertation?(format_code, record)
102
+ formats << CONFERENCE_EVENT if conference_event?(record)
103
+ formats << NEWSPAPER if newspaper?(f008, format_code)
104
+ formats << GOVDOC if government_document?(f008, record, format_code)
105
+
106
+ # but only one of these
107
+ formats << if website_database?(f006_forms, format_code)
108
+ WEBSITE_DATABASE
109
+ elsif book?(format_code, title_medium, record)
110
+ BOOK
111
+ elsif musical_score?(format_code)
112
+ MUSICAL_SCORE
113
+ elsif map_atlas?(format_code)
114
+ MAP_ATLAS
115
+ elsif graphical_media?(format_code)
116
+ graphical_media_type(f007)
117
+ elsif sound_recording?(format_code)
118
+ SOUND_RECORDING
119
+ elsif image?(format_code)
120
+ IMAGE
121
+ elsif datafile?(format_code)
122
+ DATAFILE
123
+ elsif journal_periodical?(format_code)
124
+ JOURNAL_PERIODICAL
125
+ elsif three_d_object?(format_code)
126
+ THREE_D_OBJECT
127
+ else
128
+ OTHER
129
+ end
130
+ end
131
+ formats.concat(curated_format(record))
132
+ end
133
+
134
+ # Show "Other Format" values from {https://www.oclc.org/bibformats/en/7xx/776.html 776} and any 880 linkage.
135
+ # @todo is 774 an error in the linked field in legacy? i changed to 776 here
136
+ # @param [MARC::Record] record
137
+ # @return [Array] other format values for display
138
+ def other_show(record)
139
+ other_formats = record.fields('776').filter_map do |field|
140
+ value = join_subfields(field, &subfield_in?(%w[i a s t o]))
141
+ next if value.blank?
142
+
143
+ value
144
+ end
145
+ other_formats + linked_alternate(record, '776') do |sf|
146
+ sf.code.in? %w[i a s t o]
147
+ end
148
+ end
149
+
150
+ # Check if a set of locations has any locations that include the term 'manuscripts'
151
+ # @param [Array<String>] locations
152
+ # @return [Boolean]
153
+ def include_manuscripts?(locations)
154
+ locations.any? { |loc| loc =~ /manuscripts/i }
155
+ end
156
+
157
+ private
158
+
159
+ # Get 'Curated' format from.
160
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Alma local field
161
+ # 944} ǂa, as long as it is not a numerical value.
162
+ # @param [MARC::Record] record
163
+ # @return [Array]
164
+ def curated_format(record)
165
+ record.fields('944').filter_map do |field|
166
+ subfield_a = field.find { |sf| sf.code == 'a' }
167
+ next if subfield_a.nil? || (subfield_a.value == subfield_a.value.to_i.to_s)
168
+
169
+ subfield_a.value
170
+ end.uniq
171
+ end
172
+
173
+ # @param [String] format_code
174
+ # @return [Boolean]
175
+ def image?(format_code)
176
+ format_code.in?(%w[km kd])
177
+ end
178
+
179
+ # @param [String] format_code
180
+ # @return [Boolean]
181
+ def datafile?(format_code)
182
+ format_code == 'mm'
183
+ end
184
+
185
+ # @param [String] format_code
186
+ # @return [Boolean]
187
+ def journal_periodical?(format_code)
188
+ format_code.in?(%w[as gs])
189
+ end
190
+
191
+ # @param [String] format_code
192
+ # @return [Boolean]
193
+ def three_d_object?(format_code)
194
+ format_code.start_with?('r')
195
+ end
196
+
197
+ # @param [String] format_code
198
+ # @return [Boolean]
199
+ def sound_recording?(format_code)
200
+ format_code.in?(%w[im jm jc jd js])
201
+ end
202
+
203
+ # @param [String] format_code
204
+ # @return [Boolean]
205
+ def graphical_media?(format_code)
206
+ format_code == 'gm'
207
+ end
208
+
209
+ # @param [String] format_code
210
+ # @return [Boolean]
211
+ def map_atlas?(format_code)
212
+ format_code&.start_with?('e') || format_code == 'fm'
213
+ end
214
+
215
+ # @param [String] format_code
216
+ # @return [Boolean]
217
+ def musical_score?(format_code)
218
+ format_code.in?(%w[ca cb cd cm cs dm])
219
+ end
220
+
221
+ # @param [String] format_code
222
+ # @param [Array<String>] title_medium
223
+ # @param [MARC::Record] record
224
+ # @return [Boolean]
225
+ def book?(format_code, title_medium, record)
226
+ title_forms = subfield_values_for tag: '245', subfield: :k, record: record
227
+ format_code.in?(%w[aa ac am tm]) &&
228
+ title_forms.none? { |v| v =~ /kit/i } &&
229
+ title_medium.none? { |v| v =~ /micro/i }
230
+ end
231
+
232
+ # @param [Array<String>] f006_forms
233
+ # @param [String] format_code
234
+ # @return [Boolean]
235
+ def website_database?(f006_forms, format_code)
236
+ format_code&.end_with?('i') ||
237
+ (format_code == 'am' && f006_forms.include?('m') && f006_forms.include?('s'))
238
+ end
239
+
240
+ # @param [String] f008
241
+ # @param [MARC::Record] record
242
+ # @param [String] format_code
243
+ # @return [Boolean]
244
+ def government_document?(f008, record, format_code)
245
+ # is a 260 entry present, and does it have a b that matches 'press'
246
+ f260press = record.fields('260').any? do |field|
247
+ field.select { |sf| sf.code == 'b' && sf.value =~ /press/i }.any?
248
+ end
249
+ %w[c d i j].exclude?(format_code[0]) && f008[28].in?(%w[f i o]) && !f260press
250
+ end
251
+
252
+ # @param [String] f008
253
+ # @param [String] format_code
254
+ # @return [Boolean]
255
+ def newspaper?(f008, format_code)
256
+ format_code == 'as' && (f008[21] == 'n' || f008[22] == 'e')
257
+ end
258
+
259
+ # @param [MARC::Record] record
260
+ # @return [Boolean]
261
+ def conference_event?(record)
262
+ record.fields('111').any? || record.fields('711').any? # TODO: use field_present helper here and below?
263
+ end
264
+
265
+ # @param [MARC::Record] record
266
+ # @param [String] format_code
267
+ # @return [Boolean]
268
+ def thesis_or_dissertation?(format_code, record)
269
+ record.fields('502').any? && format_code == 'tm'
270
+ end
271
+
272
+ # @param [Array<String>] title_medium
273
+ # @param [Array<String>] media_type
274
+ # @param [Array<String>] locations
275
+ # @param [Array<String>] call_nums
276
+ # @return [Boolean]
277
+ def micro_or_microform?(call_nums, locations, media_type, title_medium)
278
+ locations.any? { |loc| loc =~ /micro/i } ||
279
+ title_medium.any? { |val| val =~ /micro/i } ||
280
+ call_nums.any? { |val| val =~ /micro/i } ||
281
+ media_type.any? { |val| val =~ /microform/i }
282
+ end
283
+
284
+ # @todo "cajs" has no match in our location map, so it is not doing anything. Does this intend to catch cjsambx
285
+ # "Library at the Katz Center - Archives"?
286
+ # @param [Array<String>] locations
287
+ # @return [Boolean]
288
+ def archives_but_not_cajs_or_nursing?(locations)
289
+ locations.any? { |loc| loc =~ /archives/i } &&
290
+ locations.none? { |loc| loc =~ /cajs/i } &&
291
+ locations.none? { |loc| loc =~ /nursing/i }
292
+ end
293
+
294
+ # Consider {https://www.loc.gov/marc/bibliographic/bd007g.html 007} to determine graphical media format
295
+ # @param [Array<String>] f007
296
+ # @return [String (frozen)]
297
+ def graphical_media_type(f007)
298
+ if f007.any? { |v| v.start_with?('g') }
299
+ PROJECTED_GRAPHIC
300
+ else
301
+ VIDEO
302
+ end
303
+ end
304
+
305
+ # @param [String] leader
306
+ # @return [String]
307
+ def leader_format(leader)
308
+ leader[6..7] || ' '
309
+ end
310
+ end
311
+ end
312
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Genre field values come from the {https://www.oclc.org/bibformats/en/6xx/655.html 655}, but for some
5
+ # contexts we are only interested in a subset of the declared terms in a record.
6
+ class Genre < Helper
7
+ class << self
8
+ # Genre values for searching. We're less picky about what is included here to enable discovery via any included
9
+ # 655 data.
10
+ #
11
+ # @param [MARC::Record] record
12
+ # @return [Array<String>] array of genre values for search
13
+ def search(record)
14
+ record.fields('655').map do |field|
15
+ join_subfields(field, &subfield_not_in?(%w[0 2 5 c]))
16
+ end.uniq
17
+ end
18
+
19
+ # Genre values for display. We display Genre/Term values if they fulfill the following criteria:
20
+ # - The field is in {https://www.oclc.org/bibformats/en/6xx/655.html MARC 655}. Or the field is in MARC 880 with
21
+ # subfield 6 including '655'.
22
+ # AND
23
+ # - Above fields have an indicator 2 value of: 0 (LSCH) or 4 (No source specified).
24
+ # OR
25
+ # - Above fields have a subfield 2 (ontology code) in the list of allowed values.
26
+ # @todo subfields e and w do not appear in the documentation for 655, but we give them special consideration here,
27
+ # what gives?
28
+ # @note legacy method returns a link object
29
+ # @param [MARC::Record] record
30
+ # @return [Array<String>] array of genre values for display
31
+ def show(record)
32
+ record.fields(%w[655 880]).filter_map do |field|
33
+ next unless allowed_genre_field?(field)
34
+
35
+ next if field.tag == '880' && subfield_values(field, '6').exclude?('655')
36
+
37
+ sub_with_hyphens = field.find_all(&subfield_not_in?(%w[0 2 5 6 8 c e w])).map do |sf|
38
+ sep = %w[a b].exclude?(sf.code) ? ' -- ' : ' '
39
+ sep + sf.value
40
+ end.join.lstrip
41
+ "#{sub_with_hyphens} #{field.find_all(&subfield_in?(%w[e w])).join(' -- ')}".strip
42
+ end.uniq
43
+ end
44
+
45
+ # Genre values for faceting. We only set Genre facet values for movies (videos) and manuscripts(?)
46
+ # @todo the Genre facet in Franklin is pretty ugly. It could be cleaned up by limiting the subfields included
47
+ # here and cleaning up punctuation.
48
+ # @param [MARC::Record] record
49
+ # @param [Hash] location_map
50
+ # @return [Array<String>]
51
+ def facet(record, location_map)
52
+ locations = Location.location record: record, location_map: location_map, display_value: :specific_location
53
+ manuscript = Format.include_manuscripts?(locations)
54
+ video = record.fields('007').any? { |field| field.value.starts_with? 'v' }
55
+ return [] unless manuscript || video
56
+
57
+ record.fields('655').filter_map do |field|
58
+ join_subfields field, &subfield_not_in?(%w[0 2 5 c])
59
+ end.uniq
60
+ end
61
+
62
+ private
63
+
64
+ # @param [MARC::DataField] field
65
+ # @return [TrueClass, FalseClass]
66
+ def allowed_genre_field?(field)
67
+ field.indicator2.in?(%w[0 4]) || subfield_value_in?(field, '2', PennMARC::HeadingControl::ALLOWED_SOURCE_CODES)
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../enriched_marc'
4
+ require_relative '../util'
5
+
6
+ module PennMARC
7
+ # Shared helper code
8
+ class Helper
9
+ extend Util
10
+ end
11
+ end
@@ -0,0 +1,134 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Parser methods for extracting identifier values.
5
+ class Identifier < Helper
6
+ class << self
7
+ # Get Alma MMS ID value
8
+ #
9
+ # @param [MARC::Record] record
10
+ # @return [String]
11
+ def mmsid(record)
12
+ record.fields('001').first.value
13
+ end
14
+
15
+ # Get normalized ISXN values for searching of a record. Values aggregated from subfield 'a' and 'z' of the
16
+ # {https://www.oclc.org/bibformats/en/0xx/020.html 020 field}, and subfield 'a', 'l', and 'z' of the
17
+ # the {https://www.oclc.org/bibformats/en/0xx/020.html 022 field}.
18
+ #
19
+ # @param [MARC::Record] record
20
+ # @return [Array<String>]
21
+ def isxn_search(record)
22
+ record.fields(%w[020 022]).filter_map do |field|
23
+ if field.tag == '020'
24
+ field.filter_map { |subfield| normalize_isbn(subfield.value) if subfield_in?(%w[a z]).call(subfield) }
25
+ else
26
+ field.filter_map { |subfield| subfield.value if subfield_in?(%w[a l z]).call(subfield) }
27
+ end
28
+ end.flatten.uniq
29
+ end
30
+
31
+ # Get ISBN values for display from the {https://www.oclc.org/bibformats/en/0xx/020.html 020 field}
32
+ # and related {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
33
+ #
34
+ # @param [MARC::Record] record
35
+ # @return [Array<String>]
36
+ # @todo look into z subfield for 020 field, should we show cancelled isbn?
37
+ def isbn_show(record)
38
+ isbn_values = record.fields('020').filter_map do |field|
39
+ joined_isbn = join_subfields(field, &subfield_in?(%w[a z]))
40
+ joined_isbn if joined_isbn.present?
41
+ end
42
+ isbn_values += linked_alternate(record, '020', &subfield_in?(%w[a z]))
43
+ isbn_values
44
+ end
45
+
46
+ # Get ISSN values for display from the {https://www.oclc.org/bibformats/en/0xx/022.html 022 field} and related
47
+ # {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
48
+ #
49
+ # @param [MARC::Record] record
50
+ # @return [Array<String>]
51
+ def issn_show(record)
52
+ issn_values = record.fields('022').filter_map do |field|
53
+ joined_issn = join_subfields(field, &subfield_in?(%w[a z]))
54
+ joined_issn if joined_issn.present?
55
+ end
56
+ issn_values += linked_alternate(record, '022', &subfield_in?(%w[a z]))
57
+ issn_values
58
+ end
59
+
60
+ # Get numeric OCLC ID of first {https://www.oclc.org/bibformats/en/0xx/035.html 035 field}
61
+ # with an OCLC ID defined in subfield 'a'.
62
+ #
63
+ # @todo We should evaluate this to return a single value in the future since subfield a is non-repeatable
64
+ # @param [MARC::Record] record
65
+ # @return [Array<String>]
66
+ def oclc_id(record)
67
+ oclc_id = Array.wrap(record.fields('035')
68
+ .find { |field| field.any? { |subfield| subfield_a_is_oclc?(subfield) } })
69
+
70
+ oclc_id.flat_map do |field|
71
+ field.filter_map do |subfield|
72
+ # skip unless subfield 'a' is an oclc id value
73
+ next unless subfield_a_is_oclc?(subfield)
74
+
75
+ # search for numeric part of oclc id (e.g. '610094484' in '(OCoLC)ocn610094484')
76
+ match = /^\s*\(OCoLC\)[^1-9]*([1-9][0-9]*).*$/.match(subfield.value)
77
+
78
+ # skip unless search to find numeric part of oclc id has a match
79
+ next unless match
80
+
81
+ match[1]
82
+ end
83
+ end
84
+ end
85
+
86
+ # Get publisher issued identifiers from fields {https://www.oclc.org/bibformats/en/0xx/024.html 024},
87
+ # {https://www.oclc.org/bibformats/en/0xx/024.html 028}, and related
88
+ # {https://www.oclc.org/bibformats/en/8xx/880.html 880 field}.
89
+ #
90
+ # @param [MARC::Record] record
91
+ # @return [Array<string>]
92
+ def publisher_number_show(record)
93
+ publisher_numbers = record.fields(%w[024 028]).filter_map do |field|
94
+ joined_identifiers = join_subfields(field, &subfield_not_in?(%w[5 6]))
95
+ joined_identifiers if joined_identifiers.present?
96
+ end
97
+ publisher_numbers += linked_alternate(record, %w[024 028], &subfield_not_in?(%w[5 6]))
98
+ publisher_numbers
99
+ end
100
+
101
+ # Get publisher issued identifiers for searching of a record. Values extracted from fields
102
+ # {https://www.oclc.org/bibformats/en/0xx/024.html 024} and {https://www.oclc.org/bibformats/en/0xx/024.html 028}.
103
+ #
104
+ # @param [MARC::Record] record
105
+ # @return [Array<String>]
106
+ def publisher_number_search(record)
107
+ record.fields(%w[024 028]).filter_map do |field|
108
+ joined_identifiers = join_subfields(field, &subfield_in?(%w[a]))
109
+ joined_identifiers if joined_identifiers.present?
110
+ end
111
+ end
112
+
113
+ private
114
+
115
+ # Determine if subfield 'a' is an OCLC id.
116
+ #
117
+ # @param [MARC::Subfield]
118
+ # @return [TrueClass, FalseClass]
119
+ def subfield_a_is_oclc?(subfield)
120
+ subfield.code == 'a' && subfield.value =~ /^\(OCoLC\).*/
121
+ end
122
+
123
+ # Normalize isbn value using {https://github.com/billdueber/library_stdnums library_stdnums gem}.
124
+ # Converts ISBN10 (ten-digit) to validated ISBN13 (thriteen-digit) and returns both values. If passed
125
+ # ISBN13 parameter, only returns validated ISBN13 value.
126
+ #
127
+ # @param [String] isbn
128
+ # @return [Array<String, String>, nil]
129
+ def normalize_isbn(isbn)
130
+ StdNum::ISBN.allNormalizedValues(isbn)
131
+ end
132
+ end
133
+ end
134
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Logic for extracting and translating Language values for a record. Penn practice is to verify the value present in
5
+ # the {https://www.oclc.org/bibformats/en/fixedfield/lang.html 008 control field} as a three letter code. This code
6
+ # is then mapped to a display-friendly value using the a provided mapping hash.
7
+ # @todo should we consider values in the {https://www.oclc.org/bibformats/en/0xx/041.html 041 field}?
8
+ class Language < Helper
9
+ # Used when no value is present in the control field - still mapped
10
+ UNDETERMINED_CODE = :und
11
+
12
+ class << self
13
+ # Get language values for display from the {https://www.oclc.org/bibformats/en/5xx/546.html 546 field} and
14
+ # related 880.
15
+ # @param [MARC::Record] record
16
+ # @return [Array<String>] language values and notes
17
+ def show(record)
18
+ values = record.fields('546').map do |field|
19
+ join_subfields field, &subfield_not_in?(%w[6 8])
20
+ end
21
+ values + linked_alternate(record, '546', &subfield_not_in?(%w[6 8]))
22
+ end
23
+
24
+ # Get language values for searching and faceting of a record. The value is extracted from a defined position in
25
+ # the 008 control field. Language facet and search values will typically be the same.
26
+ #
27
+ # @param [MARC::Record] record
28
+ # @param [Hash] mapping hash for language code translation
29
+ # @return [String] nice value for language
30
+ def search(record, mapping)
31
+ control_field = record['008']&.value
32
+ language_code = control_field[35..37]
33
+ mapping[language_code.to_sym || UNDETERMINED_CODE]
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Do Link-y stuff
5
+ class Link < Helper
6
+ class << self
7
+ def full_text(record:); end
8
+
9
+ def web(record:); end
10
+ end
11
+ end
12
+ end