pennmarc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
@@ -0,0 +1,263 @@
1
+ abr: Abridger
2
+ act: Actor
3
+ adp: Adapter
4
+ rcp: Addressee
5
+ anl: Analyst
6
+ anm: Animator
7
+ ann: Annotator
8
+ apl: Appellant
9
+ ape: Appellee
10
+ app: Applicant
11
+ arc: Architect
12
+ arr: Arranger
13
+ acp: Art copyist
14
+ adi: Art director
15
+ art: Artist
16
+ ard: Artistic director
17
+ asg: Assignee
18
+ asn: Associated name
19
+ att: Attributed name
20
+ auc: Auctioneer
21
+ aut: Author
22
+ aqt: Author in quotations or text abstracts
23
+ aft: Author of afterword, colophon, etc.
24
+ aud: Author of dialog
25
+ aui: Author of introduction, etc.
26
+ ato: Autographer
27
+ ant: Bibliographic antecedent
28
+ bnd: Binder
29
+ bdd: Binding designer
30
+ blw: Blurb writer
31
+ bkd: Book designer
32
+ bkp: Book producer
33
+ bjd: Bookjacket designer
34
+ bpd: Bookplate designer
35
+ bsl: Bookseller
36
+ brl: Braille embosser
37
+ brd: Broadcaster
38
+ cll: Calligrapher
39
+ ctg: Cartographer
40
+ cas: Caster
41
+ cns: Censor
42
+ chr: Choreographer
43
+ cng: Cinematographer
44
+ cli: Client
45
+ -clb: Collaborator
46
+ cor: Collection registrar
47
+ col: Collector
48
+ clt: Collotyper
49
+ clr: Colorist
50
+ cmm: Commentator
51
+ cwt: Commentator for written text
52
+ com: Compiler
53
+ cpl: Complainant
54
+ cpt: Complainant-appellant
55
+ cpe: Complainant-appellee
56
+ cmp: Composer
57
+ cmt: Compositor
58
+ ccp: Conceptor
59
+ cnd: Conductor
60
+ con: Conservator
61
+ csl: Consultant
62
+ csp: Consultant to a project
63
+ cos: Contestant
64
+ cot: Contestant-appellant
65
+ coe: Contestant-appellee
66
+ cts: Contestee
67
+ ctt: Contestee-appellant
68
+ cte: Contestee-appellee
69
+ ctr: Contractor
70
+ ctb: Contributor
71
+ cpc: Copyright claimant
72
+ cph: Copyright holder
73
+ crr: Corrector
74
+ crp: Correspondent
75
+ cst: Costume designer
76
+ cou: Court governed
77
+ crt: Court reporter
78
+ cov: Cover designer
79
+ cre: Creator
80
+ cur: Curator
81
+ dnc: Dancer
82
+ dtc: Data contributor
83
+ dtm: Data manager
84
+ dte: Dedicatee
85
+ dto: Dedicator
86
+ dfd: Defendant
87
+ dft: Defendant-appellant
88
+ dfe: Defendant-appellee
89
+ dgg: Degree granting institution
90
+ dln: Delineator
91
+ dpc: Depicted
92
+ dpt: Depositor
93
+ dsr: Designer
94
+ drt: Director
95
+ dis: Dissertant
96
+ dbp: Distribution place
97
+ dst: Distributor
98
+ dnr: Donor
99
+ drm: Draftsman
100
+ dub: Dubious author
101
+ edt: Editor
102
+ edc: Editor of compilation
103
+ edm: Editor of moving image work
104
+ elg: Electrician
105
+ elt: Electrotyper
106
+ enj: Enacting jurisdiction
107
+ eng: Engineer
108
+ egr: Engraver
109
+ etr: Etcher
110
+ evp: Event place
111
+ exp: Expert
112
+ fac: Facsimilist
113
+ fld: Field director
114
+ fds: Film distributor
115
+ fmd: Film director
116
+ flm: Film editor
117
+ fmp: Film producer
118
+ fmk: Filmmaker
119
+ fpy: First party
120
+ frg: Forger
121
+ fmo: Former owner
122
+ fnd: Funder
123
+ gis: Geographic information specialist
124
+ -grt: Graphic technician
125
+ hnr: Honoree
126
+ hst: Host
127
+ his: Host institution
128
+ ilu: Illuminator
129
+ ill: Illustrator
130
+ ins: Inscriber
131
+ itr: Instrumentalist
132
+ ive: Interviewee
133
+ ivr: Interviewer
134
+ inv: Inventor
135
+ isb: Issuing body
136
+ jud: Judge
137
+ jug: Jurisdiction governed
138
+ lbr: Laboratory
139
+ ldr: Laboratory director
140
+ lsa: Landscape architect
141
+ led: Lead
142
+ len: Lender
143
+ lil: Libelant
144
+ lit: Libelant-appellant
145
+ lie: Libelant-appellee
146
+ lel: Libelee
147
+ let: Libelee-appellant
148
+ lee: Libelee-appellee
149
+ lbt: Librettist
150
+ lse: Licensee
151
+ lso: Licensor
152
+ lgd: Lighting designer
153
+ ltg: Lithographer
154
+ lyr: Lyricist
155
+ mfp: Manufacture place
156
+ mfr: Manufacturer
157
+ mrb: Marbler
158
+ mrk: Markup editor
159
+ mdc: Metadata contact
160
+ mte: Metal-engraver
161
+ mod: Moderator
162
+ mon: Monitor
163
+ mcp: Music copyist
164
+ msd: Musical director
165
+ mus: Musician
166
+ nrt: Narrator
167
+ osp: Onscreen presenter
168
+ opn: Opponent
169
+ orm: Organizer of meeting
170
+ org: Originator
171
+ own: Owner
172
+ pan: Panelist
173
+ ppm: Papermaker
174
+ pta: Patent applicant
175
+ pth: Patent holder
176
+ pat: Patron
177
+ prf: Performer
178
+ pma: Permitting agency
179
+ pht: Photographer
180
+ ptf: Plaintiff
181
+ ptt: Plaintiff-appellant
182
+ pte: Plaintiff-appellee
183
+ plt: Platemaker
184
+ pra: Praeses
185
+ pre: Presenter
186
+ prt: Printer
187
+ pop: Printer of plates
188
+ prm: Printmaker
189
+ prc: Process contact
190
+ pro: Producer
191
+ prn: Production company
192
+ prs: Production designer
193
+ pmn: Production manager
194
+ prd: Production personnel
195
+ prp: Production place
196
+ prg: Programmer
197
+ pdr: Project director
198
+ pfr: Proofreader
199
+ prv: Provider
200
+ pup: Publication place
201
+ pbl: Publisher
202
+ pbd: Publishing director
203
+ ppt: Puppeteer
204
+ rdd: Radio director
205
+ rpc: Radio producer
206
+ rce: Recording engineer
207
+ rcd: Recordist
208
+ red: Redaktor
209
+ ren: Renderer
210
+ rpt: Reporter
211
+ rps: Repository
212
+ rth: Research team head
213
+ rtm: Research team member
214
+ res: Researcher
215
+ rsp: Respondent
216
+ rst: Respondent-appellant
217
+ rse: Respondent-appellee
218
+ rpy: Responsible party
219
+ rsg: Restager
220
+ rsr: Restorationist
221
+ rev: Reviewer
222
+ rbr: Rubricator
223
+ sce: Scenarist
224
+ sad: Scientific advisor
225
+ aus: Screenwriter
226
+ scr: Scribe
227
+ scl: Sculptor
228
+ spy: Second party
229
+ sec: Secretary
230
+ sll: Seller
231
+ std: Set designer
232
+ stg: Setting
233
+ sgn: Signer
234
+ sng: Singer
235
+ sds: Sound designer
236
+ spk: Speaker
237
+ spn: Sponsor
238
+ sgd: Stage director
239
+ stm: Stage manager
240
+ stn: Standards body
241
+ str: Stereotyper
242
+ stl: Storyteller
243
+ sht: Supporting host
244
+ srv: Surveyor
245
+ tch: Teacher
246
+ tcd: Technical director
247
+ tld: Television director
248
+ tlp: Television producer
249
+ ths: Thesis advisor
250
+ trc: Transcriber
251
+ trl: Translator
252
+ tyd: Type designer
253
+ tyg: Typographer
254
+ uvp: University place
255
+ vdg: Videographer
256
+ -voc: Vocalist
257
+ wit: Witness
258
+ wde: Wood engraver
259
+ wdc: Woodcutter
260
+ wam: Writer of accompanying material
261
+ wac: Writer of added commentary
262
+ wat: Writer of added text
263
+ wal: Writer of added lyrics
@@ -0,0 +1,177 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/all'
4
+ require_relative 'helpers/helper'
5
+ require_relative 'helpers/creator'
6
+ require_relative 'helpers/database'
7
+ require_relative 'helpers/date'
8
+ require_relative 'helpers/format'
9
+ require_relative 'helpers/genre'
10
+ require_relative 'helpers/identifier'
11
+ require_relative 'helpers/language'
12
+ require_relative 'helpers/link'
13
+ require_relative 'helpers/location'
14
+ require_relative 'helpers/subject'
15
+ require_relative 'helpers/title'
16
+ require_relative 'helpers/citation'
17
+ require_relative 'helpers/relation'
18
+ require_relative 'helpers/production'
19
+ require_relative 'helpers/edition'
20
+ require_relative 'helpers/note'
21
+
22
+ module PennMARC
23
+ attr_accessor :mappings
24
+
25
+ DEFINED_HELPERS = %w[Creator Database Date Format Genre Language Link Location Subject Title Relation].freeze
26
+
27
+ # Methods here should return values used in the indexer. The parsing logic should
28
+ # NOT return values specific to any particular site/interface, but just general
29
+ # MARC parsing logic for "title", "subject", "author", etc., as much as reasonably
30
+ # possible. We'll see how it goes.
31
+ #
32
+ # Methods should, by default, take in a MARC::Record
33
+ class Parser
34
+ def initialize(helpers: DEFINED_HELPERS)
35
+ @mappings = {}
36
+ @helpers = Array.wrap(helpers) # TODO: load helpers dynamically?
37
+ end
38
+
39
+ def respond_to_missing?(name)
40
+ name.split('_').first.in? @helpers
41
+ end
42
+
43
+ # Call helper class methods, e.g.,
44
+ # #title_show -> PennMARC::Title.show
45
+ # #subject_facet -> PennMARC::Subject.facet
46
+ def method_missing(name, opts)
47
+ call = name.to_s.split('_')
48
+ helper = call.shift
49
+ meth = call.join('_')
50
+ "PennMARC::#{helper.titleize}".constantize.public_send(meth, opts)
51
+ end
52
+
53
+ # @todo does this fit in an existing helper?
54
+ # @param [MARC::Record] record
55
+ # @return [Object]
56
+ def cartographic_show(record)
57
+ record.fields(%w{255 342}).map do |field|
58
+ join_subfields(field, &subfield_not_6_or_8)
59
+ end
60
+ end
61
+
62
+ # @todo move to Identifier helper
63
+ # @param [MARC::Record] record
64
+ # @return [Object]
65
+ def fingerprint_show(record)
66
+ record.fields('026').map do |field|
67
+ join_subfields(field, &subfield_not_in(%w{2 5 6 8}))
68
+ end
69
+ end
70
+
71
+ # @todo does this fit in an existing helper?
72
+ # @param [MARC::Record] record
73
+ # @return [Object]
74
+ def arrangement_show(record)
75
+ get_datafield_and_880(record, '351')
76
+ end
77
+
78
+ # @param [MARC::Record] record
79
+ # @return [Object]
80
+ def system_details_show(record)
81
+ acc = []
82
+ acc += record.fields('538').map do |field|
83
+ get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
84
+ end
85
+ acc += record.fields('344').map do |field|
86
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
87
+ end
88
+ acc += record.fields(%w{345 346}).map do |field|
89
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
90
+ end
91
+ acc += record.fields('347').map do |field|
92
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
93
+ end
94
+ acc += record.fields('880')
95
+ .select { |f| has_subfield6_value(f, /^538/) }
96
+ .map do |field|
97
+ get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
98
+ end
99
+ acc += record.fields('880')
100
+ .select { |f| has_subfield6_value(f, /^344/) }
101
+ .map do |field|
102
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
103
+ end
104
+ acc += record.fields('880')
105
+ .select { |f| has_subfield6_value(f, /^(345|346)/) }
106
+ .map do |field|
107
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
108
+ end
109
+ acc += record.fields('880')
110
+ .select { |f| has_subfield6_value(f, /^347/) }
111
+ .map do |field|
112
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
113
+ end
114
+ acc
115
+ end
116
+
117
+ # @todo the legacy code here is a hot mess for a number of reasons, what do we need this field to do?
118
+ # @note port the needed parts from get_offsite_display, don't return HTML
119
+ # @param [MARC::Record] record
120
+ # @return [Object]
121
+ def offsite_show(record); end
122
+
123
+ # @todo move this to Creator helper
124
+ # @param [MARC::Record] record
125
+ # @return [Object]
126
+ def contributor_show(record)
127
+ acc = []
128
+ acc += record.fields(%w{700 710})
129
+ .select { |f| ['', ' ', '0'].member?(f.indicator2) }
130
+ .select { |f| f.none? { |sf| sf.code == 'i' } }
131
+ .map do |field|
132
+ contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
133
+ contributor_append = field.select(&subfield_in(%w{e u 3 4})).map do |sf|
134
+ if sf.code == '4'
135
+ ", #{relator_codes[sf.value]}"
136
+ else
137
+ " #{sf.value}"
138
+ end
139
+ end.join
140
+ { value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
141
+ end
142
+ acc += record.fields('880')
143
+ .select { |f| has_subfield6_value(f, /^(700|710)/) && (f.none? { |sf| sf.code == 'i' }) }
144
+ .map do |field|
145
+ contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
146
+ contributor_append = join_subfields(field, &subfield_in(%w{e u 3}))
147
+ { value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
148
+ end
149
+ acc
150
+ end
151
+
152
+ # Load language map from YAML and memoize in @mappings hash
153
+ # @return [Hash]
154
+ def language_map
155
+ @mappings[:language] ||= load_map('language.yml')
156
+ end
157
+
158
+ # Load location map from YAML and memoize in @mappings hash
159
+ # @return [Hash]
160
+ def location_map
161
+ @mappings[:location] ||= load_map('locations.yml')
162
+ end
163
+
164
+ # Load relator map from YAML and memoize in @mappings hash
165
+ # @return [Hash]
166
+ def relator_map
167
+ @mappings[:relator] ||= load_map('relator.yml')
168
+ end
169
+
170
+ # @param [String] filename of mapping file in config directory, with file extension
171
+ # @return [Hash] mapping as hash
172
+ def load_map(filename)
173
+ YAML.safe_load(File.read(File.join(File.expand_path(__dir__), 'mappings', filename)),
174
+ symbolize_names: true)
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,240 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'heading_control'
4
+
5
+ module PennMARC
6
+ # class to hold "utility" methods used in MARC parsing methods
7
+ module Util
8
+ # Join subfields from a field selected based on a provided proc
9
+ # @param [MARC::DataField] field
10
+ # @param [Proc] selector
11
+ # @return [String]
12
+ def join_subfields(field, &selector)
13
+ field.select { |v| selector.call(v) }.filter_map { |sf|
14
+ value = sf.value&.strip
15
+ next unless value.present?
16
+
17
+ value
18
+ }.join(' ').squish
19
+ end
20
+
21
+ # returns true if field has a value that matches
22
+ # passed-in regex and passed in subfield
23
+ # @todo example usage
24
+ # @param [MARC::DataField] field
25
+ # @param [String|Integer|Symbol] subfield
26
+ # @param [Regexp] regex
27
+ # @return [TrueClass, FalseClass]
28
+ def subfield_value?(field, subfield, regex)
29
+ field.any? { |sf| sf.code == subfield.to_s && sf.value =~ regex }
30
+ end
31
+
32
+ # returns true if a given field has a given subfield value in a given array
33
+ # TODO: example usage
34
+ # @param [MARC:DataField] field
35
+ # @param [String|Integer|Symbol] subfield
36
+ # @param [Array] array
37
+ # @return [TrueClass, FalseClass]
38
+ def subfield_value_in?(field, subfield, array)
39
+ field.any? { |sf| sf.code == subfield.to_s && sf.value.in?(array) }
40
+ end
41
+
42
+ # returns true if a given field does not have a given subfield value in a given array
43
+ # @param [MARC:DataField] field
44
+ # @param [String|Integer|Symbol] subfield
45
+ # @param [Array] array
46
+ # @return [TrueClass, FalseClass
47
+ def subfield_value_not_in?(field, subfield, array)
48
+ field.none? { |sf| sf.code == subfield.to_s && sf.value.in?(array) }
49
+ end
50
+
51
+ # returns a lambda checking if passed-in subfield's code is a member of array
52
+ # TODO: include lambda returning methods in their own module?
53
+ # @param [Array] array
54
+ # @return [Proc]
55
+ def subfield_in?(array)
56
+ ->(subfield) { array.member?(subfield.code) }
57
+ end
58
+
59
+ # returns a lambda checking if passed-in subfield's code is NOT a member of array
60
+ # TODO: include lambda returning methods in their own module?
61
+ # @param [Array] array
62
+ # @return [Proc]
63
+ def subfield_not_in?(array)
64
+ ->(subfield) { !array.member?(subfield.code) }
65
+ end
66
+
67
+ # Check if a field has a given subfield defined
68
+ # @param [MARC::DataField] field
69
+ # @param [String|Symbol|Integer] subfield
70
+ # @return [TrueClass, FalseClass]
71
+ def subfield_defined?(field, subfield)
72
+ field.any? { |sf| sf.code == subfield.to_s }
73
+ end
74
+
75
+ # Check if a field does not have a given subfield defined
76
+ # @param [MARC::DataField] field
77
+ # @param [String|Symbol|Integer] subfield
78
+ # @return [TrueClass, FalseClass]
79
+ def subfield_undefined?(field, subfield)
80
+ field.none? { |sf| sf.code == subfield.to_s }
81
+ end
82
+
83
+ # Gets all subfield values for a subfield in a given field
84
+ # @param [MARC::DataField] field
85
+ # @param [String|Symbol] subfield as a string or symbol
86
+ # @return [Array] subfield values for given subfield code
87
+ def subfield_values(field, subfield)
88
+ field.filter_map do |sf|
89
+ next unless sf.code == subfield.to_s
90
+
91
+ next unless sf.value.present?
92
+
93
+ sf.value
94
+ end
95
+ end
96
+
97
+ # Get all subfield values for a provided subfield from any occurrence of a provided tag/tags
98
+ # @param [String|Array] tag tags to consider
99
+ # @param [String|Symbol] subfield to take the values from
100
+ # @param [MARC::Record] record source
101
+ # @return [Array] array of subfield values
102
+ def subfield_values_for(tag:, subfield:, record:)
103
+ record.fields(tag).flat_map do |field|
104
+ subfield_values field, subfield
105
+ end
106
+ end
107
+
108
+ # @param [Symbol|String] trailer to target for removal
109
+ # @param [String] string to modify
110
+ def trim_trailing(trailer, string)
111
+ map = { semicolon: /\s*;\s*$/,
112
+ colon: /\s*:\s*$/,
113
+ equal: /=$/,
114
+ slash: %r{\s*/\s*$},
115
+ comma: /\s*,\s*$/,
116
+ period: /\.\s*$/ } # TODO: revise to exclude "etc."
117
+ string.sub map[trailer.to_sym], ''
118
+ end
119
+
120
+ # MARC 880 field "Alternate Graphic Representation" contains text "linked" to another
121
+ # field (e.g., 254 [Title]) used as an alternate representation. Often used to hold
122
+ # translations of title values. A common need is to extract subfields as selected by
123
+ # passed-in block from 880 datafield that has a particular subfield 6 value.
124
+ # See: https://www.loc.gov/marc/bibliographic/bd880.html
125
+ # @param [MARC::Record] record
126
+ # @param [String|Array] subfield6_value either a string to look for in sub6 or an array of them
127
+ # @param selector [Proc] takes a subfield as argument, returns a boolean
128
+ # @return [Array] array of linked alternates
129
+ def linked_alternate(record, subfield6_value, &selector)
130
+ record.fields('880').filter_map do |field|
131
+ next unless subfield_value?(field, '6', /^#{Array.wrap(subfield6_value).join('|')}/)
132
+
133
+ field.select { |sf| selector.call(sf) }.map(&:value).join(' ')
134
+ end
135
+ end
136
+ alias get_880 linked_alternate
137
+
138
+ # Common case of wanting to extract all the subfields besides 6 or 8,
139
+ # from 880 datafield that has a particular subfield 6 value. We exclude 6 because
140
+ # that value is the linkage ID itself and 8 because... IDK
141
+ # @param [MARC::Record] record
142
+ # @param [String|Array] subfield6_value either a string to look for in sub6 or an array of them
143
+ # @return [Array] array of linked alternates without 8 or 6 values
144
+ def linked_alternate_not_6_or_8(record, subfield6_value)
145
+ linked_alternate(record, subfield6_value) do |sf|
146
+ %w[6 8].exclude?(sf.code)
147
+ end
148
+ end
149
+
150
+ # Returns the non-6,8 subfields from a datafield and its 880 link.
151
+ # @param [MARC::Record] record
152
+ # @param [String] tag
153
+ # @return [Array] acc
154
+ def datafield_and_linked_alternate(record, tag)
155
+ record.fields(tag).filter_map do |field|
156
+ join_subfields(field, &subfield_not_in?(%w[6 8]))
157
+ end + linked_alternate_not_6_or_8(record, tag)
158
+ end
159
+
160
+ # Get the substring of a string up to a given target character
161
+ # @param [Object] string to split
162
+ # @param [Object] target character to split upon
163
+ # @return [String (frozen)]
164
+ def substring_before(string, target)
165
+ string.scan(target).present? ? string.split(target, 2).first : ''
166
+ end
167
+
168
+ # Get the substring of a string after the first occurrence of a target character
169
+ # @param [Object] string to split
170
+ # @param [Object] target character to split upon
171
+ # @return [String (frozen)]
172
+ def substring_after(string, target)
173
+ string.scan(target).present? ? string.split(target, 2).second : ''
174
+ end
175
+
176
+ # Join array and normalizing extraneous spaces
177
+ # @param [Array] array
178
+ # @return [String]
179
+ def join_and_squish(array)
180
+ array.join(' ').squish
181
+ end
182
+ alias join_and_trim_whitespace join_and_squish
183
+
184
+ # If there's a subfield i, extract its value, and if there's something
185
+ # in parentheses in that value, extract that.
186
+ # @param [MARC::Field] field
187
+ # @return [String] subfield i without parentheses value
188
+ def remove_paren_value_from_subfield_i(field)
189
+ val = field.filter_map do |sf|
190
+ next unless sf.code == 'i'
191
+
192
+ match = /\((.+?)\)/.match(sf.value)
193
+ if match
194
+ sf.value.sub("(#{match[1]})", '')
195
+ else
196
+ sf.value
197
+ end
198
+ end.first || ''
199
+ trim_trailing(:colon, trim_trailing(:period, val))
200
+ end
201
+
202
+ # Translate a relator code using mapping
203
+ # @todo handle case of receiving a URI? E.g., http://loc.gov/relator/aut
204
+ # @param [String, NilClass] relator_code
205
+ # @param [Hash] mapping
206
+ # @return [String, NilClass] full relator string
207
+ def translate_relator(relator_code, mapping)
208
+ return unless relator_code.present?
209
+
210
+ mapping[relator_code.to_sym]
211
+ end
212
+
213
+ # Get 650 & 880 for Provenance and Chronology: prefix should be 'PRO' or 'CHR' and may be preceded by a '%'
214
+ # @note 11/2018: do not display $5 in PRO or CHR subjs
215
+ # @param [MARC::Record] record
216
+ # @param [String] prefix to select from subject field
217
+ # @return [Array] array of values
218
+ def prefixed_subject_and_alternate(record, prefix)
219
+ record.fields(%w[650 880]).filter_map do |field|
220
+ next unless field.indicator2 == '4'
221
+
222
+ next if field.tag == '880' && subfield_values(field, '6').exclude?('650')
223
+
224
+ next unless field.any? { |sf| sf.code == 'a' && sf.value =~ /^(#{prefix}|%#{prefix})/ }
225
+
226
+ elements = field.select(&subfield_in?(%w[a])).map { |sf| sf.value.gsub(/^%?#{prefix}/, '') }
227
+ elements << join_subfields(field, &subfield_not_in?(%w[a 6 8 e w 5]))
228
+ join_and_squish elements
229
+ end
230
+ end
231
+
232
+ # Does the given field specify an allowed source code?
233
+ #
234
+ # @param [MARC::DataField] field
235
+ # @return [Boolean]
236
+ def valid_subject_genre_source_code?(field)
237
+ subfield_value_in?(field, '2', PennMARC::HeadingControl::ALLOWED_SOURCE_CODES)
238
+ end
239
+ end
240
+ end
data/lib/pennmarc.rb ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(__dir__) unless $LOAD_PATH.include?(__dir__)
4
+
5
+ require_relative 'pennmarc/parser'
6
+ require 'library_stdnums'
data/pennmarc.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'pennmarc'
5
+ s.version = '0.0.1'
6
+ s.summary = 'Penn Libraries Catalog MARC parsing wisdom for cross-project usage'
7
+ s.description = 'Penn Libraries Catalog MARC parsing wisdom for cross-project usage'
8
+ s.authors = ['Mike Kanning', 'Amrey Mathurin', 'Patrick Perkins']
9
+ s.email = 'mkanning@upenn.edu'
10
+ s.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
11
+ s.homepage = 'https://gitlab.library.upenn.edu/dld/catalog/pennmarc'
12
+ s.license = 'MIT'
13
+
14
+ s.required_ruby_version = '>= 3.2'
15
+
16
+ s.add_dependency 'marc'
17
+
18
+ s.add_development_dependency 'nokogiri'
19
+ s.add_development_dependency 'rspec'
20
+ s.add_development_dependency 'simplecov'
21
+ s.add_development_dependency 'upennlib-rubocop'
22
+ end