pennmarc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
@@ -0,0 +1,263 @@
1
+ abr: Abridger
2
+ act: Actor
3
+ adp: Adapter
4
+ rcp: Addressee
5
+ anl: Analyst
6
+ anm: Animator
7
+ ann: Annotator
8
+ apl: Appellant
9
+ ape: Appellee
10
+ app: Applicant
11
+ arc: Architect
12
+ arr: Arranger
13
+ acp: Art copyist
14
+ adi: Art director
15
+ art: Artist
16
+ ard: Artistic director
17
+ asg: Assignee
18
+ asn: Associated name
19
+ att: Attributed name
20
+ auc: Auctioneer
21
+ aut: Author
22
+ aqt: Author in quotations or text abstracts
23
+ aft: Author of afterword, colophon, etc.
24
+ aud: Author of dialog
25
+ aui: Author of introduction, etc.
26
+ ato: Autographer
27
+ ant: Bibliographic antecedent
28
+ bnd: Binder
29
+ bdd: Binding designer
30
+ blw: Blurb writer
31
+ bkd: Book designer
32
+ bkp: Book producer
33
+ bjd: Bookjacket designer
34
+ bpd: Bookplate designer
35
+ bsl: Bookseller
36
+ brl: Braille embosser
37
+ brd: Broadcaster
38
+ cll: Calligrapher
39
+ ctg: Cartographer
40
+ cas: Caster
41
+ cns: Censor
42
+ chr: Choreographer
43
+ cng: Cinematographer
44
+ cli: Client
45
+ -clb: Collaborator
46
+ cor: Collection registrar
47
+ col: Collector
48
+ clt: Collotyper
49
+ clr: Colorist
50
+ cmm: Commentator
51
+ cwt: Commentator for written text
52
+ com: Compiler
53
+ cpl: Complainant
54
+ cpt: Complainant-appellant
55
+ cpe: Complainant-appellee
56
+ cmp: Composer
57
+ cmt: Compositor
58
+ ccp: Conceptor
59
+ cnd: Conductor
60
+ con: Conservator
61
+ csl: Consultant
62
+ csp: Consultant to a project
63
+ cos: Contestant
64
+ cot: Contestant-appellant
65
+ coe: Contestant-appellee
66
+ cts: Contestee
67
+ ctt: Contestee-appellant
68
+ cte: Contestee-appellee
69
+ ctr: Contractor
70
+ ctb: Contributor
71
+ cpc: Copyright claimant
72
+ cph: Copyright holder
73
+ crr: Corrector
74
+ crp: Correspondent
75
+ cst: Costume designer
76
+ cou: Court governed
77
+ crt: Court reporter
78
+ cov: Cover designer
79
+ cre: Creator
80
+ cur: Curator
81
+ dnc: Dancer
82
+ dtc: Data contributor
83
+ dtm: Data manager
84
+ dte: Dedicatee
85
+ dto: Dedicator
86
+ dfd: Defendant
87
+ dft: Defendant-appellant
88
+ dfe: Defendant-appellee
89
+ dgg: Degree granting institution
90
+ dln: Delineator
91
+ dpc: Depicted
92
+ dpt: Depositor
93
+ dsr: Designer
94
+ drt: Director
95
+ dis: Dissertant
96
+ dbp: Distribution place
97
+ dst: Distributor
98
+ dnr: Donor
99
+ drm: Draftsman
100
+ dub: Dubious author
101
+ edt: Editor
102
+ edc: Editor of compilation
103
+ edm: Editor of moving image work
104
+ elg: Electrician
105
+ elt: Electrotyper
106
+ enj: Enacting jurisdiction
107
+ eng: Engineer
108
+ egr: Engraver
109
+ etr: Etcher
110
+ evp: Event place
111
+ exp: Expert
112
+ fac: Facsimilist
113
+ fld: Field director
114
+ fds: Film distributor
115
+ fmd: Film director
116
+ flm: Film editor
117
+ fmp: Film producer
118
+ fmk: Filmmaker
119
+ fpy: First party
120
+ frg: Forger
121
+ fmo: Former owner
122
+ fnd: Funder
123
+ gis: Geographic information specialist
124
+ -grt: Graphic technician
125
+ hnr: Honoree
126
+ hst: Host
127
+ his: Host institution
128
+ ilu: Illuminator
129
+ ill: Illustrator
130
+ ins: Inscriber
131
+ itr: Instrumentalist
132
+ ive: Interviewee
133
+ ivr: Interviewer
134
+ inv: Inventor
135
+ isb: Issuing body
136
+ jud: Judge
137
+ jug: Jurisdiction governed
138
+ lbr: Laboratory
139
+ ldr: Laboratory director
140
+ lsa: Landscape architect
141
+ led: Lead
142
+ len: Lender
143
+ lil: Libelant
144
+ lit: Libelant-appellant
145
+ lie: Libelant-appellee
146
+ lel: Libelee
147
+ let: Libelee-appellant
148
+ lee: Libelee-appellee
149
+ lbt: Librettist
150
+ lse: Licensee
151
+ lso: Licensor
152
+ lgd: Lighting designer
153
+ ltg: Lithographer
154
+ lyr: Lyricist
155
+ mfp: Manufacture place
156
+ mfr: Manufacturer
157
+ mrb: Marbler
158
+ mrk: Markup editor
159
+ mdc: Metadata contact
160
+ mte: Metal-engraver
161
+ mod: Moderator
162
+ mon: Monitor
163
+ mcp: Music copyist
164
+ msd: Musical director
165
+ mus: Musician
166
+ nrt: Narrator
167
+ osp: Onscreen presenter
168
+ opn: Opponent
169
+ orm: Organizer of meeting
170
+ org: Originator
171
+ own: Owner
172
+ pan: Panelist
173
+ ppm: Papermaker
174
+ pta: Patent applicant
175
+ pth: Patent holder
176
+ pat: Patron
177
+ prf: Performer
178
+ pma: Permitting agency
179
+ pht: Photographer
180
+ ptf: Plaintiff
181
+ ptt: Plaintiff-appellant
182
+ pte: Plaintiff-appellee
183
+ plt: Platemaker
184
+ pra: Praeses
185
+ pre: Presenter
186
+ prt: Printer
187
+ pop: Printer of plates
188
+ prm: Printmaker
189
+ prc: Process contact
190
+ pro: Producer
191
+ prn: Production company
192
+ prs: Production designer
193
+ pmn: Production manager
194
+ prd: Production personnel
195
+ prp: Production place
196
+ prg: Programmer
197
+ pdr: Project director
198
+ pfr: Proofreader
199
+ prv: Provider
200
+ pup: Publication place
201
+ pbl: Publisher
202
+ pbd: Publishing director
203
+ ppt: Puppeteer
204
+ rdd: Radio director
205
+ rpc: Radio producer
206
+ rce: Recording engineer
207
+ rcd: Recordist
208
+ red: Redaktor
209
+ ren: Renderer
210
+ rpt: Reporter
211
+ rps: Repository
212
+ rth: Research team head
213
+ rtm: Research team member
214
+ res: Researcher
215
+ rsp: Respondent
216
+ rst: Respondent-appellant
217
+ rse: Respondent-appellee
218
+ rpy: Responsible party
219
+ rsg: Restager
220
+ rsr: Restorationist
221
+ rev: Reviewer
222
+ rbr: Rubricator
223
+ sce: Scenarist
224
+ sad: Scientific advisor
225
+ aus: Screenwriter
226
+ scr: Scribe
227
+ scl: Sculptor
228
+ spy: Second party
229
+ sec: Secretary
230
+ sll: Seller
231
+ std: Set designer
232
+ stg: Setting
233
+ sgn: Signer
234
+ sng: Singer
235
+ sds: Sound designer
236
+ spk: Speaker
237
+ spn: Sponsor
238
+ sgd: Stage director
239
+ stm: Stage manager
240
+ stn: Standards body
241
+ str: Stereotyper
242
+ stl: Storyteller
243
+ sht: Supporting host
244
+ srv: Surveyor
245
+ tch: Teacher
246
+ tcd: Technical director
247
+ tld: Television director
248
+ tlp: Television producer
249
+ ths: Thesis advisor
250
+ trc: Transcriber
251
+ trl: Translator
252
+ tyd: Type designer
253
+ tyg: Typographer
254
+ uvp: University place
255
+ vdg: Videographer
256
+ -voc: Vocalist
257
+ wit: Witness
258
+ wde: Wood engraver
259
+ wdc: Woodcutter
260
+ wam: Writer of accompanying material
261
+ wac: Writer of added commentary
262
+ wat: Writer of added text
263
+ wal: Writer of added lyrics
@@ -0,0 +1,177 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support/all'
4
+ require_relative 'helpers/helper'
5
+ require_relative 'helpers/creator'
6
+ require_relative 'helpers/database'
7
+ require_relative 'helpers/date'
8
+ require_relative 'helpers/format'
9
+ require_relative 'helpers/genre'
10
+ require_relative 'helpers/identifier'
11
+ require_relative 'helpers/language'
12
+ require_relative 'helpers/link'
13
+ require_relative 'helpers/location'
14
+ require_relative 'helpers/subject'
15
+ require_relative 'helpers/title'
16
+ require_relative 'helpers/citation'
17
+ require_relative 'helpers/relation'
18
+ require_relative 'helpers/production'
19
+ require_relative 'helpers/edition'
20
+ require_relative 'helpers/note'
21
+
22
+ module PennMARC
23
+ attr_accessor :mappings
24
+
25
+ DEFINED_HELPERS = %w[Creator Database Date Format Genre Language Link Location Subject Title Relation].freeze
26
+
27
+ # Methods here should return values used in the indexer. The parsing logic should
28
+ # NOT return values specific to any particular site/interface, but just general
29
+ # MARC parsing logic for "title", "subject", "author", etc., as much as reasonably
30
+ # possible. We'll see how it goes.
31
+ #
32
+ # Methods should, by default, take in a MARC::Record
33
+ class Parser
34
+ def initialize(helpers: DEFINED_HELPERS)
35
+ @mappings = {}
36
+ @helpers = Array.wrap(helpers) # TODO: load helpers dynamically?
37
+ end
38
+
39
+ def respond_to_missing?(name)
40
+ name.split('_').first.in? @helpers
41
+ end
42
+
43
+ # Call helper class methods, e.g.,
44
+ # #title_show -> PennMARC::Title.show
45
+ # #subject_facet -> PennMARC::Subject.facet
46
+ def method_missing(name, opts)
47
+ call = name.to_s.split('_')
48
+ helper = call.shift
49
+ meth = call.join('_')
50
+ "PennMARC::#{helper.titleize}".constantize.public_send(meth, opts)
51
+ end
52
+
53
+ # @todo does this fit in an existing helper?
54
+ # @param [MARC::Record] record
55
+ # @return [Object]
56
+ def cartographic_show(record)
57
+ record.fields(%w{255 342}).map do |field|
58
+ join_subfields(field, &subfield_not_6_or_8)
59
+ end
60
+ end
61
+
62
+ # @todo move to Identifier helper
63
+ # @param [MARC::Record] record
64
+ # @return [Object]
65
+ def fingerprint_show(record)
66
+ record.fields('026').map do |field|
67
+ join_subfields(field, &subfield_not_in(%w{2 5 6 8}))
68
+ end
69
+ end
70
+
71
+ # @todo does this fit in an existing helper?
72
+ # @param [MARC::Record] record
73
+ # @return [Object]
74
+ def arrangement_show(record)
75
+ get_datafield_and_880(record, '351')
76
+ end
77
+
78
+ # @param [MARC::Record] record
79
+ # @return [Object]
80
+ def system_details_show(record)
81
+ acc = []
82
+ acc += record.fields('538').map do |field|
83
+ get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
84
+ end
85
+ acc += record.fields('344').map do |field|
86
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
87
+ end
88
+ acc += record.fields(%w{345 346}).map do |field|
89
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
90
+ end
91
+ acc += record.fields('347').map do |field|
92
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
93
+ end
94
+ acc += record.fields('880')
95
+ .select { |f| has_subfield6_value(f, /^538/) }
96
+ .map do |field|
97
+ get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
98
+ end
99
+ acc += record.fields('880')
100
+ .select { |f| has_subfield6_value(f, /^344/) }
101
+ .map do |field|
102
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
103
+ end
104
+ acc += record.fields('880')
105
+ .select { |f| has_subfield6_value(f, /^(345|346)/) }
106
+ .map do |field|
107
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
108
+ end
109
+ acc += record.fields('880')
110
+ .select { |f| has_subfield6_value(f, /^347/) }
111
+ .map do |field|
112
+ get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
113
+ end
114
+ acc
115
+ end
116
+
117
+ # @todo the legacy code here is a hot mess for a number of reasons, what do we need this field to do?
118
+ # @note port the needed parts from get_offsite_display, don't return HTML
119
+ # @param [MARC::Record] record
120
+ # @return [Object]
121
+ def offsite_show(record); end
122
+
123
+ # @todo move this to Creator helper
124
+ # @param [MARC::Record] record
125
+ # @return [Object]
126
+ def contributor_show(record)
127
+ acc = []
128
+ acc += record.fields(%w{700 710})
129
+ .select { |f| ['', ' ', '0'].member?(f.indicator2) }
130
+ .select { |f| f.none? { |sf| sf.code == 'i' } }
131
+ .map do |field|
132
+ contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
133
+ contributor_append = field.select(&subfield_in(%w{e u 3 4})).map do |sf|
134
+ if sf.code == '4'
135
+ ", #{relator_codes[sf.value]}"
136
+ else
137
+ " #{sf.value}"
138
+ end
139
+ end.join
140
+ { value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
141
+ end
142
+ acc += record.fields('880')
143
+ .select { |f| has_subfield6_value(f, /^(700|710)/) && (f.none? { |sf| sf.code == 'i' }) }
144
+ .map do |field|
145
+ contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
146
+ contributor_append = join_subfields(field, &subfield_in(%w{e u 3}))
147
+ { value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
148
+ end
149
+ acc
150
+ end
151
+
152
+ # Load language map from YAML and memoize in @mappings hash
153
+ # @return [Hash]
154
+ def language_map
155
+ @mappings[:language] ||= load_map('language.yml')
156
+ end
157
+
158
+ # Load location map from YAML and memoize in @mappings hash
159
+ # @return [Hash]
160
+ def location_map
161
+ @mappings[:location] ||= load_map('locations.yml')
162
+ end
163
+
164
+ # Load relator map from YAML and memoize in @mappings hash
165
+ # @return [Hash]
166
+ def relator_map
167
+ @mappings[:relator] ||= load_map('relator.yml')
168
+ end
169
+
170
+ # @param [String] filename of mapping file in config directory, with file extension
171
+ # @return [Hash] mapping as hash
172
+ def load_map(filename)
173
+ YAML.safe_load(File.read(File.join(File.expand_path(__dir__), 'mappings', filename)),
174
+ symbolize_names: true)
175
+ end
176
+ end
177
+ end
@@ -0,0 +1,240 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'heading_control'
4
+
5
+ module PennMARC
6
+ # class to hold "utility" methods used in MARC parsing methods
7
+ module Util
8
+ # Join subfields from a field selected based on a provided proc
9
+ # @param [MARC::DataField] field
10
+ # @param [Proc] selector
11
+ # @return [String]
12
+ def join_subfields(field, &selector)
13
+ field.select { |v| selector.call(v) }.filter_map { |sf|
14
+ value = sf.value&.strip
15
+ next unless value.present?
16
+
17
+ value
18
+ }.join(' ').squish
19
+ end
20
+
21
+ # returns true if field has a value that matches
22
+ # passed-in regex and passed in subfield
23
+ # @todo example usage
24
+ # @param [MARC::DataField] field
25
+ # @param [String|Integer|Symbol] subfield
26
+ # @param [Regexp] regex
27
+ # @return [TrueClass, FalseClass]
28
+ def subfield_value?(field, subfield, regex)
29
+ field.any? { |sf| sf.code == subfield.to_s && sf.value =~ regex }
30
+ end
31
+
32
+ # returns true if a given field has a given subfield value in a given array
33
+ # TODO: example usage
34
+ # @param [MARC:DataField] field
35
+ # @param [String|Integer|Symbol] subfield
36
+ # @param [Array] array
37
+ # @return [TrueClass, FalseClass]
38
+ def subfield_value_in?(field, subfield, array)
39
+ field.any? { |sf| sf.code == subfield.to_s && sf.value.in?(array) }
40
+ end
41
+
42
+ # returns true if a given field does not have a given subfield value in a given array
43
+ # @param [MARC:DataField] field
44
+ # @param [String|Integer|Symbol] subfield
45
+ # @param [Array] array
46
+ # @return [TrueClass, FalseClass
47
+ def subfield_value_not_in?(field, subfield, array)
48
+ field.none? { |sf| sf.code == subfield.to_s && sf.value.in?(array) }
49
+ end
50
+
51
+ # returns a lambda checking if passed-in subfield's code is a member of array
52
+ # TODO: include lambda returning methods in their own module?
53
+ # @param [Array] array
54
+ # @return [Proc]
55
+ def subfield_in?(array)
56
+ ->(subfield) { array.member?(subfield.code) }
57
+ end
58
+
59
+ # returns a lambda checking if passed-in subfield's code is NOT a member of array
60
+ # TODO: include lambda returning methods in their own module?
61
+ # @param [Array] array
62
+ # @return [Proc]
63
+ def subfield_not_in?(array)
64
+ ->(subfield) { !array.member?(subfield.code) }
65
+ end
66
+
67
+ # Check if a field has a given subfield defined
68
+ # @param [MARC::DataField] field
69
+ # @param [String|Symbol|Integer] subfield
70
+ # @return [TrueClass, FalseClass]
71
+ def subfield_defined?(field, subfield)
72
+ field.any? { |sf| sf.code == subfield.to_s }
73
+ end
74
+
75
+ # Check if a field does not have a given subfield defined
76
+ # @param [MARC::DataField] field
77
+ # @param [String|Symbol|Integer] subfield
78
+ # @return [TrueClass, FalseClass]
79
+ def subfield_undefined?(field, subfield)
80
+ field.none? { |sf| sf.code == subfield.to_s }
81
+ end
82
+
83
+ # Gets all subfield values for a subfield in a given field
84
+ # @param [MARC::DataField] field
85
+ # @param [String|Symbol] subfield as a string or symbol
86
+ # @return [Array] subfield values for given subfield code
87
+ def subfield_values(field, subfield)
88
+ field.filter_map do |sf|
89
+ next unless sf.code == subfield.to_s
90
+
91
+ next unless sf.value.present?
92
+
93
+ sf.value
94
+ end
95
+ end
96
+
97
+ # Get all subfield values for a provided subfield from any occurrence of a provided tag/tags
98
+ # @param [String|Array] tag tags to consider
99
+ # @param [String|Symbol] subfield to take the values from
100
+ # @param [MARC::Record] record source
101
+ # @return [Array] array of subfield values
102
+ def subfield_values_for(tag:, subfield:, record:)
103
+ record.fields(tag).flat_map do |field|
104
+ subfield_values field, subfield
105
+ end
106
+ end
107
+
108
+ # @param [Symbol|String] trailer to target for removal
109
+ # @param [String] string to modify
110
+ def trim_trailing(trailer, string)
111
+ map = { semicolon: /\s*;\s*$/,
112
+ colon: /\s*:\s*$/,
113
+ equal: /=$/,
114
+ slash: %r{\s*/\s*$},
115
+ comma: /\s*,\s*$/,
116
+ period: /\.\s*$/ } # TODO: revise to exclude "etc."
117
+ string.sub map[trailer.to_sym], ''
118
+ end
119
+
120
+ # MARC 880 field "Alternate Graphic Representation" contains text "linked" to another
121
+ # field (e.g., 254 [Title]) used as an alternate representation. Often used to hold
122
+ # translations of title values. A common need is to extract subfields as selected by
123
+ # passed-in block from 880 datafield that has a particular subfield 6 value.
124
+ # See: https://www.loc.gov/marc/bibliographic/bd880.html
125
+ # @param [MARC::Record] record
126
+ # @param [String|Array] subfield6_value either a string to look for in sub6 or an array of them
127
+ # @param selector [Proc] takes a subfield as argument, returns a boolean
128
+ # @return [Array] array of linked alternates
129
+ def linked_alternate(record, subfield6_value, &selector)
130
+ record.fields('880').filter_map do |field|
131
+ next unless subfield_value?(field, '6', /^#{Array.wrap(subfield6_value).join('|')}/)
132
+
133
+ field.select { |sf| selector.call(sf) }.map(&:value).join(' ')
134
+ end
135
+ end
136
+ alias get_880 linked_alternate
137
+
138
+ # Common case of wanting to extract all the subfields besides 6 or 8,
139
+ # from 880 datafield that has a particular subfield 6 value. We exclude 6 because
140
+ # that value is the linkage ID itself and 8 because... IDK
141
+ # @param [MARC::Record] record
142
+ # @param [String|Array] subfield6_value either a string to look for in sub6 or an array of them
143
+ # @return [Array] array of linked alternates without 8 or 6 values
144
+ def linked_alternate_not_6_or_8(record, subfield6_value)
145
+ linked_alternate(record, subfield6_value) do |sf|
146
+ %w[6 8].exclude?(sf.code)
147
+ end
148
+ end
149
+
150
+ # Returns the non-6,8 subfields from a datafield and its 880 link.
151
+ # @param [MARC::Record] record
152
+ # @param [String] tag
153
+ # @return [Array] acc
154
+ def datafield_and_linked_alternate(record, tag)
155
+ record.fields(tag).filter_map do |field|
156
+ join_subfields(field, &subfield_not_in?(%w[6 8]))
157
+ end + linked_alternate_not_6_or_8(record, tag)
158
+ end
159
+
160
+ # Get the substring of a string up to a given target character
161
+ # @param [Object] string to split
162
+ # @param [Object] target character to split upon
163
+ # @return [String (frozen)]
164
+ def substring_before(string, target)
165
+ string.scan(target).present? ? string.split(target, 2).first : ''
166
+ end
167
+
168
+ # Get the substring of a string after the first occurrence of a target character
169
+ # @param [Object] string to split
170
+ # @param [Object] target character to split upon
171
+ # @return [String (frozen)]
172
+ def substring_after(string, target)
173
+ string.scan(target).present? ? string.split(target, 2).second : ''
174
+ end
175
+
176
+ # Join array and normalizing extraneous spaces
177
+ # @param [Array] array
178
+ # @return [String]
179
+ def join_and_squish(array)
180
+ array.join(' ').squish
181
+ end
182
+ alias join_and_trim_whitespace join_and_squish
183
+
184
+ # If there's a subfield i, extract its value, and if there's something
185
+ # in parentheses in that value, extract that.
186
+ # @param [MARC::Field] field
187
+ # @return [String] subfield i without parentheses value
188
+ def remove_paren_value_from_subfield_i(field)
189
+ val = field.filter_map do |sf|
190
+ next unless sf.code == 'i'
191
+
192
+ match = /\((.+?)\)/.match(sf.value)
193
+ if match
194
+ sf.value.sub("(#{match[1]})", '')
195
+ else
196
+ sf.value
197
+ end
198
+ end.first || ''
199
+ trim_trailing(:colon, trim_trailing(:period, val))
200
+ end
201
+
202
+ # Translate a relator code using mapping
203
+ # @todo handle case of receiving a URI? E.g., http://loc.gov/relator/aut
204
+ # @param [String, NilClass] relator_code
205
+ # @param [Hash] mapping
206
+ # @return [String, NilClass] full relator string
207
+ def translate_relator(relator_code, mapping)
208
+ return unless relator_code.present?
209
+
210
+ mapping[relator_code.to_sym]
211
+ end
212
+
213
+ # Get 650 & 880 for Provenance and Chronology: prefix should be 'PRO' or 'CHR' and may be preceded by a '%'
214
+ # @note 11/2018: do not display $5 in PRO or CHR subjs
215
+ # @param [MARC::Record] record
216
+ # @param [String] prefix to select from subject field
217
+ # @return [Array] array of values
218
+ def prefixed_subject_and_alternate(record, prefix)
219
+ record.fields(%w[650 880]).filter_map do |field|
220
+ next unless field.indicator2 == '4'
221
+
222
+ next if field.tag == '880' && subfield_values(field, '6').exclude?('650')
223
+
224
+ next unless field.any? { |sf| sf.code == 'a' && sf.value =~ /^(#{prefix}|%#{prefix})/ }
225
+
226
+ elements = field.select(&subfield_in?(%w[a])).map { |sf| sf.value.gsub(/^%?#{prefix}/, '') }
227
+ elements << join_subfields(field, &subfield_not_in?(%w[a 6 8 e w 5]))
228
+ join_and_squish elements
229
+ end
230
+ end
231
+
232
+ # Does the given field specify an allowed source code?
233
+ #
234
+ # @param [MARC::DataField] field
235
+ # @return [Boolean]
236
+ def valid_subject_genre_source_code?(field)
237
+ subfield_value_in?(field, '2', PennMARC::HeadingControl::ALLOWED_SOURCE_CODES)
238
+ end
239
+ end
240
+ end
data/lib/pennmarc.rb ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(__dir__) unless $LOAD_PATH.include?(__dir__)
4
+
5
+ require_relative 'pennmarc/parser'
6
+ require 'library_stdnums'
data/pennmarc.gemspec ADDED
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'pennmarc'
5
+ s.version = '0.0.1'
6
+ s.summary = 'Penn Libraries Catalog MARC parsing wisdom for cross-project usage'
7
+ s.description = 'Penn Libraries Catalog MARC parsing wisdom for cross-project usage'
8
+ s.authors = ['Mike Kanning', 'Amrey Mathurin', 'Patrick Perkins']
9
+ s.email = 'mkanning@upenn.edu'
10
+ s.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
11
+ s.homepage = 'https://gitlab.library.upenn.edu/dld/catalog/pennmarc'
12
+ s.license = 'MIT'
13
+
14
+ s.required_ruby_version = '>= 3.2'
15
+
16
+ s.add_dependency 'marc'
17
+
18
+ s.add_development_dependency 'nokogiri'
19
+ s.add_development_dependency 'rspec'
20
+ s.add_development_dependency 'simplecov'
21
+ s.add_development_dependency 'upennlib-rubocop'
22
+ end