pennmarc 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +119 -0
- data/README.md +82 -0
- data/legacy/indexer.rb +568 -0
- data/legacy/marc.rb +2964 -0
- data/legacy/test_file_output.json +49 -0
- data/lib/pennmarc/encoding_level.rb +43 -0
- data/lib/pennmarc/enriched_marc.rb +36 -0
- data/lib/pennmarc/heading_control.rb +11 -0
- data/lib/pennmarc/helpers/citation.rb +31 -0
- data/lib/pennmarc/helpers/creator.rb +237 -0
- data/lib/pennmarc/helpers/database.rb +89 -0
- data/lib/pennmarc/helpers/date.rb +85 -0
- data/lib/pennmarc/helpers/edition.rb +90 -0
- data/lib/pennmarc/helpers/format.rb +312 -0
- data/lib/pennmarc/helpers/genre.rb +71 -0
- data/lib/pennmarc/helpers/helper.rb +11 -0
- data/lib/pennmarc/helpers/identifier.rb +134 -0
- data/lib/pennmarc/helpers/language.rb +37 -0
- data/lib/pennmarc/helpers/link.rb +12 -0
- data/lib/pennmarc/helpers/location.rb +97 -0
- data/lib/pennmarc/helpers/note.rb +132 -0
- data/lib/pennmarc/helpers/production.rb +131 -0
- data/lib/pennmarc/helpers/relation.rb +135 -0
- data/lib/pennmarc/helpers/series.rb +118 -0
- data/lib/pennmarc/helpers/subject.rb +304 -0
- data/lib/pennmarc/helpers/title.rb +197 -0
- data/lib/pennmarc/mappings/language.yml +516 -0
- data/lib/pennmarc/mappings/locations.yml +1801 -0
- data/lib/pennmarc/mappings/relator.yml +263 -0
- data/lib/pennmarc/parser.rb +177 -0
- data/lib/pennmarc/util.rb +240 -0
- data/lib/pennmarc.rb +6 -0
- data/pennmarc.gemspec +22 -0
- data/spec/fixtures/marcxml/test.xml +167 -0
- data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
- data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
- data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
- data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
- data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
- data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
- data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
- data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
- data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
- data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
- data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
- data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
- data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
- data/spec/lib/pennmarc/parser_spec.rb +13 -0
- data/spec/spec_helper.rb +104 -0
- data/spec/support/marc_spec_helpers.rb +84 -0
- metadata +171 -0
@@ -0,0 +1,263 @@
|
|
1
|
+
abr: Abridger
|
2
|
+
act: Actor
|
3
|
+
adp: Adapter
|
4
|
+
rcp: Addressee
|
5
|
+
anl: Analyst
|
6
|
+
anm: Animator
|
7
|
+
ann: Annotator
|
8
|
+
apl: Appellant
|
9
|
+
ape: Appellee
|
10
|
+
app: Applicant
|
11
|
+
arc: Architect
|
12
|
+
arr: Arranger
|
13
|
+
acp: Art copyist
|
14
|
+
adi: Art director
|
15
|
+
art: Artist
|
16
|
+
ard: Artistic director
|
17
|
+
asg: Assignee
|
18
|
+
asn: Associated name
|
19
|
+
att: Attributed name
|
20
|
+
auc: Auctioneer
|
21
|
+
aut: Author
|
22
|
+
aqt: Author in quotations or text abstracts
|
23
|
+
aft: Author of afterword, colophon, etc.
|
24
|
+
aud: Author of dialog
|
25
|
+
aui: Author of introduction, etc.
|
26
|
+
ato: Autographer
|
27
|
+
ant: Bibliographic antecedent
|
28
|
+
bnd: Binder
|
29
|
+
bdd: Binding designer
|
30
|
+
blw: Blurb writer
|
31
|
+
bkd: Book designer
|
32
|
+
bkp: Book producer
|
33
|
+
bjd: Bookjacket designer
|
34
|
+
bpd: Bookplate designer
|
35
|
+
bsl: Bookseller
|
36
|
+
brl: Braille embosser
|
37
|
+
brd: Broadcaster
|
38
|
+
cll: Calligrapher
|
39
|
+
ctg: Cartographer
|
40
|
+
cas: Caster
|
41
|
+
cns: Censor
|
42
|
+
chr: Choreographer
|
43
|
+
cng: Cinematographer
|
44
|
+
cli: Client
|
45
|
+
-clb: Collaborator
|
46
|
+
cor: Collection registrar
|
47
|
+
col: Collector
|
48
|
+
clt: Collotyper
|
49
|
+
clr: Colorist
|
50
|
+
cmm: Commentator
|
51
|
+
cwt: Commentator for written text
|
52
|
+
com: Compiler
|
53
|
+
cpl: Complainant
|
54
|
+
cpt: Complainant-appellant
|
55
|
+
cpe: Complainant-appellee
|
56
|
+
cmp: Composer
|
57
|
+
cmt: Compositor
|
58
|
+
ccp: Conceptor
|
59
|
+
cnd: Conductor
|
60
|
+
con: Conservator
|
61
|
+
csl: Consultant
|
62
|
+
csp: Consultant to a project
|
63
|
+
cos: Contestant
|
64
|
+
cot: Contestant-appellant
|
65
|
+
coe: Contestant-appellee
|
66
|
+
cts: Contestee
|
67
|
+
ctt: Contestee-appellant
|
68
|
+
cte: Contestee-appellee
|
69
|
+
ctr: Contractor
|
70
|
+
ctb: Contributor
|
71
|
+
cpc: Copyright claimant
|
72
|
+
cph: Copyright holder
|
73
|
+
crr: Corrector
|
74
|
+
crp: Correspondent
|
75
|
+
cst: Costume designer
|
76
|
+
cou: Court governed
|
77
|
+
crt: Court reporter
|
78
|
+
cov: Cover designer
|
79
|
+
cre: Creator
|
80
|
+
cur: Curator
|
81
|
+
dnc: Dancer
|
82
|
+
dtc: Data contributor
|
83
|
+
dtm: Data manager
|
84
|
+
dte: Dedicatee
|
85
|
+
dto: Dedicator
|
86
|
+
dfd: Defendant
|
87
|
+
dft: Defendant-appellant
|
88
|
+
dfe: Defendant-appellee
|
89
|
+
dgg: Degree granting institution
|
90
|
+
dln: Delineator
|
91
|
+
dpc: Depicted
|
92
|
+
dpt: Depositor
|
93
|
+
dsr: Designer
|
94
|
+
drt: Director
|
95
|
+
dis: Dissertant
|
96
|
+
dbp: Distribution place
|
97
|
+
dst: Distributor
|
98
|
+
dnr: Donor
|
99
|
+
drm: Draftsman
|
100
|
+
dub: Dubious author
|
101
|
+
edt: Editor
|
102
|
+
edc: Editor of compilation
|
103
|
+
edm: Editor of moving image work
|
104
|
+
elg: Electrician
|
105
|
+
elt: Electrotyper
|
106
|
+
enj: Enacting jurisdiction
|
107
|
+
eng: Engineer
|
108
|
+
egr: Engraver
|
109
|
+
etr: Etcher
|
110
|
+
evp: Event place
|
111
|
+
exp: Expert
|
112
|
+
fac: Facsimilist
|
113
|
+
fld: Field director
|
114
|
+
fds: Film distributor
|
115
|
+
fmd: Film director
|
116
|
+
flm: Film editor
|
117
|
+
fmp: Film producer
|
118
|
+
fmk: Filmmaker
|
119
|
+
fpy: First party
|
120
|
+
frg: Forger
|
121
|
+
fmo: Former owner
|
122
|
+
fnd: Funder
|
123
|
+
gis: Geographic information specialist
|
124
|
+
-grt: Graphic technician
|
125
|
+
hnr: Honoree
|
126
|
+
hst: Host
|
127
|
+
his: Host institution
|
128
|
+
ilu: Illuminator
|
129
|
+
ill: Illustrator
|
130
|
+
ins: Inscriber
|
131
|
+
itr: Instrumentalist
|
132
|
+
ive: Interviewee
|
133
|
+
ivr: Interviewer
|
134
|
+
inv: Inventor
|
135
|
+
isb: Issuing body
|
136
|
+
jud: Judge
|
137
|
+
jug: Jurisdiction governed
|
138
|
+
lbr: Laboratory
|
139
|
+
ldr: Laboratory director
|
140
|
+
lsa: Landscape architect
|
141
|
+
led: Lead
|
142
|
+
len: Lender
|
143
|
+
lil: Libelant
|
144
|
+
lit: Libelant-appellant
|
145
|
+
lie: Libelant-appellee
|
146
|
+
lel: Libelee
|
147
|
+
let: Libelee-appellant
|
148
|
+
lee: Libelee-appellee
|
149
|
+
lbt: Librettist
|
150
|
+
lse: Licensee
|
151
|
+
lso: Licensor
|
152
|
+
lgd: Lighting designer
|
153
|
+
ltg: Lithographer
|
154
|
+
lyr: Lyricist
|
155
|
+
mfp: Manufacture place
|
156
|
+
mfr: Manufacturer
|
157
|
+
mrb: Marbler
|
158
|
+
mrk: Markup editor
|
159
|
+
mdc: Metadata contact
|
160
|
+
mte: Metal-engraver
|
161
|
+
mod: Moderator
|
162
|
+
mon: Monitor
|
163
|
+
mcp: Music copyist
|
164
|
+
msd: Musical director
|
165
|
+
mus: Musician
|
166
|
+
nrt: Narrator
|
167
|
+
osp: Onscreen presenter
|
168
|
+
opn: Opponent
|
169
|
+
orm: Organizer of meeting
|
170
|
+
org: Originator
|
171
|
+
own: Owner
|
172
|
+
pan: Panelist
|
173
|
+
ppm: Papermaker
|
174
|
+
pta: Patent applicant
|
175
|
+
pth: Patent holder
|
176
|
+
pat: Patron
|
177
|
+
prf: Performer
|
178
|
+
pma: Permitting agency
|
179
|
+
pht: Photographer
|
180
|
+
ptf: Plaintiff
|
181
|
+
ptt: Plaintiff-appellant
|
182
|
+
pte: Plaintiff-appellee
|
183
|
+
plt: Platemaker
|
184
|
+
pra: Praeses
|
185
|
+
pre: Presenter
|
186
|
+
prt: Printer
|
187
|
+
pop: Printer of plates
|
188
|
+
prm: Printmaker
|
189
|
+
prc: Process contact
|
190
|
+
pro: Producer
|
191
|
+
prn: Production company
|
192
|
+
prs: Production designer
|
193
|
+
pmn: Production manager
|
194
|
+
prd: Production personnel
|
195
|
+
prp: Production place
|
196
|
+
prg: Programmer
|
197
|
+
pdr: Project director
|
198
|
+
pfr: Proofreader
|
199
|
+
prv: Provider
|
200
|
+
pup: Publication place
|
201
|
+
pbl: Publisher
|
202
|
+
pbd: Publishing director
|
203
|
+
ppt: Puppeteer
|
204
|
+
rdd: Radio director
|
205
|
+
rpc: Radio producer
|
206
|
+
rce: Recording engineer
|
207
|
+
rcd: Recordist
|
208
|
+
red: Redaktor
|
209
|
+
ren: Renderer
|
210
|
+
rpt: Reporter
|
211
|
+
rps: Repository
|
212
|
+
rth: Research team head
|
213
|
+
rtm: Research team member
|
214
|
+
res: Researcher
|
215
|
+
rsp: Respondent
|
216
|
+
rst: Respondent-appellant
|
217
|
+
rse: Respondent-appellee
|
218
|
+
rpy: Responsible party
|
219
|
+
rsg: Restager
|
220
|
+
rsr: Restorationist
|
221
|
+
rev: Reviewer
|
222
|
+
rbr: Rubricator
|
223
|
+
sce: Scenarist
|
224
|
+
sad: Scientific advisor
|
225
|
+
aus: Screenwriter
|
226
|
+
scr: Scribe
|
227
|
+
scl: Sculptor
|
228
|
+
spy: Second party
|
229
|
+
sec: Secretary
|
230
|
+
sll: Seller
|
231
|
+
std: Set designer
|
232
|
+
stg: Setting
|
233
|
+
sgn: Signer
|
234
|
+
sng: Singer
|
235
|
+
sds: Sound designer
|
236
|
+
spk: Speaker
|
237
|
+
spn: Sponsor
|
238
|
+
sgd: Stage director
|
239
|
+
stm: Stage manager
|
240
|
+
stn: Standards body
|
241
|
+
str: Stereotyper
|
242
|
+
stl: Storyteller
|
243
|
+
sht: Supporting host
|
244
|
+
srv: Surveyor
|
245
|
+
tch: Teacher
|
246
|
+
tcd: Technical director
|
247
|
+
tld: Television director
|
248
|
+
tlp: Television producer
|
249
|
+
ths: Thesis advisor
|
250
|
+
trc: Transcriber
|
251
|
+
trl: Translator
|
252
|
+
tyd: Type designer
|
253
|
+
tyg: Typographer
|
254
|
+
uvp: University place
|
255
|
+
vdg: Videographer
|
256
|
+
-voc: Vocalist
|
257
|
+
wit: Witness
|
258
|
+
wde: Wood engraver
|
259
|
+
wdc: Woodcutter
|
260
|
+
wam: Writer of accompanying material
|
261
|
+
wac: Writer of added commentary
|
262
|
+
wat: Writer of added text
|
263
|
+
wal: Writer of added lyrics
|
@@ -0,0 +1,177 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'active_support/all'
|
4
|
+
require_relative 'helpers/helper'
|
5
|
+
require_relative 'helpers/creator'
|
6
|
+
require_relative 'helpers/database'
|
7
|
+
require_relative 'helpers/date'
|
8
|
+
require_relative 'helpers/format'
|
9
|
+
require_relative 'helpers/genre'
|
10
|
+
require_relative 'helpers/identifier'
|
11
|
+
require_relative 'helpers/language'
|
12
|
+
require_relative 'helpers/link'
|
13
|
+
require_relative 'helpers/location'
|
14
|
+
require_relative 'helpers/subject'
|
15
|
+
require_relative 'helpers/title'
|
16
|
+
require_relative 'helpers/citation'
|
17
|
+
require_relative 'helpers/relation'
|
18
|
+
require_relative 'helpers/production'
|
19
|
+
require_relative 'helpers/edition'
|
20
|
+
require_relative 'helpers/note'
|
21
|
+
|
22
|
+
module PennMARC
|
23
|
+
attr_accessor :mappings
|
24
|
+
|
25
|
+
DEFINED_HELPERS = %w[Creator Database Date Format Genre Language Link Location Subject Title Relation].freeze
|
26
|
+
|
27
|
+
# Methods here should return values used in the indexer. The parsing logic should
|
28
|
+
# NOT return values specific to any particular site/interface, but just general
|
29
|
+
# MARC parsing logic for "title", "subject", "author", etc., as much as reasonably
|
30
|
+
# possible. We'll see how it goes.
|
31
|
+
#
|
32
|
+
# Methods should, by default, take in a MARC::Record
|
33
|
+
class Parser
|
34
|
+
def initialize(helpers: DEFINED_HELPERS)
|
35
|
+
@mappings = {}
|
36
|
+
@helpers = Array.wrap(helpers) # TODO: load helpers dynamically?
|
37
|
+
end
|
38
|
+
|
39
|
+
def respond_to_missing?(name)
|
40
|
+
name.split('_').first.in? @helpers
|
41
|
+
end
|
42
|
+
|
43
|
+
# Call helper class methods, e.g.,
|
44
|
+
# #title_show -> PennMARC::Title.show
|
45
|
+
# #subject_facet -> PennMARC::Subject.facet
|
46
|
+
def method_missing(name, opts)
|
47
|
+
call = name.to_s.split('_')
|
48
|
+
helper = call.shift
|
49
|
+
meth = call.join('_')
|
50
|
+
"PennMARC::#{helper.titleize}".constantize.public_send(meth, opts)
|
51
|
+
end
|
52
|
+
|
53
|
+
# @todo does this fit in an existing helper?
|
54
|
+
# @param [MARC::Record] record
|
55
|
+
# @return [Object]
|
56
|
+
def cartographic_show(record)
|
57
|
+
record.fields(%w{255 342}).map do |field|
|
58
|
+
join_subfields(field, &subfield_not_6_or_8)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# @todo move to Identifier helper
|
63
|
+
# @param [MARC::Record] record
|
64
|
+
# @return [Object]
|
65
|
+
def fingerprint_show(record)
|
66
|
+
record.fields('026').map do |field|
|
67
|
+
join_subfields(field, &subfield_not_in(%w{2 5 6 8}))
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# @todo does this fit in an existing helper?
|
72
|
+
# @param [MARC::Record] record
|
73
|
+
# @return [Object]
|
74
|
+
def arrangement_show(record)
|
75
|
+
get_datafield_and_880(record, '351')
|
76
|
+
end
|
77
|
+
|
78
|
+
# @param [MARC::Record] record
|
79
|
+
# @return [Object]
|
80
|
+
def system_details_show(record)
|
81
|
+
acc = []
|
82
|
+
acc += record.fields('538').map do |field|
|
83
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
|
84
|
+
end
|
85
|
+
acc += record.fields('344').map do |field|
|
86
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
|
87
|
+
end
|
88
|
+
acc += record.fields(%w{345 346}).map do |field|
|
89
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
|
90
|
+
end
|
91
|
+
acc += record.fields('347').map do |field|
|
92
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
|
93
|
+
end
|
94
|
+
acc += record.fields('880')
|
95
|
+
.select { |f| has_subfield6_value(f, /^538/) }
|
96
|
+
.map do |field|
|
97
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
|
98
|
+
end
|
99
|
+
acc += record.fields('880')
|
100
|
+
.select { |f| has_subfield6_value(f, /^344/) }
|
101
|
+
.map do |field|
|
102
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
|
103
|
+
end
|
104
|
+
acc += record.fields('880')
|
105
|
+
.select { |f| has_subfield6_value(f, /^(345|346)/) }
|
106
|
+
.map do |field|
|
107
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
|
108
|
+
end
|
109
|
+
acc += record.fields('880')
|
110
|
+
.select { |f| has_subfield6_value(f, /^347/) }
|
111
|
+
.map do |field|
|
112
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
|
113
|
+
end
|
114
|
+
acc
|
115
|
+
end
|
116
|
+
|
117
|
+
# @todo the legacy code here is a hot mess for a number of reasons, what do we need this field to do?
|
118
|
+
# @note port the needed parts from get_offsite_display, don't return HTML
|
119
|
+
# @param [MARC::Record] record
|
120
|
+
# @return [Object]
|
121
|
+
def offsite_show(record); end
|
122
|
+
|
123
|
+
# @todo move this to Creator helper
|
124
|
+
# @param [MARC::Record] record
|
125
|
+
# @return [Object]
|
126
|
+
def contributor_show(record)
|
127
|
+
acc = []
|
128
|
+
acc += record.fields(%w{700 710})
|
129
|
+
.select { |f| ['', ' ', '0'].member?(f.indicator2) }
|
130
|
+
.select { |f| f.none? { |sf| sf.code == 'i' } }
|
131
|
+
.map do |field|
|
132
|
+
contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
|
133
|
+
contributor_append = field.select(&subfield_in(%w{e u 3 4})).map do |sf|
|
134
|
+
if sf.code == '4'
|
135
|
+
", #{relator_codes[sf.value]}"
|
136
|
+
else
|
137
|
+
" #{sf.value}"
|
138
|
+
end
|
139
|
+
end.join
|
140
|
+
{ value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
|
141
|
+
end
|
142
|
+
acc += record.fields('880')
|
143
|
+
.select { |f| has_subfield6_value(f, /^(700|710)/) && (f.none? { |sf| sf.code == 'i' }) }
|
144
|
+
.map do |field|
|
145
|
+
contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
|
146
|
+
contributor_append = join_subfields(field, &subfield_in(%w{e u 3}))
|
147
|
+
{ value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
|
148
|
+
end
|
149
|
+
acc
|
150
|
+
end
|
151
|
+
|
152
|
+
# Load language map from YAML and memoize in @mappings hash
|
153
|
+
# @return [Hash]
|
154
|
+
def language_map
|
155
|
+
@mappings[:language] ||= load_map('language.yml')
|
156
|
+
end
|
157
|
+
|
158
|
+
# Load location map from YAML and memoize in @mappings hash
|
159
|
+
# @return [Hash]
|
160
|
+
def location_map
|
161
|
+
@mappings[:location] ||= load_map('locations.yml')
|
162
|
+
end
|
163
|
+
|
164
|
+
# Load relator map from YAML and memoize in @mappings hash
|
165
|
+
# @return [Hash]
|
166
|
+
def relator_map
|
167
|
+
@mappings[:relator] ||= load_map('relator.yml')
|
168
|
+
end
|
169
|
+
|
170
|
+
# @param [String] filename of mapping file in config directory, with file extension
|
171
|
+
# @return [Hash] mapping as hash
|
172
|
+
def load_map(filename)
|
173
|
+
YAML.safe_load(File.read(File.join(File.expand_path(__dir__), 'mappings', filename)),
|
174
|
+
symbolize_names: true)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
@@ -0,0 +1,240 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'heading_control'
|
4
|
+
|
5
|
+
module PennMARC
|
6
|
+
# class to hold "utility" methods used in MARC parsing methods
|
7
|
+
module Util
|
8
|
+
# Join subfields from a field selected based on a provided proc
|
9
|
+
# @param [MARC::DataField] field
|
10
|
+
# @param [Proc] selector
|
11
|
+
# @return [String]
|
12
|
+
def join_subfields(field, &selector)
|
13
|
+
field.select { |v| selector.call(v) }.filter_map { |sf|
|
14
|
+
value = sf.value&.strip
|
15
|
+
next unless value.present?
|
16
|
+
|
17
|
+
value
|
18
|
+
}.join(' ').squish
|
19
|
+
end
|
20
|
+
|
21
|
+
# returns true if field has a value that matches
|
22
|
+
# passed-in regex and passed in subfield
|
23
|
+
# @todo example usage
|
24
|
+
# @param [MARC::DataField] field
|
25
|
+
# @param [String|Integer|Symbol] subfield
|
26
|
+
# @param [Regexp] regex
|
27
|
+
# @return [TrueClass, FalseClass]
|
28
|
+
def subfield_value?(field, subfield, regex)
|
29
|
+
field.any? { |sf| sf.code == subfield.to_s && sf.value =~ regex }
|
30
|
+
end
|
31
|
+
|
32
|
+
# returns true if a given field has a given subfield value in a given array
|
33
|
+
# TODO: example usage
|
34
|
+
# @param [MARC:DataField] field
|
35
|
+
# @param [String|Integer|Symbol] subfield
|
36
|
+
# @param [Array] array
|
37
|
+
# @return [TrueClass, FalseClass]
|
38
|
+
def subfield_value_in?(field, subfield, array)
|
39
|
+
field.any? { |sf| sf.code == subfield.to_s && sf.value.in?(array) }
|
40
|
+
end
|
41
|
+
|
42
|
+
# returns true if a given field does not have a given subfield value in a given array
|
43
|
+
# @param [MARC:DataField] field
|
44
|
+
# @param [String|Integer|Symbol] subfield
|
45
|
+
# @param [Array] array
|
46
|
+
# @return [TrueClass, FalseClass
|
47
|
+
def subfield_value_not_in?(field, subfield, array)
|
48
|
+
field.none? { |sf| sf.code == subfield.to_s && sf.value.in?(array) }
|
49
|
+
end
|
50
|
+
|
51
|
+
# returns a lambda checking if passed-in subfield's code is a member of array
|
52
|
+
# TODO: include lambda returning methods in their own module?
|
53
|
+
# @param [Array] array
|
54
|
+
# @return [Proc]
|
55
|
+
def subfield_in?(array)
|
56
|
+
->(subfield) { array.member?(subfield.code) }
|
57
|
+
end
|
58
|
+
|
59
|
+
# returns a lambda checking if passed-in subfield's code is NOT a member of array
|
60
|
+
# TODO: include lambda returning methods in their own module?
|
61
|
+
# @param [Array] array
|
62
|
+
# @return [Proc]
|
63
|
+
def subfield_not_in?(array)
|
64
|
+
->(subfield) { !array.member?(subfield.code) }
|
65
|
+
end
|
66
|
+
|
67
|
+
# Check if a field has a given subfield defined
|
68
|
+
# @param [MARC::DataField] field
|
69
|
+
# @param [String|Symbol|Integer] subfield
|
70
|
+
# @return [TrueClass, FalseClass]
|
71
|
+
def subfield_defined?(field, subfield)
|
72
|
+
field.any? { |sf| sf.code == subfield.to_s }
|
73
|
+
end
|
74
|
+
|
75
|
+
# Check if a field does not have a given subfield defined
|
76
|
+
# @param [MARC::DataField] field
|
77
|
+
# @param [String|Symbol|Integer] subfield
|
78
|
+
# @return [TrueClass, FalseClass]
|
79
|
+
def subfield_undefined?(field, subfield)
|
80
|
+
field.none? { |sf| sf.code == subfield.to_s }
|
81
|
+
end
|
82
|
+
|
83
|
+
# Gets all subfield values for a subfield in a given field
|
84
|
+
# @param [MARC::DataField] field
|
85
|
+
# @param [String|Symbol] subfield as a string or symbol
|
86
|
+
# @return [Array] subfield values for given subfield code
|
87
|
+
def subfield_values(field, subfield)
|
88
|
+
field.filter_map do |sf|
|
89
|
+
next unless sf.code == subfield.to_s
|
90
|
+
|
91
|
+
next unless sf.value.present?
|
92
|
+
|
93
|
+
sf.value
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Get all subfield values for a provided subfield from any occurrence of a provided tag/tags
|
98
|
+
# @param [String|Array] tag tags to consider
|
99
|
+
# @param [String|Symbol] subfield to take the values from
|
100
|
+
# @param [MARC::Record] record source
|
101
|
+
# @return [Array] array of subfield values
|
102
|
+
def subfield_values_for(tag:, subfield:, record:)
|
103
|
+
record.fields(tag).flat_map do |field|
|
104
|
+
subfield_values field, subfield
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# @param [Symbol|String] trailer to target for removal
|
109
|
+
# @param [String] string to modify
|
110
|
+
def trim_trailing(trailer, string)
|
111
|
+
map = { semicolon: /\s*;\s*$/,
|
112
|
+
colon: /\s*:\s*$/,
|
113
|
+
equal: /=$/,
|
114
|
+
slash: %r{\s*/\s*$},
|
115
|
+
comma: /\s*,\s*$/,
|
116
|
+
period: /\.\s*$/ } # TODO: revise to exclude "etc."
|
117
|
+
string.sub map[trailer.to_sym], ''
|
118
|
+
end
|
119
|
+
|
120
|
+
# MARC 880 field "Alternate Graphic Representation" contains text "linked" to another
|
121
|
+
# field (e.g., 254 [Title]) used as an alternate representation. Often used to hold
|
122
|
+
# translations of title values. A common need is to extract subfields as selected by
|
123
|
+
# passed-in block from 880 datafield that has a particular subfield 6 value.
|
124
|
+
# See: https://www.loc.gov/marc/bibliographic/bd880.html
|
125
|
+
# @param [MARC::Record] record
|
126
|
+
# @param [String|Array] subfield6_value either a string to look for in sub6 or an array of them
|
127
|
+
# @param selector [Proc] takes a subfield as argument, returns a boolean
|
128
|
+
# @return [Array] array of linked alternates
|
129
|
+
def linked_alternate(record, subfield6_value, &selector)
|
130
|
+
record.fields('880').filter_map do |field|
|
131
|
+
next unless subfield_value?(field, '6', /^#{Array.wrap(subfield6_value).join('|')}/)
|
132
|
+
|
133
|
+
field.select { |sf| selector.call(sf) }.map(&:value).join(' ')
|
134
|
+
end
|
135
|
+
end
|
136
|
+
alias get_880 linked_alternate
|
137
|
+
|
138
|
+
# Common case of wanting to extract all the subfields besides 6 or 8,
|
139
|
+
# from 880 datafield that has a particular subfield 6 value. We exclude 6 because
|
140
|
+
# that value is the linkage ID itself and 8 because... IDK
|
141
|
+
# @param [MARC::Record] record
|
142
|
+
# @param [String|Array] subfield6_value either a string to look for in sub6 or an array of them
|
143
|
+
# @return [Array] array of linked alternates without 8 or 6 values
|
144
|
+
def linked_alternate_not_6_or_8(record, subfield6_value)
|
145
|
+
linked_alternate(record, subfield6_value) do |sf|
|
146
|
+
%w[6 8].exclude?(sf.code)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Returns the non-6,8 subfields from a datafield and its 880 link.
|
151
|
+
# @param [MARC::Record] record
|
152
|
+
# @param [String] tag
|
153
|
+
# @return [Array] acc
|
154
|
+
def datafield_and_linked_alternate(record, tag)
|
155
|
+
record.fields(tag).filter_map do |field|
|
156
|
+
join_subfields(field, &subfield_not_in?(%w[6 8]))
|
157
|
+
end + linked_alternate_not_6_or_8(record, tag)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Get the substring of a string up to a given target character
|
161
|
+
# @param [Object] string to split
|
162
|
+
# @param [Object] target character to split upon
|
163
|
+
# @return [String (frozen)]
|
164
|
+
def substring_before(string, target)
|
165
|
+
string.scan(target).present? ? string.split(target, 2).first : ''
|
166
|
+
end
|
167
|
+
|
168
|
+
# Get the substring of a string after the first occurrence of a target character
|
169
|
+
# @param [Object] string to split
|
170
|
+
# @param [Object] target character to split upon
|
171
|
+
# @return [String (frozen)]
|
172
|
+
def substring_after(string, target)
|
173
|
+
string.scan(target).present? ? string.split(target, 2).second : ''
|
174
|
+
end
|
175
|
+
|
176
|
+
# Join array and normalizing extraneous spaces
|
177
|
+
# @param [Array] array
|
178
|
+
# @return [String]
|
179
|
+
def join_and_squish(array)
|
180
|
+
array.join(' ').squish
|
181
|
+
end
|
182
|
+
alias join_and_trim_whitespace join_and_squish
|
183
|
+
|
184
|
+
# If there's a subfield i, extract its value, and if there's something
|
185
|
+
# in parentheses in that value, extract that.
|
186
|
+
# @param [MARC::Field] field
|
187
|
+
# @return [String] subfield i without parentheses value
|
188
|
+
def remove_paren_value_from_subfield_i(field)
|
189
|
+
val = field.filter_map do |sf|
|
190
|
+
next unless sf.code == 'i'
|
191
|
+
|
192
|
+
match = /\((.+?)\)/.match(sf.value)
|
193
|
+
if match
|
194
|
+
sf.value.sub("(#{match[1]})", '')
|
195
|
+
else
|
196
|
+
sf.value
|
197
|
+
end
|
198
|
+
end.first || ''
|
199
|
+
trim_trailing(:colon, trim_trailing(:period, val))
|
200
|
+
end
|
201
|
+
|
202
|
+
# Translate a relator code using mapping
|
203
|
+
# @todo handle case of receiving a URI? E.g., http://loc.gov/relator/aut
|
204
|
+
# @param [String, NilClass] relator_code
|
205
|
+
# @param [Hash] mapping
|
206
|
+
# @return [String, NilClass] full relator string
|
207
|
+
def translate_relator(relator_code, mapping)
|
208
|
+
return unless relator_code.present?
|
209
|
+
|
210
|
+
mapping[relator_code.to_sym]
|
211
|
+
end
|
212
|
+
|
213
|
+
# Get 650 & 880 for Provenance and Chronology: prefix should be 'PRO' or 'CHR' and may be preceded by a '%'
|
214
|
+
# @note 11/2018: do not display $5 in PRO or CHR subjs
|
215
|
+
# @param [MARC::Record] record
|
216
|
+
# @param [String] prefix to select from subject field
|
217
|
+
# @return [Array] array of values
|
218
|
+
def prefixed_subject_and_alternate(record, prefix)
|
219
|
+
record.fields(%w[650 880]).filter_map do |field|
|
220
|
+
next unless field.indicator2 == '4'
|
221
|
+
|
222
|
+
next if field.tag == '880' && subfield_values(field, '6').exclude?('650')
|
223
|
+
|
224
|
+
next unless field.any? { |sf| sf.code == 'a' && sf.value =~ /^(#{prefix}|%#{prefix})/ }
|
225
|
+
|
226
|
+
elements = field.select(&subfield_in?(%w[a])).map { |sf| sf.value.gsub(/^%?#{prefix}/, '') }
|
227
|
+
elements << join_subfields(field, &subfield_not_in?(%w[a 6 8 e w 5]))
|
228
|
+
join_and_squish elements
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
# Does the given field specify an allowed source code?
|
233
|
+
#
|
234
|
+
# @param [MARC::DataField] field
|
235
|
+
# @return [Boolean]
|
236
|
+
def valid_subject_genre_source_code?(field)
|
237
|
+
subfield_value_in?(field, '2', PennMARC::HeadingControl::ALLOWED_SOURCE_CODES)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
data/lib/pennmarc.rb
ADDED
data/pennmarc.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'pennmarc'
|
5
|
+
s.version = '0.0.1'
|
6
|
+
s.summary = 'Penn Libraries Catalog MARC parsing wisdom for cross-project usage'
|
7
|
+
s.description = 'Penn Libraries Catalog MARC parsing wisdom for cross-project usage'
|
8
|
+
s.authors = ['Mike Kanning', 'Amrey Mathurin', 'Patrick Perkins']
|
9
|
+
s.email = 'mkanning@upenn.edu'
|
10
|
+
s.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
|
11
|
+
s.homepage = 'https://gitlab.library.upenn.edu/dld/catalog/pennmarc'
|
12
|
+
s.license = 'MIT'
|
13
|
+
|
14
|
+
s.required_ruby_version = '>= 3.2'
|
15
|
+
|
16
|
+
s.add_dependency 'marc'
|
17
|
+
|
18
|
+
s.add_development_dependency 'nokogiri'
|
19
|
+
s.add_development_dependency 'rspec'
|
20
|
+
s.add_development_dependency 'simplecov'
|
21
|
+
s.add_development_dependency 'upennlib-rubocop'
|
22
|
+
end
|