pennmarc 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +119 -0
- data/README.md +82 -0
- data/legacy/indexer.rb +568 -0
- data/legacy/marc.rb +2964 -0
- data/legacy/test_file_output.json +49 -0
- data/lib/pennmarc/encoding_level.rb +43 -0
- data/lib/pennmarc/enriched_marc.rb +36 -0
- data/lib/pennmarc/heading_control.rb +11 -0
- data/lib/pennmarc/helpers/citation.rb +31 -0
- data/lib/pennmarc/helpers/creator.rb +237 -0
- data/lib/pennmarc/helpers/database.rb +89 -0
- data/lib/pennmarc/helpers/date.rb +85 -0
- data/lib/pennmarc/helpers/edition.rb +90 -0
- data/lib/pennmarc/helpers/format.rb +312 -0
- data/lib/pennmarc/helpers/genre.rb +71 -0
- data/lib/pennmarc/helpers/helper.rb +11 -0
- data/lib/pennmarc/helpers/identifier.rb +134 -0
- data/lib/pennmarc/helpers/language.rb +37 -0
- data/lib/pennmarc/helpers/link.rb +12 -0
- data/lib/pennmarc/helpers/location.rb +97 -0
- data/lib/pennmarc/helpers/note.rb +132 -0
- data/lib/pennmarc/helpers/production.rb +131 -0
- data/lib/pennmarc/helpers/relation.rb +135 -0
- data/lib/pennmarc/helpers/series.rb +118 -0
- data/lib/pennmarc/helpers/subject.rb +304 -0
- data/lib/pennmarc/helpers/title.rb +197 -0
- data/lib/pennmarc/mappings/language.yml +516 -0
- data/lib/pennmarc/mappings/locations.yml +1801 -0
- data/lib/pennmarc/mappings/relator.yml +263 -0
- data/lib/pennmarc/parser.rb +177 -0
- data/lib/pennmarc/util.rb +240 -0
- data/lib/pennmarc.rb +6 -0
- data/pennmarc.gemspec +22 -0
- data/spec/fixtures/marcxml/test.xml +167 -0
- data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
- data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
- data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
- data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
- data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
- data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
- data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
- data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
- data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
- data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
- data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
- data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
- data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
- data/spec/lib/pennmarc/parser_spec.rb +13 -0
- data/spec/spec_helper.rb +104 -0
- data/spec/support/marc_spec_helpers.rb +84 -0
- metadata +171 -0
@@ -0,0 +1,263 @@
|
|
1
|
+
abr: Abridger
|
2
|
+
act: Actor
|
3
|
+
adp: Adapter
|
4
|
+
rcp: Addressee
|
5
|
+
anl: Analyst
|
6
|
+
anm: Animator
|
7
|
+
ann: Annotator
|
8
|
+
apl: Appellant
|
9
|
+
ape: Appellee
|
10
|
+
app: Applicant
|
11
|
+
arc: Architect
|
12
|
+
arr: Arranger
|
13
|
+
acp: Art copyist
|
14
|
+
adi: Art director
|
15
|
+
art: Artist
|
16
|
+
ard: Artistic director
|
17
|
+
asg: Assignee
|
18
|
+
asn: Associated name
|
19
|
+
att: Attributed name
|
20
|
+
auc: Auctioneer
|
21
|
+
aut: Author
|
22
|
+
aqt: Author in quotations or text abstracts
|
23
|
+
aft: Author of afterword, colophon, etc.
|
24
|
+
aud: Author of dialog
|
25
|
+
aui: Author of introduction, etc.
|
26
|
+
ato: Autographer
|
27
|
+
ant: Bibliographic antecedent
|
28
|
+
bnd: Binder
|
29
|
+
bdd: Binding designer
|
30
|
+
blw: Blurb writer
|
31
|
+
bkd: Book designer
|
32
|
+
bkp: Book producer
|
33
|
+
bjd: Bookjacket designer
|
34
|
+
bpd: Bookplate designer
|
35
|
+
bsl: Bookseller
|
36
|
+
brl: Braille embosser
|
37
|
+
brd: Broadcaster
|
38
|
+
cll: Calligrapher
|
39
|
+
ctg: Cartographer
|
40
|
+
cas: Caster
|
41
|
+
cns: Censor
|
42
|
+
chr: Choreographer
|
43
|
+
cng: Cinematographer
|
44
|
+
cli: Client
|
45
|
+
-clb: Collaborator
|
46
|
+
cor: Collection registrar
|
47
|
+
col: Collector
|
48
|
+
clt: Collotyper
|
49
|
+
clr: Colorist
|
50
|
+
cmm: Commentator
|
51
|
+
cwt: Commentator for written text
|
52
|
+
com: Compiler
|
53
|
+
cpl: Complainant
|
54
|
+
cpt: Complainant-appellant
|
55
|
+
cpe: Complainant-appellee
|
56
|
+
cmp: Composer
|
57
|
+
cmt: Compositor
|
58
|
+
ccp: Conceptor
|
59
|
+
cnd: Conductor
|
60
|
+
con: Conservator
|
61
|
+
csl: Consultant
|
62
|
+
csp: Consultant to a project
|
63
|
+
cos: Contestant
|
64
|
+
cot: Contestant-appellant
|
65
|
+
coe: Contestant-appellee
|
66
|
+
cts: Contestee
|
67
|
+
ctt: Contestee-appellant
|
68
|
+
cte: Contestee-appellee
|
69
|
+
ctr: Contractor
|
70
|
+
ctb: Contributor
|
71
|
+
cpc: Copyright claimant
|
72
|
+
cph: Copyright holder
|
73
|
+
crr: Corrector
|
74
|
+
crp: Correspondent
|
75
|
+
cst: Costume designer
|
76
|
+
cou: Court governed
|
77
|
+
crt: Court reporter
|
78
|
+
cov: Cover designer
|
79
|
+
cre: Creator
|
80
|
+
cur: Curator
|
81
|
+
dnc: Dancer
|
82
|
+
dtc: Data contributor
|
83
|
+
dtm: Data manager
|
84
|
+
dte: Dedicatee
|
85
|
+
dto: Dedicator
|
86
|
+
dfd: Defendant
|
87
|
+
dft: Defendant-appellant
|
88
|
+
dfe: Defendant-appellee
|
89
|
+
dgg: Degree granting institution
|
90
|
+
dln: Delineator
|
91
|
+
dpc: Depicted
|
92
|
+
dpt: Depositor
|
93
|
+
dsr: Designer
|
94
|
+
drt: Director
|
95
|
+
dis: Dissertant
|
96
|
+
dbp: Distribution place
|
97
|
+
dst: Distributor
|
98
|
+
dnr: Donor
|
99
|
+
drm: Draftsman
|
100
|
+
dub: Dubious author
|
101
|
+
edt: Editor
|
102
|
+
edc: Editor of compilation
|
103
|
+
edm: Editor of moving image work
|
104
|
+
elg: Electrician
|
105
|
+
elt: Electrotyper
|
106
|
+
enj: Enacting jurisdiction
|
107
|
+
eng: Engineer
|
108
|
+
egr: Engraver
|
109
|
+
etr: Etcher
|
110
|
+
evp: Event place
|
111
|
+
exp: Expert
|
112
|
+
fac: Facsimilist
|
113
|
+
fld: Field director
|
114
|
+
fds: Film distributor
|
115
|
+
fmd: Film director
|
116
|
+
flm: Film editor
|
117
|
+
fmp: Film producer
|
118
|
+
fmk: Filmmaker
|
119
|
+
fpy: First party
|
120
|
+
frg: Forger
|
121
|
+
fmo: Former owner
|
122
|
+
fnd: Funder
|
123
|
+
gis: Geographic information specialist
|
124
|
+
-grt: Graphic technician
|
125
|
+
hnr: Honoree
|
126
|
+
hst: Host
|
127
|
+
his: Host institution
|
128
|
+
ilu: Illuminator
|
129
|
+
ill: Illustrator
|
130
|
+
ins: Inscriber
|
131
|
+
itr: Instrumentalist
|
132
|
+
ive: Interviewee
|
133
|
+
ivr: Interviewer
|
134
|
+
inv: Inventor
|
135
|
+
isb: Issuing body
|
136
|
+
jud: Judge
|
137
|
+
jug: Jurisdiction governed
|
138
|
+
lbr: Laboratory
|
139
|
+
ldr: Laboratory director
|
140
|
+
lsa: Landscape architect
|
141
|
+
led: Lead
|
142
|
+
len: Lender
|
143
|
+
lil: Libelant
|
144
|
+
lit: Libelant-appellant
|
145
|
+
lie: Libelant-appellee
|
146
|
+
lel: Libelee
|
147
|
+
let: Libelee-appellant
|
148
|
+
lee: Libelee-appellee
|
149
|
+
lbt: Librettist
|
150
|
+
lse: Licensee
|
151
|
+
lso: Licensor
|
152
|
+
lgd: Lighting designer
|
153
|
+
ltg: Lithographer
|
154
|
+
lyr: Lyricist
|
155
|
+
mfp: Manufacture place
|
156
|
+
mfr: Manufacturer
|
157
|
+
mrb: Marbler
|
158
|
+
mrk: Markup editor
|
159
|
+
mdc: Metadata contact
|
160
|
+
mte: Metal-engraver
|
161
|
+
mod: Moderator
|
162
|
+
mon: Monitor
|
163
|
+
mcp: Music copyist
|
164
|
+
msd: Musical director
|
165
|
+
mus: Musician
|
166
|
+
nrt: Narrator
|
167
|
+
osp: Onscreen presenter
|
168
|
+
opn: Opponent
|
169
|
+
orm: Organizer of meeting
|
170
|
+
org: Originator
|
171
|
+
own: Owner
|
172
|
+
pan: Panelist
|
173
|
+
ppm: Papermaker
|
174
|
+
pta: Patent applicant
|
175
|
+
pth: Patent holder
|
176
|
+
pat: Patron
|
177
|
+
prf: Performer
|
178
|
+
pma: Permitting agency
|
179
|
+
pht: Photographer
|
180
|
+
ptf: Plaintiff
|
181
|
+
ptt: Plaintiff-appellant
|
182
|
+
pte: Plaintiff-appellee
|
183
|
+
plt: Platemaker
|
184
|
+
pra: Praeses
|
185
|
+
pre: Presenter
|
186
|
+
prt: Printer
|
187
|
+
pop: Printer of plates
|
188
|
+
prm: Printmaker
|
189
|
+
prc: Process contact
|
190
|
+
pro: Producer
|
191
|
+
prn: Production company
|
192
|
+
prs: Production designer
|
193
|
+
pmn: Production manager
|
194
|
+
prd: Production personnel
|
195
|
+
prp: Production place
|
196
|
+
prg: Programmer
|
197
|
+
pdr: Project director
|
198
|
+
pfr: Proofreader
|
199
|
+
prv: Provider
|
200
|
+
pup: Publication place
|
201
|
+
pbl: Publisher
|
202
|
+
pbd: Publishing director
|
203
|
+
ppt: Puppeteer
|
204
|
+
rdd: Radio director
|
205
|
+
rpc: Radio producer
|
206
|
+
rce: Recording engineer
|
207
|
+
rcd: Recordist
|
208
|
+
red: Redaktor
|
209
|
+
ren: Renderer
|
210
|
+
rpt: Reporter
|
211
|
+
rps: Repository
|
212
|
+
rth: Research team head
|
213
|
+
rtm: Research team member
|
214
|
+
res: Researcher
|
215
|
+
rsp: Respondent
|
216
|
+
rst: Respondent-appellant
|
217
|
+
rse: Respondent-appellee
|
218
|
+
rpy: Responsible party
|
219
|
+
rsg: Restager
|
220
|
+
rsr: Restorationist
|
221
|
+
rev: Reviewer
|
222
|
+
rbr: Rubricator
|
223
|
+
sce: Scenarist
|
224
|
+
sad: Scientific advisor
|
225
|
+
aus: Screenwriter
|
226
|
+
scr: Scribe
|
227
|
+
scl: Sculptor
|
228
|
+
spy: Second party
|
229
|
+
sec: Secretary
|
230
|
+
sll: Seller
|
231
|
+
std: Set designer
|
232
|
+
stg: Setting
|
233
|
+
sgn: Signer
|
234
|
+
sng: Singer
|
235
|
+
sds: Sound designer
|
236
|
+
spk: Speaker
|
237
|
+
spn: Sponsor
|
238
|
+
sgd: Stage director
|
239
|
+
stm: Stage manager
|
240
|
+
stn: Standards body
|
241
|
+
str: Stereotyper
|
242
|
+
stl: Storyteller
|
243
|
+
sht: Supporting host
|
244
|
+
srv: Surveyor
|
245
|
+
tch: Teacher
|
246
|
+
tcd: Technical director
|
247
|
+
tld: Television director
|
248
|
+
tlp: Television producer
|
249
|
+
ths: Thesis advisor
|
250
|
+
trc: Transcriber
|
251
|
+
trl: Translator
|
252
|
+
tyd: Type designer
|
253
|
+
tyg: Typographer
|
254
|
+
uvp: University place
|
255
|
+
vdg: Videographer
|
256
|
+
-voc: Vocalist
|
257
|
+
wit: Witness
|
258
|
+
wde: Wood engraver
|
259
|
+
wdc: Woodcutter
|
260
|
+
wam: Writer of accompanying material
|
261
|
+
wac: Writer of added commentary
|
262
|
+
wat: Writer of added text
|
263
|
+
wal: Writer of added lyrics
|
@@ -0,0 +1,177 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'active_support/all'
|
4
|
+
require_relative 'helpers/helper'
|
5
|
+
require_relative 'helpers/creator'
|
6
|
+
require_relative 'helpers/database'
|
7
|
+
require_relative 'helpers/date'
|
8
|
+
require_relative 'helpers/format'
|
9
|
+
require_relative 'helpers/genre'
|
10
|
+
require_relative 'helpers/identifier'
|
11
|
+
require_relative 'helpers/language'
|
12
|
+
require_relative 'helpers/link'
|
13
|
+
require_relative 'helpers/location'
|
14
|
+
require_relative 'helpers/subject'
|
15
|
+
require_relative 'helpers/title'
|
16
|
+
require_relative 'helpers/citation'
|
17
|
+
require_relative 'helpers/relation'
|
18
|
+
require_relative 'helpers/production'
|
19
|
+
require_relative 'helpers/edition'
|
20
|
+
require_relative 'helpers/note'
|
21
|
+
|
22
|
+
module PennMARC
|
23
|
+
attr_accessor :mappings
|
24
|
+
|
25
|
+
DEFINED_HELPERS = %w[Creator Database Date Format Genre Language Link Location Subject Title Relation].freeze
|
26
|
+
|
27
|
+
# Methods here should return values used in the indexer. The parsing logic should
|
28
|
+
# NOT return values specific to any particular site/interface, but just general
|
29
|
+
# MARC parsing logic for "title", "subject", "author", etc., as much as reasonably
|
30
|
+
# possible. We'll see how it goes.
|
31
|
+
#
|
32
|
+
# Methods should, by default, take in a MARC::Record
|
33
|
+
class Parser
|
34
|
+
def initialize(helpers: DEFINED_HELPERS)
|
35
|
+
@mappings = {}
|
36
|
+
@helpers = Array.wrap(helpers) # TODO: load helpers dynamically?
|
37
|
+
end
|
38
|
+
|
39
|
+
def respond_to_missing?(name)
|
40
|
+
name.split('_').first.in? @helpers
|
41
|
+
end
|
42
|
+
|
43
|
+
# Call helper class methods, e.g.,
|
44
|
+
# #title_show -> PennMARC::Title.show
|
45
|
+
# #subject_facet -> PennMARC::Subject.facet
|
46
|
+
def method_missing(name, opts)
|
47
|
+
call = name.to_s.split('_')
|
48
|
+
helper = call.shift
|
49
|
+
meth = call.join('_')
|
50
|
+
"PennMARC::#{helper.titleize}".constantize.public_send(meth, opts)
|
51
|
+
end
|
52
|
+
|
53
|
+
# @todo does this fit in an existing helper?
|
54
|
+
# @param [MARC::Record] record
|
55
|
+
# @return [Object]
|
56
|
+
def cartographic_show(record)
|
57
|
+
record.fields(%w{255 342}).map do |field|
|
58
|
+
join_subfields(field, &subfield_not_6_or_8)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# @todo move to Identifier helper
|
63
|
+
# @param [MARC::Record] record
|
64
|
+
# @return [Object]
|
65
|
+
def fingerprint_show(record)
|
66
|
+
record.fields('026').map do |field|
|
67
|
+
join_subfields(field, &subfield_not_in(%w{2 5 6 8}))
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# @todo does this fit in an existing helper?
|
72
|
+
# @param [MARC::Record] record
|
73
|
+
# @return [Object]
|
74
|
+
def arrangement_show(record)
|
75
|
+
get_datafield_and_880(record, '351')
|
76
|
+
end
|
77
|
+
|
78
|
+
# @param [MARC::Record] record
|
79
|
+
# @return [Object]
|
80
|
+
def system_details_show(record)
|
81
|
+
acc = []
|
82
|
+
acc += record.fields('538').map do |field|
|
83
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
|
84
|
+
end
|
85
|
+
acc += record.fields('344').map do |field|
|
86
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
|
87
|
+
end
|
88
|
+
acc += record.fields(%w{345 346}).map do |field|
|
89
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
|
90
|
+
end
|
91
|
+
acc += record.fields('347').map do |field|
|
92
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
|
93
|
+
end
|
94
|
+
acc += record.fields('880')
|
95
|
+
.select { |f| has_subfield6_value(f, /^538/) }
|
96
|
+
.map do |field|
|
97
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a i u}))
|
98
|
+
end
|
99
|
+
acc += record.fields('880')
|
100
|
+
.select { |f| has_subfield6_value(f, /^344/) }
|
101
|
+
.map do |field|
|
102
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f g h}))
|
103
|
+
end
|
104
|
+
acc += record.fields('880')
|
105
|
+
.select { |f| has_subfield6_value(f, /^(345|346)/) }
|
106
|
+
.map do |field|
|
107
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b}))
|
108
|
+
end
|
109
|
+
acc += record.fields('880')
|
110
|
+
.select { |f| has_subfield6_value(f, /^347/) }
|
111
|
+
.map do |field|
|
112
|
+
get_sub3_and_other_subs(field, &subfield_in(%w{a b c d e f}))
|
113
|
+
end
|
114
|
+
acc
|
115
|
+
end
|
116
|
+
|
117
|
+
# @todo the legacy code here is a hot mess for a number of reasons, what do we need this field to do?
|
118
|
+
# @note port the needed parts from get_offsite_display, don't return HTML
|
119
|
+
# @param [MARC::Record] record
|
120
|
+
# @return [Object]
|
121
|
+
def offsite_show(record); end
|
122
|
+
|
123
|
+
# @todo move this to Creator helper
|
124
|
+
# @param [MARC::Record] record
|
125
|
+
# @return [Object]
|
126
|
+
def contributor_show(record)
|
127
|
+
acc = []
|
128
|
+
acc += record.fields(%w{700 710})
|
129
|
+
.select { |f| ['', ' ', '0'].member?(f.indicator2) }
|
130
|
+
.select { |f| f.none? { |sf| sf.code == 'i' } }
|
131
|
+
.map do |field|
|
132
|
+
contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
|
133
|
+
contributor_append = field.select(&subfield_in(%w{e u 3 4})).map do |sf|
|
134
|
+
if sf.code == '4'
|
135
|
+
", #{relator_codes[sf.value]}"
|
136
|
+
else
|
137
|
+
" #{sf.value}"
|
138
|
+
end
|
139
|
+
end.join
|
140
|
+
{ value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
|
141
|
+
end
|
142
|
+
acc += record.fields('880')
|
143
|
+
.select { |f| has_subfield6_value(f, /^(700|710)/) && (f.none? { |sf| sf.code == 'i' }) }
|
144
|
+
.map do |field|
|
145
|
+
contributor = join_subfields(field, &subfield_in(%w{a b c d j q}))
|
146
|
+
contributor_append = join_subfields(field, &subfield_in(%w{e u 3}))
|
147
|
+
{ value: contributor, value_append: contributor_append, link_type: 'author_creator_xfacet2' }
|
148
|
+
end
|
149
|
+
acc
|
150
|
+
end
|
151
|
+
|
152
|
+
# Load language map from YAML and memoize in @mappings hash
|
153
|
+
# @return [Hash]
|
154
|
+
def language_map
|
155
|
+
@mappings[:language] ||= load_map('language.yml')
|
156
|
+
end
|
157
|
+
|
158
|
+
# Load location map from YAML and memoize in @mappings hash
|
159
|
+
# @return [Hash]
|
160
|
+
def location_map
|
161
|
+
@mappings[:location] ||= load_map('locations.yml')
|
162
|
+
end
|
163
|
+
|
164
|
+
# Load relator map from YAML and memoize in @mappings hash
|
165
|
+
# @return [Hash]
|
166
|
+
def relator_map
|
167
|
+
@mappings[:relator] ||= load_map('relator.yml')
|
168
|
+
end
|
169
|
+
|
170
|
+
# @param [String] filename of mapping file in config directory, with file extension
|
171
|
+
# @return [Hash] mapping as hash
|
172
|
+
def load_map(filename)
|
173
|
+
YAML.safe_load(File.read(File.join(File.expand_path(__dir__), 'mappings', filename)),
|
174
|
+
symbolize_names: true)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
@@ -0,0 +1,240 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'heading_control'
|
4
|
+
|
5
|
+
module PennMARC
|
6
|
+
# class to hold "utility" methods used in MARC parsing methods
|
7
|
+
module Util
|
8
|
+
# Join subfields from a field selected based on a provided proc
|
9
|
+
# @param [MARC::DataField] field
|
10
|
+
# @param [Proc] selector
|
11
|
+
# @return [String]
|
12
|
+
def join_subfields(field, &selector)
|
13
|
+
field.select { |v| selector.call(v) }.filter_map { |sf|
|
14
|
+
value = sf.value&.strip
|
15
|
+
next unless value.present?
|
16
|
+
|
17
|
+
value
|
18
|
+
}.join(' ').squish
|
19
|
+
end
|
20
|
+
|
21
|
+
# returns true if field has a value that matches
|
22
|
+
# passed-in regex and passed in subfield
|
23
|
+
# @todo example usage
|
24
|
+
# @param [MARC::DataField] field
|
25
|
+
# @param [String|Integer|Symbol] subfield
|
26
|
+
# @param [Regexp] regex
|
27
|
+
# @return [TrueClass, FalseClass]
|
28
|
+
def subfield_value?(field, subfield, regex)
|
29
|
+
field.any? { |sf| sf.code == subfield.to_s && sf.value =~ regex }
|
30
|
+
end
|
31
|
+
|
32
|
+
# returns true if a given field has a given subfield value in a given array
|
33
|
+
# TODO: example usage
|
34
|
+
# @param [MARC:DataField] field
|
35
|
+
# @param [String|Integer|Symbol] subfield
|
36
|
+
# @param [Array] array
|
37
|
+
# @return [TrueClass, FalseClass]
|
38
|
+
def subfield_value_in?(field, subfield, array)
|
39
|
+
field.any? { |sf| sf.code == subfield.to_s && sf.value.in?(array) }
|
40
|
+
end
|
41
|
+
|
42
|
+
# returns true if a given field does not have a given subfield value in a given array
|
43
|
+
# @param [MARC:DataField] field
|
44
|
+
# @param [String|Integer|Symbol] subfield
|
45
|
+
# @param [Array] array
|
46
|
+
# @return [TrueClass, FalseClass
|
47
|
+
def subfield_value_not_in?(field, subfield, array)
|
48
|
+
field.none? { |sf| sf.code == subfield.to_s && sf.value.in?(array) }
|
49
|
+
end
|
50
|
+
|
51
|
+
# returns a lambda checking if passed-in subfield's code is a member of array
|
52
|
+
# TODO: include lambda returning methods in their own module?
|
53
|
+
# @param [Array] array
|
54
|
+
# @return [Proc]
|
55
|
+
def subfield_in?(array)
|
56
|
+
->(subfield) { array.member?(subfield.code) }
|
57
|
+
end
|
58
|
+
|
59
|
+
# returns a lambda checking if passed-in subfield's code is NOT a member of array
|
60
|
+
# TODO: include lambda returning methods in their own module?
|
61
|
+
# @param [Array] array
|
62
|
+
# @return [Proc]
|
63
|
+
def subfield_not_in?(array)
|
64
|
+
->(subfield) { !array.member?(subfield.code) }
|
65
|
+
end
|
66
|
+
|
67
|
+
# Check if a field has a given subfield defined
|
68
|
+
# @param [MARC::DataField] field
|
69
|
+
# @param [String|Symbol|Integer] subfield
|
70
|
+
# @return [TrueClass, FalseClass]
|
71
|
+
def subfield_defined?(field, subfield)
|
72
|
+
field.any? { |sf| sf.code == subfield.to_s }
|
73
|
+
end
|
74
|
+
|
75
|
+
# Check if a field does not have a given subfield defined
|
76
|
+
# @param [MARC::DataField] field
|
77
|
+
# @param [String|Symbol|Integer] subfield
|
78
|
+
# @return [TrueClass, FalseClass]
|
79
|
+
def subfield_undefined?(field, subfield)
|
80
|
+
field.none? { |sf| sf.code == subfield.to_s }
|
81
|
+
end
|
82
|
+
|
83
|
+
# Gets all subfield values for a subfield in a given field
|
84
|
+
# @param [MARC::DataField] field
|
85
|
+
# @param [String|Symbol] subfield as a string or symbol
|
86
|
+
# @return [Array] subfield values for given subfield code
|
87
|
+
def subfield_values(field, subfield)
|
88
|
+
field.filter_map do |sf|
|
89
|
+
next unless sf.code == subfield.to_s
|
90
|
+
|
91
|
+
next unless sf.value.present?
|
92
|
+
|
93
|
+
sf.value
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Get all subfield values for a provided subfield from any occurrence of a provided tag/tags
|
98
|
+
# @param [String|Array] tag tags to consider
|
99
|
+
# @param [String|Symbol] subfield to take the values from
|
100
|
+
# @param [MARC::Record] record source
|
101
|
+
# @return [Array] array of subfield values
|
102
|
+
def subfield_values_for(tag:, subfield:, record:)
|
103
|
+
record.fields(tag).flat_map do |field|
|
104
|
+
subfield_values field, subfield
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# @param [Symbol|String] trailer to target for removal
|
109
|
+
# @param [String] string to modify
|
110
|
+
def trim_trailing(trailer, string)
|
111
|
+
map = { semicolon: /\s*;\s*$/,
|
112
|
+
colon: /\s*:\s*$/,
|
113
|
+
equal: /=$/,
|
114
|
+
slash: %r{\s*/\s*$},
|
115
|
+
comma: /\s*,\s*$/,
|
116
|
+
period: /\.\s*$/ } # TODO: revise to exclude "etc."
|
117
|
+
string.sub map[trailer.to_sym], ''
|
118
|
+
end
|
119
|
+
|
120
|
+
# MARC 880 field "Alternate Graphic Representation" contains text "linked" to another
|
121
|
+
# field (e.g., 254 [Title]) used as an alternate representation. Often used to hold
|
122
|
+
# translations of title values. A common need is to extract subfields as selected by
|
123
|
+
# passed-in block from 880 datafield that has a particular subfield 6 value.
|
124
|
+
# See: https://www.loc.gov/marc/bibliographic/bd880.html
|
125
|
+
# @param [MARC::Record] record
|
126
|
+
# @param [String|Array] subfield6_value either a string to look for in sub6 or an array of them
|
127
|
+
# @param selector [Proc] takes a subfield as argument, returns a boolean
|
128
|
+
# @return [Array] array of linked alternates
|
129
|
+
def linked_alternate(record, subfield6_value, &selector)
|
130
|
+
record.fields('880').filter_map do |field|
|
131
|
+
next unless subfield_value?(field, '6', /^#{Array.wrap(subfield6_value).join('|')}/)
|
132
|
+
|
133
|
+
field.select { |sf| selector.call(sf) }.map(&:value).join(' ')
|
134
|
+
end
|
135
|
+
end
|
136
|
+
alias get_880 linked_alternate
|
137
|
+
|
138
|
+
# Common case of wanting to extract all the subfields besides 6 or 8,
|
139
|
+
# from 880 datafield that has a particular subfield 6 value. We exclude 6 because
|
140
|
+
# that value is the linkage ID itself and 8 because... IDK
|
141
|
+
# @param [MARC::Record] record
|
142
|
+
# @param [String|Array] subfield6_value either a string to look for in sub6 or an array of them
|
143
|
+
# @return [Array] array of linked alternates without 8 or 6 values
|
144
|
+
def linked_alternate_not_6_or_8(record, subfield6_value)
|
145
|
+
linked_alternate(record, subfield6_value) do |sf|
|
146
|
+
%w[6 8].exclude?(sf.code)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Returns the non-6,8 subfields from a datafield and its 880 link.
|
151
|
+
# @param [MARC::Record] record
|
152
|
+
# @param [String] tag
|
153
|
+
# @return [Array] acc
|
154
|
+
def datafield_and_linked_alternate(record, tag)
|
155
|
+
record.fields(tag).filter_map do |field|
|
156
|
+
join_subfields(field, &subfield_not_in?(%w[6 8]))
|
157
|
+
end + linked_alternate_not_6_or_8(record, tag)
|
158
|
+
end
|
159
|
+
|
160
|
+
# Get the substring of a string up to a given target character
|
161
|
+
# @param [Object] string to split
|
162
|
+
# @param [Object] target character to split upon
|
163
|
+
# @return [String (frozen)]
|
164
|
+
def substring_before(string, target)
|
165
|
+
string.scan(target).present? ? string.split(target, 2).first : ''
|
166
|
+
end
|
167
|
+
|
168
|
+
# Get the substring of a string after the first occurrence of a target character
|
169
|
+
# @param [Object] string to split
|
170
|
+
# @param [Object] target character to split upon
|
171
|
+
# @return [String (frozen)]
|
172
|
+
def substring_after(string, target)
|
173
|
+
string.scan(target).present? ? string.split(target, 2).second : ''
|
174
|
+
end
|
175
|
+
|
176
|
+
# Join array and normalizing extraneous spaces
|
177
|
+
# @param [Array] array
|
178
|
+
# @return [String]
|
179
|
+
def join_and_squish(array)
|
180
|
+
array.join(' ').squish
|
181
|
+
end
|
182
|
+
alias join_and_trim_whitespace join_and_squish
|
183
|
+
|
184
|
+
# If there's a subfield i, extract its value, and if there's something
|
185
|
+
# in parentheses in that value, extract that.
|
186
|
+
# @param [MARC::Field] field
|
187
|
+
# @return [String] subfield i without parentheses value
|
188
|
+
def remove_paren_value_from_subfield_i(field)
|
189
|
+
val = field.filter_map do |sf|
|
190
|
+
next unless sf.code == 'i'
|
191
|
+
|
192
|
+
match = /\((.+?)\)/.match(sf.value)
|
193
|
+
if match
|
194
|
+
sf.value.sub("(#{match[1]})", '')
|
195
|
+
else
|
196
|
+
sf.value
|
197
|
+
end
|
198
|
+
end.first || ''
|
199
|
+
trim_trailing(:colon, trim_trailing(:period, val))
|
200
|
+
end
|
201
|
+
|
202
|
+
# Translate a relator code using mapping
|
203
|
+
# @todo handle case of receiving a URI? E.g., http://loc.gov/relator/aut
|
204
|
+
# @param [String, NilClass] relator_code
|
205
|
+
# @param [Hash] mapping
|
206
|
+
# @return [String, NilClass] full relator string
|
207
|
+
def translate_relator(relator_code, mapping)
|
208
|
+
return unless relator_code.present?
|
209
|
+
|
210
|
+
mapping[relator_code.to_sym]
|
211
|
+
end
|
212
|
+
|
213
|
+
# Get 650 & 880 for Provenance and Chronology: prefix should be 'PRO' or 'CHR' and may be preceded by a '%'
|
214
|
+
# @note 11/2018: do not display $5 in PRO or CHR subjs
|
215
|
+
# @param [MARC::Record] record
|
216
|
+
# @param [String] prefix to select from subject field
|
217
|
+
# @return [Array] array of values
|
218
|
+
def prefixed_subject_and_alternate(record, prefix)
|
219
|
+
record.fields(%w[650 880]).filter_map do |field|
|
220
|
+
next unless field.indicator2 == '4'
|
221
|
+
|
222
|
+
next if field.tag == '880' && subfield_values(field, '6').exclude?('650')
|
223
|
+
|
224
|
+
next unless field.any? { |sf| sf.code == 'a' && sf.value =~ /^(#{prefix}|%#{prefix})/ }
|
225
|
+
|
226
|
+
elements = field.select(&subfield_in?(%w[a])).map { |sf| sf.value.gsub(/^%?#{prefix}/, '') }
|
227
|
+
elements << join_subfields(field, &subfield_not_in?(%w[a 6 8 e w 5]))
|
228
|
+
join_and_squish elements
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
# Does the given field specify an allowed source code?
|
233
|
+
#
|
234
|
+
# @param [MARC::DataField] field
|
235
|
+
# @return [Boolean]
|
236
|
+
def valid_subject_genre_source_code?(field)
|
237
|
+
subfield_value_in?(field, '2', PennMARC::HeadingControl::ALLOWED_SOURCE_CODES)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
data/lib/pennmarc.rb
ADDED
data/pennmarc.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = 'pennmarc'
|
5
|
+
s.version = '0.0.1'
|
6
|
+
s.summary = 'Penn Libraries Catalog MARC parsing wisdom for cross-project usage'
|
7
|
+
s.description = 'Penn Libraries Catalog MARC parsing wisdom for cross-project usage'
|
8
|
+
s.authors = ['Mike Kanning', 'Amrey Mathurin', 'Patrick Perkins']
|
9
|
+
s.email = 'mkanning@upenn.edu'
|
10
|
+
s.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
|
11
|
+
s.homepage = 'https://gitlab.library.upenn.edu/dld/catalog/pennmarc'
|
12
|
+
s.license = 'MIT'
|
13
|
+
|
14
|
+
s.required_ruby_version = '>= 3.2'
|
15
|
+
|
16
|
+
s.add_dependency 'marc'
|
17
|
+
|
18
|
+
s.add_development_dependency 'nokogiri'
|
19
|
+
s.add_development_dependency 'rspec'
|
20
|
+
s.add_development_dependency 'simplecov'
|
21
|
+
s.add_development_dependency 'upennlib-rubocop'
|
22
|
+
end
|