pennmarc 1.0.27 → 1.0.31
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/Gemfile.lock +0 -2
- data/lib/pennmarc/enriched.rb +1 -1
- data/lib/pennmarc/heading_control.rb +9 -5
- data/lib/pennmarc/helpers/creator.rb +17 -15
- data/lib/pennmarc/helpers/identifier.rb +1 -1
- data/lib/pennmarc/mappings/headings_override.yml +12 -8
- data/lib/pennmarc/version.rb +1 -1
- data/pennmarc.gemspec +0 -1
- data/spec/lib/pennmarc/heading_control_spec.rb +17 -10
- data/spec/lib/pennmarc/helpers/creator_spec.rb +15 -12
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +30 -10
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a10e18cc63414bbcc1f3ab3d0dbb47517552830f835a0ca10c50added035e9e
|
4
|
+
data.tar.gz: 0153e3bb11fe9646f0b3e6614ef2229654d70d60a0bace46e3d57a9c49800ccd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9fa4ea2d2e36736f240e8a9431d487d9195e92a1eecf2e847c8cbda94b9d7e5da1029654754b4582d1036ffb453115bf6e88f2c09e8668704830c2ff46c78a3
|
7
|
+
data.tar.gz: fb4a80ff8479b248e90c33c2eb43038d957ad93de3b3978ec964bdffba11621106a8fb61ff7feb4aeb03c8dfec251f0af7da4a7a00751e34d122312059bdf6a2
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -19,7 +19,6 @@ GEM
|
|
19
19
|
scrub_rb (>= 1.0.1, < 2)
|
20
20
|
unf
|
21
21
|
minitest (5.18.0)
|
22
|
-
multi_string_replace (2.0.2)
|
23
22
|
nokogiri (1.15.2-arm64-darwin)
|
24
23
|
racc (~> 1.4)
|
25
24
|
nokogiri (1.15.2-x64-mingw-ucrt)
|
@@ -114,7 +113,6 @@ DEPENDENCIES
|
|
114
113
|
activesupport (~> 7)
|
115
114
|
library_stdnums (~> 1.6)
|
116
115
|
marc (~> 1.2)
|
117
|
-
multi_string_replace (~> 2.0)
|
118
116
|
nokogiri (~> 1.15)
|
119
117
|
rake (~> 13.0)
|
120
118
|
rspec (~> 3.12)
|
data/lib/pennmarc/enriched.rb
CHANGED
@@ -12,7 +12,7 @@ module PennMARC
|
|
12
12
|
PHYS_INVENTORY_TAG = 'hld'
|
13
13
|
ELEC_INVENTORY_TAG = 'prt'
|
14
14
|
ITEM_TAG = 'itm'
|
15
|
-
|
15
|
+
RELATED_RECORD_TAGS = %w[REL rel].freeze
|
16
16
|
|
17
17
|
# Subfields for HLD tags
|
18
18
|
# Follow MARC 852 spec: https://www.loc.gov/marc/holdings/hd852.html, but names are translated into Alma parlance
|
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'multi_string_replace'
|
4
|
-
|
5
3
|
module PennMARC
|
6
4
|
# Shared tools and values for controlling handling of subject or genre headings
|
7
5
|
class HeadingControl
|
@@ -10,6 +8,9 @@ module PennMARC
|
|
10
8
|
ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
|
11
9
|
local/osu mesh ndlsh nli nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
|
12
10
|
|
11
|
+
REMOVE_TERM_REGEX = /#{Mappers.headings_to_remove&.join('|')}/i
|
12
|
+
REPLACE_TERM_REGEX = /(#{Mappers.heading_overrides.keys.join('|')})/i
|
13
|
+
|
13
14
|
class << self
|
14
15
|
# Replace or remove any terms in provided values pursuant to the configuration in remove and override mappers.
|
15
16
|
# Used to remove or replace offensive or otherwise undesirable subject headings.
|
@@ -18,10 +19,13 @@ module PennMARC
|
|
18
19
|
def term_override(values)
|
19
20
|
values.filter_map do |value|
|
20
21
|
# Remove values if they contain a remove term
|
21
|
-
next nil if value.match?(
|
22
|
+
next nil if value.match?(REMOVE_TERM_REGEX)
|
23
|
+
|
24
|
+
# return early if theres no terms to replace
|
25
|
+
next value if value.match(REPLACE_TERM_REGEX).nil?
|
22
26
|
|
23
|
-
#
|
24
|
-
|
27
|
+
# lookup and perform replacement
|
28
|
+
value.sub(::Regexp.last_match.to_s, Mappers.heading_overrides[::Regexp.last_match.to_s.downcase])
|
25
29
|
end
|
26
30
|
end
|
27
31
|
end
|
@@ -99,10 +99,9 @@ module PennMARC
|
|
99
99
|
|
100
100
|
fields = record.fields(tags)
|
101
101
|
fields.filter_map { |field|
|
102
|
-
if
|
103
|
-
|
104
|
-
|
105
|
-
field['a']
|
102
|
+
if field['a'].present?
|
103
|
+
name = trim_trailing(:comma, field['a'])
|
104
|
+
first_initial_only ? abbreviate_name(name) : name
|
106
105
|
end
|
107
106
|
}.uniq
|
108
107
|
end
|
@@ -131,10 +130,11 @@ module PennMARC
|
|
131
130
|
relator = 'Contributor' if relator.blank?
|
132
131
|
relator = trim_punctuation(relator).capitalize
|
133
132
|
|
133
|
+
name = trim_trailing(:comma, field['a'])
|
134
134
|
name = if name_only
|
135
|
-
|
135
|
+
name
|
136
136
|
else
|
137
|
-
join_subfields(field, &subfield_in?(%w[
|
137
|
+
"#{name} #{join_subfields(field, &subfield_in?(%w[b c d j q u 3]))}, #{relator}"
|
138
138
|
end
|
139
139
|
|
140
140
|
if contributors.key?(relator)
|
@@ -328,7 +328,7 @@ module PennMARC
|
|
328
328
|
relator_term_sf = relator_term_subfield(field)
|
329
329
|
name = field.filter_map { |sf|
|
330
330
|
if sf.code == 'a'
|
331
|
-
should_convert_name_order ? convert_name_order(sf.value) : sf.value
|
331
|
+
should_convert_name_order ? convert_name_order(sf.value) : trim_trailing(:comma, sf.value)
|
332
332
|
elsif sf.code == relator_term_sf
|
333
333
|
next
|
334
334
|
elsif NAME_EXCLUDED_SUBFIELDS.exclude?(sf.code)
|
@@ -348,6 +348,7 @@ module PennMARC
|
|
348
348
|
# @param name [String] value for processing
|
349
349
|
# @return [String]
|
350
350
|
def convert_name_order(name)
|
351
|
+
name = trim_trailing(:comma, name)
|
351
352
|
return name unless name.include? ','
|
352
353
|
|
353
354
|
after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ', '))])
|
@@ -357,15 +358,16 @@ module PennMARC
|
|
357
358
|
|
358
359
|
# Convert "Lastname, First" to "Lastname, F"
|
359
360
|
# @param [String] name
|
361
|
+
# @return [String]
|
360
362
|
def abbreviate_name(name)
|
361
|
-
|
362
|
-
return
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
363
|
+
name = trim_trailing(:comma, name)
|
364
|
+
return name unless name.include? ','
|
365
|
+
|
366
|
+
after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ','))])
|
367
|
+
before_comma = substring_before(name, ',')
|
368
|
+
abbrv = "#{before_comma},"
|
369
|
+
abbrv += " #{after_comma.first.upcase}." if after_comma.present?
|
370
|
+
abbrv
|
369
371
|
end
|
370
372
|
|
371
373
|
# Parse creator facet value from given creator field and desired subfields
|
@@ -164,7 +164,7 @@ module PennMARC
|
|
164
164
|
# @param [MARC::Record] record
|
165
165
|
# @return [Array<String>]
|
166
166
|
def host_record_id(record)
|
167
|
-
record.fields(Enriched::Pub::
|
167
|
+
record.fields(Enriched::Pub::RELATED_RECORD_TAGS).filter_map { |field|
|
168
168
|
next unless subfield_value?(field, 'c', /contains/i)
|
169
169
|
|
170
170
|
subfield_values field, :w
|
@@ -1,8 +1,12 @@
|
|
1
|
-
|
2
|
-
"
|
3
|
-
|
4
|
-
|
5
|
-
"
|
6
|
-
|
7
|
-
"
|
8
|
-
"
|
1
|
+
# Important!: These terms should be listed in an order of decreasing complexity so that terms that may be contained in
|
2
|
+
# other terms are not replaced first, e.g., if "Aliens" is listed first, "Illegal Aliens" will get overrode to
|
3
|
+
# read "Illegal Noncitizens".
|
4
|
+
# Also, use lower case only in the key to support efficient case-insensitive matching.
|
5
|
+
"illegal alien children": Undocumented immigrant children
|
6
|
+
"alien detention centers": Immigrant detention centers
|
7
|
+
"alien criminals": Noncitizen criminals
|
8
|
+
"alien property": Foreign-owned property
|
9
|
+
"illegal aliens": Undocumented immigrants
|
10
|
+
"alien labor": Foreign workers
|
11
|
+
gypsies: Romanies
|
12
|
+
aliens: Noncitizens
|
data/lib/pennmarc/version.rb
CHANGED
data/pennmarc.gemspec
CHANGED
@@ -21,7 +21,6 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.add_dependency 'activesupport', '~> 7'
|
22
22
|
s.add_dependency 'library_stdnums', '~> 1.6'
|
23
23
|
s.add_dependency 'marc', '~> 1.2'
|
24
|
-
s.add_dependency 'multi_string_replace', '~> 2.0'
|
25
24
|
s.add_dependency 'nokogiri', '~> 1.15'
|
26
25
|
|
27
26
|
s.metadata['rubygems_mfa_required'] = 'false'
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
describe 'PennMARC::HeadingControl' do
|
4
|
-
let(:replace_term) { PennMARC::Mappers.heading_overrides.
|
5
|
-
let(:replaced_term) { PennMARC::Mappers.heading_overrides.
|
4
|
+
let(:replace_term) { PennMARC::Mappers.heading_overrides.keys[2] }
|
5
|
+
let(:replaced_term) { PennMARC::Mappers.heading_overrides.values[2] }
|
6
6
|
let(:remove_term) { PennMARC::Mappers.headings_to_remove.first }
|
7
7
|
|
8
8
|
describe '.process' do
|
@@ -23,15 +23,22 @@ describe 'PennMARC::HeadingControl' do
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
26
|
+
PennMARC::Mappers.heading_overrides.each do |target, replacement|
|
27
|
+
context "with the \"#{target}\" term" do
|
28
|
+
it 'replaces the term in isolation' do
|
29
|
+
values = [target]
|
30
|
+
expect(PennMARC::HeadingControl.term_override(values)).to eq [replacement]
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'replaces the term when used with other headings' do
|
34
|
+
values = ["#{target}--History"]
|
35
|
+
expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replacement}--History"]
|
36
|
+
end
|
31
37
|
|
32
|
-
|
33
|
-
|
34
|
-
|
38
|
+
it 'replaces the term regardless of case' do
|
39
|
+
values = ["#{target.titleize}--History"]
|
40
|
+
expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replacement}--History"]
|
41
|
+
end
|
35
42
|
end
|
36
43
|
end
|
37
44
|
|
@@ -9,9 +9,9 @@ describe 'PennMARC::Creator' do
|
|
9
9
|
|
10
10
|
context 'with a single author record' do
|
11
11
|
let(:fields) do
|
12
|
-
[marc_field(tag: '100', subfields: { a: 'Surname, Name', '0': 'http://cool.uri/12345',
|
12
|
+
[marc_field(tag: '100', subfields: { a: 'Surname, Name,', '0': 'http://cool.uri/12345',
|
13
13
|
e: 'author', d: '1900-2000' }),
|
14
|
-
marc_field(tag: '880', subfields: { a: 'Surname, Alternative', '6': '100' })]
|
14
|
+
marc_field(tag: '880', subfields: { a: 'Surname, Alternative,', '6': '100' })]
|
15
15
|
end
|
16
16
|
|
17
17
|
it 'contains the expected search field values for a single author work' do
|
@@ -109,18 +109,21 @@ describe 'PennMARC::Creator' do
|
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
112
|
-
context 'with
|
112
|
+
context 'with five author records - abbreviated names' do
|
113
113
|
let(:fields) do
|
114
|
-
[marc_field(tag: '100', subfields: { a: 'Surname, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
|
114
|
+
[marc_field(tag: '100', subfields: { a: 'Surname, Alex, ', '0': 'http://cool.uri/12345', d: '1900-2000',
|
115
115
|
e: 'author.', '4': 'http://cool.uri/vocabulary/relators/aut' }),
|
116
|
-
marc_field(tag: '110', subfields: { a: 'Second, NameX', '0': 'http://cool.uri/12345', d: '1901-2010',
|
116
|
+
marc_field(tag: '110', subfields: { a: 'Second, NameX, ', '0': 'http://cool.uri/12345', d: '1901-2010',
|
117
117
|
e: 'author.', '4': 'http://cool.uri/vocabulary/relators/aut' }),
|
118
|
-
marc_field(tag: '700', subfields: { a: 'Alt, Alternative', '6': '100', d: '1970-' })
|
118
|
+
marc_field(tag: '700', subfields: { a: 'Alt, Alternative', '6': '100', d: '1970-' }),
|
119
|
+
marc_field(tag: '100', subfields: { a: 'Name with no comma', e: 'author' }),
|
120
|
+
marc_field(tag: '100', subfields: { a: 'Name ends with comma,', e: 'author' })]
|
119
121
|
end
|
120
122
|
|
121
123
|
it 'returns single author values with no URIs anywhere' do
|
122
124
|
values = helper.authors_list(record, first_initial_only: true)
|
123
|
-
expect(values).to contain_exactly 'Surname, A.', 'Second, N.', 'Alt, A.'
|
125
|
+
expect(values).to contain_exactly 'Surname, A.', 'Second, N.', 'Alt, A.',
|
126
|
+
'Name ends with comma', 'Name with no comma'
|
124
127
|
end
|
125
128
|
end
|
126
129
|
end
|
@@ -132,14 +135,14 @@ describe 'PennMARC::Creator' do
|
|
132
135
|
let(:fields) do
|
133
136
|
[marc_field(tag: '100', subfields: { a: 'Hamilton, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
|
134
137
|
e: 'author.' }),
|
135
|
-
marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham', b: 'I', c: 'laureate', d: '1968', e: 'author',
|
136
|
-
j: 'pseud', q: 'Fuller Name', u: 'affiliation', '3': 'materials',
|
138
|
+
marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham, ', b: 'I', c: 'laureate', d: '1968', e: 'author',
|
139
|
+
j: 'pseud', q: 'Fuller Name, ', u: 'affiliation', '3': 'materials',
|
137
140
|
'4': 'aut' }),
|
138
141
|
marc_field(tag: '700', subfields: { a: 'Einstein, Albert', '6': '100', d: '1970-', '4': 'trl',
|
139
142
|
e: 'translator' }),
|
140
143
|
marc_field(tag: '700', subfields: { a: 'Franklin, Ben', '6': '100', d: '1970-', '4': 'edt' }),
|
141
144
|
marc_field(tag: '710', subfields: { a: 'Jefferson, Thomas', '6': '100', d: '1870-', '4': 'edt' }),
|
142
|
-
marc_field(tag: '700', subfields: { a: 'Dickens, Charles', '6': '100', d: '1970-', '4': 'com' })]
|
145
|
+
marc_field(tag: '700', subfields: { a: 'Dickens, Charles, ', '6': '100', d: '1970-', '4': 'com' })]
|
143
146
|
end
|
144
147
|
|
145
148
|
it 'returns two authors and four contributors' do
|
@@ -153,7 +156,7 @@ describe 'PennMARC::Creator' do
|
|
153
156
|
|
154
157
|
context 'with two authors and four contributors records, with full information and relator' do
|
155
158
|
let(:fields) do
|
156
|
-
[marc_field(tag: '100', subfields: { a: 'Hamilton, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
|
159
|
+
[marc_field(tag: '100', subfields: { a: 'Hamilton, Alex, ', '0': 'http://cool.uri/12345', d: '1900-2000',
|
157
160
|
e: 'author.', '4': 'aut' }),
|
158
161
|
marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham', b: 'I', c: 'laureate', d: '1968', e: 'author',
|
159
162
|
j: 'pseud', q: 'Fuller Name', u: 'affiliation', '3': 'materials',
|
@@ -164,7 +167,7 @@ describe 'PennMARC::Creator' do
|
|
164
167
|
marc_field(tag: '710', subfields: { a: 'Jefferson, Thomas', '6': '100', d: '1870-', '4': 'edt' }),
|
165
168
|
marc_field(tag: '700', subfields: { a: 'Dickens, Charles', '6': '100', d: '1970-', '4': 'com' }),
|
166
169
|
marc_field(tag: '880', subfields: { a: '狄更斯', '6': '700', d: '1970-', '4': 'com' }),
|
167
|
-
marc_field(tag: '700', subfields: { a: 'Twain, Mark', '6': '100', d: '1870-' })]
|
170
|
+
marc_field(tag: '700', subfields: { a: 'Twain, Mark,', '6': '100', d: '1870-' })]
|
168
171
|
end
|
169
172
|
|
170
173
|
it 'returns four contributors' do
|
@@ -161,16 +161,36 @@ describe 'PennMARC::Identifier' do
|
|
161
161
|
end
|
162
162
|
|
163
163
|
describe '.host_record_id' do
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
164
|
+
context 'with a lower case tag' do
|
165
|
+
let(:record) do
|
166
|
+
marc_record fields: [
|
167
|
+
marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '123456789',
|
168
|
+
c: 'Contains',
|
169
|
+
a: 'Title' }),
|
170
|
+
marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '666666666',
|
171
|
+
c: 'Contained In' })
|
172
|
+
]
|
173
|
+
end
|
174
|
+
|
175
|
+
it 'returns only the desired host record MMS ID values' do
|
176
|
+
expect(helper.host_record_id(record)).to contain_exactly '123456789'
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
context 'with an upper case tag' do
|
181
|
+
let(:record) do
|
182
|
+
marc_record fields: [
|
183
|
+
marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '123456789',
|
184
|
+
c: 'Contains',
|
185
|
+
a: 'Title' }),
|
186
|
+
marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '666666666',
|
187
|
+
c: 'Contained In' })
|
188
|
+
]
|
189
|
+
end
|
190
|
+
|
191
|
+
it 'returns only the desired host record MMS ID values' do
|
192
|
+
expect(helper.host_record_id(record)).to contain_exactly '123456789'
|
193
|
+
end
|
174
194
|
end
|
175
195
|
end
|
176
196
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pennmarc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.31
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Kanning
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2024-07-
|
15
|
+
date: 2024-07-29 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: activesupport
|
@@ -56,20 +56,6 @@ dependencies:
|
|
56
56
|
- - "~>"
|
57
57
|
- !ruby/object:Gem::Version
|
58
58
|
version: '1.2'
|
59
|
-
- !ruby/object:Gem::Dependency
|
60
|
-
name: multi_string_replace
|
61
|
-
requirement: !ruby/object:Gem::Requirement
|
62
|
-
requirements:
|
63
|
-
- - "~>"
|
64
|
-
- !ruby/object:Gem::Version
|
65
|
-
version: '2.0'
|
66
|
-
type: :runtime
|
67
|
-
prerelease: false
|
68
|
-
version_requirements: !ruby/object:Gem::Requirement
|
69
|
-
requirements:
|
70
|
-
- - "~>"
|
71
|
-
- !ruby/object:Gem::Version
|
72
|
-
version: '2.0'
|
73
59
|
- !ruby/object:Gem::Dependency
|
74
60
|
name: nokogiri
|
75
61
|
requirement: !ruby/object:Gem::Requirement
|