pennmarc 1.0.27 → 1.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +0 -1
- data/Gemfile.lock +0 -2
- data/lib/pennmarc/enriched.rb +1 -1
- data/lib/pennmarc/heading_control.rb +9 -5
- data/lib/pennmarc/helpers/creator.rb +17 -15
- data/lib/pennmarc/helpers/identifier.rb +1 -1
- data/lib/pennmarc/mappings/headings_override.yml +12 -8
- data/lib/pennmarc/version.rb +1 -1
- data/pennmarc.gemspec +0 -1
- data/spec/lib/pennmarc/heading_control_spec.rb +17 -10
- data/spec/lib/pennmarc/helpers/creator_spec.rb +15 -12
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +30 -10
- metadata +2 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a10e18cc63414bbcc1f3ab3d0dbb47517552830f835a0ca10c50added035e9e
|
4
|
+
data.tar.gz: 0153e3bb11fe9646f0b3e6614ef2229654d70d60a0bace46e3d57a9c49800ccd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9fa4ea2d2e36736f240e8a9431d487d9195e92a1eecf2e847c8cbda94b9d7e5da1029654754b4582d1036ffb453115bf6e88f2c09e8668704830c2ff46c78a3
|
7
|
+
data.tar.gz: fb4a80ff8479b248e90c33c2eb43038d957ad93de3b3978ec964bdffba11621106a8fb61ff7feb4aeb03c8dfec251f0af7da4a7a00751e34d122312059bdf6a2
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -19,7 +19,6 @@ GEM
|
|
19
19
|
scrub_rb (>= 1.0.1, < 2)
|
20
20
|
unf
|
21
21
|
minitest (5.18.0)
|
22
|
-
multi_string_replace (2.0.2)
|
23
22
|
nokogiri (1.15.2-arm64-darwin)
|
24
23
|
racc (~> 1.4)
|
25
24
|
nokogiri (1.15.2-x64-mingw-ucrt)
|
@@ -114,7 +113,6 @@ DEPENDENCIES
|
|
114
113
|
activesupport (~> 7)
|
115
114
|
library_stdnums (~> 1.6)
|
116
115
|
marc (~> 1.2)
|
117
|
-
multi_string_replace (~> 2.0)
|
118
116
|
nokogiri (~> 1.15)
|
119
117
|
rake (~> 13.0)
|
120
118
|
rspec (~> 3.12)
|
data/lib/pennmarc/enriched.rb
CHANGED
@@ -12,7 +12,7 @@ module PennMARC
|
|
12
12
|
PHYS_INVENTORY_TAG = 'hld'
|
13
13
|
ELEC_INVENTORY_TAG = 'prt'
|
14
14
|
ITEM_TAG = 'itm'
|
15
|
-
|
15
|
+
RELATED_RECORD_TAGS = %w[REL rel].freeze
|
16
16
|
|
17
17
|
# Subfields for HLD tags
|
18
18
|
# Follow MARC 852 spec: https://www.loc.gov/marc/holdings/hd852.html, but names are translated into Alma parlance
|
@@ -1,7 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'multi_string_replace'
|
4
|
-
|
5
3
|
module PennMARC
|
6
4
|
# Shared tools and values for controlling handling of subject or genre headings
|
7
5
|
class HeadingControl
|
@@ -10,6 +8,9 @@ module PennMARC
|
|
10
8
|
ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
|
11
9
|
local/osu mesh ndlsh nli nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
|
12
10
|
|
11
|
+
REMOVE_TERM_REGEX = /#{Mappers.headings_to_remove&.join('|')}/i
|
12
|
+
REPLACE_TERM_REGEX = /(#{Mappers.heading_overrides.keys.join('|')})/i
|
13
|
+
|
13
14
|
class << self
|
14
15
|
# Replace or remove any terms in provided values pursuant to the configuration in remove and override mappers.
|
15
16
|
# Used to remove or replace offensive or otherwise undesirable subject headings.
|
@@ -18,10 +19,13 @@ module PennMARC
|
|
18
19
|
def term_override(values)
|
19
20
|
values.filter_map do |value|
|
20
21
|
# Remove values if they contain a remove term
|
21
|
-
next nil if value.match?(
|
22
|
+
next nil if value.match?(REMOVE_TERM_REGEX)
|
23
|
+
|
24
|
+
# return early if theres no terms to replace
|
25
|
+
next value if value.match(REPLACE_TERM_REGEX).nil?
|
22
26
|
|
23
|
-
#
|
24
|
-
|
27
|
+
# lookup and perform replacement
|
28
|
+
value.sub(::Regexp.last_match.to_s, Mappers.heading_overrides[::Regexp.last_match.to_s.downcase])
|
25
29
|
end
|
26
30
|
end
|
27
31
|
end
|
@@ -99,10 +99,9 @@ module PennMARC
|
|
99
99
|
|
100
100
|
fields = record.fields(tags)
|
101
101
|
fields.filter_map { |field|
|
102
|
-
if
|
103
|
-
|
104
|
-
|
105
|
-
field['a']
|
102
|
+
if field['a'].present?
|
103
|
+
name = trim_trailing(:comma, field['a'])
|
104
|
+
first_initial_only ? abbreviate_name(name) : name
|
106
105
|
end
|
107
106
|
}.uniq
|
108
107
|
end
|
@@ -131,10 +130,11 @@ module PennMARC
|
|
131
130
|
relator = 'Contributor' if relator.blank?
|
132
131
|
relator = trim_punctuation(relator).capitalize
|
133
132
|
|
133
|
+
name = trim_trailing(:comma, field['a'])
|
134
134
|
name = if name_only
|
135
|
-
|
135
|
+
name
|
136
136
|
else
|
137
|
-
join_subfields(field, &subfield_in?(%w[
|
137
|
+
"#{name} #{join_subfields(field, &subfield_in?(%w[b c d j q u 3]))}, #{relator}"
|
138
138
|
end
|
139
139
|
|
140
140
|
if contributors.key?(relator)
|
@@ -328,7 +328,7 @@ module PennMARC
|
|
328
328
|
relator_term_sf = relator_term_subfield(field)
|
329
329
|
name = field.filter_map { |sf|
|
330
330
|
if sf.code == 'a'
|
331
|
-
should_convert_name_order ? convert_name_order(sf.value) : sf.value
|
331
|
+
should_convert_name_order ? convert_name_order(sf.value) : trim_trailing(:comma, sf.value)
|
332
332
|
elsif sf.code == relator_term_sf
|
333
333
|
next
|
334
334
|
elsif NAME_EXCLUDED_SUBFIELDS.exclude?(sf.code)
|
@@ -348,6 +348,7 @@ module PennMARC
|
|
348
348
|
# @param name [String] value for processing
|
349
349
|
# @return [String]
|
350
350
|
def convert_name_order(name)
|
351
|
+
name = trim_trailing(:comma, name)
|
351
352
|
return name unless name.include? ','
|
352
353
|
|
353
354
|
after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ', '))])
|
@@ -357,15 +358,16 @@ module PennMARC
|
|
357
358
|
|
358
359
|
# Convert "Lastname, First" to "Lastname, F"
|
359
360
|
# @param [String] name
|
361
|
+
# @return [String]
|
360
362
|
def abbreviate_name(name)
|
361
|
-
|
362
|
-
return
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
363
|
+
name = trim_trailing(:comma, name)
|
364
|
+
return name unless name.include? ','
|
365
|
+
|
366
|
+
after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ','))])
|
367
|
+
before_comma = substring_before(name, ',')
|
368
|
+
abbrv = "#{before_comma},"
|
369
|
+
abbrv += " #{after_comma.first.upcase}." if after_comma.present?
|
370
|
+
abbrv
|
369
371
|
end
|
370
372
|
|
371
373
|
# Parse creator facet value from given creator field and desired subfields
|
@@ -164,7 +164,7 @@ module PennMARC
|
|
164
164
|
# @param [MARC::Record] record
|
165
165
|
# @return [Array<String>]
|
166
166
|
def host_record_id(record)
|
167
|
-
record.fields(Enriched::Pub::
|
167
|
+
record.fields(Enriched::Pub::RELATED_RECORD_TAGS).filter_map { |field|
|
168
168
|
next unless subfield_value?(field, 'c', /contains/i)
|
169
169
|
|
170
170
|
subfield_values field, :w
|
@@ -1,8 +1,12 @@
|
|
1
|
-
|
2
|
-
"
|
3
|
-
|
4
|
-
|
5
|
-
"
|
6
|
-
|
7
|
-
"
|
8
|
-
"
|
1
|
+
# Important!: These terms should be listed in an order of decreasing complexity so that terms that may be contained in
|
2
|
+
# other terms are not replaced first, e.g., if "Aliens" is listed first, "Illegal Aliens" will get overrode to
|
3
|
+
# read "Illegal Noncitizens".
|
4
|
+
# Also, use lower case only in the key to support efficient case-insensitive matching.
|
5
|
+
"illegal alien children": Undocumented immigrant children
|
6
|
+
"alien detention centers": Immigrant detention centers
|
7
|
+
"alien criminals": Noncitizen criminals
|
8
|
+
"alien property": Foreign-owned property
|
9
|
+
"illegal aliens": Undocumented immigrants
|
10
|
+
"alien labor": Foreign workers
|
11
|
+
gypsies: Romanies
|
12
|
+
aliens: Noncitizens
|
data/lib/pennmarc/version.rb
CHANGED
data/pennmarc.gemspec
CHANGED
@@ -21,7 +21,6 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.add_dependency 'activesupport', '~> 7'
|
22
22
|
s.add_dependency 'library_stdnums', '~> 1.6'
|
23
23
|
s.add_dependency 'marc', '~> 1.2'
|
24
|
-
s.add_dependency 'multi_string_replace', '~> 2.0'
|
25
24
|
s.add_dependency 'nokogiri', '~> 1.15'
|
26
25
|
|
27
26
|
s.metadata['rubygems_mfa_required'] = 'false'
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
describe 'PennMARC::HeadingControl' do
|
4
|
-
let(:replace_term) { PennMARC::Mappers.heading_overrides.
|
5
|
-
let(:replaced_term) { PennMARC::Mappers.heading_overrides.
|
4
|
+
let(:replace_term) { PennMARC::Mappers.heading_overrides.keys[2] }
|
5
|
+
let(:replaced_term) { PennMARC::Mappers.heading_overrides.values[2] }
|
6
6
|
let(:remove_term) { PennMARC::Mappers.headings_to_remove.first }
|
7
7
|
|
8
8
|
describe '.process' do
|
@@ -23,15 +23,22 @@ describe 'PennMARC::HeadingControl' do
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
26
|
+
PennMARC::Mappers.heading_overrides.each do |target, replacement|
|
27
|
+
context "with the \"#{target}\" term" do
|
28
|
+
it 'replaces the term in isolation' do
|
29
|
+
values = [target]
|
30
|
+
expect(PennMARC::HeadingControl.term_override(values)).to eq [replacement]
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'replaces the term when used with other headings' do
|
34
|
+
values = ["#{target}--History"]
|
35
|
+
expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replacement}--History"]
|
36
|
+
end
|
31
37
|
|
32
|
-
|
33
|
-
|
34
|
-
|
38
|
+
it 'replaces the term regardless of case' do
|
39
|
+
values = ["#{target.titleize}--History"]
|
40
|
+
expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replacement}--History"]
|
41
|
+
end
|
35
42
|
end
|
36
43
|
end
|
37
44
|
|
@@ -9,9 +9,9 @@ describe 'PennMARC::Creator' do
|
|
9
9
|
|
10
10
|
context 'with a single author record' do
|
11
11
|
let(:fields) do
|
12
|
-
[marc_field(tag: '100', subfields: { a: 'Surname, Name', '0': 'http://cool.uri/12345',
|
12
|
+
[marc_field(tag: '100', subfields: { a: 'Surname, Name,', '0': 'http://cool.uri/12345',
|
13
13
|
e: 'author', d: '1900-2000' }),
|
14
|
-
marc_field(tag: '880', subfields: { a: 'Surname, Alternative', '6': '100' })]
|
14
|
+
marc_field(tag: '880', subfields: { a: 'Surname, Alternative,', '6': '100' })]
|
15
15
|
end
|
16
16
|
|
17
17
|
it 'contains the expected search field values for a single author work' do
|
@@ -109,18 +109,21 @@ describe 'PennMARC::Creator' do
|
|
109
109
|
end
|
110
110
|
end
|
111
111
|
|
112
|
-
context 'with
|
112
|
+
context 'with five author records - abbreviated names' do
|
113
113
|
let(:fields) do
|
114
|
-
[marc_field(tag: '100', subfields: { a: 'Surname, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
|
114
|
+
[marc_field(tag: '100', subfields: { a: 'Surname, Alex, ', '0': 'http://cool.uri/12345', d: '1900-2000',
|
115
115
|
e: 'author.', '4': 'http://cool.uri/vocabulary/relators/aut' }),
|
116
|
-
marc_field(tag: '110', subfields: { a: 'Second, NameX', '0': 'http://cool.uri/12345', d: '1901-2010',
|
116
|
+
marc_field(tag: '110', subfields: { a: 'Second, NameX, ', '0': 'http://cool.uri/12345', d: '1901-2010',
|
117
117
|
e: 'author.', '4': 'http://cool.uri/vocabulary/relators/aut' }),
|
118
|
-
marc_field(tag: '700', subfields: { a: 'Alt, Alternative', '6': '100', d: '1970-' })
|
118
|
+
marc_field(tag: '700', subfields: { a: 'Alt, Alternative', '6': '100', d: '1970-' }),
|
119
|
+
marc_field(tag: '100', subfields: { a: 'Name with no comma', e: 'author' }),
|
120
|
+
marc_field(tag: '100', subfields: { a: 'Name ends with comma,', e: 'author' })]
|
119
121
|
end
|
120
122
|
|
121
123
|
it 'returns single author values with no URIs anywhere' do
|
122
124
|
values = helper.authors_list(record, first_initial_only: true)
|
123
|
-
expect(values).to contain_exactly 'Surname, A.', 'Second, N.', 'Alt, A.'
|
125
|
+
expect(values).to contain_exactly 'Surname, A.', 'Second, N.', 'Alt, A.',
|
126
|
+
'Name ends with comma', 'Name with no comma'
|
124
127
|
end
|
125
128
|
end
|
126
129
|
end
|
@@ -132,14 +135,14 @@ describe 'PennMARC::Creator' do
|
|
132
135
|
let(:fields) do
|
133
136
|
[marc_field(tag: '100', subfields: { a: 'Hamilton, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
|
134
137
|
e: 'author.' }),
|
135
|
-
marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham', b: 'I', c: 'laureate', d: '1968', e: 'author',
|
136
|
-
j: 'pseud', q: 'Fuller Name', u: 'affiliation', '3': 'materials',
|
138
|
+
marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham, ', b: 'I', c: 'laureate', d: '1968', e: 'author',
|
139
|
+
j: 'pseud', q: 'Fuller Name, ', u: 'affiliation', '3': 'materials',
|
137
140
|
'4': 'aut' }),
|
138
141
|
marc_field(tag: '700', subfields: { a: 'Einstein, Albert', '6': '100', d: '1970-', '4': 'trl',
|
139
142
|
e: 'translator' }),
|
140
143
|
marc_field(tag: '700', subfields: { a: 'Franklin, Ben', '6': '100', d: '1970-', '4': 'edt' }),
|
141
144
|
marc_field(tag: '710', subfields: { a: 'Jefferson, Thomas', '6': '100', d: '1870-', '4': 'edt' }),
|
142
|
-
marc_field(tag: '700', subfields: { a: 'Dickens, Charles', '6': '100', d: '1970-', '4': 'com' })]
|
145
|
+
marc_field(tag: '700', subfields: { a: 'Dickens, Charles, ', '6': '100', d: '1970-', '4': 'com' })]
|
143
146
|
end
|
144
147
|
|
145
148
|
it 'returns two authors and four contributors' do
|
@@ -153,7 +156,7 @@ describe 'PennMARC::Creator' do
|
|
153
156
|
|
154
157
|
context 'with two authors and four contributors records, with full information and relator' do
|
155
158
|
let(:fields) do
|
156
|
-
[marc_field(tag: '100', subfields: { a: 'Hamilton, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
|
159
|
+
[marc_field(tag: '100', subfields: { a: 'Hamilton, Alex, ', '0': 'http://cool.uri/12345', d: '1900-2000',
|
157
160
|
e: 'author.', '4': 'aut' }),
|
158
161
|
marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham', b: 'I', c: 'laureate', d: '1968', e: 'author',
|
159
162
|
j: 'pseud', q: 'Fuller Name', u: 'affiliation', '3': 'materials',
|
@@ -164,7 +167,7 @@ describe 'PennMARC::Creator' do
|
|
164
167
|
marc_field(tag: '710', subfields: { a: 'Jefferson, Thomas', '6': '100', d: '1870-', '4': 'edt' }),
|
165
168
|
marc_field(tag: '700', subfields: { a: 'Dickens, Charles', '6': '100', d: '1970-', '4': 'com' }),
|
166
169
|
marc_field(tag: '880', subfields: { a: '狄更斯', '6': '700', d: '1970-', '4': 'com' }),
|
167
|
-
marc_field(tag: '700', subfields: { a: 'Twain, Mark', '6': '100', d: '1870-' })]
|
170
|
+
marc_field(tag: '700', subfields: { a: 'Twain, Mark,', '6': '100', d: '1870-' })]
|
168
171
|
end
|
169
172
|
|
170
173
|
it 'returns four contributors' do
|
@@ -161,16 +161,36 @@ describe 'PennMARC::Identifier' do
|
|
161
161
|
end
|
162
162
|
|
163
163
|
describe '.host_record_id' do
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
164
|
+
context 'with a lower case tag' do
|
165
|
+
let(:record) do
|
166
|
+
marc_record fields: [
|
167
|
+
marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '123456789',
|
168
|
+
c: 'Contains',
|
169
|
+
a: 'Title' }),
|
170
|
+
marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '666666666',
|
171
|
+
c: 'Contained In' })
|
172
|
+
]
|
173
|
+
end
|
174
|
+
|
175
|
+
it 'returns only the desired host record MMS ID values' do
|
176
|
+
expect(helper.host_record_id(record)).to contain_exactly '123456789'
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
context 'with an upper case tag' do
|
181
|
+
let(:record) do
|
182
|
+
marc_record fields: [
|
183
|
+
marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '123456789',
|
184
|
+
c: 'Contains',
|
185
|
+
a: 'Title' }),
|
186
|
+
marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '666666666',
|
187
|
+
c: 'Contained In' })
|
188
|
+
]
|
189
|
+
end
|
190
|
+
|
191
|
+
it 'returns only the desired host record MMS ID values' do
|
192
|
+
expect(helper.host_record_id(record)).to contain_exactly '123456789'
|
193
|
+
end
|
174
194
|
end
|
175
195
|
end
|
176
196
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pennmarc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.31
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Kanning
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date: 2024-07-
|
15
|
+
date: 2024-07-29 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: activesupport
|
@@ -56,20 +56,6 @@ dependencies:
|
|
56
56
|
- - "~>"
|
57
57
|
- !ruby/object:Gem::Version
|
58
58
|
version: '1.2'
|
59
|
-
- !ruby/object:Gem::Dependency
|
60
|
-
name: multi_string_replace
|
61
|
-
requirement: !ruby/object:Gem::Requirement
|
62
|
-
requirements:
|
63
|
-
- - "~>"
|
64
|
-
- !ruby/object:Gem::Version
|
65
|
-
version: '2.0'
|
66
|
-
type: :runtime
|
67
|
-
prerelease: false
|
68
|
-
version_requirements: !ruby/object:Gem::Requirement
|
69
|
-
requirements:
|
70
|
-
- - "~>"
|
71
|
-
- !ruby/object:Gem::Version
|
72
|
-
version: '2.0'
|
73
59
|
- !ruby/object:Gem::Dependency
|
74
60
|
name: nokogiri
|
75
61
|
requirement: !ruby/object:Gem::Requirement
|