pennmarc 1.0.27 → 1.0.31

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a6dec6a3d235117c15b3a37f368533a93e30ce4dfb11ac4832a612f878e653d9
4
- data.tar.gz: 4217da9f4a2126e46284c48f4aeaca09016f4abf6ce08c493e039dae20b4bc20
3
+ metadata.gz: 7a10e18cc63414bbcc1f3ab3d0dbb47517552830f835a0ca10c50added035e9e
4
+ data.tar.gz: 0153e3bb11fe9646f0b3e6614ef2229654d70d60a0bace46e3d57a9c49800ccd
5
5
  SHA512:
6
- metadata.gz: 8f3f0163eb6c3b2afeb8bd8845762c1c208bbbb9efb586e3978f5f5584f73159adb2649a801d78c090e6349c72f69197b1e721acd3bdbe3a79e9f99ac11f23ee
7
- data.tar.gz: d4595b310f8a7b765a16738799b642fc05f680b681d4e090bca11a903ca7b748a25ca0981a260290c8544faea91b9437f7b98358f5358551e787dc0ae9ab8531
6
+ metadata.gz: d9fa4ea2d2e36736f240e8a9431d487d9195e92a1eecf2e847c8cbda94b9d7e5da1029654754b4582d1036ffb453115bf6e88f2c09e8668704830c2ff46c78a3
7
+ data.tar.gz: fb4a80ff8479b248e90c33c2eb43038d957ad93de3b3978ec964bdffba11621106a8fb61ff7feb4aeb03c8dfec251f0af7da4a7a00751e34d122312059bdf6a2
data/Gemfile CHANGED
@@ -5,7 +5,6 @@ source 'https://rubygems.org'
5
5
  gem 'activesupport', '~> 7'
6
6
  gem 'library_stdnums', '~> 1.6'
7
7
  gem 'marc', '~> 1.2'
8
- gem 'multi_string_replace', '~> 2.0'
9
8
  gem 'nokogiri', '~> 1.15'
10
9
  gem 'rake', '~> 13.0'
11
10
  gem 'upennlib-rubocop', require: false
data/Gemfile.lock CHANGED
@@ -19,7 +19,6 @@ GEM
19
19
  scrub_rb (>= 1.0.1, < 2)
20
20
  unf
21
21
  minitest (5.18.0)
22
- multi_string_replace (2.0.2)
23
22
  nokogiri (1.15.2-arm64-darwin)
24
23
  racc (~> 1.4)
25
24
  nokogiri (1.15.2-x64-mingw-ucrt)
@@ -114,7 +113,6 @@ DEPENDENCIES
114
113
  activesupport (~> 7)
115
114
  library_stdnums (~> 1.6)
116
115
  marc (~> 1.2)
117
- multi_string_replace (~> 2.0)
118
116
  nokogiri (~> 1.15)
119
117
  rake (~> 13.0)
120
118
  rspec (~> 3.12)
@@ -12,7 +12,7 @@ module PennMARC
12
12
  PHYS_INVENTORY_TAG = 'hld'
13
13
  ELEC_INVENTORY_TAG = 'prt'
14
14
  ITEM_TAG = 'itm'
15
- RELATED_RECORD_TAG = 'rel'
15
+ RELATED_RECORD_TAGS = %w[REL rel].freeze
16
16
 
17
17
  # Subfields for HLD tags
18
18
  # Follow MARC 852 spec: https://www.loc.gov/marc/holdings/hd852.html, but names are translated into Alma parlance
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'multi_string_replace'
4
-
5
3
  module PennMARC
6
4
  # Shared tools and values for controlling handling of subject or genre headings
7
5
  class HeadingControl
@@ -10,6 +8,9 @@ module PennMARC
10
8
  ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
11
9
  local/osu mesh ndlsh nli nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
12
10
 
11
+ REMOVE_TERM_REGEX = /#{Mappers.headings_to_remove&.join('|')}/i
12
+ REPLACE_TERM_REGEX = /(#{Mappers.heading_overrides.keys.join('|')})/i
13
+
13
14
  class << self
14
15
  # Replace or remove any terms in provided values pursuant to the configuration in remove and override mappers.
15
16
  # Used to remove or replace offensive or otherwise undesirable subject headings.
@@ -18,10 +19,13 @@ module PennMARC
18
19
  def term_override(values)
19
20
  values.filter_map do |value|
20
21
  # Remove values if they contain a remove term
21
- next nil if value.match?(/#{Mappers.headings_to_remove&.join('|')}/i)
22
+ next nil if value.match?(REMOVE_TERM_REGEX)
23
+
24
+ # return early if theres no terms to replace
25
+ next value if value.match(REPLACE_TERM_REGEX).nil?
22
26
 
23
- # Replace values using multi_string_replace gem
24
- MultiStringReplace.replace value, Mappers.heading_overrides
27
+ # lookup and perform replacement
28
+ value.sub(::Regexp.last_match.to_s, Mappers.heading_overrides[::Regexp.last_match.to_s.downcase])
25
29
  end
26
30
  end
27
31
  end
@@ -99,10 +99,9 @@ module PennMARC
99
99
 
100
100
  fields = record.fields(tags)
101
101
  fields.filter_map { |field|
102
- if first_initial_only
103
- abbreviate_name(field['a']) if field['a']
104
- else
105
- field['a']
102
+ if field['a'].present?
103
+ name = trim_trailing(:comma, field['a'])
104
+ first_initial_only ? abbreviate_name(name) : name
106
105
  end
107
106
  }.uniq
108
107
  end
@@ -131,10 +130,11 @@ module PennMARC
131
130
  relator = 'Contributor' if relator.blank?
132
131
  relator = trim_punctuation(relator).capitalize
133
132
 
133
+ name = trim_trailing(:comma, field['a'])
134
134
  name = if name_only
135
- field['a']
135
+ name
136
136
  else
137
- join_subfields(field, &subfield_in?(%w[a b c d j q u 3])) + ", #{relator}"
137
+ "#{name} #{join_subfields(field, &subfield_in?(%w[b c d j q u 3]))}, #{relator}"
138
138
  end
139
139
 
140
140
  if contributors.key?(relator)
@@ -328,7 +328,7 @@ module PennMARC
328
328
  relator_term_sf = relator_term_subfield(field)
329
329
  name = field.filter_map { |sf|
330
330
  if sf.code == 'a'
331
- should_convert_name_order ? convert_name_order(sf.value) : sf.value
331
+ should_convert_name_order ? convert_name_order(sf.value) : trim_trailing(:comma, sf.value)
332
332
  elsif sf.code == relator_term_sf
333
333
  next
334
334
  elsif NAME_EXCLUDED_SUBFIELDS.exclude?(sf.code)
@@ -348,6 +348,7 @@ module PennMARC
348
348
  # @param name [String] value for processing
349
349
  # @return [String]
350
350
  def convert_name_order(name)
351
+ name = trim_trailing(:comma, name)
351
352
  return name unless name.include? ','
352
353
 
353
354
  after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ', '))])
@@ -357,15 +358,16 @@ module PennMARC
357
358
 
358
359
  # Convert "Lastname, First" to "Lastname, F"
359
360
  # @param [String] name
361
+ # @return [String]
360
362
  def abbreviate_name(name)
361
- name_parts = name.split(', ')
362
- return '' if name_parts.empty?
363
-
364
- first_name_parts = name_parts.last.split
365
- temp_name = "#{name_parts.first}, #{first_name_parts.first[0, 1]}."
366
- first_name_parts.shift
367
- temp_name += " #{first_name_parts.join(' ')}" unless first_name_parts.empty?
368
- temp_name
363
+ name = trim_trailing(:comma, name)
364
+ return name unless name.include? ','
365
+
366
+ after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ','))])
367
+ before_comma = substring_before(name, ',')
368
+ abbrv = "#{before_comma},"
369
+ abbrv += " #{after_comma.first.upcase}." if after_comma.present?
370
+ abbrv
369
371
  end
370
372
 
371
373
  # Parse creator facet value from given creator field and desired subfields
@@ -164,7 +164,7 @@ module PennMARC
164
164
  # @param [MARC::Record] record
165
165
  # @return [Array<String>]
166
166
  def host_record_id(record)
167
- record.fields(Enriched::Pub::RELATED_RECORD_TAG).filter_map { |field|
167
+ record.fields(Enriched::Pub::RELATED_RECORD_TAGS).filter_map { |field|
168
168
  next unless subfield_value?(field, 'c', /contains/i)
169
169
 
170
170
  subfield_values field, :w
@@ -1,8 +1,12 @@
1
- Aliens: Noncitizens
2
- "Alien criminals": Noncitizen criminals
3
- "Alien detention centers": Immigrant detention centers
4
- "Alien labor": Foreign workers
5
- "Alien property": Foreign-owned property
6
- Gypsies: Romanies
7
- "Illegal Alien Children": Undocumented immigrant children
8
- "Illegal Aliens": Undocumented immigrants
1
+ # Important!: These terms should be listed in an order of decreasing complexity so that terms that may be contained in
2
+ # other terms are not replaced first, e.g., if "Aliens" is listed first, "Illegal Aliens" will get overrode to
3
+ # read "Illegal Noncitizens".
4
+ # Also, use lower case only in the key to support efficient case-insensitive matching.
5
+ "illegal alien children": Undocumented immigrant children
6
+ "alien detention centers": Immigrant detention centers
7
+ "alien criminals": Noncitizen criminals
8
+ "alien property": Foreign-owned property
9
+ "illegal aliens": Undocumented immigrants
10
+ "alien labor": Foreign workers
11
+ gypsies: Romanies
12
+ aliens: Noncitizens
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PennMARC
4
- VERSION = '1.0.27'
4
+ VERSION = '1.0.31'
5
5
  end
data/pennmarc.gemspec CHANGED
@@ -21,7 +21,6 @@ Gem::Specification.new do |s|
21
21
  s.add_dependency 'activesupport', '~> 7'
22
22
  s.add_dependency 'library_stdnums', '~> 1.6'
23
23
  s.add_dependency 'marc', '~> 1.2'
24
- s.add_dependency 'multi_string_replace', '~> 2.0'
25
24
  s.add_dependency 'nokogiri', '~> 1.15'
26
25
 
27
26
  s.metadata['rubygems_mfa_required'] = 'false'
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  describe 'PennMARC::HeadingControl' do
4
- let(:replace_term) { PennMARC::Mappers.heading_overrides.first[0] }
5
- let(:replaced_term) { PennMARC::Mappers.heading_overrides.first[1] }
4
+ let(:replace_term) { PennMARC::Mappers.heading_overrides.keys[2] }
5
+ let(:replaced_term) { PennMARC::Mappers.heading_overrides.values[2] }
6
6
  let(:remove_term) { PennMARC::Mappers.headings_to_remove.first }
7
7
 
8
8
  describe '.process' do
@@ -23,15 +23,22 @@ describe 'PennMARC::HeadingControl' do
23
23
  end
24
24
  end
25
25
 
26
- context 'with a term for replacement' do
27
- it 'replaces the term in isolation' do
28
- values = [replace_term]
29
- expect(PennMARC::HeadingControl.term_override(values)).to eq [replaced_term]
30
- end
26
+ PennMARC::Mappers.heading_overrides.each do |target, replacement|
27
+ context "with the \"#{target}\" term" do
28
+ it 'replaces the term in isolation' do
29
+ values = [target]
30
+ expect(PennMARC::HeadingControl.term_override(values)).to eq [replacement]
31
+ end
32
+
33
+ it 'replaces the term when used with other headings' do
34
+ values = ["#{target}--History"]
35
+ expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replacement}--History"]
36
+ end
31
37
 
32
- it 'replaces the term when used with other headings' do
33
- values = ["#{replace_term}--History"]
34
- expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replaced_term}--History"]
38
+ it 'replaces the term regardless of case' do
39
+ values = ["#{target.titleize}--History"]
40
+ expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replacement}--History"]
41
+ end
35
42
  end
36
43
  end
37
44
 
@@ -9,9 +9,9 @@ describe 'PennMARC::Creator' do
9
9
 
10
10
  context 'with a single author record' do
11
11
  let(:fields) do
12
- [marc_field(tag: '100', subfields: { a: 'Surname, Name', '0': 'http://cool.uri/12345',
12
+ [marc_field(tag: '100', subfields: { a: 'Surname, Name,', '0': 'http://cool.uri/12345',
13
13
  e: 'author', d: '1900-2000' }),
14
- marc_field(tag: '880', subfields: { a: 'Surname, Alternative', '6': '100' })]
14
+ marc_field(tag: '880', subfields: { a: 'Surname, Alternative,', '6': '100' })]
15
15
  end
16
16
 
17
17
  it 'contains the expected search field values for a single author work' do
@@ -109,18 +109,21 @@ describe 'PennMARC::Creator' do
109
109
  end
110
110
  end
111
111
 
112
- context 'with three author records - abbreviated names' do
112
+ context 'with five author records - abbreviated names' do
113
113
  let(:fields) do
114
- [marc_field(tag: '100', subfields: { a: 'Surname, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
114
+ [marc_field(tag: '100', subfields: { a: 'Surname, Alex, ', '0': 'http://cool.uri/12345', d: '1900-2000',
115
115
  e: 'author.', '4': 'http://cool.uri/vocabulary/relators/aut' }),
116
- marc_field(tag: '110', subfields: { a: 'Second, NameX', '0': 'http://cool.uri/12345', d: '1901-2010',
116
+ marc_field(tag: '110', subfields: { a: 'Second, NameX, ', '0': 'http://cool.uri/12345', d: '1901-2010',
117
117
  e: 'author.', '4': 'http://cool.uri/vocabulary/relators/aut' }),
118
- marc_field(tag: '700', subfields: { a: 'Alt, Alternative', '6': '100', d: '1970-' })]
118
+ marc_field(tag: '700', subfields: { a: 'Alt, Alternative', '6': '100', d: '1970-' }),
119
+ marc_field(tag: '100', subfields: { a: 'Name with no comma', e: 'author' }),
120
+ marc_field(tag: '100', subfields: { a: 'Name ends with comma,', e: 'author' })]
119
121
  end
120
122
 
121
123
  it 'returns single author values with no URIs anywhere' do
122
124
  values = helper.authors_list(record, first_initial_only: true)
123
- expect(values).to contain_exactly 'Surname, A.', 'Second, N.', 'Alt, A.'
125
+ expect(values).to contain_exactly 'Surname, A.', 'Second, N.', 'Alt, A.',
126
+ 'Name ends with comma', 'Name with no comma'
124
127
  end
125
128
  end
126
129
  end
@@ -132,14 +135,14 @@ describe 'PennMARC::Creator' do
132
135
  let(:fields) do
133
136
  [marc_field(tag: '100', subfields: { a: 'Hamilton, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
134
137
  e: 'author.' }),
135
- marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham', b: 'I', c: 'laureate', d: '1968', e: 'author',
136
- j: 'pseud', q: 'Fuller Name', u: 'affiliation', '3': 'materials',
138
+ marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham, ', b: 'I', c: 'laureate', d: '1968', e: 'author',
139
+ j: 'pseud', q: 'Fuller Name, ', u: 'affiliation', '3': 'materials',
137
140
  '4': 'aut' }),
138
141
  marc_field(tag: '700', subfields: { a: 'Einstein, Albert', '6': '100', d: '1970-', '4': 'trl',
139
142
  e: 'translator' }),
140
143
  marc_field(tag: '700', subfields: { a: 'Franklin, Ben', '6': '100', d: '1970-', '4': 'edt' }),
141
144
  marc_field(tag: '710', subfields: { a: 'Jefferson, Thomas', '6': '100', d: '1870-', '4': 'edt' }),
142
- marc_field(tag: '700', subfields: { a: 'Dickens, Charles', '6': '100', d: '1970-', '4': 'com' })]
145
+ marc_field(tag: '700', subfields: { a: 'Dickens, Charles, ', '6': '100', d: '1970-', '4': 'com' })]
143
146
  end
144
147
 
145
148
  it 'returns two authors and four contributors' do
@@ -153,7 +156,7 @@ describe 'PennMARC::Creator' do
153
156
 
154
157
  context 'with two authors and four contributors records, with full information and relator' do
155
158
  let(:fields) do
156
- [marc_field(tag: '100', subfields: { a: 'Hamilton, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
159
+ [marc_field(tag: '100', subfields: { a: 'Hamilton, Alex, ', '0': 'http://cool.uri/12345', d: '1900-2000',
157
160
  e: 'author.', '4': 'aut' }),
158
161
  marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham', b: 'I', c: 'laureate', d: '1968', e: 'author',
159
162
  j: 'pseud', q: 'Fuller Name', u: 'affiliation', '3': 'materials',
@@ -164,7 +167,7 @@ describe 'PennMARC::Creator' do
164
167
  marc_field(tag: '710', subfields: { a: 'Jefferson, Thomas', '6': '100', d: '1870-', '4': 'edt' }),
165
168
  marc_field(tag: '700', subfields: { a: 'Dickens, Charles', '6': '100', d: '1970-', '4': 'com' }),
166
169
  marc_field(tag: '880', subfields: { a: '狄更斯', '6': '700', d: '1970-', '4': 'com' }),
167
- marc_field(tag: '700', subfields: { a: 'Twain, Mark', '6': '100', d: '1870-' })]
170
+ marc_field(tag: '700', subfields: { a: 'Twain, Mark,', '6': '100', d: '1870-' })]
168
171
  end
169
172
 
170
173
  it 'returns four contributors' do
@@ -161,16 +161,36 @@ describe 'PennMARC::Identifier' do
161
161
  end
162
162
 
163
163
  describe '.host_record_id' do
164
- let(:record) do
165
- marc_record fields: [
166
- marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAG, subfields: { w: '123456789', c: 'Contains',
167
- a: 'Title' }),
168
- marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAG, subfields: { w: '666666666', c: 'Contained In' })
169
- ]
170
- end
171
-
172
- it 'returns only the desired host record MMS ID values' do
173
- expect(helper.host_record_id(record)).to contain_exactly '123456789'
164
+ context 'with a lower case tag' do
165
+ let(:record) do
166
+ marc_record fields: [
167
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '123456789',
168
+ c: 'Contains',
169
+ a: 'Title' }),
170
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '666666666',
171
+ c: 'Contained In' })
172
+ ]
173
+ end
174
+
175
+ it 'returns only the desired host record MMS ID values' do
176
+ expect(helper.host_record_id(record)).to contain_exactly '123456789'
177
+ end
178
+ end
179
+
180
+ context 'with an upper case tag' do
181
+ let(:record) do
182
+ marc_record fields: [
183
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '123456789',
184
+ c: 'Contains',
185
+ a: 'Title' }),
186
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '666666666',
187
+ c: 'Contained In' })
188
+ ]
189
+ end
190
+
191
+ it 'returns only the desired host record MMS ID values' do
192
+ expect(helper.host_record_id(record)).to contain_exactly '123456789'
193
+ end
174
194
  end
175
195
  end
176
196
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pennmarc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.27
4
+ version: 1.0.31
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Kanning
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2024-07-14 00:00:00.000000000 Z
15
+ date: 2024-07-29 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: activesupport
@@ -56,20 +56,6 @@ dependencies:
56
56
  - - "~>"
57
57
  - !ruby/object:Gem::Version
58
58
  version: '1.2'
59
- - !ruby/object:Gem::Dependency
60
- name: multi_string_replace
61
- requirement: !ruby/object:Gem::Requirement
62
- requirements:
63
- - - "~>"
64
- - !ruby/object:Gem::Version
65
- version: '2.0'
66
- type: :runtime
67
- prerelease: false
68
- version_requirements: !ruby/object:Gem::Requirement
69
- requirements:
70
- - - "~>"
71
- - !ruby/object:Gem::Version
72
- version: '2.0'
73
59
  - !ruby/object:Gem::Dependency
74
60
  name: nokogiri
75
61
  requirement: !ruby/object:Gem::Requirement