pennmarc 1.0.27 → 1.0.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a6dec6a3d235117c15b3a37f368533a93e30ce4dfb11ac4832a612f878e653d9
4
- data.tar.gz: 4217da9f4a2126e46284c48f4aeaca09016f4abf6ce08c493e039dae20b4bc20
3
+ metadata.gz: 7a10e18cc63414bbcc1f3ab3d0dbb47517552830f835a0ca10c50added035e9e
4
+ data.tar.gz: 0153e3bb11fe9646f0b3e6614ef2229654d70d60a0bace46e3d57a9c49800ccd
5
5
  SHA512:
6
- metadata.gz: 8f3f0163eb6c3b2afeb8bd8845762c1c208bbbb9efb586e3978f5f5584f73159adb2649a801d78c090e6349c72f69197b1e721acd3bdbe3a79e9f99ac11f23ee
7
- data.tar.gz: d4595b310f8a7b765a16738799b642fc05f680b681d4e090bca11a903ca7b748a25ca0981a260290c8544faea91b9437f7b98358f5358551e787dc0ae9ab8531
6
+ metadata.gz: d9fa4ea2d2e36736f240e8a9431d487d9195e92a1eecf2e847c8cbda94b9d7e5da1029654754b4582d1036ffb453115bf6e88f2c09e8668704830c2ff46c78a3
7
+ data.tar.gz: fb4a80ff8479b248e90c33c2eb43038d957ad93de3b3978ec964bdffba11621106a8fb61ff7feb4aeb03c8dfec251f0af7da4a7a00751e34d122312059bdf6a2
data/Gemfile CHANGED
@@ -5,7 +5,6 @@ source 'https://rubygems.org'
5
5
  gem 'activesupport', '~> 7'
6
6
  gem 'library_stdnums', '~> 1.6'
7
7
  gem 'marc', '~> 1.2'
8
- gem 'multi_string_replace', '~> 2.0'
9
8
  gem 'nokogiri', '~> 1.15'
10
9
  gem 'rake', '~> 13.0'
11
10
  gem 'upennlib-rubocop', require: false
data/Gemfile.lock CHANGED
@@ -19,7 +19,6 @@ GEM
19
19
  scrub_rb (>= 1.0.1, < 2)
20
20
  unf
21
21
  minitest (5.18.0)
22
- multi_string_replace (2.0.2)
23
22
  nokogiri (1.15.2-arm64-darwin)
24
23
  racc (~> 1.4)
25
24
  nokogiri (1.15.2-x64-mingw-ucrt)
@@ -114,7 +113,6 @@ DEPENDENCIES
114
113
  activesupport (~> 7)
115
114
  library_stdnums (~> 1.6)
116
115
  marc (~> 1.2)
117
- multi_string_replace (~> 2.0)
118
116
  nokogiri (~> 1.15)
119
117
  rake (~> 13.0)
120
118
  rspec (~> 3.12)
@@ -12,7 +12,7 @@ module PennMARC
12
12
  PHYS_INVENTORY_TAG = 'hld'
13
13
  ELEC_INVENTORY_TAG = 'prt'
14
14
  ITEM_TAG = 'itm'
15
- RELATED_RECORD_TAG = 'rel'
15
+ RELATED_RECORD_TAGS = %w[REL rel].freeze
16
16
 
17
17
  # Subfields for HLD tags
18
18
  # Follow MARC 852 spec: https://www.loc.gov/marc/holdings/hd852.html, but names are translated into Alma parlance
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'multi_string_replace'
4
-
5
3
  module PennMARC
6
4
  # Shared tools and values for controlling handling of subject or genre headings
7
5
  class HeadingControl
@@ -10,6 +8,9 @@ module PennMARC
10
8
  ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
11
9
  local/osu mesh ndlsh nli nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
12
10
 
11
+ REMOVE_TERM_REGEX = /#{Mappers.headings_to_remove&.join('|')}/i
12
+ REPLACE_TERM_REGEX = /(#{Mappers.heading_overrides.keys.join('|')})/i
13
+
13
14
  class << self
14
15
  # Replace or remove any terms in provided values pursuant to the configuration in remove and override mappers.
15
16
  # Used to remove or replace offensive or otherwise undesirable subject headings.
@@ -18,10 +19,13 @@ module PennMARC
18
19
  def term_override(values)
19
20
  values.filter_map do |value|
20
21
  # Remove values if they contain a remove term
21
- next nil if value.match?(/#{Mappers.headings_to_remove&.join('|')}/i)
22
+ next nil if value.match?(REMOVE_TERM_REGEX)
23
+
24
+ # return early if theres no terms to replace
25
+ next value if value.match(REPLACE_TERM_REGEX).nil?
22
26
 
23
- # Replace values using multi_string_replace gem
24
- MultiStringReplace.replace value, Mappers.heading_overrides
27
+ # lookup and perform replacement
28
+ value.sub(::Regexp.last_match.to_s, Mappers.heading_overrides[::Regexp.last_match.to_s.downcase])
25
29
  end
26
30
  end
27
31
  end
@@ -99,10 +99,9 @@ module PennMARC
99
99
 
100
100
  fields = record.fields(tags)
101
101
  fields.filter_map { |field|
102
- if first_initial_only
103
- abbreviate_name(field['a']) if field['a']
104
- else
105
- field['a']
102
+ if field['a'].present?
103
+ name = trim_trailing(:comma, field['a'])
104
+ first_initial_only ? abbreviate_name(name) : name
106
105
  end
107
106
  }.uniq
108
107
  end
@@ -131,10 +130,11 @@ module PennMARC
131
130
  relator = 'Contributor' if relator.blank?
132
131
  relator = trim_punctuation(relator).capitalize
133
132
 
133
+ name = trim_trailing(:comma, field['a'])
134
134
  name = if name_only
135
- field['a']
135
+ name
136
136
  else
137
- join_subfields(field, &subfield_in?(%w[a b c d j q u 3])) + ", #{relator}"
137
+ "#{name} #{join_subfields(field, &subfield_in?(%w[b c d j q u 3]))}, #{relator}"
138
138
  end
139
139
 
140
140
  if contributors.key?(relator)
@@ -328,7 +328,7 @@ module PennMARC
328
328
  relator_term_sf = relator_term_subfield(field)
329
329
  name = field.filter_map { |sf|
330
330
  if sf.code == 'a'
331
- should_convert_name_order ? convert_name_order(sf.value) : sf.value
331
+ should_convert_name_order ? convert_name_order(sf.value) : trim_trailing(:comma, sf.value)
332
332
  elsif sf.code == relator_term_sf
333
333
  next
334
334
  elsif NAME_EXCLUDED_SUBFIELDS.exclude?(sf.code)
@@ -348,6 +348,7 @@ module PennMARC
348
348
  # @param name [String] value for processing
349
349
  # @return [String]
350
350
  def convert_name_order(name)
351
+ name = trim_trailing(:comma, name)
351
352
  return name unless name.include? ','
352
353
 
353
354
  after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ', '))])
@@ -357,15 +358,16 @@ module PennMARC
357
358
 
358
359
  # Convert "Lastname, First" to "Lastname, F"
359
360
  # @param [String] name
361
+ # @return [String]
360
362
  def abbreviate_name(name)
361
- name_parts = name.split(', ')
362
- return '' if name_parts.empty?
363
-
364
- first_name_parts = name_parts.last.split
365
- temp_name = "#{name_parts.first}, #{first_name_parts.first[0, 1]}."
366
- first_name_parts.shift
367
- temp_name += " #{first_name_parts.join(' ')}" unless first_name_parts.empty?
368
- temp_name
363
+ name = trim_trailing(:comma, name)
364
+ return name unless name.include? ','
365
+
366
+ after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ','))])
367
+ before_comma = substring_before(name, ',')
368
+ abbrv = "#{before_comma},"
369
+ abbrv += " #{after_comma.first.upcase}." if after_comma.present?
370
+ abbrv
369
371
  end
370
372
 
371
373
  # Parse creator facet value from given creator field and desired subfields
@@ -164,7 +164,7 @@ module PennMARC
164
164
  # @param [MARC::Record] record
165
165
  # @return [Array<String>]
166
166
  def host_record_id(record)
167
- record.fields(Enriched::Pub::RELATED_RECORD_TAG).filter_map { |field|
167
+ record.fields(Enriched::Pub::RELATED_RECORD_TAGS).filter_map { |field|
168
168
  next unless subfield_value?(field, 'c', /contains/i)
169
169
 
170
170
  subfield_values field, :w
@@ -1,8 +1,12 @@
1
- Aliens: Noncitizens
2
- "Alien criminals": Noncitizen criminals
3
- "Alien detention centers": Immigrant detention centers
4
- "Alien labor": Foreign workers
5
- "Alien property": Foreign-owned property
6
- Gypsies: Romanies
7
- "Illegal Alien Children": Undocumented immigrant children
8
- "Illegal Aliens": Undocumented immigrants
1
+ # Important!: These terms should be listed in an order of decreasing complexity so that terms that may be contained in
2
+ # other terms are not replaced first, e.g., if "Aliens" is listed first, "Illegal Aliens" will get overrode to
3
+ # read "Illegal Noncitizens".
4
+ # Also, use lower case only in the key to support efficient case-insensitive matching.
5
+ "illegal alien children": Undocumented immigrant children
6
+ "alien detention centers": Immigrant detention centers
7
+ "alien criminals": Noncitizen criminals
8
+ "alien property": Foreign-owned property
9
+ "illegal aliens": Undocumented immigrants
10
+ "alien labor": Foreign workers
11
+ gypsies: Romanies
12
+ aliens: Noncitizens
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PennMARC
4
- VERSION = '1.0.27'
4
+ VERSION = '1.0.31'
5
5
  end
data/pennmarc.gemspec CHANGED
@@ -21,7 +21,6 @@ Gem::Specification.new do |s|
21
21
  s.add_dependency 'activesupport', '~> 7'
22
22
  s.add_dependency 'library_stdnums', '~> 1.6'
23
23
  s.add_dependency 'marc', '~> 1.2'
24
- s.add_dependency 'multi_string_replace', '~> 2.0'
25
24
  s.add_dependency 'nokogiri', '~> 1.15'
26
25
 
27
26
  s.metadata['rubygems_mfa_required'] = 'false'
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  describe 'PennMARC::HeadingControl' do
4
- let(:replace_term) { PennMARC::Mappers.heading_overrides.first[0] }
5
- let(:replaced_term) { PennMARC::Mappers.heading_overrides.first[1] }
4
+ let(:replace_term) { PennMARC::Mappers.heading_overrides.keys[2] }
5
+ let(:replaced_term) { PennMARC::Mappers.heading_overrides.values[2] }
6
6
  let(:remove_term) { PennMARC::Mappers.headings_to_remove.first }
7
7
 
8
8
  describe '.process' do
@@ -23,15 +23,22 @@ describe 'PennMARC::HeadingControl' do
23
23
  end
24
24
  end
25
25
 
26
- context 'with a term for replacement' do
27
- it 'replaces the term in isolation' do
28
- values = [replace_term]
29
- expect(PennMARC::HeadingControl.term_override(values)).to eq [replaced_term]
30
- end
26
+ PennMARC::Mappers.heading_overrides.each do |target, replacement|
27
+ context "with the \"#{target}\" term" do
28
+ it 'replaces the term in isolation' do
29
+ values = [target]
30
+ expect(PennMARC::HeadingControl.term_override(values)).to eq [replacement]
31
+ end
32
+
33
+ it 'replaces the term when used with other headings' do
34
+ values = ["#{target}--History"]
35
+ expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replacement}--History"]
36
+ end
31
37
 
32
- it 'replaces the term when used with other headings' do
33
- values = ["#{replace_term}--History"]
34
- expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replaced_term}--History"]
38
+ it 'replaces the term regardless of case' do
39
+ values = ["#{target.titleize}--History"]
40
+ expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replacement}--History"]
41
+ end
35
42
  end
36
43
  end
37
44
 
@@ -9,9 +9,9 @@ describe 'PennMARC::Creator' do
9
9
 
10
10
  context 'with a single author record' do
11
11
  let(:fields) do
12
- [marc_field(tag: '100', subfields: { a: 'Surname, Name', '0': 'http://cool.uri/12345',
12
+ [marc_field(tag: '100', subfields: { a: 'Surname, Name,', '0': 'http://cool.uri/12345',
13
13
  e: 'author', d: '1900-2000' }),
14
- marc_field(tag: '880', subfields: { a: 'Surname, Alternative', '6': '100' })]
14
+ marc_field(tag: '880', subfields: { a: 'Surname, Alternative,', '6': '100' })]
15
15
  end
16
16
 
17
17
  it 'contains the expected search field values for a single author work' do
@@ -109,18 +109,21 @@ describe 'PennMARC::Creator' do
109
109
  end
110
110
  end
111
111
 
112
- context 'with three author records - abbreviated names' do
112
+ context 'with five author records - abbreviated names' do
113
113
  let(:fields) do
114
- [marc_field(tag: '100', subfields: { a: 'Surname, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
114
+ [marc_field(tag: '100', subfields: { a: 'Surname, Alex, ', '0': 'http://cool.uri/12345', d: '1900-2000',
115
115
  e: 'author.', '4': 'http://cool.uri/vocabulary/relators/aut' }),
116
- marc_field(tag: '110', subfields: { a: 'Second, NameX', '0': 'http://cool.uri/12345', d: '1901-2010',
116
+ marc_field(tag: '110', subfields: { a: 'Second, NameX, ', '0': 'http://cool.uri/12345', d: '1901-2010',
117
117
  e: 'author.', '4': 'http://cool.uri/vocabulary/relators/aut' }),
118
- marc_field(tag: '700', subfields: { a: 'Alt, Alternative', '6': '100', d: '1970-' })]
118
+ marc_field(tag: '700', subfields: { a: 'Alt, Alternative', '6': '100', d: '1970-' }),
119
+ marc_field(tag: '100', subfields: { a: 'Name with no comma', e: 'author' }),
120
+ marc_field(tag: '100', subfields: { a: 'Name ends with comma,', e: 'author' })]
119
121
  end
120
122
 
121
123
  it 'returns single author values with no URIs anywhere' do
122
124
  values = helper.authors_list(record, first_initial_only: true)
123
- expect(values).to contain_exactly 'Surname, A.', 'Second, N.', 'Alt, A.'
125
+ expect(values).to contain_exactly 'Surname, A.', 'Second, N.', 'Alt, A.',
126
+ 'Name ends with comma', 'Name with no comma'
124
127
  end
125
128
  end
126
129
  end
@@ -132,14 +135,14 @@ describe 'PennMARC::Creator' do
132
135
  let(:fields) do
133
136
  [marc_field(tag: '100', subfields: { a: 'Hamilton, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
134
137
  e: 'author.' }),
135
- marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham', b: 'I', c: 'laureate', d: '1968', e: 'author',
136
- j: 'pseud', q: 'Fuller Name', u: 'affiliation', '3': 'materials',
138
+ marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham, ', b: 'I', c: 'laureate', d: '1968', e: 'author',
139
+ j: 'pseud', q: 'Fuller Name, ', u: 'affiliation', '3': 'materials',
137
140
  '4': 'aut' }),
138
141
  marc_field(tag: '700', subfields: { a: 'Einstein, Albert', '6': '100', d: '1970-', '4': 'trl',
139
142
  e: 'translator' }),
140
143
  marc_field(tag: '700', subfields: { a: 'Franklin, Ben', '6': '100', d: '1970-', '4': 'edt' }),
141
144
  marc_field(tag: '710', subfields: { a: 'Jefferson, Thomas', '6': '100', d: '1870-', '4': 'edt' }),
142
- marc_field(tag: '700', subfields: { a: 'Dickens, Charles', '6': '100', d: '1970-', '4': 'com' })]
145
+ marc_field(tag: '700', subfields: { a: 'Dickens, Charles, ', '6': '100', d: '1970-', '4': 'com' })]
143
146
  end
144
147
 
145
148
  it 'returns two authors and four contributors' do
@@ -153,7 +156,7 @@ describe 'PennMARC::Creator' do
153
156
 
154
157
  context 'with two authors and four contributors records, with full information and relator' do
155
158
  let(:fields) do
156
- [marc_field(tag: '100', subfields: { a: 'Hamilton, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
159
+ [marc_field(tag: '100', subfields: { a: 'Hamilton, Alex, ', '0': 'http://cool.uri/12345', d: '1900-2000',
157
160
  e: 'author.', '4': 'aut' }),
158
161
  marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham', b: 'I', c: 'laureate', d: '1968', e: 'author',
159
162
  j: 'pseud', q: 'Fuller Name', u: 'affiliation', '3': 'materials',
@@ -164,7 +167,7 @@ describe 'PennMARC::Creator' do
164
167
  marc_field(tag: '710', subfields: { a: 'Jefferson, Thomas', '6': '100', d: '1870-', '4': 'edt' }),
165
168
  marc_field(tag: '700', subfields: { a: 'Dickens, Charles', '6': '100', d: '1970-', '4': 'com' }),
166
169
  marc_field(tag: '880', subfields: { a: '狄更斯', '6': '700', d: '1970-', '4': 'com' }),
167
- marc_field(tag: '700', subfields: { a: 'Twain, Mark', '6': '100', d: '1870-' })]
170
+ marc_field(tag: '700', subfields: { a: 'Twain, Mark,', '6': '100', d: '1870-' })]
168
171
  end
169
172
 
170
173
  it 'returns four contributors' do
@@ -161,16 +161,36 @@ describe 'PennMARC::Identifier' do
161
161
  end
162
162
 
163
163
  describe '.host_record_id' do
164
- let(:record) do
165
- marc_record fields: [
166
- marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAG, subfields: { w: '123456789', c: 'Contains',
167
- a: 'Title' }),
168
- marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAG, subfields: { w: '666666666', c: 'Contained In' })
169
- ]
170
- end
171
-
172
- it 'returns only the desired host record MMS ID values' do
173
- expect(helper.host_record_id(record)).to contain_exactly '123456789'
164
+ context 'with a lower case tag' do
165
+ let(:record) do
166
+ marc_record fields: [
167
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '123456789',
168
+ c: 'Contains',
169
+ a: 'Title' }),
170
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '666666666',
171
+ c: 'Contained In' })
172
+ ]
173
+ end
174
+
175
+ it 'returns only the desired host record MMS ID values' do
176
+ expect(helper.host_record_id(record)).to contain_exactly '123456789'
177
+ end
178
+ end
179
+
180
+ context 'with an upper case tag' do
181
+ let(:record) do
182
+ marc_record fields: [
183
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '123456789',
184
+ c: 'Contains',
185
+ a: 'Title' }),
186
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '666666666',
187
+ c: 'Contained In' })
188
+ ]
189
+ end
190
+
191
+ it 'returns only the desired host record MMS ID values' do
192
+ expect(helper.host_record_id(record)).to contain_exactly '123456789'
193
+ end
174
194
  end
175
195
  end
176
196
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pennmarc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.27
4
+ version: 1.0.31
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Kanning
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2024-07-14 00:00:00.000000000 Z
15
+ date: 2024-07-29 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: activesupport
@@ -56,20 +56,6 @@ dependencies:
56
56
  - - "~>"
57
57
  - !ruby/object:Gem::Version
58
58
  version: '1.2'
59
- - !ruby/object:Gem::Dependency
60
- name: multi_string_replace
61
- requirement: !ruby/object:Gem::Requirement
62
- requirements:
63
- - - "~>"
64
- - !ruby/object:Gem::Version
65
- version: '2.0'
66
- type: :runtime
67
- prerelease: false
68
- version_requirements: !ruby/object:Gem::Requirement
69
- requirements:
70
- - - "~>"
71
- - !ruby/object:Gem::Version
72
- version: '2.0'
73
59
  - !ruby/object:Gem::Dependency
74
60
  name: nokogiri
75
61
  requirement: !ruby/object:Gem::Requirement