RubyGems - pennmarc - Versions diffs - 1.0.27 → 1.0.31 - Mend

pennmarc 1.0.27 → 1.0.31

Files changed (14) hide show

checksums.yaml +4 -4
data/Gemfile +0 -1
data/Gemfile.lock +0 -2
data/lib/pennmarc/enriched.rb +1 -1
data/lib/pennmarc/heading_control.rb +9 -5
data/lib/pennmarc/helpers/creator.rb +17 -15
data/lib/pennmarc/helpers/identifier.rb +1 -1
data/lib/pennmarc/mappings/headings_override.yml +12 -8
data/lib/pennmarc/version.rb +1 -1
data/pennmarc.gemspec +0 -1
data/spec/lib/pennmarc/heading_control_spec.rb +17 -10
data/spec/lib/pennmarc/helpers/creator_spec.rb +15 -12
data/spec/lib/pennmarc/helpers/identifer_spec.rb +30 -10
metadata +2 -16

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a6dec6a3d235117c15b3a37f368533a93e30ce4dfb11ac4832a612f878e653d9
-  data.tar.gz: 4217da9f4a2126e46284c48f4aeaca09016f4abf6ce08c493e039dae20b4bc20
+  metadata.gz: 7a10e18cc63414bbcc1f3ab3d0dbb47517552830f835a0ca10c50added035e9e
+  data.tar.gz: 0153e3bb11fe9646f0b3e6614ef2229654d70d60a0bace46e3d57a9c49800ccd
 SHA512:
-  metadata.gz: 8f3f0163eb6c3b2afeb8bd8845762c1c208bbbb9efb586e3978f5f5584f73159adb2649a801d78c090e6349c72f69197b1e721acd3bdbe3a79e9f99ac11f23ee
-  data.tar.gz: d4595b310f8a7b765a16738799b642fc05f680b681d4e090bca11a903ca7b748a25ca0981a260290c8544faea91b9437f7b98358f5358551e787dc0ae9ab8531
+  metadata.gz: d9fa4ea2d2e36736f240e8a9431d487d9195e92a1eecf2e847c8cbda94b9d7e5da1029654754b4582d1036ffb453115bf6e88f2c09e8668704830c2ff46c78a3
+  data.tar.gz: fb4a80ff8479b248e90c33c2eb43038d957ad93de3b3978ec964bdffba11621106a8fb61ff7feb4aeb03c8dfec251f0af7da4a7a00751e34d122312059bdf6a2

data/Gemfile CHANGED Viewed

@@ -5,7 +5,6 @@ source 'https://rubygems.org'
 gem 'activesupport', '~> 7'
 gem 'library_stdnums', '~> 1.6'
 gem 'marc', '~> 1.2'
-gem 'multi_string_replace', '~> 2.0'
 gem 'nokogiri', '~> 1.15'
 gem 'rake', '~> 13.0'
 gem 'upennlib-rubocop', require: false

data/Gemfile.lock CHANGED Viewed

@@ -19,7 +19,6 @@ GEM
       scrub_rb (>= 1.0.1, < 2)
       unf
     minitest (5.18.0)
-    multi_string_replace (2.0.2)
     nokogiri (1.15.2-arm64-darwin)
       racc (~> 1.4)
     nokogiri (1.15.2-x64-mingw-ucrt)
@@ -114,7 +113,6 @@ DEPENDENCIES
   activesupport (~> 7)
   library_stdnums (~> 1.6)
   marc (~> 1.2)
-  multi_string_replace (~> 2.0)
   nokogiri (~> 1.15)
   rake (~> 13.0)
   rspec (~> 3.12)

data/lib/pennmarc/enriched.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module PennMARC
       PHYS_INVENTORY_TAG = 'hld'
       ELEC_INVENTORY_TAG = 'prt'
       ITEM_TAG = 'itm'
-      RELATED_RECORD_TAG = 'rel'
+      RELATED_RECORD_TAGS = %w[REL rel].freeze
       # Subfields for HLD tags
       # Follow MARC 852 spec: https://www.loc.gov/marc/holdings/hd852.html, but names are translated into Alma parlance

data/lib/pennmarc/heading_control.rb CHANGED Viewed

@@ -1,7 +1,5 @@
 # frozen_string_literal: true
-require 'multi_string_replace'
 module PennMARC
   # Shared tools and values for controlling handling of subject or genre headings
   class HeadingControl
@@ -10,6 +8,9 @@ module PennMARC
     ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
                               local/osu mesh ndlsh nli nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
+    REMOVE_TERM_REGEX = /#{Mappers.headings_to_remove&.join('|')}/i
+    REPLACE_TERM_REGEX = /(#{Mappers.heading_overrides.keys.join('|')})/i
     class << self
       # Replace or remove any terms in provided values pursuant to the configuration in remove and override mappers.
       # Used to remove or replace offensive or otherwise undesirable subject headings.
@@ -18,10 +19,13 @@ module PennMARC
       def term_override(values)
         values.filter_map do |value|
           # Remove values if they contain a remove term
-          next nil if value.match?(/#{Mappers.headings_to_remove&.join('|')}/i)
+          next nil if value.match?(REMOVE_TERM_REGEX)
+          # return early if theres no terms to replace
+          next value if value.match(REPLACE_TERM_REGEX).nil?
-          # Replace values using multi_string_replace gem
-          MultiStringReplace.replace value, Mappers.heading_overrides
+          # lookup and perform replacement
+          value.sub(::Regexp.last_match.to_s, Mappers.heading_overrides[::Regexp.last_match.to_s.downcase])
         end
       end
     end

data/lib/pennmarc/helpers/creator.rb CHANGED Viewed

@@ -99,10 +99,9 @@ module PennMARC
         fields = record.fields(tags)
         fields.filter_map { |field|
-          if first_initial_only
-            abbreviate_name(field['a']) if field['a']
-          else
-            field['a']
+          if field['a'].present?
+            name = trim_trailing(:comma, field['a'])
+            first_initial_only ? abbreviate_name(name) : name
           end
         }.uniq
       end
@@ -131,10 +130,11 @@ module PennMARC
           relator = 'Contributor' if relator.blank?
           relator = trim_punctuation(relator).capitalize
+          name = trim_trailing(:comma, field['a'])
           name = if name_only
-                   field['a']
+                   name
                  else
-                   join_subfields(field, &subfield_in?(%w[a b c d j q u 3])) + ", #{relator}"
+                   "#{name} #{join_subfields(field, &subfield_in?(%w[b c d j q u 3]))}, #{relator}"
                  end
           if contributors.key?(relator)
@@ -328,7 +328,7 @@ module PennMARC
         relator_term_sf = relator_term_subfield(field)
         name = field.filter_map { |sf|
           if sf.code == 'a'
-            should_convert_name_order ? convert_name_order(sf.value) : sf.value
+            should_convert_name_order ? convert_name_order(sf.value) : trim_trailing(:comma, sf.value)
           elsif sf.code == relator_term_sf
             next
           elsif NAME_EXCLUDED_SUBFIELDS.exclude?(sf.code)
@@ -348,6 +348,7 @@ module PennMARC
       # @param name [String] value for processing
       # @return [String]
       def convert_name_order(name)
+        name = trim_trailing(:comma, name)
         return name unless name.include? ','
         after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ', '))])
@@ -357,15 +358,16 @@ module PennMARC
       # Convert "Lastname, First" to "Lastname, F"
       # @param [String] name
+      # @return [String]
       def abbreviate_name(name)
-        name_parts = name.split(', ')
-        return '' if name_parts.empty?
-        first_name_parts = name_parts.last.split
-        temp_name = "#{name_parts.first}, #{first_name_parts.first[0, 1]}."
-        first_name_parts.shift
-        temp_name += " #{first_name_parts.join(' ')}" unless first_name_parts.empty?
-        temp_name
+        name = trim_trailing(:comma, name)
+        return name unless name.include? ','
+        after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ','))])
+        before_comma = substring_before(name, ',')
+        abbrv = "#{before_comma},"
+        abbrv += " #{after_comma.first.upcase}." if after_comma.present?
+        abbrv
       end
       # Parse creator facet value from given creator field and desired subfields

data/lib/pennmarc/helpers/identifier.rb CHANGED Viewed

@@ -164,7 +164,7 @@ module PennMARC
       # @param [MARC::Record] record
       # @return [Array<String>]
       def host_record_id(record)
-        record.fields(Enriched::Pub::RELATED_RECORD_TAG).filter_map { |field|
+        record.fields(Enriched::Pub::RELATED_RECORD_TAGS).filter_map { |field|
           next unless subfield_value?(field, 'c', /contains/i)
           subfield_values field, :w

data/lib/pennmarc/mappings/headings_override.yml CHANGED Viewed

@@ -1,8 +1,12 @@
-Aliens: Noncitizens
-"Alien criminals": Noncitizen criminals
-"Alien detention centers": Immigrant detention centers
-"Alien labor": Foreign workers
-"Alien property": Foreign-owned property
-Gypsies: Romanies
-"Illegal Alien Children": Undocumented immigrant children
-"Illegal Aliens": Undocumented immigrants
+# Important!: These terms should be listed in an order of decreasing complexity so that terms that may be contained in
+# other terms are not replaced first, e.g., if "Aliens" is listed first, "Illegal Aliens" will get overrode to
+# read "Illegal Noncitizens".
+# Also, use lower case only in the key to support efficient case-insensitive matching.
+"illegal alien children": Undocumented immigrant children
+"alien detention centers": Immigrant detention centers
+"alien criminals": Noncitizen criminals
+"alien property": Foreign-owned property
+"illegal aliens": Undocumented immigrants
+"alien labor": Foreign workers
+gypsies: Romanies
+aliens: Noncitizens

data/lib/pennmarc/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module PennMARC
-  VERSION = '1.0.27'
+  VERSION = '1.0.31'
 end

data/pennmarc.gemspec CHANGED Viewed

@@ -21,7 +21,6 @@ Gem::Specification.new do |s|
   s.add_dependency 'activesupport', '~> 7'
   s.add_dependency 'library_stdnums', '~> 1.6'
   s.add_dependency 'marc', '~> 1.2'
-  s.add_dependency 'multi_string_replace', '~> 2.0'
   s.add_dependency 'nokogiri', '~> 1.15'
   s.metadata['rubygems_mfa_required'] = 'false'

data/spec/lib/pennmarc/heading_control_spec.rb CHANGED Viewed

@@ -1,8 +1,8 @@
 # frozen_string_literal: true
 describe 'PennMARC::HeadingControl' do
-  let(:replace_term) { PennMARC::Mappers.heading_overrides.first[0] }
-  let(:replaced_term) { PennMARC::Mappers.heading_overrides.first[1] }
+  let(:replace_term) { PennMARC::Mappers.heading_overrides.keys[2] }
+  let(:replaced_term) { PennMARC::Mappers.heading_overrides.values[2] }
   let(:remove_term) { PennMARC::Mappers.headings_to_remove.first }
   describe '.process' do
@@ -23,15 +23,22 @@ describe 'PennMARC::HeadingControl' do
       end
     end
-    context 'with a term for replacement' do
-      it 'replaces the term in isolation' do
-        values = [replace_term]
-        expect(PennMARC::HeadingControl.term_override(values)).to eq [replaced_term]
-      end
+    PennMARC::Mappers.heading_overrides.each do |target, replacement|
+      context "with the \"#{target}\" term" do
+        it 'replaces the term in isolation' do
+          values = [target]
+          expect(PennMARC::HeadingControl.term_override(values)).to eq [replacement]
+        end
+        it 'replaces the term when used with other headings' do
+          values = ["#{target}--History"]
+          expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replacement}--History"]
+        end
-      it 'replaces the term when used with other headings' do
-        values = ["#{replace_term}--History"]
-        expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replaced_term}--History"]
+        it 'replaces the term regardless of case' do
+          values = ["#{target.titleize}--History"]
+          expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replacement}--History"]
+        end
       end
     end

data/spec/lib/pennmarc/helpers/creator_spec.rb CHANGED Viewed

@@ -9,9 +9,9 @@ describe 'PennMARC::Creator' do
     context 'with a single author record' do
       let(:fields) do
-        [marc_field(tag: '100', subfields: { a: 'Surname, Name', '0': 'http://cool.uri/12345',
+        [marc_field(tag: '100', subfields: { a: 'Surname, Name,', '0': 'http://cool.uri/12345',
                                              e: 'author', d: '1900-2000' }),
-         marc_field(tag: '880', subfields: { a: 'Surname, Alternative', '6': '100' })]
+         marc_field(tag: '880', subfields: { a: 'Surname, Alternative,', '6': '100' })]
       end
       it 'contains the expected search field values for a single author work' do
@@ -109,18 +109,21 @@ describe 'PennMARC::Creator' do
       end
     end
-    context 'with three author records - abbreviated names' do
+    context 'with five author records - abbreviated names' do
       let(:fields) do
-        [marc_field(tag: '100', subfields: { a: 'Surname, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
+        [marc_field(tag: '100', subfields: { a: 'Surname, Alex, ', '0': 'http://cool.uri/12345', d: '1900-2000',
                                              e: 'author.', '4': 'http://cool.uri/vocabulary/relators/aut' }),
-         marc_field(tag: '110', subfields: { a: 'Second, NameX', '0': 'http://cool.uri/12345', d: '1901-2010',
+         marc_field(tag: '110', subfields: { a: 'Second, NameX,  ', '0': 'http://cool.uri/12345', d: '1901-2010',
                                              e: 'author.', '4': 'http://cool.uri/vocabulary/relators/aut' }),
-         marc_field(tag: '700', subfields: { a: 'Alt, Alternative', '6': '100', d: '1970-' })]
+         marc_field(tag: '700', subfields: { a: 'Alt, Alternative', '6': '100', d: '1970-' }),
+         marc_field(tag: '100', subfields: { a: 'Name with no comma', e: 'author' }),
+         marc_field(tag: '100', subfields: { a: 'Name ends with comma,', e: 'author' })]
       end
       it 'returns single author values with no URIs anywhere' do
         values = helper.authors_list(record, first_initial_only: true)
-        expect(values).to contain_exactly 'Surname, A.', 'Second, N.', 'Alt, A.'
+        expect(values).to contain_exactly 'Surname, A.', 'Second, N.', 'Alt, A.',
+                                          'Name ends with comma', 'Name with no comma'
       end
     end
   end
@@ -132,14 +135,14 @@ describe 'PennMARC::Creator' do
       let(:fields) do
         [marc_field(tag: '100', subfields: { a: 'Hamilton, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
                                              e: 'author.' }),
-         marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham', b: 'I', c: 'laureate', d: '1968', e: 'author',
-                                             j: 'pseud', q: 'Fuller Name', u: 'affiliation', '3': 'materials',
+         marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham,   ', b: 'I', c: 'laureate', d: '1968', e: 'author',
+                                             j: 'pseud', q: 'Fuller Name,  ', u: 'affiliation', '3': 'materials',
                                              '4': 'aut' }),
          marc_field(tag: '700', subfields: { a: 'Einstein, Albert', '6': '100', d: '1970-', '4': 'trl',
                                              e: 'translator' }),
          marc_field(tag: '700', subfields: { a: 'Franklin, Ben', '6': '100', d: '1970-', '4': 'edt' }),
          marc_field(tag: '710', subfields: { a: 'Jefferson, Thomas', '6': '100', d: '1870-', '4': 'edt' }),
-         marc_field(tag: '700', subfields: { a: 'Dickens, Charles', '6': '100', d: '1970-', '4': 'com' })]
+         marc_field(tag: '700', subfields: { a: 'Dickens, Charles, ', '6': '100', d: '1970-', '4': 'com' })]
       end
       it 'returns two authors and four contributors' do
@@ -153,7 +156,7 @@ describe 'PennMARC::Creator' do
     context 'with two authors and four contributors records, with full information and relator' do
       let(:fields) do
-        [marc_field(tag: '100', subfields: { a: 'Hamilton, Alex', '0': 'http://cool.uri/12345', d: '1900-2000',
+        [marc_field(tag: '100', subfields: { a: 'Hamilton, Alex,  ', '0': 'http://cool.uri/12345', d: '1900-2000',
                                              e: 'author.', '4': 'aut' }),
          marc_field(tag: '100', subfields: { a: 'Lincoln, Abraham', b: 'I', c: 'laureate', d: '1968', e: 'author',
                                              j: 'pseud', q: 'Fuller Name', u: 'affiliation', '3': 'materials',
@@ -164,7 +167,7 @@ describe 'PennMARC::Creator' do
          marc_field(tag: '710', subfields: { a: 'Jefferson, Thomas', '6': '100', d: '1870-', '4': 'edt' }),
          marc_field(tag: '700', subfields: { a: 'Dickens, Charles', '6': '100', d: '1970-', '4': 'com' }),
          marc_field(tag: '880', subfields: { a: '狄更斯', '6': '700', d: '1970-', '4': 'com' }),
-         marc_field(tag: '700', subfields: { a: 'Twain, Mark', '6': '100', d: '1870-' })]
+         marc_field(tag: '700', subfields: { a: 'Twain, Mark,', '6': '100', d: '1870-' })]
       end
       it 'returns four contributors' do

data/spec/lib/pennmarc/helpers/identifer_spec.rb CHANGED Viewed

@@ -161,16 +161,36 @@ describe 'PennMARC::Identifier' do
   end
   describe '.host_record_id' do
-    let(:record) do
-      marc_record fields: [
-        marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAG, subfields: { w: '123456789', c: 'Contains',
-                                                                                  a: 'Title' }),
-        marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAG, subfields: { w: '666666666', c: 'Contained In' })
-      ]
-    end
-    it 'returns only the desired host record MMS ID values' do
-      expect(helper.host_record_id(record)).to contain_exactly '123456789'
+    context 'with a lower case tag' do
+      let(:record) do
+        marc_record fields: [
+          marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '123456789',
+                                                                                            c: 'Contains',
+                                                                                            a: 'Title' }),
+          marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '666666666',
+                                                                                            c: 'Contained In' })
+        ]
+      end
+      it 'returns only the desired host record MMS ID values' do
+        expect(helper.host_record_id(record)).to contain_exactly '123456789'
+      end
+    end
+    context 'with an upper case tag' do
+      let(:record) do
+        marc_record fields: [
+          marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '123456789',
+                                                                                           c: 'Contains',
+                                                                                           a: 'Title' }),
+          marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '666666666',
+                                                                                           c: 'Contained In' })
+        ]
+      end
+      it 'returns only the desired host record MMS ID values' do
+        expect(helper.host_record_id(record)).to contain_exactly '123456789'
+      end
     end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: pennmarc
 version: !ruby/object:Gem::Version
-  version: 1.0.27
+  version: 1.0.31
 platform: ruby
 authors:
 - Mike Kanning
@@ -12,7 +12,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-07-14 00:00:00.000000000 Z
+date: 2024-07-29 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activesupport
@@ -56,20 +56,6 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '1.2'
-- !ruby/object:Gem::Dependency
-  name: multi_string_replace
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: '2.0'
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: '2.0'
 - !ruby/object:Gem::Dependency
   name: nokogiri
   requirement: !ruby/object:Gem::Requirement