pennmarc 1.0.26 → 1.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b4d6a53347f4532faa566debcc59bde1fb7043cfbd4530d0a06489a36db8599
4
- data.tar.gz: 8a92c36c80406541ac454025e6c2adc53af80dab20af5af16b8995dc5bacfe9d
3
+ metadata.gz: '07810ba2a6de07713ce40a1cfbe7fc33d5497eec834011a0098ce2cbd74bc5ff'
4
+ data.tar.gz: 45c3f0002e46be338a63be2109153f7603d9ebf8b1814d825b4c889c7f7237b4
5
5
  SHA512:
6
- metadata.gz: 8b1f0be0caba6a00adc8ccc5516556b2de250d93861157c49a60e17f52810b6eed7942efe8b51843d7428bb329c6dcad58a0887c17e9132004fe3d7d29729682
7
- data.tar.gz: b06dad73899b13254391b74832d3b99745d82bc9f871801d7a617e2416f9d70039d45a9122a292377628b638c4044e692432688dbce48497cb6394fa48ba0d6e
6
+ metadata.gz: 9a214837d1f061611a13ea7d09b13ae41d7f6f7df8f08d03da3d98d8969849aacb0edfd45a3fea231acb1e53f8c06c4e4793db49cf5274063d6c59d7b7043212
7
+ data.tar.gz: 1a6dd99e562ef38b301e49ac17b6ec211eb0af1e7bdccc6a0d98c15683163f9c4168e51b8560296282a42d6dacdbfbb5d8856628af7000614e48961cbcb6b19c
data/Gemfile CHANGED
@@ -5,6 +5,7 @@ source 'https://rubygems.org'
5
5
  gem 'activesupport', '~> 7'
6
6
  gem 'library_stdnums', '~> 1.6'
7
7
  gem 'marc', '~> 1.2'
8
+ gem 'multi_string_replace', '~> 2.0'
8
9
  gem 'nokogiri', '~> 1.15'
9
10
  gem 'rake', '~> 13.0'
10
11
  gem 'upennlib-rubocop', require: false
data/Gemfile.lock CHANGED
@@ -19,6 +19,7 @@ GEM
19
19
  scrub_rb (>= 1.0.1, < 2)
20
20
  unf
21
21
  minitest (5.18.0)
22
+ multi_string_replace (2.0.2)
22
23
  nokogiri (1.15.2-arm64-darwin)
23
24
  racc (~> 1.4)
24
25
  nokogiri (1.15.2-x64-mingw-ucrt)
@@ -113,6 +114,7 @@ DEPENDENCIES
113
114
  activesupport (~> 7)
114
115
  library_stdnums (~> 1.6)
115
116
  marc (~> 1.2)
117
+ multi_string_replace (~> 2.0)
116
118
  nokogiri (~> 1.15)
117
119
  rake (~> 13.0)
118
120
  rspec (~> 3.12)
data/README.md CHANGED
@@ -7,13 +7,28 @@ the "Nouveau Franklin" project aka [discovery_app](https://gitlab.library.upenn.
7
7
  When included in a project, it should be utilized like this:
8
8
 
9
9
  ```ruby
10
- parser = PennMARC::Parser.new # eventually we will pass in some mappings...
10
+ parser = PennMARC::Parser.new
11
11
  puts parser.title_show(marc_record) # Title intended for display
12
12
  ```
13
13
 
14
14
  All methods will require a `MARC::Record` object. For more about these, see the
15
15
  [ruby-marc](https://github.com/ruby-marc/ruby-marc) gem documentation
16
16
 
17
+ ## Term Overriding
18
+
19
+ This gem provides configuration as well as a method for overriding and removing terms that are undesirable. In your app,
20
+ you can remove or replace the configured terms like so:
21
+
22
+ ```ruby
23
+ improved_values = PennMARC::HeadingControl.term_override(values)
24
+ ```
25
+
26
+ This will remove any elements of the `values` array that include any terms defined in `mappers/headings_remove.yml` and
27
+ replace any terms defined in the `headings_override.yml` file.
28
+
29
+ By default, terms are replaced for `Subject#*show` and `Subject#facet` methods. You can bypass the default overriding on
30
+ on these methods by passing `override: false`.
31
+
17
32
  ## Development
18
33
 
19
34
  ### Requirements
@@ -12,7 +12,7 @@ module PennMARC
12
12
  PHYS_INVENTORY_TAG = 'hld'
13
13
  ELEC_INVENTORY_TAG = 'prt'
14
14
  ITEM_TAG = 'itm'
15
- RELATED_RECORD_TAG = 'rel'
15
+ RELATED_RECORD_TAGS = %w[REL rel].freeze
16
16
 
17
17
  # Subfields for HLD tags
18
18
  # Follow MARC 852 spec: https://www.loc.gov/marc/holdings/hd852.html, but names are translated into Alma parlance
@@ -1,11 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'multi_string_replace'
4
+
3
5
  module PennMARC
4
- # Shared values for controlling inclusion of subject or genre headings
5
- module HeadingControl
6
- # These codes are expected to be found in sf2 when the indicator2 value is 7, indicating "source specified". There
7
- # are some sources whose headings we don't want to display.
6
+ # Shared tools and values for controlling handling of subject or genre headings
7
+ class HeadingControl
8
+ # These codes are expected to be found in sf2 of a subject/genre field when the indicator2 value is 7, indicating
9
+ # "source specified". There are some sources whose headings we don't want to display.
8
10
  ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
9
11
  local/osu mesh ndlsh nli nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
12
+
13
+ class << self
14
+ # Replace or remove any terms in provided values pursuant to the configuration in remove and override mappers.
15
+ # Used to remove or replace offensive or otherwise undesirable subject headings.
16
+ # @param values [Array]
17
+ # @return [Array] values with terms removed/replaced
18
+ def term_override(values)
19
+ values.filter_map do |value|
20
+ # Remove values if they contain a remove term
21
+ next nil if value.match?(/#{Mappers.headings_to_remove&.join('|')}/i)
22
+
23
+ # Replace values using multi_string_replace gem
24
+ MultiStringReplace.replace value, Mappers.heading_overrides
25
+ end
26
+ end
27
+ end
10
28
  end
11
29
  end
@@ -164,7 +164,7 @@ module PennMARC
164
164
  # @param [MARC::Record] record
165
165
  # @return [Array<String>]
166
166
  def host_record_id(record)
167
- record.fields(Enriched::Pub::RELATED_RECORD_TAG).filter_map { |field|
167
+ record.fields(Enriched::Pub::RELATED_RECORD_TAGS).filter_map { |field|
168
168
  next unless subfield_value?(field, 'c', /contains/i)
169
169
 
170
170
  subfield_values field, :w
@@ -62,67 +62,76 @@ module PennMARC
62
62
  #
63
63
  # @note this is ported mostly form MG's new-style Subject parsing
64
64
  # @param [MARC::Record] record
65
+ # @param [Boolean] override to remove undesirable terms or not
65
66
  # @return [Array<String>] array of all subject values for faceting
66
- def facet(record)
67
- subject_fields(record, type: :facet).filter_map { |field|
67
+ def facet(record, override: true)
68
+ values = subject_fields(record, type: :facet).filter_map { |field|
68
69
  term_hash = build_subject_hash(field)
69
70
  next if term_hash.blank? || term_hash[:count]&.zero?
70
71
 
71
72
  format_term type: :facet, term: term_hash
72
73
  }.uniq
74
+ override ? HeadingControl.term_override(values) : values
73
75
  end
74
76
 
75
77
  # All Subjects for display. This includes all {DISPLAY_TAGS} and {LOCAL_TAGS}. For tags that specify a source,
76
78
  # only those with an allowed source code (see ALLOWED_SOURCE_CODES) are included.
77
79
  #
78
80
  # @param [MARC::Record] record
81
+ # @param [Boolean] override to remove undesirable terms or not
79
82
  # @return [Array] array of all subject values for display
80
- def show(record)
81
- subject_fields(record, type: :all).filter_map { |field|
83
+ def show(record, override: true)
84
+ values = subject_fields(record, type: :all).filter_map { |field|
82
85
  term_hash = build_subject_hash(field)
83
86
  next if term_hash.blank? || term_hash[:count]&.zero?
84
87
 
85
88
  format_term type: :display, term: term_hash
86
89
  }.uniq
90
+ override ? HeadingControl.term_override(values) : values
87
91
  end
88
92
 
89
93
  # Get Subjects from "Children" ontology
90
94
  #
91
95
  # @param [MARC::Record] record
96
+ # @param [Boolean] override to remove undesirable terms or not
92
97
  # @return [Array] array of children's subject values for display
93
- def childrens_show(record)
94
- subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '1' })
95
- .filter_map { |field|
96
- term_hash = build_subject_hash(field)
97
- next if term_hash.blank? || term_hash[:count]&.zero?
98
-
99
- format_term type: :display, term: term_hash
100
- }.uniq
98
+ def childrens_show(record, override: true)
99
+ values = subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '1' })
100
+ .filter_map { |field|
101
+ term_hash = build_subject_hash(field)
102
+ next if term_hash.blank? || term_hash[:count]&.zero?
103
+
104
+ format_term type: :display, term: term_hash
105
+ }.uniq
106
+ override ? HeadingControl.term_override(values) : values
101
107
  end
102
108
 
103
109
  # Get Subjects from "MeSH" ontology
104
110
  #
105
111
  # @param [MARC::Record] record
112
+ # @param [Boolean] override to remove undesirable terms or not
106
113
  # @return [Array] array of MeSH subject values for display
107
- def medical_show(record)
108
- subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '2' })
109
- .filter_map { |field|
110
- term_hash = build_subject_hash(field)
111
- next if term_hash.blank? || term_hash[:count]&.zero?
112
-
113
- format_term type: :display, term: term_hash
114
- }.uniq
114
+ def medical_show(record, override: true)
115
+ values = subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '2' })
116
+ .filter_map { |field|
117
+ term_hash = build_subject_hash(field)
118
+ next if term_hash.blank? || term_hash[:count]&.zero?
119
+
120
+ format_term type: :display, term: term_hash
121
+ }.uniq
122
+ override ? HeadingControl.term_override(values) : values
115
123
  end
116
124
 
117
125
  # Get Subject values from {DISPLAY_TAGS} where indicator2 is 4 and {LOCAL_TAGS}. Do not include any values where
118
126
  # sf2 includes "penncoi" (Community of Interest).
119
127
  #
120
128
  # @param [MARC::Record] record
129
+ # @param [Boolean] override to remove undesirable terms
121
130
  # @return [Array] array of local subject values for display
122
- def local_show(record)
131
+ def local_show(record, override: true)
123
132
  local_fields = subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '4' }) +
124
133
  subject_fields(record, type: :local)
125
- local_fields.filter_map { |field|
134
+ values = local_fields.filter_map { |field|
126
135
  next if subfield_value?(field, '2', /penncoi/)
127
136
 
128
137
  term_hash = build_subject_hash(field)
@@ -130,6 +139,7 @@ module PennMARC
130
139
 
131
140
  format_term type: :display, term: term_hash
132
141
  }.uniq
142
+ override ? HeadingControl.term_override(values) : values
133
143
  end
134
144
 
135
145
  private
@@ -4,46 +4,58 @@ module PennMARC
4
4
  # reusable static mappers
5
5
  class Mappers
6
6
  class << self
7
- # @return [Hash]
7
+ # @return [Hash, nil]
8
+ def heading_overrides
9
+ @heading_overrides ||= load_map('headings_override.yml', symbolize_names: false)
10
+ end
11
+
12
+ # @return [Hash, nil]
13
+ def headings_to_remove
14
+ @headings_to_remove ||= load_map('headings_remove.yml', symbolize_names: false)
15
+ end
16
+
17
+ # @return [Hash, nil]
8
18
  def iso_639_2_language
9
19
  @iso_639_2_language ||= load_map('iso639-2-languages.yml')
10
20
  end
11
21
 
22
+ # @return [Hash, nil]
12
23
  def iso_639_3_language
13
24
  @iso_639_3_language ||= load_map('iso639-3-languages.yml')
14
25
  end
15
26
 
16
- # @return [Hash]
27
+ # @return [Hash, nil]
17
28
  def location
18
29
  @location ||= load_map('locations.yml')
19
30
  end
20
31
 
21
- # @return [Hash]
32
+ # @return [Hash, nil]
22
33
  def location_overrides
23
34
  @location_overrides ||= load_map('location_overrides.yml')
24
35
  end
25
36
 
26
- # @return [Hash]
37
+ # @return [Hash, nil]
27
38
  def relator
28
39
  @relator ||= load_map('relator.yml')
29
40
  end
30
41
 
31
- # @return [Hash]
42
+ # @return [Hash, nil]
32
43
  def loc_classification
33
44
  @loc_classification ||= load_map('loc_classification.yml')
34
45
  end
35
46
 
36
- # @return [Hash]
47
+ # @return [Hash, nil]
37
48
  def dewey_classification
38
49
  @dewey_classification ||= load_map('dewey_classification.yml')
39
50
  end
40
51
 
41
- # @param [String] filename of mapping file in config directory, with file extension
42
- # @return [Hash] mapping as hash
43
- def load_map(filename)
52
+ # @param filename [String] name of mapping file in config directory, with file extension
53
+ # @param symbolize_names [Boolean] whether or not to symbolize keys in returned hash
54
+ # @return [Hash, nil] mapping as hash
55
+ def load_map(filename, symbolize_names: true)
44
56
  puts { "Loading #{filename}" }
45
57
  YAML.safe_load(File.read(File.join(File.expand_path(__dir__), 'mappings', filename)),
46
- symbolize_names: true)
58
+ symbolize_names: symbolize_names)
47
59
  end
48
60
  end
49
61
  end
@@ -0,0 +1,8 @@
1
+ Aliens: Noncitizens
2
+ "Alien criminals": Noncitizen criminals
3
+ "Alien detention centers": Immigrant detention centers
4
+ "Alien labor": Foreign workers
5
+ "Alien property": Foreign-owned property
6
+ Gypsies: Romanies
7
+ "Illegal Alien Children": Undocumented immigrant children
8
+ "Illegal Aliens": Undocumented immigrants
@@ -0,0 +1,2 @@
1
+ - Jewish Question
2
+ - Yellow Peril
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PennMARC
4
- VERSION = '1.0.26'
4
+ VERSION = '1.0.30'
5
5
  end
data/pennmarc.gemspec CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.summary = 'Penn Libraries Catalog MARC parsing wisdom for cross-project usage'
11
11
  s.description = 'This gem provides methods for parsing a Penn Libraries MARCXML record into string, array and date
12
12
  objects for use in discovery or preservation applications.'
13
- s.authors = ['Mike Kanning', 'Amrey Mathurin', 'Patrick Perkins']
13
+ s.authors = ['Mike Kanning', 'Amrey Mathurin', 'Patrick Perkins', 'Katherine Schultz', 'Baowei Wei']
14
14
  s.email = 'mkanning@upenn.edu'
15
15
  s.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
16
16
  s.homepage = 'https://gitlab.library.upenn.edu/dld/catalog/pennmarc'
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.add_dependency 'activesupport', '~> 7'
22
22
  s.add_dependency 'library_stdnums', '~> 1.6'
23
23
  s.add_dependency 'marc', '~> 1.2'
24
+ s.add_dependency 'multi_string_replace', '~> 2.0'
24
25
  s.add_dependency 'nokogiri', '~> 1.15'
25
26
 
26
27
  s.metadata['rubygems_mfa_required'] = 'false'
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ describe 'PennMARC::HeadingControl' do
4
+ let(:replace_term) { PennMARC::Mappers.heading_overrides.first[0] }
5
+ let(:replaced_term) { PennMARC::Mappers.heading_overrides.first[1] }
6
+ let(:remove_term) { PennMARC::Mappers.headings_to_remove.first }
7
+
8
+ describe '.process' do
9
+ context 'with a term for removal' do
10
+ it 'removes the term if found in isolation' do
11
+ values = [remove_term]
12
+ expect(PennMARC::HeadingControl.term_override(values)).to eq []
13
+ end
14
+
15
+ it 'removes the term regardless of case' do
16
+ values = [remove_term.downcase]
17
+ expect(PennMARC::HeadingControl.term_override(values)).to eq []
18
+ end
19
+
20
+ it 'removes the term if it is included as a substring' do
21
+ values = ["#{remove_term}--History"]
22
+ expect(PennMARC::HeadingControl.term_override(values)).to eq []
23
+ end
24
+ end
25
+
26
+ context 'with a term for replacement' do
27
+ it 'replaces the term in isolation' do
28
+ values = [replace_term]
29
+ expect(PennMARC::HeadingControl.term_override(values)).to eq [replaced_term]
30
+ end
31
+
32
+ it 'replaces the term when used with other headings' do
33
+ values = ["#{replace_term}--History"]
34
+ expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replaced_term}--History"]
35
+ end
36
+ end
37
+
38
+ context 'with a variety of terms' do
39
+ it 'removes and replaces terms as needed' do
40
+ values = [remove_term, replace_term, 'History']
41
+ expect(PennMARC::HeadingControl.term_override(values)).to contain_exactly 'History', replaced_term
42
+ end
43
+ end
44
+ end
45
+ end
@@ -161,16 +161,36 @@ describe 'PennMARC::Identifier' do
161
161
  end
162
162
 
163
163
  describe '.host_record_id' do
164
- let(:record) do
165
- marc_record fields: [
166
- marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAG, subfields: { w: '123456789', c: 'Contains',
167
- a: 'Title' }),
168
- marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAG, subfields: { w: '666666666', c: 'Contained In' })
169
- ]
170
- end
171
-
172
- it 'returns only the desired host record MMS ID values' do
173
- expect(helper.host_record_id(record)).to contain_exactly '123456789'
164
+ context 'with a lower case tag' do
165
+ let(:record) do
166
+ marc_record fields: [
167
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '123456789',
168
+ c: 'Contains',
169
+ a: 'Title' }),
170
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '666666666',
171
+ c: 'Contained In' })
172
+ ]
173
+ end
174
+
175
+ it 'returns only the desired host record MMS ID values' do
176
+ expect(helper.host_record_id(record)).to contain_exactly '123456789'
177
+ end
178
+ end
179
+
180
+ context 'with an upper case tag' do
181
+ let(:record) do
182
+ marc_record fields: [
183
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '123456789',
184
+ c: 'Contains',
185
+ a: 'Title' }),
186
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '666666666',
187
+ c: 'Contained In' })
188
+ ]
189
+ end
190
+
191
+ it 'returns only the desired host record MMS ID values' do
192
+ expect(helper.host_record_id(record)).to contain_exactly '123456789'
193
+ end
174
194
  end
175
195
  end
176
196
  end
@@ -302,6 +302,25 @@ describe 'PennMARC::Subject' do
302
302
  expect(values).to contain_exactly 'Philosophy in motion pictures.'
303
303
  end
304
304
  end
305
+
306
+ context 'with headings that contain terms for removal and replacement' do
307
+ let(:fields) do
308
+ [marc_field(tag: '650', subfields: { a: 'History.' }),
309
+ marc_field(tag: '650', subfields: { a: PennMARC::Mappers.headings_to_remove.first }),
310
+ marc_field(tag: '650', subfields: { a: PennMARC::Mappers.heading_overrides.first[0] })]
311
+ end
312
+
313
+ it 'removes and replaces terms as expected' do
314
+ expect(values).to contain_exactly 'History.', "#{PennMARC::Mappers.heading_overrides.first[1]}."
315
+ end
316
+
317
+ it 'does not remove or replace terms if override param is false' do
318
+ expect(helper.show(record, override: false)).to contain_exactly(
319
+ 'History.', "#{PennMARC::Mappers.headings_to_remove.first}.",
320
+ "#{PennMARC::Mappers.heading_overrides.first[0]}."
321
+ )
322
+ end
323
+ end
305
324
  end
306
325
 
307
326
  describe '.childrens_show' do
metadata CHANGED
@@ -1,16 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pennmarc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.26
4
+ version: 1.0.30
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Kanning
8
8
  - Amrey Mathurin
9
9
  - Patrick Perkins
10
+ - Katherine Schultz
11
+ - Baowei Wei
10
12
  autorequire:
11
13
  bindir: bin
12
14
  cert_chain: []
13
- date: 2024-07-02 00:00:00.000000000 Z
15
+ date: 2024-07-15 00:00:00.000000000 Z
14
16
  dependencies:
15
17
  - !ruby/object:Gem::Dependency
16
18
  name: activesupport
@@ -54,6 +56,20 @@ dependencies:
54
56
  - - "~>"
55
57
  - !ruby/object:Gem::Version
56
58
  version: '1.2'
59
+ - !ruby/object:Gem::Dependency
60
+ name: multi_string_replace
61
+ requirement: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - "~>"
64
+ - !ruby/object:Gem::Version
65
+ version: '2.0'
66
+ type: :runtime
67
+ prerelease: false
68
+ version_requirements: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - "~>"
71
+ - !ruby/object:Gem::Version
72
+ version: '2.0'
57
73
  - !ruby/object:Gem::Dependency
58
74
  name: nokogiri
59
75
  requirement: !ruby/object:Gem::Requirement
@@ -116,6 +132,8 @@ files:
116
132
  - lib/pennmarc/helpers/title.rb
117
133
  - lib/pennmarc/mappers.rb
118
134
  - lib/pennmarc/mappings/dewey_classification.yml
135
+ - lib/pennmarc/mappings/headings_override.yml
136
+ - lib/pennmarc/mappings/headings_remove.yml
119
137
  - lib/pennmarc/mappings/iso639-2-languages.yml
120
138
  - lib/pennmarc/mappings/iso639-3-languages.yml
121
139
  - lib/pennmarc/mappings/loc_classification.yml
@@ -128,6 +146,7 @@ files:
128
146
  - lib/pennmarc/version.rb
129
147
  - pennmarc.gemspec
130
148
  - spec/fixtures/marcxml/test.xml
149
+ - spec/lib/pennmarc/heading_control_spec.rb
131
150
  - spec/lib/pennmarc/helpers/access_spec.rb
132
151
  - spec/lib/pennmarc/helpers/citation_spec.rb
133
152
  - spec/lib/pennmarc/helpers/classification_spec.rb