pennmarc 1.0.26 → 1.0.30

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b4d6a53347f4532faa566debcc59bde1fb7043cfbd4530d0a06489a36db8599
4
- data.tar.gz: 8a92c36c80406541ac454025e6c2adc53af80dab20af5af16b8995dc5bacfe9d
3
+ metadata.gz: '07810ba2a6de07713ce40a1cfbe7fc33d5497eec834011a0098ce2cbd74bc5ff'
4
+ data.tar.gz: 45c3f0002e46be338a63be2109153f7603d9ebf8b1814d825b4c889c7f7237b4
5
5
  SHA512:
6
- metadata.gz: 8b1f0be0caba6a00adc8ccc5516556b2de250d93861157c49a60e17f52810b6eed7942efe8b51843d7428bb329c6dcad58a0887c17e9132004fe3d7d29729682
7
- data.tar.gz: b06dad73899b13254391b74832d3b99745d82bc9f871801d7a617e2416f9d70039d45a9122a292377628b638c4044e692432688dbce48497cb6394fa48ba0d6e
6
+ metadata.gz: 9a214837d1f061611a13ea7d09b13ae41d7f6f7df8f08d03da3d98d8969849aacb0edfd45a3fea231acb1e53f8c06c4e4793db49cf5274063d6c59d7b7043212
7
+ data.tar.gz: 1a6dd99e562ef38b301e49ac17b6ec211eb0af1e7bdccc6a0d98c15683163f9c4168e51b8560296282a42d6dacdbfbb5d8856628af7000614e48961cbcb6b19c
data/Gemfile CHANGED
@@ -5,6 +5,7 @@ source 'https://rubygems.org'
5
5
  gem 'activesupport', '~> 7'
6
6
  gem 'library_stdnums', '~> 1.6'
7
7
  gem 'marc', '~> 1.2'
8
+ gem 'multi_string_replace', '~> 2.0'
8
9
  gem 'nokogiri', '~> 1.15'
9
10
  gem 'rake', '~> 13.0'
10
11
  gem 'upennlib-rubocop', require: false
data/Gemfile.lock CHANGED
@@ -19,6 +19,7 @@ GEM
19
19
  scrub_rb (>= 1.0.1, < 2)
20
20
  unf
21
21
  minitest (5.18.0)
22
+ multi_string_replace (2.0.2)
22
23
  nokogiri (1.15.2-arm64-darwin)
23
24
  racc (~> 1.4)
24
25
  nokogiri (1.15.2-x64-mingw-ucrt)
@@ -113,6 +114,7 @@ DEPENDENCIES
113
114
  activesupport (~> 7)
114
115
  library_stdnums (~> 1.6)
115
116
  marc (~> 1.2)
117
+ multi_string_replace (~> 2.0)
116
118
  nokogiri (~> 1.15)
117
119
  rake (~> 13.0)
118
120
  rspec (~> 3.12)
data/README.md CHANGED
@@ -7,13 +7,28 @@ the "Nouveau Franklin" project aka [discovery_app](https://gitlab.library.upenn.
7
7
  When included in a project, it should be utilized like this:
8
8
 
9
9
  ```ruby
10
- parser = PennMARC::Parser.new # eventually we will pass in some mappings...
10
+ parser = PennMARC::Parser.new
11
11
  puts parser.title_show(marc_record) # Title intended for display
12
12
  ```
13
13
 
14
14
  All methods will require a `MARC::Record` object. For more about these, see the
15
15
  [ruby-marc](https://github.com/ruby-marc/ruby-marc) gem documentation
16
16
 
17
+ ## Term Overriding
18
+
19
+ This gem provides configuration as well as a method for overriding and removing terms that are undesirable. In your app,
20
+ you can remove or replace the configured terms like so:
21
+
22
+ ```ruby
23
+ improved_values = PennMARC::HeadingControl.term_override(values)
24
+ ```
25
+
26
+ This will remove any elements of the `values` array that include any terms defined in `mappers/headings_remove.yml` and
27
+ replace any terms defined in the `headings_override.yml` file.
28
+
29
+ By default, terms are replaced for `Subject#*show` and `Subject#facet` methods. You can bypass the default overriding on
30
+ on these methods by passing `override: false`.
31
+
17
32
  ## Development
18
33
 
19
34
  ### Requirements
@@ -12,7 +12,7 @@ module PennMARC
12
12
  PHYS_INVENTORY_TAG = 'hld'
13
13
  ELEC_INVENTORY_TAG = 'prt'
14
14
  ITEM_TAG = 'itm'
15
- RELATED_RECORD_TAG = 'rel'
15
+ RELATED_RECORD_TAGS = %w[REL rel].freeze
16
16
 
17
17
  # Subfields for HLD tags
18
18
  # Follow MARC 852 spec: https://www.loc.gov/marc/holdings/hd852.html, but names are translated into Alma parlance
@@ -1,11 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'multi_string_replace'
4
+
3
5
  module PennMARC
4
- # Shared values for controlling inclusion of subject or genre headings
5
- module HeadingControl
6
- # These codes are expected to be found in sf2 when the indicator2 value is 7, indicating "source specified". There
7
- # are some sources whose headings we don't want to display.
6
+ # Shared tools and values for controlling handling of subject or genre headings
7
+ class HeadingControl
8
+ # These codes are expected to be found in sf2 of a subject/genre field when the indicator2 value is 7, indicating
9
+ # "source specified". There are some sources whose headings we don't want to display.
8
10
  ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
9
11
  local/osu mesh ndlsh nli nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
12
+
13
+ class << self
14
+ # Replace or remove any terms in provided values pursuant to the configuration in remove and override mappers.
15
+ # Used to remove or replace offensive or otherwise undesirable subject headings.
16
+ # @param values [Array]
17
+ # @return [Array] values with terms removed/replaced
18
+ def term_override(values)
19
+ values.filter_map do |value|
20
+ # Remove values if they contain a remove term
21
+ next nil if value.match?(/#{Mappers.headings_to_remove&.join('|')}/i)
22
+
23
+ # Replace values using multi_string_replace gem
24
+ MultiStringReplace.replace value, Mappers.heading_overrides
25
+ end
26
+ end
27
+ end
10
28
  end
11
29
  end
@@ -164,7 +164,7 @@ module PennMARC
164
164
  # @param [MARC::Record] record
165
165
  # @return [Array<String>]
166
166
  def host_record_id(record)
167
- record.fields(Enriched::Pub::RELATED_RECORD_TAG).filter_map { |field|
167
+ record.fields(Enriched::Pub::RELATED_RECORD_TAGS).filter_map { |field|
168
168
  next unless subfield_value?(field, 'c', /contains/i)
169
169
 
170
170
  subfield_values field, :w
@@ -62,67 +62,76 @@ module PennMARC
62
62
  #
63
63
  # @note this is ported mostly form MG's new-style Subject parsing
64
64
  # @param [MARC::Record] record
65
+ # @param [Boolean] override to remove undesirable terms or not
65
66
  # @return [Array<String>] array of all subject values for faceting
66
- def facet(record)
67
- subject_fields(record, type: :facet).filter_map { |field|
67
+ def facet(record, override: true)
68
+ values = subject_fields(record, type: :facet).filter_map { |field|
68
69
  term_hash = build_subject_hash(field)
69
70
  next if term_hash.blank? || term_hash[:count]&.zero?
70
71
 
71
72
  format_term type: :facet, term: term_hash
72
73
  }.uniq
74
+ override ? HeadingControl.term_override(values) : values
73
75
  end
74
76
 
75
77
  # All Subjects for display. This includes all {DISPLAY_TAGS} and {LOCAL_TAGS}. For tags that specify a source,
76
78
  # only those with an allowed source code (see ALLOWED_SOURCE_CODES) are included.
77
79
  #
78
80
  # @param [MARC::Record] record
81
+ # @param [Boolean] override to remove undesirable terms or not
79
82
  # @return [Array] array of all subject values for display
80
- def show(record)
81
- subject_fields(record, type: :all).filter_map { |field|
83
+ def show(record, override: true)
84
+ values = subject_fields(record, type: :all).filter_map { |field|
82
85
  term_hash = build_subject_hash(field)
83
86
  next if term_hash.blank? || term_hash[:count]&.zero?
84
87
 
85
88
  format_term type: :display, term: term_hash
86
89
  }.uniq
90
+ override ? HeadingControl.term_override(values) : values
87
91
  end
88
92
 
89
93
  # Get Subjects from "Children" ontology
90
94
  #
91
95
  # @param [MARC::Record] record
96
+ # @param [Boolean] override to remove undesirable terms or not
92
97
  # @return [Array] array of children's subject values for display
93
- def childrens_show(record)
94
- subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '1' })
95
- .filter_map { |field|
96
- term_hash = build_subject_hash(field)
97
- next if term_hash.blank? || term_hash[:count]&.zero?
98
-
99
- format_term type: :display, term: term_hash
100
- }.uniq
98
+ def childrens_show(record, override: true)
99
+ values = subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '1' })
100
+ .filter_map { |field|
101
+ term_hash = build_subject_hash(field)
102
+ next if term_hash.blank? || term_hash[:count]&.zero?
103
+
104
+ format_term type: :display, term: term_hash
105
+ }.uniq
106
+ override ? HeadingControl.term_override(values) : values
101
107
  end
102
108
 
103
109
  # Get Subjects from "MeSH" ontology
104
110
  #
105
111
  # @param [MARC::Record] record
112
+ # @param [Boolean] override to remove undesirable terms or not
106
113
  # @return [Array] array of MeSH subject values for display
107
- def medical_show(record)
108
- subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '2' })
109
- .filter_map { |field|
110
- term_hash = build_subject_hash(field)
111
- next if term_hash.blank? || term_hash[:count]&.zero?
112
-
113
- format_term type: :display, term: term_hash
114
- }.uniq
114
+ def medical_show(record, override: true)
115
+ values = subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '2' })
116
+ .filter_map { |field|
117
+ term_hash = build_subject_hash(field)
118
+ next if term_hash.blank? || term_hash[:count]&.zero?
119
+
120
+ format_term type: :display, term: term_hash
121
+ }.uniq
122
+ override ? HeadingControl.term_override(values) : values
115
123
  end
116
124
 
117
125
  # Get Subject values from {DISPLAY_TAGS} where indicator2 is 4 and {LOCAL_TAGS}. Do not include any values where
118
126
  # sf2 includes "penncoi" (Community of Interest).
119
127
  #
120
128
  # @param [MARC::Record] record
129
+ # @param [Boolean] override to remove undesirable terms
121
130
  # @return [Array] array of local subject values for display
122
- def local_show(record)
131
+ def local_show(record, override: true)
123
132
  local_fields = subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '4' }) +
124
133
  subject_fields(record, type: :local)
125
- local_fields.filter_map { |field|
134
+ values = local_fields.filter_map { |field|
126
135
  next if subfield_value?(field, '2', /penncoi/)
127
136
 
128
137
  term_hash = build_subject_hash(field)
@@ -130,6 +139,7 @@ module PennMARC
130
139
 
131
140
  format_term type: :display, term: term_hash
132
141
  }.uniq
142
+ override ? HeadingControl.term_override(values) : values
133
143
  end
134
144
 
135
145
  private
@@ -4,46 +4,58 @@ module PennMARC
4
4
  # reusable static mappers
5
5
  class Mappers
6
6
  class << self
7
- # @return [Hash]
7
+ # @return [Hash, nil]
8
+ def heading_overrides
9
+ @heading_overrides ||= load_map('headings_override.yml', symbolize_names: false)
10
+ end
11
+
12
+ # @return [Hash, nil]
13
+ def headings_to_remove
14
+ @headings_to_remove ||= load_map('headings_remove.yml', symbolize_names: false)
15
+ end
16
+
17
+ # @return [Hash, nil]
8
18
  def iso_639_2_language
9
19
  @iso_639_2_language ||= load_map('iso639-2-languages.yml')
10
20
  end
11
21
 
22
+ # @return [Hash, nil]
12
23
  def iso_639_3_language
13
24
  @iso_639_3_language ||= load_map('iso639-3-languages.yml')
14
25
  end
15
26
 
16
- # @return [Hash]
27
+ # @return [Hash, nil]
17
28
  def location
18
29
  @location ||= load_map('locations.yml')
19
30
  end
20
31
 
21
- # @return [Hash]
32
+ # @return [Hash, nil]
22
33
  def location_overrides
23
34
  @location_overrides ||= load_map('location_overrides.yml')
24
35
  end
25
36
 
26
- # @return [Hash]
37
+ # @return [Hash, nil]
27
38
  def relator
28
39
  @relator ||= load_map('relator.yml')
29
40
  end
30
41
 
31
- # @return [Hash]
42
+ # @return [Hash, nil]
32
43
  def loc_classification
33
44
  @loc_classification ||= load_map('loc_classification.yml')
34
45
  end
35
46
 
36
- # @return [Hash]
47
+ # @return [Hash, nil]
37
48
  def dewey_classification
38
49
  @dewey_classification ||= load_map('dewey_classification.yml')
39
50
  end
40
51
 
41
- # @param [String] filename of mapping file in config directory, with file extension
42
- # @return [Hash] mapping as hash
43
- def load_map(filename)
52
+ # @param filename [String] name of mapping file in config directory, with file extension
53
+ # @param symbolize_names [Boolean] whether or not to symbolize keys in returned hash
54
+ # @return [Hash, nil] mapping as hash
55
+ def load_map(filename, symbolize_names: true)
44
56
  puts { "Loading #{filename}" }
45
57
  YAML.safe_load(File.read(File.join(File.expand_path(__dir__), 'mappings', filename)),
46
- symbolize_names: true)
58
+ symbolize_names: symbolize_names)
47
59
  end
48
60
  end
49
61
  end
@@ -0,0 +1,8 @@
1
+ Aliens: Noncitizens
2
+ "Alien criminals": Noncitizen criminals
3
+ "Alien detention centers": Immigrant detention centers
4
+ "Alien labor": Foreign workers
5
+ "Alien property": Foreign-owned property
6
+ Gypsies: Romanies
7
+ "Illegal Alien Children": Undocumented immigrant children
8
+ "Illegal Aliens": Undocumented immigrants
@@ -0,0 +1,2 @@
1
+ - Jewish Question
2
+ - Yellow Peril
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PennMARC
4
- VERSION = '1.0.26'
4
+ VERSION = '1.0.30'
5
5
  end
data/pennmarc.gemspec CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
10
10
  s.summary = 'Penn Libraries Catalog MARC parsing wisdom for cross-project usage'
11
11
  s.description = 'This gem provides methods for parsing a Penn Libraries MARCXML record into string, array and date
12
12
  objects for use in discovery or preservation applications.'
13
- s.authors = ['Mike Kanning', 'Amrey Mathurin', 'Patrick Perkins']
13
+ s.authors = ['Mike Kanning', 'Amrey Mathurin', 'Patrick Perkins', 'Katherine Schultz', 'Baowei Wei']
14
14
  s.email = 'mkanning@upenn.edu'
15
15
  s.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
16
16
  s.homepage = 'https://gitlab.library.upenn.edu/dld/catalog/pennmarc'
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
21
21
  s.add_dependency 'activesupport', '~> 7'
22
22
  s.add_dependency 'library_stdnums', '~> 1.6'
23
23
  s.add_dependency 'marc', '~> 1.2'
24
+ s.add_dependency 'multi_string_replace', '~> 2.0'
24
25
  s.add_dependency 'nokogiri', '~> 1.15'
25
26
 
26
27
  s.metadata['rubygems_mfa_required'] = 'false'
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ describe 'PennMARC::HeadingControl' do
4
+ let(:replace_term) { PennMARC::Mappers.heading_overrides.first[0] }
5
+ let(:replaced_term) { PennMARC::Mappers.heading_overrides.first[1] }
6
+ let(:remove_term) { PennMARC::Mappers.headings_to_remove.first }
7
+
8
+ describe '.process' do
9
+ context 'with a term for removal' do
10
+ it 'removes the term if found in isolation' do
11
+ values = [remove_term]
12
+ expect(PennMARC::HeadingControl.term_override(values)).to eq []
13
+ end
14
+
15
+ it 'removes the term regardless of case' do
16
+ values = [remove_term.downcase]
17
+ expect(PennMARC::HeadingControl.term_override(values)).to eq []
18
+ end
19
+
20
+ it 'removes the term if it is included as a substring' do
21
+ values = ["#{remove_term}--History"]
22
+ expect(PennMARC::HeadingControl.term_override(values)).to eq []
23
+ end
24
+ end
25
+
26
+ context 'with a term for replacement' do
27
+ it 'replaces the term in isolation' do
28
+ values = [replace_term]
29
+ expect(PennMARC::HeadingControl.term_override(values)).to eq [replaced_term]
30
+ end
31
+
32
+ it 'replaces the term when used with other headings' do
33
+ values = ["#{replace_term}--History"]
34
+ expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replaced_term}--History"]
35
+ end
36
+ end
37
+
38
+ context 'with a variety of terms' do
39
+ it 'removes and replaces terms as needed' do
40
+ values = [remove_term, replace_term, 'History']
41
+ expect(PennMARC::HeadingControl.term_override(values)).to contain_exactly 'History', replaced_term
42
+ end
43
+ end
44
+ end
45
+ end
@@ -161,16 +161,36 @@ describe 'PennMARC::Identifier' do
161
161
  end
162
162
 
163
163
  describe '.host_record_id' do
164
- let(:record) do
165
- marc_record fields: [
166
- marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAG, subfields: { w: '123456789', c: 'Contains',
167
- a: 'Title' }),
168
- marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAG, subfields: { w: '666666666', c: 'Contained In' })
169
- ]
170
- end
171
-
172
- it 'returns only the desired host record MMS ID values' do
173
- expect(helper.host_record_id(record)).to contain_exactly '123456789'
164
+ context 'with a lower case tag' do
165
+ let(:record) do
166
+ marc_record fields: [
167
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '123456789',
168
+ c: 'Contains',
169
+ a: 'Title' }),
170
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.second, subfields: { w: '666666666',
171
+ c: 'Contained In' })
172
+ ]
173
+ end
174
+
175
+ it 'returns only the desired host record MMS ID values' do
176
+ expect(helper.host_record_id(record)).to contain_exactly '123456789'
177
+ end
178
+ end
179
+
180
+ context 'with an upper case tag' do
181
+ let(:record) do
182
+ marc_record fields: [
183
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '123456789',
184
+ c: 'Contains',
185
+ a: 'Title' }),
186
+ marc_field(tag: PennMARC::Enriched::Pub::RELATED_RECORD_TAGS.first, subfields: { w: '666666666',
187
+ c: 'Contained In' })
188
+ ]
189
+ end
190
+
191
+ it 'returns only the desired host record MMS ID values' do
192
+ expect(helper.host_record_id(record)).to contain_exactly '123456789'
193
+ end
174
194
  end
175
195
  end
176
196
  end
@@ -302,6 +302,25 @@ describe 'PennMARC::Subject' do
302
302
  expect(values).to contain_exactly 'Philosophy in motion pictures.'
303
303
  end
304
304
  end
305
+
306
+ context 'with headings that contain terms for removal and replacement' do
307
+ let(:fields) do
308
+ [marc_field(tag: '650', subfields: { a: 'History.' }),
309
+ marc_field(tag: '650', subfields: { a: PennMARC::Mappers.headings_to_remove.first }),
310
+ marc_field(tag: '650', subfields: { a: PennMARC::Mappers.heading_overrides.first[0] })]
311
+ end
312
+
313
+ it 'removes and replaces terms as expected' do
314
+ expect(values).to contain_exactly 'History.', "#{PennMARC::Mappers.heading_overrides.first[1]}."
315
+ end
316
+
317
+ it 'does not remove or replace terms if override param is false' do
318
+ expect(helper.show(record, override: false)).to contain_exactly(
319
+ 'History.', "#{PennMARC::Mappers.headings_to_remove.first}.",
320
+ "#{PennMARC::Mappers.heading_overrides.first[0]}."
321
+ )
322
+ end
323
+ end
305
324
  end
306
325
 
307
326
  describe '.childrens_show' do
metadata CHANGED
@@ -1,16 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pennmarc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.26
4
+ version: 1.0.30
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Kanning
8
8
  - Amrey Mathurin
9
9
  - Patrick Perkins
10
+ - Katherine Schultz
11
+ - Baowei Wei
10
12
  autorequire:
11
13
  bindir: bin
12
14
  cert_chain: []
13
- date: 2024-07-02 00:00:00.000000000 Z
15
+ date: 2024-07-15 00:00:00.000000000 Z
14
16
  dependencies:
15
17
  - !ruby/object:Gem::Dependency
16
18
  name: activesupport
@@ -54,6 +56,20 @@ dependencies:
54
56
  - - "~>"
55
57
  - !ruby/object:Gem::Version
56
58
  version: '1.2'
59
+ - !ruby/object:Gem::Dependency
60
+ name: multi_string_replace
61
+ requirement: !ruby/object:Gem::Requirement
62
+ requirements:
63
+ - - "~>"
64
+ - !ruby/object:Gem::Version
65
+ version: '2.0'
66
+ type: :runtime
67
+ prerelease: false
68
+ version_requirements: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - "~>"
71
+ - !ruby/object:Gem::Version
72
+ version: '2.0'
57
73
  - !ruby/object:Gem::Dependency
58
74
  name: nokogiri
59
75
  requirement: !ruby/object:Gem::Requirement
@@ -116,6 +132,8 @@ files:
116
132
  - lib/pennmarc/helpers/title.rb
117
133
  - lib/pennmarc/mappers.rb
118
134
  - lib/pennmarc/mappings/dewey_classification.yml
135
+ - lib/pennmarc/mappings/headings_override.yml
136
+ - lib/pennmarc/mappings/headings_remove.yml
119
137
  - lib/pennmarc/mappings/iso639-2-languages.yml
120
138
  - lib/pennmarc/mappings/iso639-3-languages.yml
121
139
  - lib/pennmarc/mappings/loc_classification.yml
@@ -128,6 +146,7 @@ files:
128
146
  - lib/pennmarc/version.rb
129
147
  - pennmarc.gemspec
130
148
  - spec/fixtures/marcxml/test.xml
149
+ - spec/lib/pennmarc/heading_control_spec.rb
131
150
  - spec/lib/pennmarc/helpers/access_spec.rb
132
151
  - spec/lib/pennmarc/helpers/citation_spec.rb
133
152
  - spec/lib/pennmarc/helpers/classification_spec.rb