pennmarc 1.0.26 → 1.0.27
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/Gemfile.lock +2 -0
- data/README.md +16 -1
- data/lib/pennmarc/heading_control.rb +22 -4
- data/lib/pennmarc/helpers/subject.rb +32 -22
- data/lib/pennmarc/mappers.rb +22 -10
- data/lib/pennmarc/mappings/headings_override.yml +8 -0
- data/lib/pennmarc/mappings/headings_remove.yml +2 -0
- data/lib/pennmarc/version.rb +1 -1
- data/pennmarc.gemspec +2 -1
- data/spec/lib/pennmarc/heading_control_spec.rb +45 -0
- data/spec/lib/pennmarc/helpers/subject_spec.rb +19 -0
- metadata +21 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a6dec6a3d235117c15b3a37f368533a93e30ce4dfb11ac4832a612f878e653d9
|
4
|
+
data.tar.gz: 4217da9f4a2126e46284c48f4aeaca09016f4abf6ce08c493e039dae20b4bc20
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f3f0163eb6c3b2afeb8bd8845762c1c208bbbb9efb586e3978f5f5584f73159adb2649a801d78c090e6349c72f69197b1e721acd3bdbe3a79e9f99ac11f23ee
|
7
|
+
data.tar.gz: d4595b310f8a7b765a16738799b642fc05f680b681d4e090bca11a903ca7b748a25ca0981a260290c8544faea91b9437f7b98358f5358551e787dc0ae9ab8531
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -19,6 +19,7 @@ GEM
|
|
19
19
|
scrub_rb (>= 1.0.1, < 2)
|
20
20
|
unf
|
21
21
|
minitest (5.18.0)
|
22
|
+
multi_string_replace (2.0.2)
|
22
23
|
nokogiri (1.15.2-arm64-darwin)
|
23
24
|
racc (~> 1.4)
|
24
25
|
nokogiri (1.15.2-x64-mingw-ucrt)
|
@@ -113,6 +114,7 @@ DEPENDENCIES
|
|
113
114
|
activesupport (~> 7)
|
114
115
|
library_stdnums (~> 1.6)
|
115
116
|
marc (~> 1.2)
|
117
|
+
multi_string_replace (~> 2.0)
|
116
118
|
nokogiri (~> 1.15)
|
117
119
|
rake (~> 13.0)
|
118
120
|
rspec (~> 3.12)
|
data/README.md
CHANGED
@@ -7,13 +7,28 @@ the "Nouveau Franklin" project aka [discovery_app](https://gitlab.library.upenn.
|
|
7
7
|
When included in a project, it should be utilized like this:
|
8
8
|
|
9
9
|
```ruby
|
10
|
-
parser = PennMARC::Parser.new
|
10
|
+
parser = PennMARC::Parser.new
|
11
11
|
puts parser.title_show(marc_record) # Title intended for display
|
12
12
|
```
|
13
13
|
|
14
14
|
All methods will require a `MARC::Record` object. For more about these, see the
|
15
15
|
[ruby-marc](https://github.com/ruby-marc/ruby-marc) gem documentation
|
16
16
|
|
17
|
+
## Term Overriding
|
18
|
+
|
19
|
+
This gem provides configuration as well as a method for overriding and removing terms that are undesirable. In your app,
|
20
|
+
you can remove or replace the configured terms like so:
|
21
|
+
|
22
|
+
```ruby
|
23
|
+
improved_values = PennMARC::HeadingControl.term_override(values)
|
24
|
+
```
|
25
|
+
|
26
|
+
This will remove any elements of the `values` array that include any terms defined in `mappers/headings_remove.yml` and
|
27
|
+
replace any terms defined in the `headings_override.yml` file.
|
28
|
+
|
29
|
+
By default, terms are replaced for `Subject#*show` and `Subject#facet` methods. You can bypass the default overriding on
|
30
|
+
on these methods by passing `override: false`.
|
31
|
+
|
17
32
|
## Development
|
18
33
|
|
19
34
|
### Requirements
|
@@ -1,11 +1,29 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'multi_string_replace'
|
4
|
+
|
3
5
|
module PennMARC
|
4
|
-
# Shared values for controlling
|
5
|
-
|
6
|
-
# These codes are expected to be found in sf2 when the indicator2 value is 7, indicating
|
7
|
-
# are some sources whose headings we don't want to display.
|
6
|
+
# Shared tools and values for controlling handling of subject or genre headings
|
7
|
+
class HeadingControl
|
8
|
+
# These codes are expected to be found in sf2 of a subject/genre field when the indicator2 value is 7, indicating
|
9
|
+
# "source specified". There are some sources whose headings we don't want to display.
|
8
10
|
ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
|
9
11
|
local/osu mesh ndlsh nli nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
|
12
|
+
|
13
|
+
class << self
|
14
|
+
# Replace or remove any terms in provided values pursuant to the configuration in remove and override mappers.
|
15
|
+
# Used to remove or replace offensive or otherwise undesirable subject headings.
|
16
|
+
# @param values [Array]
|
17
|
+
# @return [Array] values with terms removed/replaced
|
18
|
+
def term_override(values)
|
19
|
+
values.filter_map do |value|
|
20
|
+
# Remove values if they contain a remove term
|
21
|
+
next nil if value.match?(/#{Mappers.headings_to_remove&.join('|')}/i)
|
22
|
+
|
23
|
+
# Replace values using multi_string_replace gem
|
24
|
+
MultiStringReplace.replace value, Mappers.heading_overrides
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
10
28
|
end
|
11
29
|
end
|
@@ -62,67 +62,76 @@ module PennMARC
|
|
62
62
|
#
|
63
63
|
# @note this is ported mostly form MG's new-style Subject parsing
|
64
64
|
# @param [MARC::Record] record
|
65
|
+
# @param [Boolean] override to remove undesirable terms or not
|
65
66
|
# @return [Array<String>] array of all subject values for faceting
|
66
|
-
def facet(record)
|
67
|
-
subject_fields(record, type: :facet).filter_map { |field|
|
67
|
+
def facet(record, override: true)
|
68
|
+
values = subject_fields(record, type: :facet).filter_map { |field|
|
68
69
|
term_hash = build_subject_hash(field)
|
69
70
|
next if term_hash.blank? || term_hash[:count]&.zero?
|
70
71
|
|
71
72
|
format_term type: :facet, term: term_hash
|
72
73
|
}.uniq
|
74
|
+
override ? HeadingControl.term_override(values) : values
|
73
75
|
end
|
74
76
|
|
75
77
|
# All Subjects for display. This includes all {DISPLAY_TAGS} and {LOCAL_TAGS}. For tags that specify a source,
|
76
78
|
# only those with an allowed source code (see ALLOWED_SOURCE_CODES) are included.
|
77
79
|
#
|
78
80
|
# @param [MARC::Record] record
|
81
|
+
# @param [Boolean] override to remove undesirable terms or not
|
79
82
|
# @return [Array] array of all subject values for display
|
80
|
-
def show(record)
|
81
|
-
subject_fields(record, type: :all).filter_map { |field|
|
83
|
+
def show(record, override: true)
|
84
|
+
values = subject_fields(record, type: :all).filter_map { |field|
|
82
85
|
term_hash = build_subject_hash(field)
|
83
86
|
next if term_hash.blank? || term_hash[:count]&.zero?
|
84
87
|
|
85
88
|
format_term type: :display, term: term_hash
|
86
89
|
}.uniq
|
90
|
+
override ? HeadingControl.term_override(values) : values
|
87
91
|
end
|
88
92
|
|
89
93
|
# Get Subjects from "Children" ontology
|
90
94
|
#
|
91
95
|
# @param [MARC::Record] record
|
96
|
+
# @param [Boolean] override to remove undesirable terms or not
|
92
97
|
# @return [Array] array of children's subject values for display
|
93
|
-
def childrens_show(record)
|
94
|
-
subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '1' })
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
98
|
+
def childrens_show(record, override: true)
|
99
|
+
values = subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '1' })
|
100
|
+
.filter_map { |field|
|
101
|
+
term_hash = build_subject_hash(field)
|
102
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
103
|
+
|
104
|
+
format_term type: :display, term: term_hash
|
105
|
+
}.uniq
|
106
|
+
override ? HeadingControl.term_override(values) : values
|
101
107
|
end
|
102
108
|
|
103
109
|
# Get Subjects from "MeSH" ontology
|
104
110
|
#
|
105
111
|
# @param [MARC::Record] record
|
112
|
+
# @param [Boolean] override to remove undesirable terms or not
|
106
113
|
# @return [Array] array of MeSH subject values for display
|
107
|
-
def medical_show(record)
|
108
|
-
subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '2' })
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
114
|
+
def medical_show(record, override: true)
|
115
|
+
values = subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '2' })
|
116
|
+
.filter_map { |field|
|
117
|
+
term_hash = build_subject_hash(field)
|
118
|
+
next if term_hash.blank? || term_hash[:count]&.zero?
|
119
|
+
|
120
|
+
format_term type: :display, term: term_hash
|
121
|
+
}.uniq
|
122
|
+
override ? HeadingControl.term_override(values) : values
|
115
123
|
end
|
116
124
|
|
117
125
|
# Get Subject values from {DISPLAY_TAGS} where indicator2 is 4 and {LOCAL_TAGS}. Do not include any values where
|
118
126
|
# sf2 includes "penncoi" (Community of Interest).
|
119
127
|
#
|
120
128
|
# @param [MARC::Record] record
|
129
|
+
# @param [Boolean] override to remove undesirable terms
|
121
130
|
# @return [Array] array of local subject values for display
|
122
|
-
def local_show(record)
|
131
|
+
def local_show(record, override: true)
|
123
132
|
local_fields = subject_fields(record, type: :display, options: { tags: DISPLAY_TAGS, indicator2: '4' }) +
|
124
133
|
subject_fields(record, type: :local)
|
125
|
-
local_fields.filter_map { |field|
|
134
|
+
values = local_fields.filter_map { |field|
|
126
135
|
next if subfield_value?(field, '2', /penncoi/)
|
127
136
|
|
128
137
|
term_hash = build_subject_hash(field)
|
@@ -130,6 +139,7 @@ module PennMARC
|
|
130
139
|
|
131
140
|
format_term type: :display, term: term_hash
|
132
141
|
}.uniq
|
142
|
+
override ? HeadingControl.term_override(values) : values
|
133
143
|
end
|
134
144
|
|
135
145
|
private
|
data/lib/pennmarc/mappers.rb
CHANGED
@@ -4,46 +4,58 @@ module PennMARC
|
|
4
4
|
# reusable static mappers
|
5
5
|
class Mappers
|
6
6
|
class << self
|
7
|
-
# @return [Hash]
|
7
|
+
# @return [Hash, nil]
|
8
|
+
def heading_overrides
|
9
|
+
@heading_overrides ||= load_map('headings_override.yml', symbolize_names: false)
|
10
|
+
end
|
11
|
+
|
12
|
+
# @return [Hash, nil]
|
13
|
+
def headings_to_remove
|
14
|
+
@headings_to_remove ||= load_map('headings_remove.yml', symbolize_names: false)
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return [Hash, nil]
|
8
18
|
def iso_639_2_language
|
9
19
|
@iso_639_2_language ||= load_map('iso639-2-languages.yml')
|
10
20
|
end
|
11
21
|
|
22
|
+
# @return [Hash, nil]
|
12
23
|
def iso_639_3_language
|
13
24
|
@iso_639_3_language ||= load_map('iso639-3-languages.yml')
|
14
25
|
end
|
15
26
|
|
16
|
-
# @return [Hash]
|
27
|
+
# @return [Hash, nil]
|
17
28
|
def location
|
18
29
|
@location ||= load_map('locations.yml')
|
19
30
|
end
|
20
31
|
|
21
|
-
# @return [Hash]
|
32
|
+
# @return [Hash, nil]
|
22
33
|
def location_overrides
|
23
34
|
@location_overrides ||= load_map('location_overrides.yml')
|
24
35
|
end
|
25
36
|
|
26
|
-
# @return [Hash]
|
37
|
+
# @return [Hash, nil]
|
27
38
|
def relator
|
28
39
|
@relator ||= load_map('relator.yml')
|
29
40
|
end
|
30
41
|
|
31
|
-
# @return [Hash]
|
42
|
+
# @return [Hash, nil]
|
32
43
|
def loc_classification
|
33
44
|
@loc_classification ||= load_map('loc_classification.yml')
|
34
45
|
end
|
35
46
|
|
36
|
-
# @return [Hash]
|
47
|
+
# @return [Hash, nil]
|
37
48
|
def dewey_classification
|
38
49
|
@dewey_classification ||= load_map('dewey_classification.yml')
|
39
50
|
end
|
40
51
|
|
41
|
-
# @param [String]
|
42
|
-
# @
|
43
|
-
|
52
|
+
# @param filename [String] name of mapping file in config directory, with file extension
|
53
|
+
# @param symbolize_names [Boolean] whether or not to symbolize keys in returned hash
|
54
|
+
# @return [Hash, nil] mapping as hash
|
55
|
+
def load_map(filename, symbolize_names: true)
|
44
56
|
puts { "Loading #{filename}" }
|
45
57
|
YAML.safe_load(File.read(File.join(File.expand_path(__dir__), 'mappings', filename)),
|
46
|
-
symbolize_names:
|
58
|
+
symbolize_names: symbolize_names)
|
47
59
|
end
|
48
60
|
end
|
49
61
|
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
Aliens: Noncitizens
|
2
|
+
"Alien criminals": Noncitizen criminals
|
3
|
+
"Alien detention centers": Immigrant detention centers
|
4
|
+
"Alien labor": Foreign workers
|
5
|
+
"Alien property": Foreign-owned property
|
6
|
+
Gypsies: Romanies
|
7
|
+
"Illegal Alien Children": Undocumented immigrant children
|
8
|
+
"Illegal Aliens": Undocumented immigrants
|
data/lib/pennmarc/version.rb
CHANGED
data/pennmarc.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.summary = 'Penn Libraries Catalog MARC parsing wisdom for cross-project usage'
|
11
11
|
s.description = 'This gem provides methods for parsing a Penn Libraries MARCXML record into string, array and date
|
12
12
|
objects for use in discovery or preservation applications.'
|
13
|
-
s.authors = ['Mike Kanning', 'Amrey Mathurin', 'Patrick Perkins']
|
13
|
+
s.authors = ['Mike Kanning', 'Amrey Mathurin', 'Patrick Perkins', 'Katherine Schultz', 'Baowei Wei']
|
14
14
|
s.email = 'mkanning@upenn.edu'
|
15
15
|
s.files = `git ls-files`.split($OUTPUT_RECORD_SEPARATOR)
|
16
16
|
s.homepage = 'https://gitlab.library.upenn.edu/dld/catalog/pennmarc'
|
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.add_dependency 'activesupport', '~> 7'
|
22
22
|
s.add_dependency 'library_stdnums', '~> 1.6'
|
23
23
|
s.add_dependency 'marc', '~> 1.2'
|
24
|
+
s.add_dependency 'multi_string_replace', '~> 2.0'
|
24
25
|
s.add_dependency 'nokogiri', '~> 1.15'
|
25
26
|
|
26
27
|
s.metadata['rubygems_mfa_required'] = 'false'
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
describe 'PennMARC::HeadingControl' do
|
4
|
+
let(:replace_term) { PennMARC::Mappers.heading_overrides.first[0] }
|
5
|
+
let(:replaced_term) { PennMARC::Mappers.heading_overrides.first[1] }
|
6
|
+
let(:remove_term) { PennMARC::Mappers.headings_to_remove.first }
|
7
|
+
|
8
|
+
describe '.process' do
|
9
|
+
context 'with a term for removal' do
|
10
|
+
it 'removes the term if found in isolation' do
|
11
|
+
values = [remove_term]
|
12
|
+
expect(PennMARC::HeadingControl.term_override(values)).to eq []
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'removes the term regardless of case' do
|
16
|
+
values = [remove_term.downcase]
|
17
|
+
expect(PennMARC::HeadingControl.term_override(values)).to eq []
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'removes the term if it is included as a substring' do
|
21
|
+
values = ["#{remove_term}--History"]
|
22
|
+
expect(PennMARC::HeadingControl.term_override(values)).to eq []
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context 'with a term for replacement' do
|
27
|
+
it 'replaces the term in isolation' do
|
28
|
+
values = [replace_term]
|
29
|
+
expect(PennMARC::HeadingControl.term_override(values)).to eq [replaced_term]
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'replaces the term when used with other headings' do
|
33
|
+
values = ["#{replace_term}--History"]
|
34
|
+
expect(PennMARC::HeadingControl.term_override(values)).to eq ["#{replaced_term}--History"]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
context 'with a variety of terms' do
|
39
|
+
it 'removes and replaces terms as needed' do
|
40
|
+
values = [remove_term, replace_term, 'History']
|
41
|
+
expect(PennMARC::HeadingControl.term_override(values)).to contain_exactly 'History', replaced_term
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -302,6 +302,25 @@ describe 'PennMARC::Subject' do
|
|
302
302
|
expect(values).to contain_exactly 'Philosophy in motion pictures.'
|
303
303
|
end
|
304
304
|
end
|
305
|
+
|
306
|
+
context 'with headings that contain terms for removal and replacement' do
|
307
|
+
let(:fields) do
|
308
|
+
[marc_field(tag: '650', subfields: { a: 'History.' }),
|
309
|
+
marc_field(tag: '650', subfields: { a: PennMARC::Mappers.headings_to_remove.first }),
|
310
|
+
marc_field(tag: '650', subfields: { a: PennMARC::Mappers.heading_overrides.first[0] })]
|
311
|
+
end
|
312
|
+
|
313
|
+
it 'removes and replaces terms as expected' do
|
314
|
+
expect(values).to contain_exactly 'History.', "#{PennMARC::Mappers.heading_overrides.first[1]}."
|
315
|
+
end
|
316
|
+
|
317
|
+
it 'does not remove or replace terms if override param is false' do
|
318
|
+
expect(helper.show(record, override: false)).to contain_exactly(
|
319
|
+
'History.', "#{PennMARC::Mappers.headings_to_remove.first}.",
|
320
|
+
"#{PennMARC::Mappers.heading_overrides.first[0]}."
|
321
|
+
)
|
322
|
+
end
|
323
|
+
end
|
305
324
|
end
|
306
325
|
|
307
326
|
describe '.childrens_show' do
|
metadata
CHANGED
@@ -1,16 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pennmarc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.27
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mike Kanning
|
8
8
|
- Amrey Mathurin
|
9
9
|
- Patrick Perkins
|
10
|
+
- Katherine Schultz
|
11
|
+
- Baowei Wei
|
10
12
|
autorequire:
|
11
13
|
bindir: bin
|
12
14
|
cert_chain: []
|
13
|
-
date: 2024-07-
|
15
|
+
date: 2024-07-14 00:00:00.000000000 Z
|
14
16
|
dependencies:
|
15
17
|
- !ruby/object:Gem::Dependency
|
16
18
|
name: activesupport
|
@@ -54,6 +56,20 @@ dependencies:
|
|
54
56
|
- - "~>"
|
55
57
|
- !ruby/object:Gem::Version
|
56
58
|
version: '1.2'
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: multi_string_replace
|
61
|
+
requirement: !ruby/object:Gem::Requirement
|
62
|
+
requirements:
|
63
|
+
- - "~>"
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '2.0'
|
66
|
+
type: :runtime
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - "~>"
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '2.0'
|
57
73
|
- !ruby/object:Gem::Dependency
|
58
74
|
name: nokogiri
|
59
75
|
requirement: !ruby/object:Gem::Requirement
|
@@ -116,6 +132,8 @@ files:
|
|
116
132
|
- lib/pennmarc/helpers/title.rb
|
117
133
|
- lib/pennmarc/mappers.rb
|
118
134
|
- lib/pennmarc/mappings/dewey_classification.yml
|
135
|
+
- lib/pennmarc/mappings/headings_override.yml
|
136
|
+
- lib/pennmarc/mappings/headings_remove.yml
|
119
137
|
- lib/pennmarc/mappings/iso639-2-languages.yml
|
120
138
|
- lib/pennmarc/mappings/iso639-3-languages.yml
|
121
139
|
- lib/pennmarc/mappings/loc_classification.yml
|
@@ -128,6 +146,7 @@ files:
|
|
128
146
|
- lib/pennmarc/version.rb
|
129
147
|
- pennmarc.gemspec
|
130
148
|
- spec/fixtures/marcxml/test.xml
|
149
|
+
- spec/lib/pennmarc/heading_control_spec.rb
|
131
150
|
- spec/lib/pennmarc/helpers/access_spec.rb
|
132
151
|
- spec/lib/pennmarc/helpers/citation_spec.rb
|
133
152
|
- spec/lib/pennmarc/helpers/classification_spec.rb
|