pennmarc 1.3.5 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1d1e26fab095596cdef4b9e0cf3b8f0c571ce7526b5bf8a8b17b6b1240d1c2b9
4
- data.tar.gz: 133829f994db6bc1ee7f341e6750c4202126dcc0dbe5bdaec3610170317164ab
3
+ metadata.gz: 81ebb88f882d1b92922aa5dc7338056b6831d4857f91ba1ed1710deb2b678f09
4
+ data.tar.gz: 9db8caa363ce8ab4515d62612e033c4e73c185fef68c2a5c115a3520928f95b8
5
5
  SHA512:
6
- metadata.gz: 2616ca6b451b3dc99caef45e63d444b8e1d4b3d87e8cac920f68ec54bcddfe6193392e34d6efe23f9144173baa0fc271ff0b94b2f1f7eb7676d5ee0beb8d800a
7
- data.tar.gz: f0a69e601d112df2b53b1107ff33e421d0d9da92845601bc24ed96980a6876bf56c79c25d3301f412d1dc20815ed591875aed0248957bff4c9179b14aa263cfd
6
+ metadata.gz: d3a18e0ff157bdecd8a10cad8c72c4aeffece93dc06117740fb122e27ff972f5940f45b332c8bb3d6d57b05cff2c7b610fc57487b9ce7ae38700df96ca077764
7
+ data.tar.gz: 0cb04ac15f6f5aa1f7c86005b953bd86e5c952ab2de99479d04688d131bff991ec3a2bbe17a3cf2707bcfa613c71ed8beb2711951c5868c0d5cd4e240d0fca5a
data/.pipeline.yml CHANGED
@@ -1,3 +1,7 @@
1
+ variables:
2
+ FF_SCRIPT_SECTIONS: "true"
3
+ GIT_CLONE_PATH: "${CI_BUILDS_DIR}/${CI_PROJECT_NAME}/${CI_JOB_ID}"
4
+
1
5
  include:
2
6
  - component: gitlab.library.upenn.edu/devops/gitlab/components/general/install_hashicorp_vault@~latest
3
7
  - component: gitlab.library.upenn.edu/devops/gitlab/components/general/vault_jwt_auth@~latest
data/.rubocop_todo.yml CHANGED
@@ -1,12 +1,12 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config --auto-gen-only-exclude --exclude-limit 10000`
3
- # on 2025-08-15 20:13:24 UTC using RuboCop version 1.79.2.
3
+ # on 2025-10-22 21:06:47 UTC using RuboCop version 1.79.2.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
7
7
  # versions of RuboCop, may require this file to be generated again.
8
8
 
9
- # Offense count: 27
9
+ # Offense count: 28
10
10
  # Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes, Max.
11
11
  Metrics/AbcSize:
12
12
  Exclude:
@@ -33,7 +33,7 @@ Metrics/ClassLength:
33
33
  - 'lib/pennmarc/helpers/subject.rb'
34
34
  - 'lib/pennmarc/helpers/title.rb'
35
35
 
36
- # Offense count: 21
36
+ # Offense count: 22
37
37
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
38
38
  Metrics/CyclomaticComplexity:
39
39
  Exclude:
@@ -50,7 +50,7 @@ Metrics/CyclomaticComplexity:
50
50
  - 'lib/pennmarc/helpers/title.rb'
51
51
  - 'lib/pennmarc/util.rb'
52
52
 
53
- # Offense count: 17
53
+ # Offense count: 18
54
54
  # Configuration parameters: CountComments, Max, CountAsOne, AllowedMethods, AllowedPatterns.
55
55
  Metrics/MethodLength:
56
56
  Exclude:
@@ -70,7 +70,7 @@ Metrics/ModuleLength:
70
70
  Exclude:
71
71
  - 'lib/pennmarc/util.rb'
72
72
 
73
- # Offense count: 16
73
+ # Offense count: 17
74
74
  # Configuration parameters: AllowedMethods, AllowedPatterns, Max.
75
75
  Metrics/PerceivedComplexity:
76
76
  Exclude:
@@ -109,9 +109,18 @@ RSpec/ExampleLength:
109
109
  Exclude:
110
110
  - 'spec/lib/pennmarc/marc_util_spec.rb'
111
111
 
112
- # Offense count: 1
112
+ # Offense count: 2
113
113
  # Configuration parameters: Include, CustomTransform, IgnoreMethods, IgnoreMetadata.
114
114
  # Include: **/*_spec.rb
115
115
  RSpec/SpecFilePathFormat:
116
116
  Exclude:
117
117
  - 'spec/lib/pennmarc/parser_spec.rb'
118
+ - 'spec/lib/pennmarc/services/title_suggestion_weight_service_spec.rb'
119
+
120
+ # Offense count: 1
121
+ # This cop supports unsafe autocorrection (--autocorrect-all).
122
+ # Configuration parameters: EnforcedStyle, AllowedMethods, AllowedPatterns.
123
+ # SupportedStyles: predicate, comparison
124
+ Style/NumericPredicate:
125
+ Exclude:
126
+ - 'lib/pennmarc/services/title_suggestion_weight_service.rb'
data/Gemfile CHANGED
@@ -19,6 +19,7 @@ group :test do
19
19
  end
20
20
 
21
21
  group :development do
22
- gem 'webrick', '~> 1.8'
22
+ gem 'puma'
23
+ gem 'rackup'
23
24
  gem 'yard', '~> 0.9'
24
25
  end
data/Gemfile.lock CHANGED
@@ -35,6 +35,7 @@ GEM
35
35
  nokogiri (~> 1.0)
36
36
  rexml
37
37
  minitest (5.26.0)
38
+ nio4r (2.7.4)
38
39
  nokogiri (1.18.9-arm64-darwin)
39
40
  racc (~> 1.4)
40
41
  nokogiri (1.18.9-x64-mingw-ucrt)
@@ -48,8 +49,12 @@ GEM
48
49
  ast (~> 2.4.1)
49
50
  racc
50
51
  prism (1.4.0)
52
+ puma (7.1.0)
53
+ nio4r (~> 2.0)
51
54
  racc (1.8.1)
52
- rack (3.2.0)
55
+ rack (3.2.3)
56
+ rackup (2.2.1)
57
+ rack (>= 3)
53
58
  rainbow (3.1.1)
54
59
  rake (13.3.0)
55
60
  regexp_parser (2.11.2)
@@ -121,7 +126,6 @@ GEM
121
126
  rubocop-rake
122
127
  rubocop-rspec
123
128
  uri (1.0.4)
124
- webrick (1.8.1)
125
129
  yard (0.9.37)
126
130
 
127
131
  PLATFORMS
@@ -137,11 +141,12 @@ DEPENDENCIES
137
141
  library_stdnums (~> 1.6)
138
142
  marc (~> 1.2)
139
143
  nokogiri (~> 1.15)
144
+ puma
145
+ rackup
140
146
  rake (~> 13.0)
141
147
  rspec (~> 3.12)
142
148
  simplecov (~> 0.22)
143
149
  upennlib-rubocop
144
- webrick (~> 1.8)
145
150
  yard (~> 0.9)
146
151
 
147
152
  BUNDLED WITH
@@ -36,7 +36,7 @@ module PennMARC
36
36
 
37
37
  Time.strptime(date_added, format)
38
38
  rescue StandardError => e
39
- puts 'Error parsing date in date added subfield. ' \
39
+ warn 'Error parsing date in date added subfield. ' \
40
40
  "mmsid: #{Identifier.mmsid(record)}, value: #{date_added}, error: #{e}"
41
41
  nil
42
42
  end
@@ -59,7 +59,7 @@ module PennMARC
59
59
 
60
60
  Time.strptime(date_time_string, '%Y%m%d%H%M%S.%N')
61
61
  rescue StandardError => e
62
- puts 'Error parsing last updated date. ' \
62
+ warn 'Error parsing last updated date. ' \
63
63
  "mmsid: #{Identifier.mmsid(record)}, value: #{date_time_string}, error: #{e}"
64
64
  nil
65
65
  end
@@ -156,7 +156,7 @@ module PennMARC
156
156
  system_details_notes += record.fields(%w[344 880]).filter_map do |field|
157
157
  next if field.tag == '880' && no_subfield_value_matches?(field, '6', /^344/)
158
158
 
159
- sub3_and_other_subs(field, &subfield_in?(%w[a b c d e f g h]))
159
+ sub3_and_other_subs(field, &subfield_in?(%w[a b c d e f g h i j]))
160
160
  end
161
161
  system_details_notes += record.fields(%w[345 346 880]).filter_map do |field|
162
162
  next if field.tag == '880' && no_subfield_value_matches?(field, '6', /^(345|346)/)
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative '../services/title_suggestion_weight_service'
4
+
3
5
  module PennMARC
4
6
  # This helper contains logic for parsing out Title and Title-related fields.
5
7
  class Title < Helper
@@ -36,6 +38,27 @@ module PennMARC
36
38
  NO_TITLE_PROVIDED = '[No title provided]'
37
39
 
38
40
  class << self
41
+ # Values for title suggester, including only ǂa and ǂb from
42
+ # {https://www.loc.gov/marc/bibliographic/bd245.html 245} field. Limits the output to 20 words and strips any
43
+ # trailing slashes.
44
+ # @param record [MARC::Record]
45
+ # @return [Array<String>] array of all title values for suggestion
46
+ def suggest(record)
47
+ record.fields(%w[245]).filter_map do |field|
48
+ join_subfields(field, &subfield_in?(%w[a b]))
49
+ .squish
50
+ .truncate_words(20)
51
+ .sub(%r{ /$}, '')
52
+ end
53
+ end
54
+
55
+ # An integer value used for weighing title suggest values. See {PennMARC::TitleSuggestionWeightService} for logic.
56
+ # @param record [MARC::Record]
57
+ # @return [Integer]
58
+ def suggest_weight(record)
59
+ PennMARC::TitleSuggestionWeightService.weight record
60
+ end
61
+
39
62
  # Main Title Search field. Takes from {https://www.loc.gov/marc/bibliographic/bd245.html 245} and linked 880.
40
63
  # @note Ported from get_title_1_search_values.
41
64
  # @param record [MARC::Record]
@@ -54,7 +77,7 @@ module PennMARC
54
77
  # @return [Array<String>] array of auxiliary title values for search
55
78
  def search_aux(record)
56
79
  values = search_aux_values(record: record, title_type: :main, &subfield_not_in?(%w[c 6 8])) +
57
- search_aux_values(record: record, title_type: :related, &subfield_not_in?(%w[s t])) +
80
+ search_aux_values(record: record, title_type: :related, &subfield_in?(%w[s t])) +
58
81
  search_aux_values(record: record, title_type: :entity, &subfield_in?(%w[t])) +
59
82
  search_aux_values(record: record, title_type: :note, &subfield_in?(%w[t]))
60
83
  values.uniq
@@ -82,7 +105,7 @@ module PennMARC
82
105
  # @return [Array<String>] auxiliary journal title information for search
83
106
  def journal_search_aux(record)
84
107
  values = search_aux_values(record: record, title_type: :main, journal: true, &subfield_not_in?(%w[c 6 8])) +
85
- search_aux_values(record: record, title_type: :related, journal: true, &subfield_not_in?(%w[s t])) +
108
+ search_aux_values(record: record, title_type: :related, journal: true, &subfield_in?(%w[s t])) +
86
109
  search_aux_values(record: record, title_type: :entity, journal: true, &subfield_in?(%w[t])) +
87
110
  search_aux_values(record: record, title_type: :note, journal: true, &subfield_in?(%w[t]))
88
111
  values.uniq
@@ -50,10 +50,9 @@ module PennMARC
50
50
  end
51
51
 
52
52
  # @param filename [String] name of mapping file in config directory, with file extension
53
- # @param symbolize_names [Boolean] whether or not to symbolize keys in returned hash
53
+ # @param symbolize_names [Boolean] whether to symbolize keys in returned hash
54
54
  # @return [Hash, nil] mapping as hash
55
55
  def load_map(filename, symbolize_names: true)
56
- puts { "Loading #{filename}" }
57
56
  YAML.safe_load(File.read(File.join(File.expand_path(__dir__), 'mappings', filename)),
58
57
  symbolize_names: symbolize_names)
59
58
  end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # A service to calculate suggestion weights based on a variety of criteria
5
+ class TitleSuggestionWeightService
6
+ # Starting score
7
+ BASE_WEIGHT = 10
8
+
9
+ # Array of symbols referring to methods on this object that return a boolean and the scoring factor if the
10
+ # method returns true.
11
+ FACTORS = [
12
+ [:targeted_format?, 8],
13
+ [:published_in_last_ten_years?, 5],
14
+ [:electronic_holdings?, 3],
15
+ [:high_encoding_level?, 2],
16
+ [:physical_holdings?, 1],
17
+ [:low_encoding_level?, -2],
18
+ [:weird_format?, -5],
19
+ [:no_holdings?, -10]
20
+ ].freeze
21
+
22
+ # Score higher records with these formats
23
+ TARGETED_FORMATS = [Format::BOOK, Format::WEBSITE_DATABASE, Format::JOURNAL_PERIODICAL, Format::NEWSPAPER,
24
+ Format::SOUND_RECORDING, Format::MUSICAL_SCORE].freeze
25
+ # Score lower these formats
26
+ WEIRD_FORMATS = [Format::OTHER, Format::THREE_D_OBJECT].freeze
27
+
28
+ # See #{PennMARC::EncodingLevel} for more of the logic that determines sort values
29
+ # An encoding sort level of this value is considered good
30
+ HIGH_ENCODING_SORT_LEVEL = 0
31
+ # An encoding sort level higher than this is considered poor
32
+ LOW_ENCODING_SORT_LEVEL = 4
33
+
34
+ class << self
35
+ # Calculate a weight for use in sorting good title suggestions from bad
36
+ # @param record [MARC::Record]
37
+ # @return [Integer]
38
+ def weight(record)
39
+ factors.reduce(BASE_WEIGHT) do |weight, (call, score)|
40
+ weight + (public_send(call, record) ? score : 0)
41
+ end
42
+ end
43
+
44
+ # @return [Array[Array]]
45
+ def factors
46
+ FACTORS
47
+ end
48
+
49
+ # @param record [MARC::Record]
50
+ # @return [Boolean]
51
+ def published_in_last_ten_years?(record)
52
+ return false unless Date.publication(record).present?
53
+
54
+ Date.publication(record) > 10.years.ago
55
+ end
56
+
57
+ # @param record [MARC::Record]
58
+ # @return [Boolean]
59
+ def electronic_holdings?(record)
60
+ Inventory.electronic(record)&.any? || false
61
+ end
62
+
63
+ # @param record [MARC::Record]
64
+ # @return [Boolean]
65
+ def physical_holdings?(record)
66
+ Inventory.physical(record)&.any? || false
67
+ end
68
+
69
+ # @param record [MARC::Record]
70
+ # @return [Boolean]
71
+ def targeted_format?(record)
72
+ (Format.facet(record) & TARGETED_FORMATS).any?
73
+ end
74
+
75
+ # @param record [MARC::Record]
76
+ # @return [Boolean]
77
+ def high_encoding_level?(record)
78
+ Encoding.level_sort(record) == HIGH_ENCODING_SORT_LEVEL
79
+ end
80
+
81
+ # @param record [MARC::Record]
82
+ # @return [Boolean]
83
+ def weird_format?(record)
84
+ (Format.facet(record) & WEIRD_FORMATS).any?
85
+ end
86
+
87
+ # @param record [MARC::Record]
88
+ # @return [Boolean]
89
+ def no_holdings?(record)
90
+ !electronic_holdings?(record) && !physical_holdings?(record)
91
+ end
92
+
93
+ # @param record [MARC::Record]
94
+ # @return [Boolean]
95
+ def low_encoding_level?(record)
96
+ return false unless Encoding.level_sort(record).present?
97
+
98
+ Encoding.level_sort(record) > LOW_ENCODING_SORT_LEVEL
99
+ end
100
+ end
101
+ end
102
+ end
data/lib/pennmarc/util.rb CHANGED
@@ -147,7 +147,7 @@ module PennMARC
147
147
  string.strip
148
148
  end
149
149
 
150
- # @param trailer [Symbol|String] to target for removal
150
+ # @param trailer [Symbol, String] to target for removal
151
151
  # @param string [String] to modify
152
152
  # @return [String]
153
153
  def trim_trailing(trailer, string)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module PennMARC
4
- VERSION = '1.3.5'
4
+ VERSION = '1.4.1'
5
5
  end
@@ -90,7 +90,7 @@ describe 'PennMARC::Date' do
90
90
  expect {
91
91
  helper.added(record)
92
92
  }.to output('Error parsing date in date added subfield. mmsid: mmsid, value: invalid date, ' \
93
- "error: invalid date or strptime format - `invalid date' `%Y-%m-%d %H:%M:%S'\n").to_stdout
93
+ "error: invalid date or strptime format - `invalid date' `%Y-%m-%d %H:%M:%S'\n").to_stderr
94
94
  end
95
95
  end
96
96
  end
@@ -120,7 +120,7 @@ describe 'PennMARC::Date' do
120
120
  expect {
121
121
  helper.last_updated(record)
122
122
  }.to output('Error parsing last updated date. mmsid: mmsid, value: invalid date, ' \
123
- "error: invalid date or strptime format - `invalid date' `%Y%m%d%H%M%S.%N'\n").to_stdout
123
+ "error: invalid date or strptime format - `invalid date' `%Y%m%d%H%M%S.%N'\n").to_stderr
124
124
  end
125
125
  end
126
126
  end
@@ -224,89 +224,90 @@ describe 'PennMARC::Note' do
224
224
  expect(values).to contain_exactly('An Abstract Additional Summary ProQuest',
225
225
  'Alt Abstract Alt Additional Summary Alt ProQuest')
226
226
  end
227
+ end
227
228
 
228
- describe '.arrangement_show' do
229
- let(:record) { marc_record(fields: fields) }
229
+ describe '.arrangement_show' do
230
+ let(:record) { marc_record(fields: fields) }
230
231
 
231
- let(:fields) do
232
- [
233
- marc_field(tag: '351', subfields: { a: 'Organized into five subseries', b: 'Arrangement pattern', c: 'Series',
234
- '3': 'materials' }),
235
- marc_field(tag: '880', subfields: { a: 'Alt organization', b: 'Alt arrangement', c: 'Alt hierarchical level',
236
- '3': 'Alt materials', '6': '351' })
237
- ]
238
- end
232
+ let(:fields) do
233
+ [
234
+ marc_field(tag: '351', subfields: { a: 'Organized into five subseries', b: 'Arrangement pattern', c: 'Series',
235
+ '3': 'materials' }),
236
+ marc_field(tag: '880', subfields: { a: 'Alt organization', b: 'Alt arrangement', c: 'Alt hierarchical level',
237
+ '3': 'Alt materials', '6': '351' })
238
+ ]
239
+ end
239
240
 
240
- let(:values) { helper.arrangement_show(record) }
241
+ let(:values) { helper.arrangement_show(record) }
241
242
 
242
- it 'returns expected values from 351 and its linked alternate' do
243
- expect(values).to contain_exactly('Organized into five subseries Arrangement pattern Series materials',
244
- 'Alt organization Alt arrangement Alt hierarchical level Alt materials')
245
- end
243
+ it 'returns expected values from 351 and its linked alternate' do
244
+ expect(values).to contain_exactly('Organized into five subseries Arrangement pattern Series materials',
245
+ 'Alt organization Alt arrangement Alt hierarchical level Alt materials')
246
246
  end
247
+ end
247
248
 
248
- describe '.system_details_show' do
249
- let(:record) { marc_record(fields: fields) }
250
-
251
- let(:fields) do
252
- [
253
- marc_field(tag: '538', subfields: { a: 'Blu-ray, region A, 1080p High Definition, full screen (1.33:1)',
254
- i: 'display text for URI', u: 'http://www.universal.resource/locator ',
255
- '3': ['Blu-ray disc.', '2015'] }),
256
- marc_field(tag: '344', subfields: { a: 'digital', b: 'optical', c: '1.4 m/s', g: 'stereo',
257
- h: 'digital recording', '3': 'audio disc' }),
258
- marc_field(tag: '345', subfields: { a: '1 film reel (25 min.)', b: '24 fps', '3': 'Polyester print' }),
259
- marc_field(tag: '346', subfields: { a: 'VHS', b: 'NTSC', '3': 'original videocassette' }),
260
- marc_field(tag: '347', subfields: { a: 'video file', b: 'DVD video', e: 'region', '3': 'DVD' }),
261
- marc_field(tag: '880', subfields: { a: 'Alt system details', i: 'Alternative display text', u: 'Alt URI',
262
- '3': 'Alt materials.', '6': '538' }),
263
- marc_field(tag: '880', subfields: { a: 'Alt recording', b: 'Alt medium', c: 'Alt playing speed',
264
- g: 'Alt channel',
265
- h: 'Alt characteristic', '3': 'Alt materials.',
266
- '6': '344' }),
267
- marc_field(tag: '880', subfields: { a: 'Alt presentation format', b: 'Alt projection speed',
268
- '3': 'Alt materials.', '6': '345' }),
269
- marc_field(tag: '880', subfields: { a: 'Alt video format', b: 'Alt broadcast', '3': 'Alt materials.',
270
- '6': '346' }),
271
- marc_field(tag: '880', subfields: { a: 'Alt file type', b: 'Alt encoding', '3': 'Alt materials.',
272
- '6': '347-02' })
273
-
274
- ]
275
- end
249
+ describe '.system_details_show' do
250
+ let(:record) { marc_record(fields: fields) }
276
251
 
277
- let(:values) { helper.system_details_show(record) }
252
+ let(:fields) do
253
+ [
254
+ marc_field(tag: '538', subfields: { a: 'Blu-ray, region A, 1080p High Definition, full screen (1.33:1)',
255
+ i: 'display text for URI', u: 'http://www.universal.resource/locator ',
256
+ '3': ['Blu-ray disc.', '2015'] }),
257
+ marc_field(tag: '344', subfields: { a: 'digital', b: 'optical', c: '1.4 m/s', g: 'stereo',
258
+ h: 'digital recording', i: 'sound', j: 'Electrical capture',
259
+ '3': 'audio disc' }),
260
+ marc_field(tag: '345', subfields: { a: '1 film reel (25 min.)', b: '24 fps', '3': 'Polyester print' }),
261
+ marc_field(tag: '346', subfields: { a: 'VHS', b: 'NTSC', '3': 'original videocassette' }),
262
+ marc_field(tag: '347', subfields: { a: 'video file', b: 'DVD video', e: 'region', '3': 'DVD' }),
263
+ marc_field(tag: '880', subfields: { a: 'Alt system details', i: 'Alternative display text', u: 'Alt URI',
264
+ '3': 'Alt materials.', '6': '538' }),
265
+ marc_field(tag: '880', subfields: { a: 'Alt recording', b: 'Alt medium', c: 'Alt playing speed',
266
+ g: 'Alt channel',
267
+ h: 'Alt characteristic', '3': 'Alt materials.',
268
+ '6': '344' }),
269
+ marc_field(tag: '880', subfields: { a: 'Alt presentation format', b: 'Alt projection speed',
270
+ '3': 'Alt materials.', '6': '345' }),
271
+ marc_field(tag: '880', subfields: { a: 'Alt video format', b: 'Alt broadcast', '3': 'Alt materials.',
272
+ '6': '346' }),
273
+ marc_field(tag: '880', subfields: { a: 'Alt file type', b: 'Alt encoding', '3': 'Alt materials.',
274
+ '6': '347-02' })
278
275
 
279
- it 'returns expected from 5xx and 3xx fields and their linked alternates' do
280
- expect(values).to contain_exactly(
281
- 'Blu-ray disc: 2015 Blu-ray, region A, 1080p High Definition, full screen (1.33:1) display
276
+ ]
277
+ end
278
+ let(:values) { helper.system_details_show(record) }
279
+
280
+ it 'returns expected from 5xx and 3xx fields and their linked alternates' do
281
+ expect(values).to contain_exactly(
282
+ 'Blu-ray disc: 2015 Blu-ray, region A, 1080p High Definition, full screen (1.33:1) display
282
283
  text for URI http://www.universal.resource/locator'.squish,
283
- 'audio disc digital optical 1.4 m/s stereo digital recording', 'Polyester print 1 film reel (25 min.) 24 fps',
284
- 'original videocassette VHS NTSC', 'DVD video file DVD video region',
285
- 'Alt materials Alt system details Alternative display text Alt URI',
286
- 'Alt materials Alt recording Alt medium Alt playing speed Alt channel Alt characteristic',
287
- 'Alt materials Alt presentation format Alt projection speed',
288
- 'Alt materials Alt video format Alt broadcast',
289
- 'Alt materials Alt file type Alt encoding'
290
- )
291
- end
284
+ 'audio disc digital optical 1.4 m/s stereo digital recording sound Electrical capture',
285
+ 'Polyester print 1 film reel (25 min.) 24 fps',
286
+ 'original videocassette VHS NTSC', 'DVD video file DVD video region',
287
+ 'Alt materials Alt system details Alternative display text Alt URI',
288
+ 'Alt materials Alt recording Alt medium Alt playing speed Alt channel Alt characteristic',
289
+ 'Alt materials Alt presentation format Alt projection speed',
290
+ 'Alt materials Alt video format Alt broadcast',
291
+ 'Alt materials Alt file type Alt encoding'
292
+ )
292
293
  end
294
+ end
293
295
 
294
- describe '.bound_with_show' do
295
- let(:record) { marc_record(fields: fields) }
296
+ describe '.bound_with_show' do
297
+ let(:record) { marc_record(fields: fields) }
296
298
 
297
- let(:fields) do
298
- [
299
- marc_field(tag: '501', subfields: { a: 'With: Peer Gynt (Suite) no. 1-2 / Edvard Grieg' }),
300
- marc_field(tag: '501', subfields: { a: 'With: Schumann, C. Romances, piano, op. 11. No. 2' })
301
- ]
302
- end
299
+ let(:fields) do
300
+ [
301
+ marc_field(tag: '501', subfields: { a: 'With: Peer Gynt (Suite) no. 1-2 / Edvard Grieg' }),
302
+ marc_field(tag: '501', subfields: { a: 'With: Schumann, C. Romances, piano, op. 11. No. 2' })
303
+ ]
304
+ end
303
305
 
304
- let(:values) { helper.bound_with_show(record) }
306
+ let(:values) { helper.bound_with_show(record) }
305
307
 
306
- it 'returns expected values' do
307
- expect(values).to contain_exactly('With: Peer Gynt (Suite) no. 1-2 / Edvard Grieg',
308
- 'With: Schumann, C. Romances, piano, op. 11. No. 2')
309
- end
308
+ it 'returns expected values' do
309
+ expect(values).to contain_exactly('With: Peer Gynt (Suite) no. 1-2 / Edvard Grieg',
310
+ 'With: Schumann, C. Romances, piano, op. 11. No. 2')
310
311
  end
311
312
  end
312
313
  end
@@ -6,6 +6,39 @@ describe 'PennMARC::Title' do
6
6
  let(:fields) { [marc_field(tag: '245', subfields: subfields)] }
7
7
  let(:record) { marc_record fields: fields, leader: leader }
8
8
 
9
+ describe '.suggest' do
10
+ context 'with slashes in the title as well as at the end of ǂb' do
11
+ let(:subfields) { { a: 'Title /', b: 'Subtitle /' } }
12
+
13
+ it 'removes only the trailing slash' do
14
+ expect(helper.suggest(record).first).to eq 'Title / Subtitle'
15
+ end
16
+ end
17
+
18
+ context 'with a long title more than twenty words' do
19
+ let(:subfields) do
20
+ { a: 'The Book of Very Short Words With a Lot of Words',
21
+ b: 'Containing many words with that may or may not have many characters /' }
22
+ end
23
+
24
+ it 'truncates the title to twenty words and adds a trailing ellipsis' do
25
+ truncated_suggestion = helper.suggest(record).first
26
+ expect(truncated_suggestion).to eq(
27
+ 'The Book of Very Short Words With a Lot of Words Containing many words with that may or may not...'
28
+ )
29
+ expect(truncated_suggestion.split(' ').count).to eq 20
30
+ end
31
+ end
32
+
33
+ context 'with other subfields present' do
34
+ let(:subfields) { { a: 'Title', b: 'Subtitle', c: 'Author' } }
35
+
36
+ it 'returns only title fields' do
37
+ expect(helper.suggest(record).first).not_to include 'Author'
38
+ end
39
+ end
40
+ end
41
+
9
42
  describe '.search' do
10
43
  let(:fields) do
11
44
  [marc_field(tag: '245', subfields: { a: 'Title', b: 'Subtitle', c: 'Responsibility', h: 'Medium' }),
@@ -24,7 +57,8 @@ describe 'PennMARC::Title' do
24
57
  let(:fields) do
25
58
  [marc_field(tag: '130', subfields: { a: 'Uniform Title', c: '130 not included' }),
26
59
  marc_field(tag: '880', subfields: { '6': '130', a: 'Alternative Uniform Title' }),
27
- marc_field(tag: '773', subfields: { a: 'Host Uniform Title', s: '773 not included' }),
60
+ marc_field(tag: '773', subfields: { a: 'Host Item - Main entry heading', s: 'Host Item - Uniform title',
61
+ t: 'Host Item - Title' }),
28
62
  marc_field(tag: '700', subfields: { t: 'Personal Entry Title', s: '700 not included' }),
29
63
  marc_field(tag: '505', subfields: { t: 'Invalid Formatted Contents Note Title' }, indicator1: 'invalid'),
30
64
  marc_field(tag: '505', subfields: { t: 'Formatted Contents Note Title', s: '505 not included' },
@@ -32,18 +66,18 @@ describe 'PennMARC::Title' do
32
66
  end
33
67
 
34
68
  it 'returns auxiliary titles' do
35
- expect(helper.search_aux(record)).to contain_exactly('Uniform Title', 'Host Uniform Title',
36
- 'Alternative Uniform Title', 'Personal Entry Title',
37
- 'Formatted Contents Note Title')
69
+ expect(helper.search_aux(record)).to contain_exactly('Uniform Title', 'Alternative Uniform Title',
70
+ 'Host Item - Uniform title Host Item - Title',
71
+ 'Personal Entry Title', 'Formatted Contents Note Title')
38
72
  end
39
73
 
40
74
  context 'when the leader indicates the record is a serial' do
41
75
  let(:leader) { 'ZZZZZnasZa22ZZZZZzZZ4500' }
42
76
 
43
77
  it 'returns auxiliary titles' do
44
- expect(helper.search_aux(record)).to contain_exactly('Uniform Title', 'Host Uniform Title',
45
- 'Alternative Uniform Title', 'Personal Entry Title',
46
- 'Formatted Contents Note Title')
78
+ expect(helper.search_aux(record)).to contain_exactly('Uniform Title', 'Alternative Uniform Title',
79
+ 'Host Item - Uniform title Host Item - Title',
80
+ 'Personal Entry Title', 'Formatted Contents Note Title')
47
81
  end
48
82
  end
49
83
  end
@@ -74,7 +108,8 @@ describe 'PennMARC::Title' do
74
108
  let(:fields) do
75
109
  [marc_field(tag: '130', subfields: { a: 'Uniform Title', c: '130 not included' }),
76
110
  marc_field(tag: '880', subfields: { '6': '130', a: 'Alternative Uniform Title' }),
77
- marc_field(tag: '773', subfields: { a: 'Host Uniform Title', s: '773 not included' }),
111
+ marc_field(tag: '773', subfields: { a: 'Host Item - Main entry heading', s: 'Host Item - Uniform title',
112
+ t: 'Host Item - Title' }),
78
113
  marc_field(tag: '700', subfields: { t: 'Personal Entry Title', s: '700 not included' }),
79
114
  marc_field(tag: '505', subfields: { t: 'Invalid Formatted Contents Note Title' }, indicator1: 'invalid'),
80
115
  marc_field(tag: '505', subfields: { t: 'Formatted Contents Note Title', s: '505 not included' },
@@ -83,7 +118,8 @@ describe 'PennMARC::Title' do
83
118
 
84
119
  it 'returns auxiliary journal search titles' do
85
120
  expect(helper.journal_search_aux(record)).to contain_exactly('Uniform Title', 'Alternative Uniform Title',
86
- 'Host Uniform Title', 'Personal Entry Title',
121
+ 'Host Item - Uniform title Host Item - Title',
122
+ 'Personal Entry Title',
87
123
  'Formatted Contents Note Title')
88
124
  end
89
125
 
@@ -0,0 +1,231 @@
1
+ # frozen_string_literal: true
2
+
3
+ describe PennMARC::TitleSuggestionWeightService do
4
+ let(:record) { instance_double MARC::Record }
5
+
6
+ describe '.weight' do
7
+ context 'with defined factors' do
8
+ before do
9
+ allow(described_class).to receive_messages(
10
+ targeted_format?: true,
11
+ published_in_last_ten_years?: false,
12
+ electronic_holdings?: false,
13
+ high_encoding_level?: false,
14
+ physical_holdings?: false,
15
+ low_encoding_level?: false,
16
+ weird_format?: false,
17
+ no_holdings?: false
18
+ )
19
+ end
20
+
21
+ it 'scores properly based on factor responce valence' do
22
+ expected_score = described_class::BASE_WEIGHT + described_class::FACTORS[0].second
23
+ expect(described_class.weight(record)).to eq expected_score
24
+ end
25
+ end
26
+ end
27
+
28
+ describe '.published_in_the_last_ten_years' do
29
+ before { allow(PennMARC::Date).to receive(:publication).with(record).and_return(record_date) }
30
+
31
+ let(:value) { described_class.published_in_last_ten_years?(record) }
32
+
33
+ context 'with no date' do
34
+ let(:record_date) { nil }
35
+
36
+ it 'returns true' do
37
+ expect(value).to be false
38
+ end
39
+ end
40
+
41
+ context 'with a recent date' do
42
+ let(:record_date) { Time.now }
43
+
44
+ it 'returns true' do
45
+ expect(value).to be true
46
+ end
47
+ end
48
+
49
+ context 'with an ancient date' do
50
+ let(:record_date) { Time.now - 400.years }
51
+
52
+ it 'returns false' do
53
+ expect(value).to be false
54
+ end
55
+ end
56
+ end
57
+
58
+ describe '.targeted_format?' do
59
+ before { allow(PennMARC::Format).to receive(:facet).with(record).and_return([record_format]) }
60
+
61
+ let(:value) { described_class.targeted_format?(record) }
62
+
63
+ context 'with no format' do
64
+ let(:record_format) { nil }
65
+
66
+ it 'returns false' do
67
+ expect(value).to be false
68
+ end
69
+ end
70
+
71
+ context 'with a targeted format' do
72
+ let(:record_format) { PennMARC::TitleSuggestionWeightService::TARGETED_FORMATS.sample }
73
+
74
+ it 'returns true' do
75
+ expect(value).to be true
76
+ end
77
+ end
78
+
79
+ context 'with a non-targeted format' do
80
+ let(:record_format) { PennMARC::TitleSuggestionWeightService::WEIRD_FORMATS.sample }
81
+
82
+ it 'returns false' do
83
+ expect(value).to be false
84
+ end
85
+ end
86
+ end
87
+
88
+ describe '.weird_format?' do
89
+ before { allow(PennMARC::Format).to receive(:facet).with(record).and_return([record_format]) }
90
+
91
+ let(:value) { described_class.weird_format?(record) }
92
+
93
+ context 'with no format' do
94
+ let(:record_format) { nil }
95
+
96
+ it 'returns false' do
97
+ expect(value).to be false
98
+ end
99
+ end
100
+
101
+ context 'with a weird format' do
102
+ let(:record_format) { PennMARC::TitleSuggestionWeightService::WEIRD_FORMATS.sample }
103
+
104
+ it 'returns true' do
105
+ expect(value).to be true
106
+ end
107
+ end
108
+
109
+ context 'with a non-weird format' do
110
+ let(:record_format) { PennMARC::TitleSuggestionWeightService::TARGETED_FORMATS.sample }
111
+
112
+ it 'returns false' do
113
+ expect(value).to be false
114
+ end
115
+ end
116
+ end
117
+
118
+ describe '.low_encoding_level?' do
119
+ before { allow(PennMARC::Encoding).to receive(:level_sort).with(record).and_return(encoding_sort_score) }
120
+
121
+ let(:value) { described_class.low_encoding_level?(record) }
122
+
123
+ context 'with no encoding level' do
124
+ let(:encoding_sort_score) { nil }
125
+
126
+ it 'returns false' do
127
+ expect(value).to be false
128
+ end
129
+ end
130
+
131
+ context 'with a low encoding level' do
132
+ let(:encoding_sort_score) { 11 }
133
+
134
+ it 'returns true' do
135
+ expect(value).to be true
136
+ end
137
+ end
138
+
139
+ context 'with a high encoding level' do
140
+ let(:encoding_sort_score) { PennMARC::TitleSuggestionWeightService::HIGH_ENCODING_SORT_LEVEL }
141
+
142
+ it 'returns false' do
143
+ expect(value).to be false
144
+ end
145
+ end
146
+ end
147
+
148
+ describe '.electronic_holdings?' do
149
+ before do
150
+ allow(PennMARC::Inventory).to receive(:electronic).with(record).and_return(holdings)
151
+ end
152
+
153
+ let(:value) { described_class.electronic_holdings?(record) }
154
+
155
+ context 'with electronic holdings' do
156
+ let(:holdings) { [PennMARC::InventoryEntry::Electronic] }
157
+
158
+ it 'returns true' do
159
+ expect(value).to be true
160
+ end
161
+ end
162
+
163
+ context 'without any holdings' do
164
+ let(:holdings) { [] }
165
+
166
+ it 'returns false' do
167
+ expect(value).to be false
168
+ end
169
+ end
170
+ end
171
+
172
+ describe '.physical_holdings?' do
173
+ before do
174
+ allow(PennMARC::Inventory).to receive(:physical).with(record).and_return(holdings)
175
+ end
176
+
177
+ let(:value) { described_class.physical_holdings?(record) }
178
+
179
+ context 'with physical holdings' do
180
+ let(:holdings) { [PennMARC::InventoryEntry::Physical] }
181
+
182
+ it 'returns true' do
183
+ expect(value).to be true
184
+ end
185
+ end
186
+
187
+ context 'without any holdings' do
188
+ let(:holdings) { [] }
189
+
190
+ it 'returns false' do
191
+ expect(value).to be false
192
+ end
193
+ end
194
+ end
195
+
196
+ describe '.no_holdings?' do
197
+ before do
198
+ allow(PennMARC::Inventory).to receive(:physical).with(record).and_return(physical_holdings)
199
+ allow(PennMARC::Inventory).to receive(:electronic).with(record).and_return(electronic_holdings)
200
+ end
201
+
202
+ let(:value) { described_class.no_holdings?(record) }
203
+
204
+ context 'with neither physical nor electronic holdings' do
205
+ let(:physical_holdings) { [] }
206
+ let(:electronic_holdings) { [] }
207
+
208
+ it 'returns true' do
209
+ expect(value).to be true
210
+ end
211
+ end
212
+
213
+ context 'with only electronic holdings' do
214
+ let(:physical_holdings) { [] }
215
+ let(:electronic_holdings) { [instance_double(PennMARC::InventoryEntry::Electronic)] }
216
+
217
+ it 'returns false' do
218
+ expect(value).to be false
219
+ end
220
+ end
221
+
222
+ context 'with only physical holdings' do
223
+ let(:physical_holdings) { [instance_double(PennMARC::InventoryEntry::Physical)] }
224
+ let(:electronic_holdings) { [] }
225
+
226
+ it 'returns false' do
227
+ expect(value).to be false
228
+ end
229
+ end
230
+ end
231
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pennmarc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.5
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Kanning
@@ -12,7 +12,7 @@ authors:
12
12
  autorequire:
13
13
  bindir: bin
14
14
  cert_chain: []
15
- date: 2025-10-16 00:00:00.000000000 Z
15
+ date: 2025-11-13 00:00:00.000000000 Z
16
16
  dependencies:
17
17
  - !ruby/object:Gem::Dependency
18
18
  name: activesupport
@@ -143,6 +143,7 @@ files:
143
143
  - lib/pennmarc/mappings/locations.yml
144
144
  - lib/pennmarc/mappings/relator.yml
145
145
  - lib/pennmarc/parser.rb
146
+ - lib/pennmarc/services/title_suggestion_weight_service.rb
146
147
  - lib/pennmarc/test/marc_helpers.rb
147
148
  - lib/pennmarc/util.rb
148
149
  - lib/pennmarc/version.rb
@@ -172,6 +173,7 @@ files:
172
173
  - spec/lib/pennmarc/helpers/title_spec.rb
173
174
  - spec/lib/pennmarc/marc_util_spec.rb
174
175
  - spec/lib/pennmarc/parser_spec.rb
176
+ - spec/lib/pennmarc/services/title_suggestion_weight_service_spec.rb
175
177
  - spec/spec_helper.rb
176
178
  - spec/support/fixture_helpers.rb
177
179
  homepage: https://gitlab.library.upenn.edu/dld/catalog/pennmarc