cocina-models 0.94.0 → 0.94.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 325cc1afd87f6c47380be3a4858d8858e31661efdfd39c0360c4a62f4541b6f2
4
- data.tar.gz: 4539fb9f4a715cc488e50a67df0c1e345ff227be9afe1dbd94c6466d4902a41b
3
+ metadata.gz: 38c6748d5c7659224f4139fc5d96b6b58732183bf133bb8ea32657a81fadc9ff
4
+ data.tar.gz: 29615bf905de450af973bd8a7ee00d0cd45810a8d2bd2b55166b2e686e0e048f
5
5
  SHA512:
6
- metadata.gz: d37e0ca7edb90caf68d706741e1f04ad8eafa767527af393f72ef3fe44ab55fa056b027848c46d8c43ed4ccdc97369a86a93a7d1d82b60b8d2302307fc1e634b
7
- data.tar.gz: 2bca5d3dcf6f5d141a0402545a558b17953af6f64311b29e11a57fc0f0d842af983a901b86e1a1cc8265c61f454db88d5f2a84460bb90e5bd0c0d8031073e913
6
+ metadata.gz: 3d6192a5bc54d0133483649d0bf421a4ce32f85b328d9cda131a6ce8aff7df180b045de1b58d5c78836a7ec53e326b04a141e3a616046febfc31650ec8ef4e80
7
+ data.tar.gz: 93069e72c38d812ddd493c0de9abcb80a0733fad353a36721821250cbf8c36482b1711748760a8f9a1cbc8fa4915654cf4236496f4a99eaa4ab8e687fee74cbe
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cocina-models (0.94.0)
4
+ cocina-models (0.94.1)
5
5
  activesupport
6
6
  deprecation
7
7
  dry-struct (~> 1.0)
@@ -21,7 +21,7 @@ PATH
21
21
  GEM
22
22
  remote: https://rubygems.org/
23
23
  specs:
24
- activesupport (7.1.2)
24
+ activesupport (7.1.3)
25
25
  base64
26
26
  bigdecimal
27
27
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -34,14 +34,14 @@ GEM
34
34
  ast (2.4.2)
35
35
  attr_extras (7.1.0)
36
36
  base64 (0.2.0)
37
- bigdecimal (3.1.5)
37
+ bigdecimal (3.1.6)
38
38
  byebug (11.1.3)
39
39
  committee (5.0.0)
40
40
  json_schema (~> 0.14, >= 0.14.3)
41
41
  openapi_parser (~> 1.0)
42
42
  rack (>= 1.5)
43
43
  commonmarker (0.23.10)
44
- concurrent-ruby (1.2.2)
44
+ concurrent-ruby (1.2.3)
45
45
  connection_pool (2.4.1)
46
46
  deprecation (1.1.0)
47
47
  activesupport
@@ -82,7 +82,7 @@ GEM
82
82
  multi_json
83
83
  language_server-protocol (3.17.0.3)
84
84
  mini_portile2 (2.8.5)
85
- minitest (5.21.1)
85
+ minitest (5.21.2)
86
86
  multi_json (1.15.0)
87
87
  mutex_m (0.2.0)
88
88
  nokogiri (1.16.0)
@@ -93,7 +93,7 @@ GEM
93
93
  openapi_parser (1.0.0)
94
94
  optimist (3.1.0)
95
95
  parallel (1.24.0)
96
- parser (3.3.0.3)
96
+ parser (3.3.0.5)
97
97
  ast (~> 2.4.1)
98
98
  racc
99
99
  patience_diff (1.2.0)
@@ -121,7 +121,7 @@ GEM
121
121
  rspec-core (>= 2, < 4, != 2.12.0)
122
122
  rss (0.3.0)
123
123
  rexml
124
- rubocop (1.60.0)
124
+ rubocop (1.60.1)
125
125
  json (~> 2.3)
126
126
  language_server-protocol (>= 3.17.0)
127
127
  parallel (~> 1.10)
data/README.md CHANGED
@@ -143,7 +143,7 @@ which pushes the gem to rubygems.org.
143
143
 
144
144
  ### Step 2: Update client gems coupled to the models
145
145
 
146
- Release new versions of [sdr-client](https://github.com/sul-dlss/sdr-client) and [dor-services-client](https://github.com/sul-dlss/dor-services-client/) pinned to use the new cocina-models version because applications such as [Argo](https://github.com/sul-dlss/argo) depend on both of these gems using the same models.
146
+ Release new versions of [sdr-client](https://github.com/sul-dlss/sdr-client), [dor-services-client](https://github.com/sul-dlss/dor-services-client/), and [dor_indexing](https://github.com/sul-dlss/dor_indexing/) pinned to use the new cocina-models version because applications such as [Argo](https://github.com/sul-dlss/argo) depend on both of these gems using the same models.
147
147
 
148
148
  ### Step 3: Update services directly coupled to the models
149
149
 
@@ -12,7 +12,8 @@ module Cocina
12
12
  # @param [Symbol] strategy ":first" is the strategy for selection when primary or display
13
13
  # title are missing
14
14
  # @param [Boolean] add_punctuation determines if the title should be formmated with punctuation
15
- # @return [String] the title value for Solr
15
+ # @return [String, Array] the title value for Solr - for :first strategy, a string; for :all strategy, an array
16
+ # (e.g. title displayed in blacklight search results vs boosting values for search result rankings)
16
17
  def self.build(titles, strategy: :first, add_punctuation: true)
17
18
  if titles.respond_to?(:description)
18
19
  Deprecation.warn(self,
@@ -27,7 +28,7 @@ module Cocina
27
28
  # we can boost matches on it in search results (boost matching this string higher than matching full title string)
28
29
  # e.g. "The Hobbit" (main_title) vs "The Hobbit, or, There and Back Again (full_title)
29
30
  # @param [[Array<Cocina::Models::Title,Cocina::Models::DescriptiveValue>] titles the titles to consider
30
- # @return [String] the main title value for Solr
31
+ # @return [Array<String>] the main title value(s) for Solr - array due to possible parallelValue
31
32
  def self.main_title(titles)
32
33
  new(strategy: :first, add_punctuation: false).main_title(titles)
33
34
  end
@@ -35,9 +36,9 @@ module Cocina
35
36
  # the "full title" is the title WITH subtitle, part name, etc. We want to able able to index it separately so
36
37
  # we can boost matches on it in search results (boost matching this string higher than other titles present)
37
38
  # @param [[Array<Cocina::Models::Title,Cocina::Models::DescriptiveValue>] titles the titles to consider
38
- # @return [String] the title value for Solr
39
+ # @return [Array<String>] the full title value(s) for Solr - array due to possible parallelValue
39
40
  def self.full_title(titles)
40
- new(strategy: :first, add_punctuation: false).build(titles)
41
+ [new(strategy: :first, add_punctuation: false, only_one_parallel_value: false).build(titles)].flatten.compact
41
42
  end
42
43
 
43
44
  # "additional titles" are all title data except for full_title. We want to able able to index it separately so
@@ -45,16 +46,28 @@ module Cocina
45
46
  # @param [[Array<Cocina::Models::Title,Cocina::Models::DescriptiveValue>] titles the titles to consider
46
47
  # @return [Array<String>] the values for Solr
47
48
  def self.additional_titles(titles)
48
- new(strategy: :all, add_punctuation: false).build(titles) - [full_title(titles)]
49
+ [new(strategy: :all, add_punctuation: false).build(titles)].flatten - full_title(titles)
49
50
  end
50
51
 
51
- def initialize(strategy:, add_punctuation:)
52
+ # @param strategy [Symbol] ":first" selects a single title value based on precedence of
53
+ # primary, untyped, first occurrence. ":all" returns an array containing all the values.
54
+ # @param add_punctuation [boolean] whether the title should be formmated with punctuation (think of a structured
55
+ # value coming from a MARC record, which is designed for catalog cards.)
56
+ # @param only_one_parallel_value [boolean] when true, choose one of the parallel values according to precedence
57
+ # of primary, untyped, first occurrence. When false, return an array containing all the parallel values.
58
+ # Why? Think of e.g. title displayed in blacklight search results vs boosting values for ranking of search
59
+ # results
60
+ def initialize(strategy:, add_punctuation:, only_one_parallel_value: true)
52
61
  @strategy = strategy
53
62
  @add_punctuation = add_punctuation
63
+ @only_one_parallel_value = only_one_parallel_value
54
64
  end
55
65
 
56
66
  # @param [[Array<Cocina::Models::Title>] cocina_titles the titles to consider
57
- # @return [String] the title value for Solr
67
+ # @return [String, Array] the title value for Solr - for :first strategy, a string; for :all strategy, an array
68
+ # (e.g. title displayed in blacklight search results vs boosting values for search result rankings)
69
+ #
70
+ # rubocop:disable Metrics/PerceivedComplexity
58
71
  def build(cocina_titles)
59
72
  cocina_title = primary_title(cocina_titles) || untyped_title(cocina_titles)
60
73
  cocina_title = other_title(cocina_titles) if cocina_title.blank?
@@ -62,15 +75,23 @@ module Cocina
62
75
  if strategy == :first
63
76
  extract_title(cocina_title)
64
77
  else
65
- cocina_titles.map { |ctitle| extract_title(ctitle) }.flatten
78
+ result = cocina_titles.map { |ctitle| extract_title(ctitle) }.flatten
79
+ if only_one_parallel_value? && result.length == 1
80
+ result.first
81
+ else
82
+ result
83
+ end
66
84
  end
67
85
  end
86
+ # rubocop:enable Metrics/PerceivedComplexity
68
87
 
88
+ # this is the single "short title" - the title without subtitle, part name, etc.
89
+ # this may be useful for boosting and exact matching for search results
90
+ # @return [Array<String>] the main title value(s) for Solr - can be array due to parallel titles
69
91
  def main_title(titles)
70
92
  cocina_title = primary_title(titles) || untyped_title(titles)
71
93
  cocina_title = other_title(titles) if cocina_title.blank?
72
94
 
73
- cocina_title = cocina_title.first if cocina_title.is_a?(Array)
74
95
  extract_main_title(cocina_title)
75
96
  end
76
97
 
@@ -79,40 +100,71 @@ module Cocina
79
100
  attr_reader :strategy
80
101
 
81
102
  def extract_title(cocina_title)
103
+ title_values = if cocina_title.value
104
+ cocina_title.value
105
+ elsif cocina_title.structuredValue.present?
106
+ rebuild_structured_value(cocina_title)
107
+ elsif cocina_title.parallelValue.present?
108
+ extract_title_parallel_values(cocina_title)
109
+ end
110
+ result = [title_values].flatten.compact.map { |val| remove_trailing_punctuation(val.strip) }
111
+ result.length == 1 ? result.first : result
112
+ end
113
+
114
+ # stategy :first says to return a single value (default: true)
115
+ # only_one_parallel_value? says to return a single value, even if that value is a parallelValue (default: false)
116
+ #
117
+ # rubocop:disable Metrics/PerceivedComplexity
118
+ def extract_title_parallel_values(cocina_title)
119
+ primary = cocina_title.parallelValue.find { |pvalue| pvalue.status == 'primary' }
120
+ if primary && only_one_parallel_value? && strategy == :first
121
+ # we have a primary title and we know we want a single value
122
+ extract_title(primary)
123
+ elsif only_one_parallel_value? && strategy == :first
124
+ # no primary value; algorithm says prefer an untyped value over a typed value for single value
125
+ untyped = cocina_title.parallelValue.find { |pvalue| pvalue.type.blank? }
126
+ extract_title(untyped || cocina_title.parallelValue.first)
127
+ else
128
+ cocina_title.parallelValue.map { |pvalue| extract_title(pvalue) }
129
+ end
130
+ end
131
+ # rubocop:enable Metrics/PerceivedComplexity
132
+
133
+ # @return [Array<String>] the main title value(s) for Solr - can be array due to parallel titles
134
+ def extract_main_title(cocina_title) # rubocop:disable Metrics/PerceivedComplexity
82
135
  result = if cocina_title.value
83
- cocina_title.value
136
+ cocina_title.value # covers both title and main title types
84
137
  elsif cocina_title.structuredValue.present?
85
- title_from_structured_values(cocina_title)
138
+ main_title_from_structured_values(cocina_title)
86
139
  elsif cocina_title.parallelValue.present?
87
- return build(cocina_title.parallelValue)
140
+ primary = cocina_title.parallelValue.find { |pvalue| pvalue.status == 'primary' }
141
+ if primary
142
+ extract_main_title(primary)
143
+ else
144
+ cocina_title.parallelValue.map { |pvalue| extract_main_title(pvalue) }
145
+ end
88
146
  end
89
- remove_trailing_punctuation(result.strip) if result.present?
90
- end
147
+ return [] if result.blank?
91
148
 
92
- def extract_main_title(cocina_title)
93
- if cocina_title.value
94
- cocina_title.value # covers both title and main title types
95
- elsif cocina_title.structuredValue.present?
96
- main_title_from_structured_values(cocina_title)
97
- elsif cocina_title.parallelValue.present?
98
- main_title(cocina_title.parallelValue)
99
- end
149
+ [result].flatten.compact.map { |val| remove_trailing_punctuation(val) }
100
150
  end
101
151
 
102
152
  def add_punctuation?
103
153
  @add_punctuation
104
154
  end
105
155
 
156
+ def only_one_parallel_value?
157
+ @only_one_parallel_value
158
+ end
159
+
106
160
  # @return [Cocina::Models::Title, nil] title that has status=primary
107
- def primary_title(titles)
108
- primary_title = titles.find do |title|
109
- title.status == 'primary'
110
- end
161
+ def primary_title(cocina_titles)
162
+ primary_title = cocina_titles.find { |title| title.status == 'primary' }
111
163
  return primary_title if primary_title.present?
112
164
 
113
165
  # NOTE: structuredValues would only have status primary assigned as a sibling, not as an attribute
114
166
 
115
- titles.find do |title|
167
+ cocina_titles.find do |title|
116
168
  title.parallelValue&.find do |parallel_title|
117
169
  parallel_title.status == 'primary'
118
170
  end
@@ -149,56 +201,63 @@ module Cocina
149
201
  # @return [String] the title value from combining the pieces of the structured_values by type and order
150
202
  # with desired punctuation per specs
151
203
  #
204
+ # for punctuaion funky town, thank MARC and catalog cards
205
+ #
206
+ # rubocop:disable Metrics/AbcSize
152
207
  # rubocop:disable Metrics/CyclomaticComplexity
153
208
  # rubocop:disable Metrics/MethodLength
154
209
  # rubocop:disable Metrics/PerceivedComplexity
155
- def title_from_structured_values(title)
156
- # parse out the parts
157
- main_title = ''
158
- subtitle = ''
159
- non_sort_value = ''
210
+ def rebuild_structured_value(cocina_title)
211
+ result = ''
160
212
  part_name_number = ''
161
- title.structuredValue.each do |structured_value|
162
- # There can be a structuredValue inside a structuredValue. For example,
213
+ cocina_title.structuredValue.each do |structured_value| # rubocop:disable Metrics/BlockLength
214
+ # There can be a structuredValue inside a structuredValue, for example,
163
215
  # a uniform title where both the name and the title have internal StructuredValue
164
- return title_from_structured_values(structured_value) if structured_value.structuredValue.present?
216
+ return rebuild_structured_value(structured_value) if structured_value.structuredValue.present?
165
217
 
166
218
  value = structured_value.value&.strip
167
219
  next unless value
168
220
 
169
- # additional types: name, uniform ...
221
+ # additional types ignored here, e.g. name, uniform ...
170
222
  case structured_value.type&.downcase
171
223
  when 'nonsorting characters'
172
- non_sort_value = "#{value}#{non_sorting_padding(title, value)}"
224
+ padding = non_sorting_padding(cocina_title, value)
225
+ result = add_non_sorting_value(result, value, padding)
173
226
  when 'part name', 'part number'
174
- part_name_number = part_name_number(title.structuredValue) if part_name_number.blank?
175
- when 'main title', 'title'
176
- main_title = value
177
- when 'subtitle'
178
- # combine multiple subtitles into a single string
179
- subtitle = if !add_punctuation?
180
- if subtitle.present?
181
- [subtitle, value].join(' ')
182
- else
183
- value
184
- end
185
- elsif subtitle.present?
186
- # subtitle is preceded by space colon space, unless it is at the beginning of the title string
187
- "#{subtitle.sub(/[. :]+$/, '')} : #{value.sub(/^:/, '').strip}"
227
+ if part_name_number.blank?
228
+ part_name_number = part_name_number(cocina_title.structuredValue)
229
+ result = if !add_punctuation?
230
+ [result, part_name_number].join(' ')
231
+ elsif result.present?
232
+ # part name/number is preceded by period space, unless it is at the beginning of the title string
233
+ "#{result.sub(/[ .,]*$/, '')}. #{part_name_number}. "
188
234
  else
189
- value.sub(/^:/, '').strip
235
+ "#{part_name_number}. "
190
236
  end
237
+ end
238
+ when 'main title', 'title'
239
+ # nonsorting characters ending with hyphen, apostrophe or space should be slammed against the main title,
240
+ # even if we are not adding punctuation
241
+ result = if add_punctuation? || result.ends_with?(' ') || result.ends_with?('-') || result.ends_with?('\'')
242
+ [result, value].join
243
+ else
244
+ [remove_trailing_punctuation(result), remove_trailing_punctuation(value)].select(&:presence).join(' ')
245
+ end
246
+ when 'subtitle'
247
+ result = if !add_punctuation?
248
+ [result, value].select(&:presence).join(' ')
249
+ elsif result.present?
250
+ # subtitle is preceded by space colon space, unless it is at the beginning of the title string
251
+ "#{result.sub(/[. :]+$/, '')} : #{value.sub(/^:/, '').strip}"
252
+ else
253
+ result = value.sub(/^:/, '').strip
254
+ end
191
255
  end
192
256
  end
193
257
 
194
- # combine the parts into a single title string
195
- if add_punctuation?
196
- combine_with_punctuation(non_sort_value: non_sort_value, main_title: main_title, subtitle: subtitle,
197
- part_name_number: part_name_number)
198
- else
199
- ["#{non_sort_value}#{main_title}", subtitle, part_name_number].select(&:presence).join(' ')
200
- end
258
+ result
201
259
  end
260
+ # rubocop:enable Metrics/AbcSize
202
261
  # rubocop:enable Metrics/CyclomaticComplexity
203
262
  # rubocop:enable Metrics/MethodLength
204
263
  # rubocop:enable Metrics/PerceivedComplexity
@@ -206,11 +265,14 @@ module Cocina
206
265
  # main_title is title.structuredValue.value with type 'main title' (or just title.value)
207
266
  # @param [Cocina::Models::Title] title with structured values
208
267
  # @return [String] the main title value
209
- def main_title_from_structured_values(cocina_title) # rubocop:disable Metrics/MethodLength
268
+ #
269
+ # rubocop:disable Metrics/MethodLength
270
+ # rubocop:disable Metrics/PerceivedComplexity
271
+ def main_title_from_structured_values(cocina_title)
210
272
  result = ''
211
273
  # combine pieces of the cocina structuredValue into a single title
212
274
  cocina_title.structuredValue.each do |structured_value|
213
- # There can be a structuredValue inside a structuredValue. For example,
275
+ # There can be a structuredValue inside a structuredValue, for example,
214
276
  # a uniform title where both the name and the title have internal StructuredValue
215
277
  return main_title_from_structured_values(structured_value) if structured_value.structuredValue.present?
216
278
 
@@ -219,43 +281,36 @@ module Cocina
219
281
 
220
282
  case structured_value.type&.downcase
221
283
  when 'nonsorting characters'
222
- non_sort_value = "#{value}#{non_sorting_padding(cocina_title, value)}"
223
- result = "#{non_sort_value}#{result}" # non-sorting characters are at the beginning of the title
224
- when 'main title'
225
- result = "#{result}#{value}"
226
- when 'title'
227
- result = value
284
+ padding = non_sorting_padding(cocina_title, value)
285
+ result = add_non_sorting_value(result, value, padding)
286
+ when 'main title', 'title'
287
+ result = if ['\'', '-'].include?(result.last)
288
+ [result, value].join
289
+ else
290
+ [remove_trailing_punctuation(result).strip, remove_trailing_punctuation(value).strip].select(&:presence).join(' ')
291
+ end
228
292
  end
229
293
  end
230
- result
231
- end
232
294
 
233
- # Thank MARC and catalog cards for this mess. We need to add punctuation.
234
- # rubocop:disable Metrics/MethodLength
235
- def combine_with_punctuation(non_sort_value:, main_title:, subtitle:, part_name_number:)
236
- result = "#{non_sort_value}#{main_title}"
237
- if subtitle.present?
238
- result = if result.present?
239
- "#{result.sub(/[. :]+$/, '')} : #{subtitle.sub(/^:/, '').strip}"
240
- else
241
- result = subtitle
242
- end
243
- end
244
- if part_name_number.present?
245
- result = if result.present?
246
- "#{result.sub(/[ .,]*$/, '')}. #{part_name_number}."
247
- else
248
- "#{part_name_number}."
249
- end
250
- end
251
295
  result
252
296
  end
253
297
  # rubocop:enable Metrics/MethodLength
298
+ # rubocop:enable Metrics/PerceivedComplexity
254
299
 
300
+ # Thank MARC and catalog cards for this mess.
255
301
  def remove_trailing_punctuation(title)
256
302
  title.sub(%r{[ .,;:/\\]+$}, '')
257
303
  end
258
304
 
305
+ def add_non_sorting_value(title_so_far, non_sorting_value, padding)
306
+ non_sort_value = "#{non_sorting_value}#{padding}"
307
+ if title_so_far.present?
308
+ [title_so_far.strip, padding, non_sort_value].join
309
+ else
310
+ non_sort_value
311
+ end
312
+ end
313
+
259
314
  def non_sorting_padding(title, non_sorting_value)
260
315
  non_sort_note = title.note&.find { |note| note.type&.downcase == 'nonsorting character count' }
261
316
  if non_sort_note
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Cocina
4
4
  module Models
5
- VERSION = '0.94.0'
5
+ VERSION = '0.94.1'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cocina-models
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.94.0
4
+ version: 0.94.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Coyne
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-19 00:00:00.000000000 Z
11
+ date: 2024-01-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -562,7 +562,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
562
562
  - !ruby/object:Gem::Version
563
563
  version: '0'
564
564
  requirements: []
565
- rubygems_version: 3.3.7
565
+ rubygems_version: 3.4.13
566
566
  signing_key:
567
567
  specification_version: 4
568
568
  summary: Data models for the SDR