cocina-models 0.94.0 → 0.94.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 325cc1afd87f6c47380be3a4858d8858e31661efdfd39c0360c4a62f4541b6f2
4
- data.tar.gz: 4539fb9f4a715cc488e50a67df0c1e345ff227be9afe1dbd94c6466d4902a41b
3
+ metadata.gz: 38c6748d5c7659224f4139fc5d96b6b58732183bf133bb8ea32657a81fadc9ff
4
+ data.tar.gz: 29615bf905de450af973bd8a7ee00d0cd45810a8d2bd2b55166b2e686e0e048f
5
5
  SHA512:
6
- metadata.gz: d37e0ca7edb90caf68d706741e1f04ad8eafa767527af393f72ef3fe44ab55fa056b027848c46d8c43ed4ccdc97369a86a93a7d1d82b60b8d2302307fc1e634b
7
- data.tar.gz: 2bca5d3dcf6f5d141a0402545a558b17953af6f64311b29e11a57fc0f0d842af983a901b86e1a1cc8265c61f454db88d5f2a84460bb90e5bd0c0d8031073e913
6
+ metadata.gz: 3d6192a5bc54d0133483649d0bf421a4ce32f85b328d9cda131a6ce8aff7df180b045de1b58d5c78836a7ec53e326b04a141e3a616046febfc31650ec8ef4e80
7
+ data.tar.gz: 93069e72c38d812ddd493c0de9abcb80a0733fad353a36721821250cbf8c36482b1711748760a8f9a1cbc8fa4915654cf4236496f4a99eaa4ab8e687fee74cbe
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cocina-models (0.94.0)
4
+ cocina-models (0.94.1)
5
5
  activesupport
6
6
  deprecation
7
7
  dry-struct (~> 1.0)
@@ -21,7 +21,7 @@ PATH
21
21
  GEM
22
22
  remote: https://rubygems.org/
23
23
  specs:
24
- activesupport (7.1.2)
24
+ activesupport (7.1.3)
25
25
  base64
26
26
  bigdecimal
27
27
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -34,14 +34,14 @@ GEM
34
34
  ast (2.4.2)
35
35
  attr_extras (7.1.0)
36
36
  base64 (0.2.0)
37
- bigdecimal (3.1.5)
37
+ bigdecimal (3.1.6)
38
38
  byebug (11.1.3)
39
39
  committee (5.0.0)
40
40
  json_schema (~> 0.14, >= 0.14.3)
41
41
  openapi_parser (~> 1.0)
42
42
  rack (>= 1.5)
43
43
  commonmarker (0.23.10)
44
- concurrent-ruby (1.2.2)
44
+ concurrent-ruby (1.2.3)
45
45
  connection_pool (2.4.1)
46
46
  deprecation (1.1.0)
47
47
  activesupport
@@ -82,7 +82,7 @@ GEM
82
82
  multi_json
83
83
  language_server-protocol (3.17.0.3)
84
84
  mini_portile2 (2.8.5)
85
- minitest (5.21.1)
85
+ minitest (5.21.2)
86
86
  multi_json (1.15.0)
87
87
  mutex_m (0.2.0)
88
88
  nokogiri (1.16.0)
@@ -93,7 +93,7 @@ GEM
93
93
  openapi_parser (1.0.0)
94
94
  optimist (3.1.0)
95
95
  parallel (1.24.0)
96
- parser (3.3.0.3)
96
+ parser (3.3.0.5)
97
97
  ast (~> 2.4.1)
98
98
  racc
99
99
  patience_diff (1.2.0)
@@ -121,7 +121,7 @@ GEM
121
121
  rspec-core (>= 2, < 4, != 2.12.0)
122
122
  rss (0.3.0)
123
123
  rexml
124
- rubocop (1.60.0)
124
+ rubocop (1.60.1)
125
125
  json (~> 2.3)
126
126
  language_server-protocol (>= 3.17.0)
127
127
  parallel (~> 1.10)
data/README.md CHANGED
@@ -143,7 +143,7 @@ which pushes the gem to rubygems.org.
143
143
 
144
144
  ### Step 2: Update client gems coupled to the models
145
145
 
146
- Release new versions of [sdr-client](https://github.com/sul-dlss/sdr-client) and [dor-services-client](https://github.com/sul-dlss/dor-services-client/) pinned to use the new cocina-models version because applications such as [Argo](https://github.com/sul-dlss/argo) depend on both of these gems using the same models.
146
+ Release new versions of [sdr-client](https://github.com/sul-dlss/sdr-client), [dor-services-client](https://github.com/sul-dlss/dor-services-client/), and [dor_indexing](https://github.com/sul-dlss/dor_indexing/) pinned to use the new cocina-models version because applications such as [Argo](https://github.com/sul-dlss/argo) depend on both of these gems using the same models.
147
147
 
148
148
  ### Step 3: Update services directly coupled to the models
149
149
 
@@ -12,7 +12,8 @@ module Cocina
12
12
  # @param [Symbol] strategy ":first" is the strategy for selection when primary or display
13
13
  # title are missing
14
14
  # @param [Boolean] add_punctuation determines if the title should be formmated with punctuation
15
- # @return [String] the title value for Solr
15
+ # @return [String, Array] the title value for Solr - for :first strategy, a string; for :all strategy, an array
16
+ # (e.g. title displayed in blacklight search results vs boosting values for search result rankings)
16
17
  def self.build(titles, strategy: :first, add_punctuation: true)
17
18
  if titles.respond_to?(:description)
18
19
  Deprecation.warn(self,
@@ -27,7 +28,7 @@ module Cocina
27
28
  # we can boost matches on it in search results (boost matching this string higher than matching full title string)
28
29
  # e.g. "The Hobbit" (main_title) vs "The Hobbit, or, There and Back Again (full_title)
29
30
  # @param [[Array<Cocina::Models::Title,Cocina::Models::DescriptiveValue>] titles the titles to consider
30
- # @return [String] the main title value for Solr
31
+ # @return [Array<String>] the main title value(s) for Solr - array due to possible parallelValue
31
32
  def self.main_title(titles)
32
33
  new(strategy: :first, add_punctuation: false).main_title(titles)
33
34
  end
@@ -35,9 +36,9 @@ module Cocina
35
36
  # the "full title" is the title WITH subtitle, part name, etc. We want to able able to index it separately so
36
37
  # we can boost matches on it in search results (boost matching this string higher than other titles present)
37
38
  # @param [[Array<Cocina::Models::Title,Cocina::Models::DescriptiveValue>] titles the titles to consider
38
- # @return [String] the title value for Solr
39
+ # @return [Array<String>] the full title value(s) for Solr - array due to possible parallelValue
39
40
  def self.full_title(titles)
40
- new(strategy: :first, add_punctuation: false).build(titles)
41
+ [new(strategy: :first, add_punctuation: false, only_one_parallel_value: false).build(titles)].flatten.compact
41
42
  end
42
43
 
43
44
  # "additional titles" are all title data except for full_title. We want to able able to index it separately so
@@ -45,16 +46,28 @@ module Cocina
45
46
  # @param [[Array<Cocina::Models::Title,Cocina::Models::DescriptiveValue>] titles the titles to consider
46
47
  # @return [Array<String>] the values for Solr
47
48
  def self.additional_titles(titles)
48
- new(strategy: :all, add_punctuation: false).build(titles) - [full_title(titles)]
49
+ [new(strategy: :all, add_punctuation: false).build(titles)].flatten - full_title(titles)
49
50
  end
50
51
 
51
- def initialize(strategy:, add_punctuation:)
52
+ # @param strategy [Symbol] ":first" selects a single title value based on precedence of
53
+ # primary, untyped, first occurrence. ":all" returns an array containing all the values.
54
+ # @param add_punctuation [boolean] whether the title should be formmated with punctuation (think of a structured
55
+ # value coming from a MARC record, which is designed for catalog cards.)
56
+ # @param only_one_parallel_value [boolean] when true, choose one of the parallel values according to precedence
57
+ # of primary, untyped, first occurrence. When false, return an array containing all the parallel values.
58
+ # Why? Think of e.g. title displayed in blacklight search results vs boosting values for ranking of search
59
+ # results
60
+ def initialize(strategy:, add_punctuation:, only_one_parallel_value: true)
52
61
  @strategy = strategy
53
62
  @add_punctuation = add_punctuation
63
+ @only_one_parallel_value = only_one_parallel_value
54
64
  end
55
65
 
56
66
  # @param [[Array<Cocina::Models::Title>] cocina_titles the titles to consider
57
- # @return [String] the title value for Solr
67
+ # @return [String, Array] the title value for Solr - for :first strategy, a string; for :all strategy, an array
68
+ # (e.g. title displayed in blacklight search results vs boosting values for search result rankings)
69
+ #
70
+ # rubocop:disable Metrics/PerceivedComplexity
58
71
  def build(cocina_titles)
59
72
  cocina_title = primary_title(cocina_titles) || untyped_title(cocina_titles)
60
73
  cocina_title = other_title(cocina_titles) if cocina_title.blank?
@@ -62,15 +75,23 @@ module Cocina
62
75
  if strategy == :first
63
76
  extract_title(cocina_title)
64
77
  else
65
- cocina_titles.map { |ctitle| extract_title(ctitle) }.flatten
78
+ result = cocina_titles.map { |ctitle| extract_title(ctitle) }.flatten
79
+ if only_one_parallel_value? && result.length == 1
80
+ result.first
81
+ else
82
+ result
83
+ end
66
84
  end
67
85
  end
86
+ # rubocop:enable Metrics/PerceivedComplexity
68
87
 
88
+ # this is the single "short title" - the title without subtitle, part name, etc.
89
+ # this may be useful for boosting and exact matching for search results
90
+ # @return [Array<String>] the main title value(s) for Solr - can be array due to parallel titles
69
91
  def main_title(titles)
70
92
  cocina_title = primary_title(titles) || untyped_title(titles)
71
93
  cocina_title = other_title(titles) if cocina_title.blank?
72
94
 
73
- cocina_title = cocina_title.first if cocina_title.is_a?(Array)
74
95
  extract_main_title(cocina_title)
75
96
  end
76
97
 
@@ -79,40 +100,71 @@ module Cocina
79
100
  attr_reader :strategy
80
101
 
81
102
  def extract_title(cocina_title)
103
+ title_values = if cocina_title.value
104
+ cocina_title.value
105
+ elsif cocina_title.structuredValue.present?
106
+ rebuild_structured_value(cocina_title)
107
+ elsif cocina_title.parallelValue.present?
108
+ extract_title_parallel_values(cocina_title)
109
+ end
110
+ result = [title_values].flatten.compact.map { |val| remove_trailing_punctuation(val.strip) }
111
+ result.length == 1 ? result.first : result
112
+ end
113
+
114
+ # stategy :first says to return a single value (default: true)
115
+ # only_one_parallel_value? says to return a single value, even if that value is a parallelValue (default: false)
116
+ #
117
+ # rubocop:disable Metrics/PerceivedComplexity
118
+ def extract_title_parallel_values(cocina_title)
119
+ primary = cocina_title.parallelValue.find { |pvalue| pvalue.status == 'primary' }
120
+ if primary && only_one_parallel_value? && strategy == :first
121
+ # we have a primary title and we know we want a single value
122
+ extract_title(primary)
123
+ elsif only_one_parallel_value? && strategy == :first
124
+ # no primary value; algorithm says prefer an untyped value over a typed value for single value
125
+ untyped = cocina_title.parallelValue.find { |pvalue| pvalue.type.blank? }
126
+ extract_title(untyped || cocina_title.parallelValue.first)
127
+ else
128
+ cocina_title.parallelValue.map { |pvalue| extract_title(pvalue) }
129
+ end
130
+ end
131
+ # rubocop:enable Metrics/PerceivedComplexity
132
+
133
+ # @return [Array<String>] the main title value(s) for Solr - can be array due to parallel titles
134
+ def extract_main_title(cocina_title) # rubocop:disable Metrics/PerceivedComplexity
82
135
  result = if cocina_title.value
83
- cocina_title.value
136
+ cocina_title.value # covers both title and main title types
84
137
  elsif cocina_title.structuredValue.present?
85
- title_from_structured_values(cocina_title)
138
+ main_title_from_structured_values(cocina_title)
86
139
  elsif cocina_title.parallelValue.present?
87
- return build(cocina_title.parallelValue)
140
+ primary = cocina_title.parallelValue.find { |pvalue| pvalue.status == 'primary' }
141
+ if primary
142
+ extract_main_title(primary)
143
+ else
144
+ cocina_title.parallelValue.map { |pvalue| extract_main_title(pvalue) }
145
+ end
88
146
  end
89
- remove_trailing_punctuation(result.strip) if result.present?
90
- end
147
+ return [] if result.blank?
91
148
 
92
- def extract_main_title(cocina_title)
93
- if cocina_title.value
94
- cocina_title.value # covers both title and main title types
95
- elsif cocina_title.structuredValue.present?
96
- main_title_from_structured_values(cocina_title)
97
- elsif cocina_title.parallelValue.present?
98
- main_title(cocina_title.parallelValue)
99
- end
149
+ [result].flatten.compact.map { |val| remove_trailing_punctuation(val) }
100
150
  end
101
151
 
102
152
  def add_punctuation?
103
153
  @add_punctuation
104
154
  end
105
155
 
156
+ def only_one_parallel_value?
157
+ @only_one_parallel_value
158
+ end
159
+
106
160
  # @return [Cocina::Models::Title, nil] title that has status=primary
107
- def primary_title(titles)
108
- primary_title = titles.find do |title|
109
- title.status == 'primary'
110
- end
161
+ def primary_title(cocina_titles)
162
+ primary_title = cocina_titles.find { |title| title.status == 'primary' }
111
163
  return primary_title if primary_title.present?
112
164
 
113
165
  # NOTE: structuredValues would only have status primary assigned as a sibling, not as an attribute
114
166
 
115
- titles.find do |title|
167
+ cocina_titles.find do |title|
116
168
  title.parallelValue&.find do |parallel_title|
117
169
  parallel_title.status == 'primary'
118
170
  end
@@ -149,56 +201,63 @@ module Cocina
149
201
  # @return [String] the title value from combining the pieces of the structured_values by type and order
150
202
  # with desired punctuation per specs
151
203
  #
204
+ # for punctuaion funky town, thank MARC and catalog cards
205
+ #
206
+ # rubocop:disable Metrics/AbcSize
152
207
  # rubocop:disable Metrics/CyclomaticComplexity
153
208
  # rubocop:disable Metrics/MethodLength
154
209
  # rubocop:disable Metrics/PerceivedComplexity
155
- def title_from_structured_values(title)
156
- # parse out the parts
157
- main_title = ''
158
- subtitle = ''
159
- non_sort_value = ''
210
+ def rebuild_structured_value(cocina_title)
211
+ result = ''
160
212
  part_name_number = ''
161
- title.structuredValue.each do |structured_value|
162
- # There can be a structuredValue inside a structuredValue. For example,
213
+ cocina_title.structuredValue.each do |structured_value| # rubocop:disable Metrics/BlockLength
214
+ # There can be a structuredValue inside a structuredValue, for example,
163
215
  # a uniform title where both the name and the title have internal StructuredValue
164
- return title_from_structured_values(structured_value) if structured_value.structuredValue.present?
216
+ return rebuild_structured_value(structured_value) if structured_value.structuredValue.present?
165
217
 
166
218
  value = structured_value.value&.strip
167
219
  next unless value
168
220
 
169
- # additional types: name, uniform ...
221
+ # additional types ignored here, e.g. name, uniform ...
170
222
  case structured_value.type&.downcase
171
223
  when 'nonsorting characters'
172
- non_sort_value = "#{value}#{non_sorting_padding(title, value)}"
224
+ padding = non_sorting_padding(cocina_title, value)
225
+ result = add_non_sorting_value(result, value, padding)
173
226
  when 'part name', 'part number'
174
- part_name_number = part_name_number(title.structuredValue) if part_name_number.blank?
175
- when 'main title', 'title'
176
- main_title = value
177
- when 'subtitle'
178
- # combine multiple subtitles into a single string
179
- subtitle = if !add_punctuation?
180
- if subtitle.present?
181
- [subtitle, value].join(' ')
182
- else
183
- value
184
- end
185
- elsif subtitle.present?
186
- # subtitle is preceded by space colon space, unless it is at the beginning of the title string
187
- "#{subtitle.sub(/[. :]+$/, '')} : #{value.sub(/^:/, '').strip}"
227
+ if part_name_number.blank?
228
+ part_name_number = part_name_number(cocina_title.structuredValue)
229
+ result = if !add_punctuation?
230
+ [result, part_name_number].join(' ')
231
+ elsif result.present?
232
+ # part name/number is preceded by period space, unless it is at the beginning of the title string
233
+ "#{result.sub(/[ .,]*$/, '')}. #{part_name_number}. "
188
234
  else
189
- value.sub(/^:/, '').strip
235
+ "#{part_name_number}. "
190
236
  end
237
+ end
238
+ when 'main title', 'title'
239
+ # nonsorting characters ending with hyphen, apostrophe or space should be slammed against the main title,
240
+ # even if we are not adding punctuation
241
+ result = if add_punctuation? || result.ends_with?(' ') || result.ends_with?('-') || result.ends_with?('\'')
242
+ [result, value].join
243
+ else
244
+ [remove_trailing_punctuation(result), remove_trailing_punctuation(value)].select(&:presence).join(' ')
245
+ end
246
+ when 'subtitle'
247
+ result = if !add_punctuation?
248
+ [result, value].select(&:presence).join(' ')
249
+ elsif result.present?
250
+ # subtitle is preceded by space colon space, unless it is at the beginning of the title string
251
+ "#{result.sub(/[. :]+$/, '')} : #{value.sub(/^:/, '').strip}"
252
+ else
253
+ result = value.sub(/^:/, '').strip
254
+ end
191
255
  end
192
256
  end
193
257
 
194
- # combine the parts into a single title string
195
- if add_punctuation?
196
- combine_with_punctuation(non_sort_value: non_sort_value, main_title: main_title, subtitle: subtitle,
197
- part_name_number: part_name_number)
198
- else
199
- ["#{non_sort_value}#{main_title}", subtitle, part_name_number].select(&:presence).join(' ')
200
- end
258
+ result
201
259
  end
260
+ # rubocop:enable Metrics/AbcSize
202
261
  # rubocop:enable Metrics/CyclomaticComplexity
203
262
  # rubocop:enable Metrics/MethodLength
204
263
  # rubocop:enable Metrics/PerceivedComplexity
@@ -206,11 +265,14 @@ module Cocina
206
265
  # main_title is title.structuredValue.value with type 'main title' (or just title.value)
207
266
  # @param [Cocina::Models::Title] title with structured values
208
267
  # @return [String] the main title value
209
- def main_title_from_structured_values(cocina_title) # rubocop:disable Metrics/MethodLength
268
+ #
269
+ # rubocop:disable Metrics/MethodLength
270
+ # rubocop:disable Metrics/PerceivedComplexity
271
+ def main_title_from_structured_values(cocina_title)
210
272
  result = ''
211
273
  # combine pieces of the cocina structuredValue into a single title
212
274
  cocina_title.structuredValue.each do |structured_value|
213
- # There can be a structuredValue inside a structuredValue. For example,
275
+ # There can be a structuredValue inside a structuredValue, for example,
214
276
  # a uniform title where both the name and the title have internal StructuredValue
215
277
  return main_title_from_structured_values(structured_value) if structured_value.structuredValue.present?
216
278
 
@@ -219,43 +281,36 @@ module Cocina
219
281
 
220
282
  case structured_value.type&.downcase
221
283
  when 'nonsorting characters'
222
- non_sort_value = "#{value}#{non_sorting_padding(cocina_title, value)}"
223
- result = "#{non_sort_value}#{result}" # non-sorting characters are at the beginning of the title
224
- when 'main title'
225
- result = "#{result}#{value}"
226
- when 'title'
227
- result = value
284
+ padding = non_sorting_padding(cocina_title, value)
285
+ result = add_non_sorting_value(result, value, padding)
286
+ when 'main title', 'title'
287
+ result = if ['\'', '-'].include?(result.last)
288
+ [result, value].join
289
+ else
290
+ [remove_trailing_punctuation(result).strip, remove_trailing_punctuation(value).strip].select(&:presence).join(' ')
291
+ end
228
292
  end
229
293
  end
230
- result
231
- end
232
294
 
233
- # Thank MARC and catalog cards for this mess. We need to add punctuation.
234
- # rubocop:disable Metrics/MethodLength
235
- def combine_with_punctuation(non_sort_value:, main_title:, subtitle:, part_name_number:)
236
- result = "#{non_sort_value}#{main_title}"
237
- if subtitle.present?
238
- result = if result.present?
239
- "#{result.sub(/[. :]+$/, '')} : #{subtitle.sub(/^:/, '').strip}"
240
- else
241
- result = subtitle
242
- end
243
- end
244
- if part_name_number.present?
245
- result = if result.present?
246
- "#{result.sub(/[ .,]*$/, '')}. #{part_name_number}."
247
- else
248
- "#{part_name_number}."
249
- end
250
- end
251
295
  result
252
296
  end
253
297
  # rubocop:enable Metrics/MethodLength
298
+ # rubocop:enable Metrics/PerceivedComplexity
254
299
 
300
+ # Thank MARC and catalog cards for this mess.
255
301
  def remove_trailing_punctuation(title)
256
302
  title.sub(%r{[ .,;:/\\]+$}, '')
257
303
  end
258
304
 
305
+ def add_non_sorting_value(title_so_far, non_sorting_value, padding)
306
+ non_sort_value = "#{non_sorting_value}#{padding}"
307
+ if title_so_far.present?
308
+ [title_so_far.strip, padding, non_sort_value].join
309
+ else
310
+ non_sort_value
311
+ end
312
+ end
313
+
259
314
  def non_sorting_padding(title, non_sorting_value)
260
315
  non_sort_note = title.note&.find { |note| note.type&.downcase == 'nonsorting character count' }
261
316
  if non_sort_note
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Cocina
4
4
  module Models
5
- VERSION = '0.94.0'
5
+ VERSION = '0.94.1'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cocina-models
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.94.0
4
+ version: 0.94.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Coyne
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-19 00:00:00.000000000 Z
11
+ date: 2024-01-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -562,7 +562,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
562
562
  - !ruby/object:Gem::Version
563
563
  version: '0'
564
564
  requirements: []
565
- rubygems_version: 3.3.7
565
+ rubygems_version: 3.4.13
566
566
  signing_key:
567
567
  specification_version: 4
568
568
  summary: Data models for the SDR