cocina-models 0.94.0 → 0.94.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 325cc1afd87f6c47380be3a4858d8858e31661efdfd39c0360c4a62f4541b6f2
4
- data.tar.gz: 4539fb9f4a715cc488e50a67df0c1e345ff227be9afe1dbd94c6466d4902a41b
3
+ metadata.gz: e147c55355cc4c2f11ed358c5091c0942e48168948da875a1fd1ff97f7dfb95a
4
+ data.tar.gz: fd668019a9193c1620d419b9a3815115a1bc0d8a93e635721bbf1251db80df10
5
5
  SHA512:
6
- metadata.gz: d37e0ca7edb90caf68d706741e1f04ad8eafa767527af393f72ef3fe44ab55fa056b027848c46d8c43ed4ccdc97369a86a93a7d1d82b60b8d2302307fc1e634b
7
- data.tar.gz: 2bca5d3dcf6f5d141a0402545a558b17953af6f64311b29e11a57fc0f0d842af983a901b86e1a1cc8265c61f454db88d5f2a84460bb90e5bd0c0d8031073e913
6
+ metadata.gz: 3f9485a94c43606870b8d47f7b0d29597660fe82a884fab5128922501e65eccee5787c43a27f796dd35af36992bb04a417eb89e045e2bd74be4c8b30212e2d04
7
+ data.tar.gz: 0707161a4e5399762878ed9d9215ae59a807d046fa7f3b199e09227f8fb5da69484b32dc0a942df5fb1281aafd8ec29a054b0eae04f40a68ceddc84fda15e406
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cocina-models (0.94.0)
4
+ cocina-models (0.94.2)
5
5
  activesupport
6
6
  deprecation
7
7
  dry-struct (~> 1.0)
@@ -21,7 +21,7 @@ PATH
21
21
  GEM
22
22
  remote: https://rubygems.org/
23
23
  specs:
24
- activesupport (7.1.2)
24
+ activesupport (7.1.3)
25
25
  base64
26
26
  bigdecimal
27
27
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -34,14 +34,14 @@ GEM
34
34
  ast (2.4.2)
35
35
  attr_extras (7.1.0)
36
36
  base64 (0.2.0)
37
- bigdecimal (3.1.5)
37
+ bigdecimal (3.1.6)
38
38
  byebug (11.1.3)
39
39
  committee (5.0.0)
40
40
  json_schema (~> 0.14, >= 0.14.3)
41
41
  openapi_parser (~> 1.0)
42
42
  rack (>= 1.5)
43
43
  commonmarker (0.23.10)
44
- concurrent-ruby (1.2.2)
44
+ concurrent-ruby (1.2.3)
45
45
  connection_pool (2.4.1)
46
46
  deprecation (1.1.0)
47
47
  activesupport
@@ -82,7 +82,7 @@ GEM
82
82
  multi_json
83
83
  language_server-protocol (3.17.0.3)
84
84
  mini_portile2 (2.8.5)
85
- minitest (5.21.1)
85
+ minitest (5.21.2)
86
86
  multi_json (1.15.0)
87
87
  mutex_m (0.2.0)
88
88
  nokogiri (1.16.0)
@@ -93,7 +93,7 @@ GEM
93
93
  openapi_parser (1.0.0)
94
94
  optimist (3.1.0)
95
95
  parallel (1.24.0)
96
- parser (3.3.0.3)
96
+ parser (3.3.0.5)
97
97
  ast (~> 2.4.1)
98
98
  racc
99
99
  patience_diff (1.2.0)
@@ -121,7 +121,7 @@ GEM
121
121
  rspec-core (>= 2, < 4, != 2.12.0)
122
122
  rss (0.3.0)
123
123
  rexml
124
- rubocop (1.60.0)
124
+ rubocop (1.60.1)
125
125
  json (~> 2.3)
126
126
  language_server-protocol (>= 3.17.0)
127
127
  parallel (~> 1.10)
data/README.md CHANGED
@@ -143,7 +143,7 @@ which pushes the gem to rubygems.org.
143
143
 
144
144
  ### Step 2: Update client gems coupled to the models
145
145
 
146
- Release new versions of [sdr-client](https://github.com/sul-dlss/sdr-client) and [dor-services-client](https://github.com/sul-dlss/dor-services-client/) pinned to use the new cocina-models version because applications such as [Argo](https://github.com/sul-dlss/argo) depend on both of these gems using the same models.
146
+ Release new versions of [sdr-client](https://github.com/sul-dlss/sdr-client), [dor-services-client](https://github.com/sul-dlss/dor-services-client/), and [dor_indexing](https://github.com/sul-dlss/dor_indexing/) pinned to use the new cocina-models version because applications such as [Argo](https://github.com/sul-dlss/argo) depend on both of these gems using the same models.
147
147
 
148
148
  ### Step 3: Update services directly coupled to the models
149
149
 
@@ -12,7 +12,8 @@ module Cocina
12
12
  # @param [Symbol] strategy ":first" is the strategy for selection when primary or display
13
13
  # title are missing
14
14
  # @param [Boolean] add_punctuation determines if the title should be formmated with punctuation
15
- # @return [String] the title value for Solr
15
+ # @return [String, Array] the title value for Solr - for :first strategy, a string; for :all strategy, an array
16
+ # (e.g. title displayed in blacklight search results vs boosting values for search result rankings)
16
17
  def self.build(titles, strategy: :first, add_punctuation: true)
17
18
  if titles.respond_to?(:description)
18
19
  Deprecation.warn(self,
@@ -27,7 +28,7 @@ module Cocina
27
28
  # we can boost matches on it in search results (boost matching this string higher than matching full title string)
28
29
  # e.g. "The Hobbit" (main_title) vs "The Hobbit, or, There and Back Again (full_title)
29
30
  # @param [[Array<Cocina::Models::Title,Cocina::Models::DescriptiveValue>] titles the titles to consider
30
- # @return [String] the main title value for Solr
31
+ # @return [Array<String>] the main title value(s) for Solr - array due to possible parallelValue
31
32
  def self.main_title(titles)
32
33
  new(strategy: :first, add_punctuation: false).main_title(titles)
33
34
  end
@@ -35,9 +36,9 @@ module Cocina
35
36
  # the "full title" is the title WITH subtitle, part name, etc. We want to able able to index it separately so
36
37
  # we can boost matches on it in search results (boost matching this string higher than other titles present)
37
38
  # @param [[Array<Cocina::Models::Title,Cocina::Models::DescriptiveValue>] titles the titles to consider
38
- # @return [String] the title value for Solr
39
+ # @return [Array<String>] the full title value(s) for Solr - array due to possible parallelValue
39
40
  def self.full_title(titles)
40
- new(strategy: :first, add_punctuation: false).build(titles)
41
+ [new(strategy: :first, add_punctuation: false, only_one_parallel_value: false).build(titles)].flatten.compact
41
42
  end
42
43
 
43
44
  # "additional titles" are all title data except for full_title. We want to able able to index it separately so
@@ -45,16 +46,28 @@ module Cocina
45
46
  # @param [[Array<Cocina::Models::Title,Cocina::Models::DescriptiveValue>] titles the titles to consider
46
47
  # @return [Array<String>] the values for Solr
47
48
  def self.additional_titles(titles)
48
- new(strategy: :all, add_punctuation: false).build(titles) - [full_title(titles)]
49
+ [new(strategy: :all, add_punctuation: false).build(titles)].flatten - full_title(titles)
49
50
  end
50
51
 
51
- def initialize(strategy:, add_punctuation:)
52
+ # @param strategy [Symbol] ":first" selects a single title value based on precedence of
53
+ # primary, untyped, first occurrence. ":all" returns an array containing all the values.
54
+ # @param add_punctuation [boolean] whether the title should be formmated with punctuation (think of a structured
55
+ # value coming from a MARC record, which is designed for catalog cards.)
56
+ # @param only_one_parallel_value [boolean] when true, choose one of the parallel values according to precedence
57
+ # of primary, untyped, first occurrence. When false, return an array containing all the parallel values.
58
+ # Why? Think of e.g. title displayed in blacklight search results vs boosting values for ranking of search
59
+ # results
60
+ def initialize(strategy:, add_punctuation:, only_one_parallel_value: true)
52
61
  @strategy = strategy
53
62
  @add_punctuation = add_punctuation
63
+ @only_one_parallel_value = only_one_parallel_value
54
64
  end
55
65
 
56
66
  # @param [[Array<Cocina::Models::Title>] cocina_titles the titles to consider
57
- # @return [String] the title value for Solr
67
+ # @return [String, Array] the title value for Solr - for :first strategy, a string; for :all strategy, an array
68
+ # (e.g. title displayed in blacklight search results vs boosting values for search result rankings)
69
+ #
70
+ # rubocop:disable Metrics/PerceivedComplexity
58
71
  def build(cocina_titles)
59
72
  cocina_title = primary_title(cocina_titles) || untyped_title(cocina_titles)
60
73
  cocina_title = other_title(cocina_titles) if cocina_title.blank?
@@ -62,15 +75,23 @@ module Cocina
62
75
  if strategy == :first
63
76
  extract_title(cocina_title)
64
77
  else
65
- cocina_titles.map { |ctitle| extract_title(ctitle) }.flatten
78
+ result = cocina_titles.map { |ctitle| extract_title(ctitle) }.flatten
79
+ if only_one_parallel_value? && result.length == 1
80
+ result.first
81
+ else
82
+ result
83
+ end
66
84
  end
67
85
  end
86
+ # rubocop:enable Metrics/PerceivedComplexity
68
87
 
88
+ # this is the single "short title" - the title without subtitle, part name, etc.
89
+ # this may be useful for boosting and exact matching for search results
90
+ # @return [Array<String>] the main title value(s) for Solr - can be array due to parallel titles
69
91
  def main_title(titles)
70
92
  cocina_title = primary_title(titles) || untyped_title(titles)
71
93
  cocina_title = other_title(titles) if cocina_title.blank?
72
94
 
73
- cocina_title = cocina_title.first if cocina_title.is_a?(Array)
74
95
  extract_main_title(cocina_title)
75
96
  end
76
97
 
@@ -79,40 +100,71 @@ module Cocina
79
100
  attr_reader :strategy
80
101
 
81
102
  def extract_title(cocina_title)
103
+ title_values = if cocina_title.value
104
+ cocina_title.value
105
+ elsif cocina_title.structuredValue.present?
106
+ rebuild_structured_value(cocina_title)
107
+ elsif cocina_title.parallelValue.present?
108
+ extract_title_parallel_values(cocina_title)
109
+ end
110
+ result = [title_values].flatten.compact.map { |val| remove_trailing_punctuation(val.strip) }
111
+ result.length == 1 ? result.first : result
112
+ end
113
+
114
+ # stategy :first says to return a single value (default: true)
115
+ # only_one_parallel_value? says to return a single value, even if that value is a parallelValue (default: false)
116
+ #
117
+ # rubocop:disable Metrics/PerceivedComplexity
118
+ def extract_title_parallel_values(cocina_title)
119
+ primary = cocina_title.parallelValue.find { |pvalue| pvalue.status == 'primary' }
120
+ if primary && only_one_parallel_value? && strategy == :first
121
+ # we have a primary title and we know we want a single value
122
+ extract_title(primary)
123
+ elsif only_one_parallel_value? && strategy == :first
124
+ # no primary value; algorithm says prefer an untyped value over a typed value for single value
125
+ untyped = cocina_title.parallelValue.find { |pvalue| pvalue.type.blank? }
126
+ extract_title(untyped || cocina_title.parallelValue.first)
127
+ else
128
+ cocina_title.parallelValue.map { |pvalue| extract_title(pvalue) }
129
+ end
130
+ end
131
+ # rubocop:enable Metrics/PerceivedComplexity
132
+
133
+ # @return [Array<String>] the main title value(s) for Solr - can be array due to parallel titles
134
+ def extract_main_title(cocina_title) # rubocop:disable Metrics/PerceivedComplexity
82
135
  result = if cocina_title.value
83
- cocina_title.value
136
+ cocina_title.value # covers both title and main title types
84
137
  elsif cocina_title.structuredValue.present?
85
- title_from_structured_values(cocina_title)
138
+ main_title_from_structured_values(cocina_title)
86
139
  elsif cocina_title.parallelValue.present?
87
- return build(cocina_title.parallelValue)
140
+ primary = cocina_title.parallelValue.find { |pvalue| pvalue.status == 'primary' }
141
+ if primary
142
+ extract_main_title(primary)
143
+ else
144
+ cocina_title.parallelValue.map { |pvalue| extract_main_title(pvalue) }
145
+ end
88
146
  end
89
- remove_trailing_punctuation(result.strip) if result.present?
90
- end
147
+ return [] if result.blank?
91
148
 
92
- def extract_main_title(cocina_title)
93
- if cocina_title.value
94
- cocina_title.value # covers both title and main title types
95
- elsif cocina_title.structuredValue.present?
96
- main_title_from_structured_values(cocina_title)
97
- elsif cocina_title.parallelValue.present?
98
- main_title(cocina_title.parallelValue)
99
- end
149
+ [result].flatten.compact.map { |val| remove_trailing_punctuation(val) }
100
150
  end
101
151
 
102
152
  def add_punctuation?
103
153
  @add_punctuation
104
154
  end
105
155
 
156
+ def only_one_parallel_value?
157
+ @only_one_parallel_value
158
+ end
159
+
106
160
  # @return [Cocina::Models::Title, nil] title that has status=primary
107
- def primary_title(titles)
108
- primary_title = titles.find do |title|
109
- title.status == 'primary'
110
- end
161
+ def primary_title(cocina_titles)
162
+ primary_title = cocina_titles.find { |title| title.status == 'primary' }
111
163
  return primary_title if primary_title.present?
112
164
 
113
165
  # NOTE: structuredValues would only have status primary assigned as a sibling, not as an attribute
114
166
 
115
- titles.find do |title|
167
+ cocina_titles.find do |title|
116
168
  title.parallelValue&.find do |parallel_title|
117
169
  parallel_title.status == 'primary'
118
170
  end
@@ -149,56 +201,73 @@ module Cocina
149
201
  # @return [String] the title value from combining the pieces of the structured_values by type and order
150
202
  # with desired punctuation per specs
151
203
  #
204
+ # - nonsorting characters value is followed by a space, unless the nonsorting
205
+ # character count note has a numeric value equal to the length of the
206
+ # nonsorting characters value, in which case no space is inserted
207
+ # - subtitle is preceded by space colon space, unless it is at the beginning
208
+ # of the title string
209
+ # - partName and partNumber are always separated from each other by comma space
210
+ # - first partName or partNumber in the string is preceded by period space
211
+ # - partName or partNumber before nonsorting characters or main title is followed
212
+ # by period space
213
+ #
214
+ # for punctuaion funky town, thank MARC and catalog cards
215
+ #
216
+ # rubocop:disable Metrics/AbcSize
152
217
  # rubocop:disable Metrics/CyclomaticComplexity
153
218
  # rubocop:disable Metrics/MethodLength
154
219
  # rubocop:disable Metrics/PerceivedComplexity
155
- def title_from_structured_values(title)
156
- # parse out the parts
157
- main_title = ''
158
- subtitle = ''
159
- non_sort_value = ''
220
+ def rebuild_structured_value(cocina_title)
221
+ result = ''
160
222
  part_name_number = ''
161
- title.structuredValue.each do |structured_value|
162
- # There can be a structuredValue inside a structuredValue. For example,
223
+ cocina_title.structuredValue.each do |structured_value| # rubocop:disable Metrics/BlockLength
224
+ # There can be a structuredValue inside a structuredValue, for example,
163
225
  # a uniform title where both the name and the title have internal StructuredValue
164
- return title_from_structured_values(structured_value) if structured_value.structuredValue.present?
226
+ return rebuild_structured_value(structured_value) if structured_value.structuredValue.present?
165
227
 
166
228
  value = structured_value.value&.strip
167
229
  next unless value
168
230
 
169
- # additional types: name, uniform ...
231
+ # additional types ignored here, e.g. name, uniform ...
170
232
  case structured_value.type&.downcase
171
233
  when 'nonsorting characters'
172
- non_sort_value = "#{value}#{non_sorting_padding(title, value)}"
234
+ padding = non_sorting_padding(cocina_title, value)
235
+ result = add_non_sorting_value(result, value, padding)
173
236
  when 'part name', 'part number'
174
- part_name_number = part_name_number(title.structuredValue) if part_name_number.blank?
175
- when 'main title', 'title'
176
- main_title = value
177
- when 'subtitle'
178
- # combine multiple subtitles into a single string
179
- subtitle = if !add_punctuation?
180
- if subtitle.present?
181
- [subtitle, value].join(' ')
182
- else
183
- value
184
- end
185
- elsif subtitle.present?
186
- # subtitle is preceded by space colon space, unless it is at the beginning of the title string
187
- "#{subtitle.sub(/[. :]+$/, '')} : #{value.sub(/^:/, '').strip}"
237
+ if part_name_number.blank?
238
+ part_name_number = part_name_number(cocina_title.structuredValue)
239
+ result = if !add_punctuation?
240
+ [result, part_name_number].join(' ')
241
+ elsif result.present?
242
+ # part name/number is preceded by period space, unless it is at the beginning of the title string
243
+ "#{result.sub(/[ .,]*$/, '')}. #{part_name_number}. "
188
244
  else
189
- value.sub(/^:/, '').strip
245
+ "#{part_name_number}. "
190
246
  end
247
+ end
248
+ when 'main title', 'title'
249
+ # nonsorting characters ending with hyphen, apostrophe or space should be slammed against the main title,
250
+ # even if we are not adding punctuation
251
+ result = if add_punctuation? || result.ends_with?(' ') || result.ends_with?('-') || result.ends_with?('\'')
252
+ [result, value].join
253
+ else
254
+ [remove_trailing_punctuation(result), remove_trailing_punctuation(value)].select(&:presence).join(' ')
255
+ end
256
+ when 'subtitle'
257
+ result = if !add_punctuation?
258
+ [result, value.sub(/^:/, '').strip].select(&:presence).join(' ')
259
+ elsif result.present?
260
+ # subtitle is preceded by space colon space, unless it is at the beginning of the title string
261
+ "#{result.sub(/[. :]+$/, '')} : #{value.sub(/^:/, '').strip}"
262
+ else
263
+ result = value.sub(/^:/, '').strip
264
+ end
191
265
  end
192
266
  end
193
267
 
194
- # combine the parts into a single title string
195
- if add_punctuation?
196
- combine_with_punctuation(non_sort_value: non_sort_value, main_title: main_title, subtitle: subtitle,
197
- part_name_number: part_name_number)
198
- else
199
- ["#{non_sort_value}#{main_title}", subtitle, part_name_number].select(&:presence).join(' ')
200
- end
268
+ result
201
269
  end
270
+ # rubocop:enable Metrics/AbcSize
202
271
  # rubocop:enable Metrics/CyclomaticComplexity
203
272
  # rubocop:enable Metrics/MethodLength
204
273
  # rubocop:enable Metrics/PerceivedComplexity
@@ -206,11 +275,14 @@ module Cocina
206
275
  # main_title is title.structuredValue.value with type 'main title' (or just title.value)
207
276
  # @param [Cocina::Models::Title] title with structured values
208
277
  # @return [String] the main title value
209
- def main_title_from_structured_values(cocina_title) # rubocop:disable Metrics/MethodLength
278
+ #
279
+ # rubocop:disable Metrics/MethodLength
280
+ # rubocop:disable Metrics/PerceivedComplexity
281
+ def main_title_from_structured_values(cocina_title)
210
282
  result = ''
211
283
  # combine pieces of the cocina structuredValue into a single title
212
284
  cocina_title.structuredValue.each do |structured_value|
213
- # There can be a structuredValue inside a structuredValue. For example,
285
+ # There can be a structuredValue inside a structuredValue, for example,
214
286
  # a uniform title where both the name and the title have internal StructuredValue
215
287
  return main_title_from_structured_values(structured_value) if structured_value.structuredValue.present?
216
288
 
@@ -219,43 +291,36 @@ module Cocina
219
291
 
220
292
  case structured_value.type&.downcase
221
293
  when 'nonsorting characters'
222
- non_sort_value = "#{value}#{non_sorting_padding(cocina_title, value)}"
223
- result = "#{non_sort_value}#{result}" # non-sorting characters are at the beginning of the title
224
- when 'main title'
225
- result = "#{result}#{value}"
226
- when 'title'
227
- result = value
294
+ padding = non_sorting_padding(cocina_title, value)
295
+ result = add_non_sorting_value(result, value, padding)
296
+ when 'main title', 'title'
297
+ result = if ['\'', '-'].include?(result.last)
298
+ [result, value].join
299
+ else
300
+ [remove_trailing_punctuation(result).strip, remove_trailing_punctuation(value).strip].select(&:presence).join(' ')
301
+ end
228
302
  end
229
303
  end
230
- result
231
- end
232
304
 
233
- # Thank MARC and catalog cards for this mess. We need to add punctuation.
234
- # rubocop:disable Metrics/MethodLength
235
- def combine_with_punctuation(non_sort_value:, main_title:, subtitle:, part_name_number:)
236
- result = "#{non_sort_value}#{main_title}"
237
- if subtitle.present?
238
- result = if result.present?
239
- "#{result.sub(/[. :]+$/, '')} : #{subtitle.sub(/^:/, '').strip}"
240
- else
241
- result = subtitle
242
- end
243
- end
244
- if part_name_number.present?
245
- result = if result.present?
246
- "#{result.sub(/[ .,]*$/, '')}. #{part_name_number}."
247
- else
248
- "#{part_name_number}."
249
- end
250
- end
251
305
  result
252
306
  end
253
307
  # rubocop:enable Metrics/MethodLength
308
+ # rubocop:enable Metrics/PerceivedComplexity
254
309
 
310
+ # Thank MARC and catalog cards for this mess.
255
311
  def remove_trailing_punctuation(title)
256
312
  title.sub(%r{[ .,;:/\\]+$}, '')
257
313
  end
258
314
 
315
+ def add_non_sorting_value(title_so_far, non_sorting_value, padding)
316
+ non_sort_value = "#{non_sorting_value}#{padding}"
317
+ if title_so_far.present?
318
+ [title_so_far.strip, padding, non_sort_value].join
319
+ else
320
+ non_sort_value
321
+ end
322
+ end
323
+
259
324
  def non_sorting_padding(title, non_sorting_value)
260
325
  non_sort_note = title.note&.find { |note| note.type&.downcase == 'nonsorting character count' }
261
326
  if non_sort_note
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Cocina
4
4
  module Models
5
- VERSION = '0.94.0'
5
+ VERSION = '0.94.2'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cocina-models
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.94.0
4
+ version: 0.94.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Coyne
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-01-19 00:00:00.000000000 Z
11
+ date: 2024-01-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -562,7 +562,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
562
562
  - !ruby/object:Gem::Version
563
563
  version: '0'
564
564
  requirements: []
565
- rubygems_version: 3.3.7
565
+ rubygems_version: 3.4.13
566
566
  signing_key:
567
567
  specification_version: 4
568
568
  summary: Data models for the SDR