cocina_display 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,374 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support"
4
+ require "active_support/core_ext/object/blank"
5
+ require "active_support/core_ext/string/access"
6
+ require "active_support/core_ext/string/starts_ends_with"
7
+
8
+ module CocinaDisplay
9
+ # Select and format title data as a string for display or indexing.
10
+ class TitleBuilder
11
+ # @param titles [Array<Hash>] The titles to consider.
12
+ # @param catalog_links [Array<Hash>] The folio catalog links to check for digital serials part labels.
13
+ # @param strategy [Symbol] ":first" is the strategy for selection when primary or display title are missing.
14
+ # @param add_punctuation [Boolean] Determines if the title should be formatted with punctuation.
15
+ # @return [String, Array] The title value for Solr - for :first strategy, a string; for :all strategy, an array.
16
+ # (e.g. title displayed in blacklight search results vs boosting values for search result rankings)
17
+ def self.build(titles, catalog_links: [], strategy: :first, add_punctuation: true)
18
+ part_label = catalog_links.find { |link| link["catalog"] == "folio" }&.fetch("partLabel", nil)
19
+ new(strategy: strategy, add_punctuation: add_punctuation, part_label: part_label).build(titles)
20
+ end
21
+
22
+ # the "main title" is the title withOUT subtitle, part name, etc. We want to index it separately so
23
+ # we can boost matches on it in search results (boost matching this string higher than matching full title string)
24
+ # e.g. "The Hobbit" (main_title) vs "The Hobbit, or, There and Back Again (full_title)
25
+ # @param titles [Array<Hash>] The titles to consider.
26
+ # @return [Array<String>] The main title value(s) for Solr - array due to possible parallelValue
27
+ def self.main_title(titles)
28
+ new(strategy: :first, add_punctuation: false).main_title(titles)
29
+ end
30
+
31
+ # the "full title" is the title WITH subtitle, part name, etc. We want to able able to index it separately so
32
+ # we can boost matches on it in search results (boost matching this string higher than other titles present)
33
+ # @param titles [Array<Hash>] The titles to consider.
34
+ # @param catalog_links [Array<Hash>] The folio catalog links to check for digital serials part labels.
35
+ # @return [Array<String>] The full title value(s) for Solr - array due to possible parallelValue
36
+ def self.full_title(titles, catalog_links: [])
37
+ part_label = catalog_links.find { |link| link["catalog"] == "folio" }&.fetch("partLabel", nil)
38
+ [new(strategy: :first, add_punctuation: false, only_one_parallel_value: false, part_label: part_label).build(titles)].flatten.compact
39
+ end
40
+
41
+ # "additional titles" are all title data except for full_title. We want to able able to index it separately so
42
+ # we can boost matches on it in search results (boost matching these strings lower than other titles present)
43
+ # @param titles [Array<Hash>] The titles to consider.
44
+ # @return [Array<String>] The values for Solr.
45
+ def self.additional_titles(titles)
46
+ [new(strategy: :all, add_punctuation: false).build(titles)].flatten - full_title(titles)
47
+ end
48
+
49
+ # @param strategy [Symbol] ":first" selects a single title value based on precedence of
50
+ # primary, untyped, first occurrence. ":all" returns an array containing all the values.
51
+ # @param add_punctuation [boolean] whether the title should be formatted with punctuation (think of a structured
52
+ # value coming from a MARC record, which is designed for catalog cards.)
53
+ # @param only_one_parallel_value [boolean] when true, choose one of the parallel values according to precedence
54
+ # of primary, untyped, first occurrence. When false, return an array containing all the parallel values.
55
+ # Why? Think of e.g. title displayed in blacklight search results vs boosting values for ranking of search results
56
+ # @param part_label [String] the partLabel to add for digital serials display
57
+ def initialize(strategy:, add_punctuation:, only_one_parallel_value: true, part_label: nil)
58
+ @strategy = strategy
59
+ @add_punctuation = add_punctuation
60
+ @only_one_parallel_value = only_one_parallel_value
61
+ @part_label = part_label
62
+ end
63
+
64
+ # @param [Array<Hash>] cocina_titles the titles to consider
65
+ # @return [String, Array] the title value for Solr - for :first strategy, a string; for :all strategy, an array
66
+ # (e.g. title displayed in blacklight search results vs boosting values for search result rankings)
67
+ # rubocop:disable Metrics/PerceivedComplexity
68
+ def build(cocina_titles)
69
+ cocina_title = primary_title(cocina_titles) || untyped_title(cocina_titles)
70
+ cocina_title = other_title(cocina_titles) if cocina_title.blank?
71
+ if strategy == :first
72
+ result = extract_title(cocina_title)
73
+ result = add_part_label(result) if part_label.present?
74
+ result
75
+ else
76
+ result = cocina_titles.map { |ctitle| extract_title(ctitle) }.flatten
77
+ if only_one_parallel_value? && result.length == 1
78
+ result.first
79
+ else
80
+ result
81
+ end
82
+ end
83
+ end
84
+ # rubocop:enable Metrics/PerceivedComplexity
85
+
86
+ # this is the single "short title" - the title without subtitle, part name, etc.
87
+ # this may be useful for boosting and exact matching for search results
88
+ # @return [Array<String>] the main title value(s) for Solr - can be array due to parallel titles
89
+ def main_title(titles)
90
+ cocina_title = primary_title(titles) || untyped_title(titles)
91
+ cocina_title = other_title(titles) if cocina_title.blank?
92
+
93
+ extract_main_title(cocina_title)
94
+ end
95
+
96
+ private
97
+
98
+ attr_reader :strategy, :part_label
99
+
100
+ def add_part_label(title)
101
+ # when a digital serial
102
+ title = title.sub(/[ .,]*$/, "").to_s
103
+ add_punctuation? ? "#{title}, #{part_label}" : "#{title} #{part_label}"
104
+ end
105
+
106
+ def extract_title(cocina_title)
107
+ title_values = if cocina_title["value"]
108
+ cocina_title["value"]
109
+ elsif cocina_title["structuredValue"].present?
110
+ rebuild_structured_value(cocina_title)
111
+ elsif cocina_title["parallelValue"].present?
112
+ extract_title_parallel_values(cocina_title)
113
+ end
114
+ result = [title_values].flatten.compact.map { |val| remove_trailing_punctuation(val.strip) }
115
+ (result.length == 1) ? result.first : result
116
+ end
117
+
118
+ # strategy :first says to return a single value (default: true)
119
+ # only_one_parallel_value? says to return a single value, even if that value is a parallelValue (default: false)
120
+ #
121
+ # rubocop:disable Metrics/PerceivedComplexity
122
+ def extract_title_parallel_values(cocina_title)
123
+ primary = cocina_title["parallelValue"].find { |pvalue| pvalue["status"] == "primary" }
124
+ if primary && only_one_parallel_value? && strategy == :first
125
+ # we have a primary title and we know we want a single value
126
+ extract_title(primary)
127
+ elsif only_one_parallel_value? && strategy == :first
128
+ # no primary value; algorithm says prefer an untyped value over a typed value for single value
129
+ untyped = cocina_title["parallelValue"].find { |pvalue| pvalue["type"].blank? }
130
+ extract_title(untyped || cocina_title["parallelValue"].first)
131
+ else
132
+ cocina_title["parallelValue"].map { |pvalue| extract_title(pvalue) }
133
+ end
134
+ end
135
+ # rubocop:enable Metrics/PerceivedComplexity
136
+
137
+ # @return [Array<String>] the main title value(s) for Solr - can be array due to parallel titles
138
+ def extract_main_title(cocina_title) # rubocop:disable Metrics/PerceivedComplexity
139
+ result = if cocina_title["value"]
140
+ cocina_title["value"] # covers both title and main title types
141
+ elsif cocina_title["structuredValue"].present?
142
+ main_title_from_structured_values(cocina_title)
143
+ elsif cocina_title["parallelValue"].present?
144
+ primary = cocina_title["parallelValue"].find { |pvalue| pvalue["status"] == "primary" }
145
+ if primary
146
+ extract_main_title(primary)
147
+ else
148
+ cocina_title["parallelValue"].map { |pvalue| extract_main_title(pvalue) }
149
+ end
150
+ end
151
+ return [] if result.blank?
152
+
153
+ [result].flatten.compact.map { |val| remove_trailing_punctuation(val) }
154
+ end
155
+
156
+ def add_punctuation?
157
+ @add_punctuation
158
+ end
159
+
160
+ def only_one_parallel_value?
161
+ @only_one_parallel_value
162
+ end
163
+
164
+ # @return [Hash, nil] title that has status=primary
165
+ def primary_title(cocina_titles)
166
+ primary_title = cocina_titles.find { |title| title["status"] == "primary" }
167
+ return primary_title if primary_title.present?
168
+
169
+ # NOTE: structuredValues would only have status primary assigned as a sibling, not as an attribute
170
+
171
+ cocina_titles.find do |title|
172
+ title["parallelValue"]&.find do |parallel_title|
173
+ parallel_title["status"] == "primary"
174
+ end
175
+ end
176
+ end
177
+
178
+ def untyped_title(titles)
179
+ method = (strategy == :first) ? :find : :select
180
+ untyped_title_for(titles.public_send(method))
181
+ end
182
+
183
+ # @return [Array[Hash]] first title that has no type attribute
184
+ def untyped_title_for(titles)
185
+ titles.each do |title|
186
+ if title["parallelValue"].present?
187
+ untyped_title_for(title["parallelValue"])
188
+ else
189
+ title["type"].nil? || title["type"] == "title"
190
+ end
191
+ end
192
+ end
193
+
194
+ # This is called when there is no primary title and no untyped title
195
+ # @return [Hash, Array<Hash>] first title or all titles
196
+ def other_title(titles)
197
+ if strategy == :first
198
+ titles.first
199
+ else
200
+ titles
201
+ end
202
+ end
203
+
204
+ # @param cocina_title [Hash] title with structured values
205
+ # @return [String] the title value from combining the pieces of the structured_values by type and order
206
+ # with desired punctuation per specs
207
+ #
208
+ # - nonsorting characters value is followed by a space, unless the nonsorting
209
+ # character count note has a numeric value equal to the length of the
210
+ # nonsorting characters value, in which case no space is inserted
211
+ # - subtitle is preceded by space colon space, unless it is at the beginning
212
+ # of the title string
213
+ # - partName and partNumber are always separated from each other by comma space
214
+ # - first partName or partNumber in the string is preceded by period space
215
+ # - partName or partNumber before nonsorting characters or main title is followed
216
+ # by period space
217
+ #
218
+ # for punctuation funky town, thank MARC and catalog cards
219
+ #
220
+ # rubocop:disable Metrics/AbcSize
221
+ # rubocop:disable Metrics/CyclomaticComplexity
222
+ # rubocop:disable Metrics/MethodLength
223
+ # rubocop:disable Metrics/PerceivedComplexity
224
+ def rebuild_structured_value(cocina_title)
225
+ result = ""
226
+ part_name_number = ""
227
+ cocina_title["structuredValue"].each do |structured_value| # rubocop:disable Metrics/BlockLength
228
+ # There can be a structuredValue inside a structuredValue, for example,
229
+ # a uniform title where both the name and the title have internal StructuredValue
230
+ return rebuild_structured_value(structured_value) if structured_value["structuredValue"].present?
231
+
232
+ value = structured_value["value"]&.strip
233
+ next unless value
234
+
235
+ # additional types ignored here, e.g. name, uniform ...
236
+ case structured_value["type"]&.downcase
237
+ when "nonsorting characters"
238
+ padding = non_sorting_padding(cocina_title, value)
239
+ result = add_non_sorting_value(result, value, padding)
240
+ when "part name", "part number"
241
+ # even if there is a partLabel, use any existing structuredValue
242
+ # part name/number that remains for non-digital serials purposes
243
+ if part_name_number.blank?
244
+ part_name_number = part_name_number(cocina_title["structuredValue"])
245
+ result = if !add_punctuation?
246
+ [result, part_name_number].join(" ")
247
+ elsif result.present?
248
+ # part name/number is preceded by period space, unless it is at the beginning of the title string
249
+ "#{result.sub(/[ .,]*$/, "")}. #{part_name_number}. "
250
+ else
251
+ "#{part_name_number}. "
252
+ end
253
+ end
254
+ when "main title", "title"
255
+ # nonsorting characters ending with hyphen, apostrophe or space should be slammed against the main title,
256
+ # even if we are not adding punctuation
257
+ result = if add_punctuation? || result.ends_with?(" ") || result.ends_with?("-") || result.ends_with?("'")
258
+ [result, value].join
259
+ else
260
+ [remove_trailing_punctuation(result), remove_trailing_punctuation(value)].select(&:presence).join(" ")
261
+ end
262
+ when "subtitle"
263
+ result = if !add_punctuation?
264
+ [result, value.sub(/^:/, "").strip].select(&:presence).join(" ")
265
+ elsif result.present?
266
+ # subtitle is preceded by space colon space, unless it is at the beginning of the title string
267
+ "#{result.sub(/[. :]+$/, "")} : #{value.sub(/^:/, "").strip}"
268
+ else
269
+ result = value.sub(/^:/, "").strip
270
+ end
271
+ end
272
+ end
273
+
274
+ result
275
+ end
276
+ # rubocop:enable Metrics/AbcSize
277
+ # rubocop:enable Metrics/CyclomaticComplexity
278
+ # rubocop:enable Metrics/MethodLength
279
+ # rubocop:enable Metrics/PerceivedComplexity
280
+
281
+ # main_title is title.structuredValue.value with type 'main title' (or just title.value)
282
+ # @param cocina_title [Hash] Title with structured values
283
+ # @return [String] the main title value
284
+ #
285
+ # rubocop:disable Metrics/MethodLength
286
+ # rubocop:disable Metrics/PerceivedComplexity
287
+ # rubocop:disable Metrics/AbcSize
288
+ # rubocop:disable Metrics/CyclomaticComplexity
289
+ def main_title_from_structured_values(cocina_title)
290
+ result = ""
291
+ # combine pieces of the cocina structuredValue into a single title
292
+ cocina_title["structuredValue"].each do |structured_value|
293
+ # There can be a structuredValue inside a structuredValue, for example,
294
+ # a uniform title where both the name and the title have internal StructuredValue
295
+ return main_title_from_structured_values(structured_value) if structured_value["structuredValue"].present?
296
+
297
+ value = structured_value["value"]&.strip
298
+ next unless value
299
+
300
+ case structured_value["type"]&.downcase
301
+ when "nonsorting characters"
302
+ padding = non_sorting_padding(cocina_title, value)
303
+ result = add_non_sorting_value(result, value, padding)
304
+ when "main title", "title"
305
+ result = if ["'", "-"].include?(result.last)
306
+ [result, value].join
307
+ else
308
+ [remove_trailing_punctuation(result).strip, remove_trailing_punctuation(value).strip].select(&:presence).join(" ")
309
+ end
310
+ end
311
+ end
312
+
313
+ result
314
+ end
315
+ # rubocop:enable Metrics/MethodLength
316
+ # rubocop:enable Metrics/PerceivedComplexity
317
+ # rubocop:enable Metrics/AbcSize
318
+ # rubocop:enable Metrics/CyclomaticComplexity
319
+
320
+ # Thank MARC and catalog cards for this mess.
321
+ def remove_trailing_punctuation(title)
322
+ title.sub(%r{[ .,;:/\\]+$}, "")
323
+ end
324
+
325
+ def add_non_sorting_value(title_so_far, non_sorting_value, padding)
326
+ non_sort_value = "#{non_sorting_value}#{padding}"
327
+ if title_so_far.present?
328
+ [title_so_far.strip, padding, non_sort_value].join
329
+ else
330
+ non_sort_value
331
+ end
332
+ end
333
+
334
+ def non_sorting_padding(title, non_sorting_value)
335
+ non_sort_note = title["note"]&.find { |note| note["type"]&.downcase == "nonsorting character count" }
336
+ if non_sort_note
337
+ padding_count = [non_sort_note["value"].to_i - non_sorting_value.length, 0].max
338
+ " " * padding_count
339
+ elsif ["'", "-"].include?(non_sorting_value.last)
340
+ ""
341
+ else
342
+ " "
343
+ end
344
+ end
345
+
346
+ # combine part name and part number:
347
+ # respect order of occurrence
348
+ # separated from each other by comma space
349
+ def part_name_number(structured_values)
350
+ title_from_part = ""
351
+ structured_values.each do |structured_value|
352
+ case structured_value["type"]&.downcase
353
+ when "part name", "part number"
354
+ value = structured_value["value"]&.strip
355
+ next unless value
356
+
357
+ title_from_part = append_part_to_title(title_from_part, value)
358
+
359
+ end
360
+ end
361
+ title_from_part
362
+ end
363
+
364
+ def append_part_to_title(title_from_part, value)
365
+ if !add_punctuation?
366
+ [title_from_part, value].select(&:presence).join(" ")
367
+ elsif title_from_part.strip.present?
368
+ "#{title_from_part.sub(/[ .,]*$/, "")}, #{value}"
369
+ else
370
+ value
371
+ end
372
+ end
373
+ end
374
+ end
@@ -2,5 +2,5 @@
2
2
 
3
3
  # :nodoc:
4
4
  module CocinaDisplay
5
- VERSION = "0.1.0" # :nodoc:
5
+ VERSION = "0.3.0" # :nodoc:
6
6
  end
metadata CHANGED
@@ -1,43 +1,63 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cocina_display
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Budak
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-06-18 00:00:00.000000000 Z
11
+ date: 2025-07-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: cocina-models
14
+ name: janeway-jsonpath
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0.101'
19
+ version: '0.6'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0.101'
26
+ version: '0.6'
27
27
  - !ruby/object:Gem::Dependency
28
- name: janeway-jsonpath
28
+ name: activesupport
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0.6'
33
+ version: '8.0'
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 8.0.2
34
37
  type: :runtime
35
38
  prerelease: false
36
39
  version_requirements: !ruby/object:Gem::Requirement
37
40
  requirements:
38
41
  - - "~>"
39
42
  - !ruby/object:Gem::Version
40
- version: '0.6'
43
+ version: '8.0'
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 8.0.2
47
+ - !ruby/object:Gem::Dependency
48
+ name: edtf
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '3.2'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: '3.2'
41
61
  - !ruby/object:Gem::Dependency
42
62
  name: rake
43
63
  requirement: !ruby/object:Gem::Requirement
@@ -94,6 +114,20 @@ dependencies:
94
114
  - - "~>"
95
115
  - !ruby/object:Gem::Version
96
116
  version: 0.22.0
117
+ - !ruby/object:Gem::Dependency
118
+ name: simplecov-rspec
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '0.4'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '0.4'
97
131
  - !ruby/object:Gem::Dependency
98
132
  name: yard
99
133
  requirement: !ruby/object:Gem::Requirement
@@ -108,6 +142,26 @@ dependencies:
108
142
  - - "~>"
109
143
  - !ruby/object:Gem::Version
110
144
  version: 0.9.37
145
+ - !ruby/object:Gem::Dependency
146
+ name: webrick
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '1.9'
152
+ - - ">="
153
+ - !ruby/object:Gem::Version
154
+ version: 1.9.1
155
+ type: :development
156
+ prerelease: false
157
+ version_requirements: !ruby/object:Gem::Requirement
158
+ requirements:
159
+ - - "~>"
160
+ - !ruby/object:Gem::Version
161
+ version: '1.9'
162
+ - - ">="
163
+ - !ruby/object:Gem::Version
164
+ version: 1.9.1
111
165
  description:
112
166
  email:
113
167
  - budak@stanford.edu
@@ -122,6 +176,12 @@ files:
122
176
  - Rakefile
123
177
  - lib/cocina_display.rb
124
178
  - lib/cocina_display/cocina_record.rb
179
+ - lib/cocina_display/concerns/events.rb
180
+ - lib/cocina_display/dates/date.rb
181
+ - lib/cocina_display/dates/date_range.rb
182
+ - lib/cocina_display/imprint.rb
183
+ - lib/cocina_display/marc_country_codes.rb
184
+ - lib/cocina_display/title_builder.rb
125
185
  - lib/cocina_display/version.rb
126
186
  - sig/cocina_display.rbs
127
187
  homepage: https://sul-dlss.github.io/cocina_display/