cocina_display 1.1.3 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec +0 -1
  3. data/.standard.yml +1 -1
  4. data/README.md +21 -2
  5. data/config/i18n-tasks.yml +0 -0
  6. data/config/licenses.yml +59 -0
  7. data/config/locales/en.yml +109 -0
  8. data/lib/cocina_display/cocina_record.rb +27 -63
  9. data/lib/cocina_display/concerns/accesses.rb +78 -0
  10. data/lib/cocina_display/concerns/contributors.rb +32 -11
  11. data/lib/cocina_display/concerns/events.rb +19 -6
  12. data/lib/cocina_display/concerns/forms.rb +98 -11
  13. data/lib/cocina_display/concerns/geospatial.rb +9 -5
  14. data/lib/cocina_display/concerns/identifiers.rb +15 -4
  15. data/lib/cocina_display/concerns/languages.rb +6 -2
  16. data/lib/cocina_display/concerns/notes.rb +36 -0
  17. data/lib/cocina_display/concerns/related_resources.rb +20 -0
  18. data/lib/cocina_display/concerns/subjects.rb +25 -8
  19. data/lib/cocina_display/concerns/titles.rb +67 -25
  20. data/lib/cocina_display/concerns/{access.rb → url_helpers.rb} +3 -3
  21. data/lib/cocina_display/concerns.rb +6 -0
  22. data/lib/cocina_display/contributors/contributor.rb +47 -26
  23. data/lib/cocina_display/contributors/name.rb +18 -14
  24. data/lib/cocina_display/contributors/role.rb +20 -13
  25. data/lib/cocina_display/dates/date.rb +55 -14
  26. data/lib/cocina_display/dates/date_range.rb +0 -2
  27. data/lib/cocina_display/description/access.rb +41 -0
  28. data/lib/cocina_display/description/access_contact.rb +11 -0
  29. data/lib/cocina_display/description/url.rb +17 -0
  30. data/lib/cocina_display/display_data.rb +104 -0
  31. data/lib/cocina_display/events/event.rb +8 -4
  32. data/lib/cocina_display/events/imprint.rb +0 -10
  33. data/lib/cocina_display/events/location.rb +0 -2
  34. data/lib/cocina_display/events/note.rb +33 -0
  35. data/lib/cocina_display/forms/form.rb +71 -0
  36. data/lib/cocina_display/forms/genre.rb +12 -0
  37. data/lib/cocina_display/forms/resource_type.rb +38 -0
  38. data/lib/cocina_display/geospatial.rb +1 -1
  39. data/lib/cocina_display/identifier.rb +101 -0
  40. data/lib/cocina_display/json_backed_record.rb +27 -0
  41. data/lib/cocina_display/language.rb +9 -11
  42. data/lib/cocina_display/license.rb +32 -0
  43. data/lib/cocina_display/note.rb +103 -0
  44. data/lib/cocina_display/related_resource.rb +74 -0
  45. data/lib/cocina_display/subjects/subject.rb +32 -9
  46. data/lib/cocina_display/subjects/subject_value.rb +34 -16
  47. data/lib/cocina_display/title.rb +194 -0
  48. data/lib/cocina_display/utils.rb +4 -4
  49. data/lib/cocina_display/version.rb +1 -1
  50. data/lib/cocina_display.rb +30 -2
  51. metadata +45 -11
  52. data/lib/cocina_display/title_builder.rb +0 -397
  53. /data/lib/cocina_display/vocabularies/{marc_country_codes.rb → marc_country.rb} +0 -0
  54. /data/lib/cocina_display/vocabularies/{marc_relator_codes.rb → marc_relator.rb} +0 -0
@@ -1,397 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "active_support"
4
- require "active_support/core_ext/object/blank"
5
- require "active_support/core_ext/string/access"
6
- require "active_support/core_ext/string/starts_ends_with"
7
-
8
- module CocinaDisplay
9
- # Select and format title data as a string for display or indexing.
10
- class TitleBuilder
11
- # @param titles [Array<Hash>] The titles to consider.
12
- # @param catalog_links [Array<Hash>] The folio catalog links to check for digital serials part labels.
13
- # @param strategy [Symbol] ":first" is the strategy for selection when primary or display title are missing.
14
- # @param add_punctuation [Boolean] Determines if the title should be formatted with punctuation.
15
- # @return [String, Array] The title value for Solr - for :first strategy, a string; for :all strategy, an array.
16
- # (e.g. title displayed in blacklight search results vs boosting values for search result rankings)
17
- def self.build(titles, catalog_links: [], strategy: :first, add_punctuation: true)
18
- part_label = catalog_links.find { |link| link["catalog"] == "folio" }&.fetch("partLabel", nil)
19
- new(strategy: strategy, add_punctuation: add_punctuation, part_label: part_label).build(titles)
20
- end
21
-
22
- # the "main title" is the title withOUT subtitle, part name, etc. We want to index it separately so
23
- # we can boost matches on it in search results (boost matching this string higher than matching full title string)
24
- # e.g. "The Hobbit" (main_title) vs "The Hobbit, or, There and Back Again (full_title)
25
- # @param titles [Array<Hash>] The titles to consider.
26
- # @return [Array<String>] The main title value(s) for Solr - array due to possible parallelValue
27
- def self.main_title(titles)
28
- new(strategy: :first, add_punctuation: false).main_title(titles)
29
- end
30
-
31
- # the "full title" is the title WITH subtitle, part name, etc. We want to able able to index it separately so
32
- # we can boost matches on it in search results (boost matching this string higher than other titles present)
33
- # @param titles [Array<Hash>] The titles to consider.
34
- # @param catalog_links [Array<Hash>] The folio catalog links to check for digital serials part labels.
35
- # @return [Array<String>] The full title value(s) for Solr - array due to possible parallelValue
36
- def self.full_title(titles, catalog_links: [])
37
- part_label = catalog_links.find { |link| link["catalog"] == "folio" }&.fetch("partLabel", nil)
38
- [new(strategy: :first, add_punctuation: false, only_one_parallel_value: false, part_label: part_label).build(titles)].flatten.compact
39
- end
40
-
41
- # "additional titles" are all title data except for full_title. We want to able able to index it separately so
42
- # we can boost matches on it in search results (boost matching these strings lower than other titles present)
43
- # @param titles [Array<Hash>] The titles to consider.
44
- # @return [Array<String>] The values for Solr.
45
- def self.additional_titles(titles)
46
- [new(strategy: :all, add_punctuation: false).build(titles)].flatten - full_title(titles)
47
- end
48
-
49
- # Like the full title, but with any non-sorting characters and punctuation removed.
50
- # @param titles [Array<Hash>] The titles to consider.
51
- # @param catalog_links [Array<Hash>] The folio catalog links to check for digital serials part labels.
52
- # @return [Array<String>] The sort title value(s) for Solr - array due to possible parallelValue
53
- def self.sort_title(titles, catalog_links: [])
54
- part_label = catalog_links.find { |link| link["catalog"] == "folio" }&.fetch("partLabel", nil)
55
- [new(strategy: :first, add_punctuation: false, only_one_parallel_value: false, part_label: part_label, sortable: true).build(titles)]
56
- .flatten.compact.map { |title| title.gsub(/[[:punct:]]*/, "").squeeze(" ").strip }
57
- end
58
-
59
- # @param strategy [Symbol] ":first" selects a single title value based on precedence of
60
- # primary, untyped, first occurrence. ":all" returns an array containing all the values.
61
- # @param add_punctuation [boolean] whether the title should be formatted with punctuation (think of a structured
62
- # value coming from a MARC record, which is designed for catalog cards.)
63
- # @param only_one_parallel_value [boolean] when true, choose one of the parallel values according to precedence
64
- # of primary, untyped, first occurrence. When false, return an array containing all the parallel values.
65
- # Why? Think of e.g. title displayed in blacklight search results vs boosting values for ranking of search results
66
- # @param part_label [String] the partLabel to add for digital serials display
67
- # @param sortable [boolean] whether the title is intended for sorting, and should have non-sorting parts removed
68
- def initialize(strategy:, add_punctuation:, only_one_parallel_value: true, part_label: nil, sortable: false)
69
- @strategy = strategy
70
- @add_punctuation = add_punctuation
71
- @only_one_parallel_value = only_one_parallel_value
72
- @part_label = part_label
73
- @sortable = sortable
74
- end
75
-
76
- # @param [Array<Hash>] cocina_titles the titles to consider
77
- # @return [String, Array] the title value for Solr - for :first strategy, a string; for :all strategy, an array
78
- # (e.g. title displayed in blacklight search results vs boosting values for search result rankings)
79
- # rubocop:disable Metrics/PerceivedComplexity
80
- def build(cocina_titles)
81
- cocina_title = primary_title(cocina_titles) || untyped_title(cocina_titles)
82
- cocina_title = other_title(cocina_titles) if cocina_title.blank?
83
- if strategy == :first
84
- result = extract_title(cocina_title)
85
- result = add_part_label(result) if part_label.present?
86
- result
87
- else
88
- result = cocina_titles.map { |ctitle| extract_title(ctitle) }.flatten
89
- if only_one_parallel_value? && result.length == 1
90
- result.first
91
- else
92
- result
93
- end
94
- end
95
- end
96
- # rubocop:enable Metrics/PerceivedComplexity
97
-
98
- # this is the single "short title" - the title without subtitle, part name, etc.
99
- # this may be useful for boosting and exact matching for search results
100
- # @return [Array<String>] the main title value(s) for Solr - can be array due to parallel titles
101
- def main_title(titles)
102
- return [] if titles.empty?
103
-
104
- cocina_title = primary_title(titles) || untyped_title(titles)
105
- cocina_title = other_title(titles) if cocina_title.blank?
106
-
107
- extract_main_title(cocina_title)
108
- end
109
-
110
- private
111
-
112
- attr_reader :strategy, :part_label
113
-
114
- def add_part_label(title)
115
- # when a digital serial
116
- title = title.sub(/[ .,]*$/, "").to_s
117
- add_punctuation? ? "#{title}, #{part_label}" : "#{title} #{part_label}"
118
- end
119
-
120
- def extract_title(cocina_title)
121
- return if cocina_title.blank?
122
- title_values = if cocina_title["value"]
123
- cocina_title["value"]
124
- elsif cocina_title["structuredValue"].present?
125
- rebuild_structured_value(cocina_title)
126
- elsif cocina_title["parallelValue"].present?
127
- extract_title_parallel_values(cocina_title)
128
- end
129
- result = [title_values].flatten.compact.map { |val| remove_trailing_punctuation(val.strip) }
130
- (result.length == 1) ? result.first : result
131
- end
132
-
133
- # strategy :first says to return a single value (default: true)
134
- # only_one_parallel_value? says to return a single value, even if that value is a parallelValue (default: false)
135
- #
136
- # rubocop:disable Metrics/PerceivedComplexity
137
- def extract_title_parallel_values(cocina_title)
138
- primary = cocina_title["parallelValue"].find { |pvalue| pvalue["status"] == "primary" }
139
- if primary && only_one_parallel_value? && strategy == :first
140
- # we have a primary title and we know we want a single value
141
- extract_title(primary)
142
- elsif only_one_parallel_value? && strategy == :first
143
- # no primary value; algorithm says prefer an untyped value over a typed value for single value
144
- untyped = cocina_title["parallelValue"].find { |pvalue| pvalue["type"].blank? }
145
- extract_title(untyped || cocina_title["parallelValue"].first)
146
- else
147
- cocina_title["parallelValue"].map { |pvalue| extract_title(pvalue) }
148
- end
149
- end
150
- # rubocop:enable Metrics/PerceivedComplexity
151
-
152
- # @return [Array<String>] the main title value(s) for Solr - can be array due to parallel titles
153
- def extract_main_title(cocina_title) # rubocop:disable Metrics/PerceivedComplexity
154
- result = if cocina_title["value"]
155
- cocina_title["value"] # covers both title and main title types
156
- elsif cocina_title["structuredValue"].present?
157
- main_title_from_structured_values(cocina_title)
158
- elsif cocina_title["parallelValue"].present?
159
- primary = cocina_title["parallelValue"].find { |pvalue| pvalue["status"] == "primary" }
160
- if primary
161
- extract_main_title(primary)
162
- else
163
- cocina_title["parallelValue"].map { |pvalue| extract_main_title(pvalue) }
164
- end
165
- end
166
- return [] if result.blank?
167
-
168
- [result].flatten.compact.map { |val| remove_trailing_punctuation(val) }
169
- end
170
-
171
- def add_punctuation?
172
- @add_punctuation
173
- end
174
-
175
- def only_one_parallel_value?
176
- @only_one_parallel_value
177
- end
178
-
179
- def sortable?
180
- @sortable
181
- end
182
-
183
- # @return [Hash, nil] title that has status=primary
184
- def primary_title(cocina_titles)
185
- primary_title = cocina_titles.find { |title| title["status"] == "primary" }
186
- return primary_title if primary_title.present?
187
-
188
- # NOTE: structuredValues would only have status primary assigned as a sibling, not as an attribute
189
-
190
- cocina_titles.find do |title|
191
- title["parallelValue"]&.find do |parallel_title|
192
- parallel_title["status"] == "primary"
193
- end
194
- end
195
- end
196
-
197
- def untyped_title(titles)
198
- method = (strategy == :first) ? :find : :select
199
- untyped_title_for(titles.public_send(method))
200
- end
201
-
202
- # @return [Array[Hash]] first title that has no type attribute
203
- def untyped_title_for(titles)
204
- titles.each do |title|
205
- if title["parallelValue"].present?
206
- untyped_title_for(title["parallelValue"])
207
- else
208
- title["type"].nil? || title["type"] == "title"
209
- end
210
- end
211
- end
212
-
213
- # This is called when there is no primary title and no untyped title
214
- # @return [Hash, Array<Hash>] first title or all titles
215
- def other_title(titles)
216
- if strategy == :first
217
- titles.first
218
- else
219
- titles
220
- end
221
- end
222
-
223
- # @param cocina_title [Hash] title with structured values
224
- # @return [String] the title value from combining the pieces of the structured_values by type and order
225
- # with desired punctuation per specs
226
- #
227
- # - nonsorting characters value is followed by a space, unless the nonsorting
228
- # character count note has a numeric value equal to the length of the
229
- # nonsorting characters value, in which case no space is inserted
230
- # - subtitle is preceded by space colon space, unless it is at the beginning
231
- # of the title string
232
- # - partName and partNumber are always separated from each other by comma space
233
- # - first partName or partNumber in the string is preceded by period space
234
- # - partName or partNumber before nonsorting characters or main title is followed
235
- # by period space
236
- #
237
- # for punctuation funky town, thank MARC and catalog cards
238
- #
239
- # rubocop:disable Metrics/AbcSize
240
- # rubocop:disable Metrics/CyclomaticComplexity
241
- # rubocop:disable Metrics/MethodLength
242
- # rubocop:disable Metrics/PerceivedComplexity
243
- def rebuild_structured_value(cocina_title, sortable: false)
244
- result = ""
245
- part_name_number = ""
246
- cocina_title["structuredValue"].each do |structured_value| # rubocop:disable Metrics/BlockLength
247
- # There can be a structuredValue inside a structuredValue, for example,
248
- # a uniform title where both the name and the title have internal StructuredValue
249
- return rebuild_structured_value(structured_value) if structured_value["structuredValue"].present?
250
-
251
- value = structured_value["value"]&.strip
252
- next unless value
253
-
254
- # additional types ignored here, e.g. name, uniform ...
255
- case structured_value["type"]&.downcase
256
- when "nonsorting characters"
257
- unless sortable?
258
- padding = non_sorting_padding(cocina_title, value)
259
- result = add_non_sorting_value(result, value, padding)
260
- end
261
- when "part name", "part number"
262
- # even if there is a partLabel, use any existing structuredValue
263
- # part name/number that remains for non-digital serials purposes
264
- if part_name_number.blank?
265
- part_name_number = part_name_number(cocina_title["structuredValue"])
266
- result = if !add_punctuation?
267
- [result, part_name_number].join(" ")
268
- elsif result.present?
269
- # part name/number is preceded by period space, unless it is at the beginning of the title string
270
- "#{result.sub(/[ .,]*$/, "")}. #{part_name_number}. "
271
- else
272
- "#{part_name_number}. "
273
- end
274
- end
275
- when "main title", "title"
276
- # nonsorting characters ending with hyphen, apostrophe or space should be slammed against the main title,
277
- # even if we are not adding punctuation
278
- result = if add_punctuation? || result.ends_with?(" ") || result.ends_with?("-") || result.ends_with?("'")
279
- [result, value].join
280
- else
281
- [remove_trailing_punctuation(result), remove_trailing_punctuation(value)].select(&:presence).join(" ")
282
- end
283
- when "subtitle"
284
- result = if !add_punctuation?
285
- [result, value.sub(/^:/, "").strip].select(&:presence).join(" ")
286
- elsif result.present?
287
- # subtitle is preceded by space colon space, unless it is at the beginning of the title string
288
- "#{result.sub(/[. :]+$/, "")} : #{value.sub(/^:/, "").strip}"
289
- else
290
- result = value.sub(/^:/, "").strip
291
- end
292
- end
293
- end
294
-
295
- result
296
- end
297
- # rubocop:enable Metrics/AbcSize
298
- # rubocop:enable Metrics/CyclomaticComplexity
299
- # rubocop:enable Metrics/MethodLength
300
- # rubocop:enable Metrics/PerceivedComplexity
301
-
302
- # main_title is title.structuredValue.value with type 'main title' (or just title.value)
303
- # @param cocina_title [Hash] Title with structured values
304
- # @return [String] the main title value
305
- #
306
- # rubocop:disable Metrics/MethodLength
307
- # rubocop:disable Metrics/PerceivedComplexity
308
- # rubocop:disable Metrics/AbcSize
309
- # rubocop:disable Metrics/CyclomaticComplexity
310
- def main_title_from_structured_values(cocina_title, sortable: false)
311
- result = ""
312
- # combine pieces of the cocina structuredValue into a single title
313
- cocina_title["structuredValue"].each do |structured_value|
314
- # There can be a structuredValue inside a structuredValue, for example,
315
- # a uniform title where both the name and the title have internal StructuredValue
316
- return main_title_from_structured_values(structured_value) if structured_value["structuredValue"].present?
317
-
318
- value = structured_value["value"]&.strip
319
- next unless value
320
-
321
- case structured_value["type"]&.downcase
322
- when "nonsorting characters"
323
- unless sortable?
324
- padding = non_sorting_padding(cocina_title, value)
325
- result = add_non_sorting_value(result, value, padding)
326
- end
327
- when "main title", "title"
328
- result = if ["'", "-"].include?(result.last)
329
- [result, value].join
330
- else
331
- [remove_trailing_punctuation(result).strip, remove_trailing_punctuation(value).strip].select(&:presence).join(" ")
332
- end
333
- end
334
- end
335
-
336
- result
337
- end
338
- # rubocop:enable Metrics/MethodLength
339
- # rubocop:enable Metrics/PerceivedComplexity
340
- # rubocop:enable Metrics/AbcSize
341
- # rubocop:enable Metrics/CyclomaticComplexity
342
-
343
- # Thank MARC and catalog cards for this mess.
344
- def remove_trailing_punctuation(title)
345
- title.sub(%r{[ .,;:/\\]+$}, "")
346
- end
347
-
348
- def add_non_sorting_value(title_so_far, non_sorting_value, padding)
349
- non_sort_value = "#{non_sorting_value}#{padding}"
350
- if title_so_far.present?
351
- [title_so_far.strip, padding, non_sort_value].join
352
- else
353
- non_sort_value
354
- end
355
- end
356
-
357
- def non_sorting_padding(title, non_sorting_value)
358
- non_sort_note = title["note"]&.find { |note| note["type"]&.downcase == "nonsorting character count" }
359
- if non_sort_note
360
- padding_count = [non_sort_note["value"].to_i - non_sorting_value.length, 0].max
361
- " " * padding_count
362
- elsif ["'", "-"].include?(non_sorting_value.last)
363
- ""
364
- else
365
- " "
366
- end
367
- end
368
-
369
- # combine part name and part number:
370
- # respect order of occurrence
371
- # separated from each other by comma space
372
- def part_name_number(structured_values)
373
- title_from_part = ""
374
- structured_values.each do |structured_value|
375
- case structured_value["type"]&.downcase
376
- when "part name", "part number"
377
- value = structured_value["value"]&.strip
378
- next unless value
379
-
380
- title_from_part = append_part_to_title(title_from_part, value)
381
-
382
- end
383
- end
384
- title_from_part
385
- end
386
-
387
- def append_part_to_title(title_from_part, value)
388
- if !add_punctuation?
389
- [title_from_part, value].select(&:presence).join(" ")
390
- elsif title_from_part.strip.present?
391
- "#{title_from_part.sub(/[ .,]*$/, "")}, #{value}"
392
- else
393
- value
394
- end
395
- end
396
- end
397
- end