cocina-models 0.119.0 → 0.121.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +1 -1
  3. data/.claude/skills/cocina-jq-query/SKILL.md +8 -0
  4. data/.gitignore +5 -0
  5. data/.rubocop.yml +6 -0
  6. data/AGENTS.md +208 -0
  7. data/Gemfile +2 -0
  8. data/Gemfile.lock +40 -28
  9. data/README.md +4 -1
  10. data/bin/enhance-report-csv +90 -0
  11. data/bin/validate-data +7 -0
  12. data/bin/validate-schema +6 -1
  13. data/cocina-models.gemspec +1 -2
  14. data/lib/cocina/models/contributor.rb +0 -3
  15. data/lib/cocina/models/mapping/from_mods/event.rb +12 -3
  16. data/lib/cocina/models/related_resource.rb +1 -1
  17. data/lib/cocina/models/validators/base_description_visitor_validator.rb +33 -0
  18. data/lib/cocina/models/validators/base_structural_visitor_validator.rb +23 -0
  19. data/lib/cocina/models/validators/composite_description_validator.rb +62 -0
  20. data/lib/cocina/models/validators/composite_structural_validator.rb +48 -0
  21. data/lib/cocina/models/validators/dark_visitor_validator.rb +46 -0
  22. data/lib/cocina/models/validators/description_date_time_visitor_validator.rb +132 -0
  23. data/lib/cocina/models/validators/{description_types_validator.rb → description_types_visitor_validator.rb} +9 -55
  24. data/lib/cocina/models/validators/{description_values_validator.rb → description_values_visitor_validator.rb} +14 -51
  25. data/lib/cocina/models/validators/json_schema_validator.rb +54 -102
  26. data/lib/cocina/models/validators/language_tag_visitor_validator.rb +32 -0
  27. data/lib/cocina/models/validators/reserved_filename_visitor_validator.rb +40 -0
  28. data/lib/cocina/models/validators/validator.rb +5 -9
  29. data/lib/cocina/models/version.rb +1 -1
  30. data/lib/cocina/models.rb +1 -1
  31. data/schema.json +114 -59
  32. metadata +16 -24
  33. data/lib/cocina/models/descriptive_parallel_contributor.rb +0 -29
  34. data/lib/cocina/models/validators/dark_validator.rb +0 -76
  35. data/lib/cocina/models/validators/date_time_validator.rb +0 -100
  36. data/lib/cocina/models/validators/language_tag_validator.rb +0 -76
  37. data/lib/cocina/models/validators/reserved_filename_validator.rb +0 -60
@@ -435,7 +435,7 @@ module Cocina
435
435
  new_node = node.deep_dup
436
436
  new_node.remove_attribute('encoding') if common_attribs[:encoding].present? || node[:encoding]&.empty?
437
437
  new_node.remove_attribute('qualifier') if common_attribs[:qualifier].present? || node[:qualifier]&.empty?
438
- build_date(new_node)
438
+ build_date(new_node, encoding: common_attribs.dig(:encoding, :code))
439
439
  end
440
440
  { structuredValue: dates }.merge(common_attribs).compact
441
441
  end
@@ -462,9 +462,11 @@ module Cocina
462
462
  attribs.compact
463
463
  end
464
464
 
465
- def build_date(date_node)
465
+ def build_date(date_node, encoding: nil)
466
+ effective_encoding = date_node['encoding'] || encoding
466
467
  {}.tap do |date|
467
- date[:value] = clean_date(date_node.text) if date_node.text.present?
468
+ raw_value = clean_date(date_node.text)
469
+ date[:value] = effective_encoding == 'edtf' ? pad_edtf_year(raw_value) : raw_value if date_node.text.present?
468
470
  date[:encoding] = { code: date_node['encoding'] } if date_node['encoding']
469
471
  date[:status] = 'primary' if date_node['keyDate']
470
472
  date[:note] = build_date_note(date_node)
@@ -489,6 +491,13 @@ module Cocina
489
491
  date.delete_suffix('.')
490
492
  end
491
493
 
494
+ # Pads a 1-3 digit year to 4 digits, handling modifiers like ~,-.
495
+ def pad_edtf_year(value)
496
+ value.sub(%r{\A(-?)(\d{1,3})(?=[~?%/-]|\z)}) do
497
+ "#{::Regexp.last_match(1)}#{::Regexp.last_match(2).rjust(4, '0')}"
498
+ end
499
+ end
500
+
492
501
  # NOTE: Do any eventType/displayLabel transformations before determining role (i.e. with LEGACY_EVENT_TYPES_2_TYPE)
493
502
  def role_for(event)
494
503
  case event[:type]
@@ -14,7 +14,7 @@ module Cocina
14
14
  # The preferred display label to use for the related resource in access systems.
15
15
  attribute? :displayLabel, Types::Strict::String.optional
16
16
  # Titles of the related resource.
17
- attribute :title, Types::Strict::Array.of(DescriptiveValue).default([].freeze)
17
+ attribute :title, Types::Strict::Array.of(Title).default([].freeze)
18
18
  # Agents contributing in some way to the creation and history of the related resource.
19
19
  attribute :contributor, Types::Strict::Array.of(Contributor).default([].freeze)
20
20
  # Events in the history of the related resource.
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Validators
6
+ # Super class for description validators that use a visitor pattern.
7
+ class BaseDescriptionVisitorValidator
8
+ def visit_hash(hash:, path:); end
9
+
10
+ def visit_array(array:, path:); end
11
+
12
+ def visit_obj(obj:, path:); end
13
+
14
+ # @raise [ValidationError] if validation fails
15
+ def validate!; end
16
+
17
+ def path_to_s(path)
18
+ # This matches the format used by descriptive spreadsheets
19
+ path_str = ''
20
+ path.each_with_index do |part, index|
21
+ if part.is_a?(Integer)
22
+ path_str += (part + 1).to_s
23
+ else
24
+ path_str += '.' if index.positive?
25
+ path_str += part.to_s
26
+ end
27
+ end
28
+ path_str
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Validators
6
+ # Super class for structural validators that use a visitor pattern.
7
+ class BaseStructuralVisitorValidator
8
+ def initialize(attributes)
9
+ @attributes = attributes
10
+ end
11
+
12
+ def visit_file(file_hash:); end
13
+
14
+ # @raise [ValidationError] if validation fails
15
+ def validate!; end
16
+
17
+ private
18
+
19
+ attr_reader :attributes
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Validators
6
+ # Composite validator for description that uses a visitor pattern to validate in a single pass.
7
+ class CompositeDescriptionValidator
8
+ VALIDATORS = [
9
+ DescriptionTypesVisitorValidator,
10
+ DescriptionValuesVisitorValidator,
11
+ DescriptionDateTimeVisitorValidator
12
+ ].freeze
13
+
14
+ def self.validate(clazz, attributes)
15
+ new(clazz, attributes).validate
16
+ end
17
+
18
+ def initialize(clazz, attributes, validators: VALIDATORS)
19
+ @clazz = clazz
20
+ @attributes = attributes
21
+ @validators = validators.map(&:new)
22
+ end
23
+
24
+ def validate
25
+ return unless meets_preconditions?
26
+
27
+ validate_obj(obj: attributes, path: [])
28
+
29
+ validators.each(&:validate!)
30
+ end
31
+
32
+ private
33
+
34
+ attr_reader :clazz, :attributes, :validators
35
+
36
+ def meets_preconditions?
37
+ [Cocina::Models::Description, Cocina::Models::RequestDescription].include?(clazz)
38
+ end
39
+
40
+ def validate_hash(hash:, path:)
41
+ validators.each { |validator| validator.visit_hash(hash:, path:) }
42
+ hash.each do |key, obj|
43
+ validate_obj(obj:, path: path + [key])
44
+ end
45
+ end
46
+
47
+ def validate_array(array:, path:)
48
+ validators.each { |validator| validator.visit_array(array:, path:) }
49
+ array.each_with_index do |obj, index|
50
+ validate_obj(obj:, path: path + [index])
51
+ end
52
+ end
53
+
54
+ def validate_obj(obj:, path:)
55
+ validators.each { |validator| validator.visit_obj(obj:, path:) }
56
+ validate_hash(hash: obj, path: path) if obj.is_a?(Hash)
57
+ validate_array(array: obj, path: path) if obj.is_a?(Array)
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Validators
6
+ # Composite validator for structural metadata that uses a visitor pattern to validate files in a single pass.
7
+ class CompositeStructuralValidator
8
+ VALIDATORS = [
9
+ DarkVisitorValidator,
10
+ LanguageTagVisitorValidator,
11
+ ReservedFilenameVisitorValidator
12
+ ].freeze
13
+
14
+ def self.validate(clazz, attributes)
15
+ new(clazz, attributes).validate
16
+ end
17
+
18
+ def initialize(clazz, attributes, validators: VALIDATORS)
19
+ @clazz = clazz
20
+ @attributes = attributes
21
+ @validators = validators.map { |v| v.new(attributes) }
22
+ end
23
+
24
+ def validate
25
+ return unless meets_preconditions?
26
+
27
+ Array(attributes.dig(:structural, :contains)).each do |fileset_hash|
28
+ Array(fileset_hash.dig(:structural, :contains)).each do |file_hash|
29
+ validators.each { |validator| validator.visit_file(file_hash:) }
30
+ end
31
+ end
32
+
33
+ validators.each(&:validate!)
34
+ end
35
+
36
+ private
37
+
38
+ attr_reader :clazz, :attributes, :validators
39
+
40
+ def meets_preconditions?
41
+ clazz::TYPES.intersect?(DRO::TYPES)
42
+ rescue NameError
43
+ false
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cocina
4
+ module Models
5
+ module Validators
6
+ # Validates that shelve and publish file attributes are set to false for dark DRO objects.
7
+ class DarkVisitorValidator < BaseStructuralVisitorValidator
8
+ def visit_file(file_hash:)
9
+ return unless dark_object?
10
+
11
+ invalid_files << file_hash if invalid?(file_hash)
12
+ end
13
+
14
+ def validate!
15
+ return if invalid_files.empty?
16
+
17
+ filenames = invalid_files.map { |file| file[:filename] || file[:label] }
18
+ raise ValidationError, 'Not all files have dark access and/or are unshelved ' \
19
+ "when object access is dark: #{filenames}"
20
+ end
21
+
22
+ private
23
+
24
+ def invalid_files
25
+ @invalid_files ||= []
26
+ end
27
+
28
+ def dark_object?
29
+ # Checking for nil to account for default being dark.
30
+ @dark_object ||= ['dark', nil].include?(attributes.dig(:access, :view))
31
+ end
32
+
33
+ def invalid?(file)
34
+ # Ignore if a WARC
35
+ return false if file[:hasMimeType] == 'application/warc'
36
+
37
+ return true if file.dig(:administrative, :shelve)
38
+ # Checking for nil to account for default being dark.
39
+ return true if ['dark', nil].exclude?(file.dig(:access, :view))
40
+
41
+ false
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'edtf'
4
+
5
+ module Cocina
6
+ module Models
7
+ module Validators
8
+ # Validates that dates of known types are type-valid using the visitor pattern.
9
+ class DescriptionDateTimeVisitorValidator < BaseDescriptionVisitorValidator
10
+ VALIDATABLE_TYPES = %w[edtf iso8601 w3cdtf].freeze
11
+
12
+ def visit_hash(hash:, path:) # rubocop:disable Metrics/CyclomaticComplexity
13
+ # Only dates nested under a `date` key are subject to validation.
14
+ # For example, event.date is in scope but event.note is not.
15
+ return unless in_date_path?(path)
16
+
17
+ # A hash with a validatable encoding.code "owns" the encoding for its
18
+ # entire subtree. For example, the outer hash below owns iso8601 for
19
+ # both structuredValue children even though those children carry no
20
+ # encoding themselves:
21
+ #
22
+ # date: [{
23
+ # structuredValue: [
24
+ # { value: '1996', type: 'start' },
25
+ # { value: '1998', type: 'end' }
26
+ # ],
27
+ # encoding: { code: 'iso8601' } # ← registered at path [:date, 0]
28
+ # }]
29
+ #
30
+ # We record the path before visiting children because
31
+ # CompositeDescriptionValidator calls visit_hash on a parent before
32
+ # recursing into its children, so the encoding is always registered
33
+ # before any child value hashes are visited.
34
+ code = hash.dig(:encoding, :code)
35
+ encoding_paths[path.dup] = code if code && VALIDATABLE_TYPES.include?(code)
36
+
37
+ value = hash[:value]
38
+ return unless value.is_a?(String)
39
+
40
+ # Resolve which encoding governs this value by finding the longest
41
+ # registered encoding path that is a prefix of the current path.
42
+ # Longest-prefix wins so that a more-specific inner encoding overrides
43
+ # a less-specific outer one. For example, given:
44
+ #
45
+ # date: [{
46
+ # parallelValue: [
47
+ # { value: '1996', encoding: { code: 'edtf' } }, # path [:date,0,:parallelValue,0]
48
+ # { value: '一九九六' } # path [:date,0,:parallelValue,1]
49
+ # ],
50
+ # encoding: { code: 'iso8601' } # path [:date,0]
51
+ # }]
52
+ #
53
+ # The value '1996' at [:date,0,:parallelValue,0] matches both [:date,0]
54
+ # (iso8601) and [:date,0,:parallelValue,0] (edtf); the longer prefix wins
55
+ # and it is validated as edtf. The value '一九九六' at
56
+ # [:date,0,:parallelValue,1] only matches [:date,0] (iso8601).
57
+ encoding_path, code = find_encoding_for(path)
58
+ return unless code
59
+
60
+ invalid_groups[encoding_path] ||= []
61
+ invalid_groups[encoding_path] << value unless valid_value?(value, code)
62
+ end
63
+
64
+ def validate!
65
+ return if invalid_groups.empty?
66
+
67
+ invalid_dates = invalid_groups.filter_map do |path, values|
68
+ next if values.empty?
69
+
70
+ [*values, encoding_paths[path]]
71
+ end
72
+
73
+ return if invalid_dates.empty?
74
+
75
+ raise ValidationError, "Invalid date(s) in description: #{invalid_dates}"
76
+ end
77
+
78
+ private
79
+
80
+ def encoding_paths
81
+ @encoding_paths ||= {}
82
+ end
83
+
84
+ def invalid_groups
85
+ @invalid_groups ||= {}
86
+ end
87
+
88
+ def in_date_path?(path)
89
+ path.any? { |part| part.to_s == 'date' }
90
+ end
91
+
92
+ def find_encoding_for(path)
93
+ encoding_paths
94
+ .select { |prefix, _| path.first(prefix.length) == prefix }
95
+ .max_by { |prefix, _| prefix.length }
96
+ end
97
+
98
+ def valid_value?(value, code)
99
+ send(:"valid_#{code}?", value)
100
+ end
101
+
102
+ def valid_edtf?(value)
103
+ return false if value == 'XXXX'
104
+
105
+ Date.edtf!(value)
106
+ true
107
+ rescue StandardError
108
+ # NOTE: the upstream EDTF implementation in the `edtf` gem does not
109
+ # allow a valid pattern that we use (possibly because only level
110
+ # 0 of the spec was implemented?):
111
+ #
112
+ # * Y-20555
113
+ #
114
+ # So we catch the false positives from the upstream gem and allow
115
+ # this pattern to validate
116
+ /\AY-?\d{5,}\Z/.match?(value)
117
+ end
118
+
119
+ def valid_iso8601?(value)
120
+ DateTime.iso8601(value)
121
+ true
122
+ rescue StandardError
123
+ false
124
+ end
125
+
126
+ def valid_w3cdtf?(value)
127
+ W3cdtfValidator.validate(value)
128
+ end
129
+ end
130
+ end
131
+ end
132
+ end
@@ -4,54 +4,22 @@ module Cocina
4
4
  module Models
5
5
  module Validators
6
6
  # Validates types for description against description_types.yml.
7
- class DescriptionTypesValidator
8
- def self.validate(clazz, attributes)
9
- new(clazz, attributes).validate
10
- end
11
-
12
- def initialize(clazz, attributes)
13
- @clazz = clazz
14
- @attributes = attributes
15
- @error_paths = []
16
- end
17
-
18
- def validate
19
- return unless meets_preconditions?
20
-
21
- validate_obj(attributes, [])
22
-
7
+ class DescriptionTypesVisitorValidator < BaseDescriptionVisitorValidator
8
+ def validate!
23
9
  return if error_paths.empty?
24
10
 
25
11
  raise ValidationError, "Unrecognized types in description: #{error_paths.join(', ')}"
26
12
  end
27
13
 
28
- private
29
-
30
- attr_reader :clazz, :attributes, :error_paths
31
-
32
- def meets_preconditions?
33
- [Cocina::Models::Description, Cocina::Models::RequestDescription].include?(clazz)
34
- end
35
-
36
- def validate_hash(hash, path)
37
- hash.each do |key, obj|
38
- if key.to_sym == :type
39
- validate_type(obj, path)
40
- else
41
- validate_obj(obj, path + [key])
42
- end
43
- end
14
+ def visit_hash(hash:, path:)
15
+ type = hash[:type]
16
+ validate_type(type, path) if type
44
17
  end
45
18
 
46
- def validate_array(array, path)
47
- array.each_with_index do |obj, index|
48
- validate_obj(obj, path + [index])
49
- end
50
- end
19
+ private
51
20
 
52
- def validate_obj(obj, path)
53
- validate_hash(obj, path) if obj.is_a?(Hash)
54
- validate_array(obj, path) if obj.is_a?(Array)
21
+ def error_paths
22
+ @error_paths ||= []
55
23
  end
56
24
 
57
25
  def validate_type(type, path)
@@ -73,7 +41,7 @@ module Cocina
73
41
  # Some part of the path are ignored for the purpose of matching.
74
42
  def clean_path(path)
75
43
  new_path = path.reject do |part|
76
- part.is_a?(Integer) || %i[parallelValue parallelContributor parallelEvent].include?(part.to_sym)
44
+ part.is_a?(Integer) || %i[parallelValue parallelEvent].include?(part.to_sym)
77
45
  end.map(&:to_sym)
78
46
  # This needs to happen after parallelValue is removed
79
47
  # to handle structuredValue > parallelValue > structuredValue
@@ -100,20 +68,6 @@ module Cocina
100
68
  def types_yaml
101
69
  YAML.load_file(::File.expand_path('../../../../description_types.yml', __dir__))
102
70
  end
103
-
104
- def path_to_s(path)
105
- # This matches the format used by descriptive spreadsheets
106
- path_str = ''
107
- path.each_with_index do |part, index|
108
- if part.is_a?(Integer)
109
- path_str += (part + 1).to_s
110
- else
111
- path_str += '.' if index.positive?
112
- path_str += part.to_s
113
- end
114
- end
115
- path_str
116
- end
117
71
  end
118
72
  end
119
73
  end
@@ -3,25 +3,16 @@
3
3
  module Cocina
4
4
  module Models
5
5
  module Validators
6
- # Validates that there is only one of value, groupedValue, structuredValue, or parallelValue.
7
- class DescriptionValuesValidator
8
- def self.validate(clazz, attributes)
9
- new(clazz, attributes).validate
10
- end
11
-
12
- def initialize(clazz, attributes)
13
- @clazz = clazz
14
- @attributes = attributes
15
- @error_paths_multiple = []
16
- @error_paths_blank = []
17
- @error_paths_missing_title_type = []
6
+ # Validates that there is only one of value, groupedValue, structuredValue, or parallelValue,
7
+ # that values are not blank, and that title structuredValue entries have a type.
8
+ class DescriptionValuesVisitorValidator < BaseDescriptionVisitorValidator
9
+ def visit_hash(hash:, path:)
10
+ validate_values_for_blanks(hash, path)
11
+ validate_values_for_multiples(hash, path)
12
+ validate_title_type(hash, path)
18
13
  end
19
14
 
20
- def validate
21
- return unless meets_preconditions?
22
-
23
- validate_obj(attributes, [])
24
-
15
+ def validate!
25
16
  unless error_paths_multiple.empty?
26
17
  raise ValidationError,
27
18
  "Multiple value, groupedValue, structuredValue, and parallelValue in description: #{error_paths_multiple.join(', ')}"
@@ -38,30 +29,16 @@ module Cocina
38
29
 
39
30
  private
40
31
 
41
- attr_reader :clazz, :attributes, :error_paths_blank, :error_paths_multiple, :error_paths_missing_title_type
42
-
43
- def meets_preconditions?
44
- [Cocina::Models::Description, Cocina::Models::RequestDescription].include?(clazz)
45
- end
46
-
47
- def validate_hash(hash, path)
48
- validate_values_for_blanks(hash, path)
49
- validate_values_for_multiples(hash, path)
50
- validate_title_type(hash, path)
51
- hash.each do |key, obj|
52
- validate_obj(obj, path + [key])
53
- end
32
+ def error_paths_multiple
33
+ @error_paths_multiple ||= []
54
34
  end
55
35
 
56
- def validate_array(array, path)
57
- array.each_with_index do |obj, index|
58
- validate_obj(obj, path + [index])
59
- end
36
+ def error_paths_blank
37
+ @error_paths_blank ||= []
60
38
  end
61
39
 
62
- def validate_obj(obj, path)
63
- validate_hash(obj, path) if obj.is_a?(Hash)
64
- validate_array(obj, path) if obj.is_a?(Array)
40
+ def error_paths_missing_title_type
41
+ @error_paths_missing_title_type ||= []
65
42
  end
66
43
 
67
44
  def validate_values_for_blanks(hash, path)
@@ -98,20 +75,6 @@ module Cocina
98
75
  structured_value_path = path[2] == 'structuredValue' || (path[2] == 'parallelValue' && path[4] == 'structuredValue')
99
76
  path.first == 'title' && structured_value_path
100
77
  end
101
-
102
- def path_to_s(path)
103
- # This matches the format used by descriptive spreadsheets
104
- path_str = ''
105
- path.each_with_index do |part, index|
106
- if part.is_a?(Integer)
107
- path_str += (part + 1).to_s
108
- else
109
- path_str += '.' if index.positive?
110
- path_str += part.to_s
111
- end
112
- end
113
- path_str
114
- end
115
78
  end
116
79
  end
117
80
  end