cocina-models 0.121.0 → 0.122.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cfcb2c2a393845b1c9129ab48f5fc7340beb8067936694798e800a5935bf1ac9
4
- data.tar.gz: f9dc80c520ddecb670a5df20273c635146f75d75025132402d4d1329090e29fd
3
+ metadata.gz: a7ba361f4c17d3165dce9e5042739493896f798fe9a215ce20054b6f2c739c11
4
+ data.tar.gz: 5ca9b3f5efdd6ab255dfe1f48b19be012638986f7126af70547d701bd5059b01
5
5
  SHA512:
6
- metadata.gz: 2f7ccadc053ba81117275251401432ec92c65fefd86da94c25e7c862f8680c868d3f392a84b3966fb48609a5c383abd8dc90b8a51e457fd2fe8d3792371bd4a6
7
- data.tar.gz: c1355245a0571f8933155c9de04b38226b55b96a3ffdb899af3d2af553e30f53ae7986a41e6293339a90cc2a017fcd6eec273fcd1ce64df57b26cbbb5f161193
6
+ metadata.gz: cfd5524aa8e1ce0dc3510855b1c1cb9cadd875878192c71ecfb9fdd02fe74457e00a9a3766eefd3f399b2b3a539a013eb63e929b7b1a46b58fedb789ef170d6c
7
+ data.tar.gz: 27d345e4bd13c181165a943a696ecbfe44c36fce1fcd1db4324f13c8d97fce029b953a36030fc9a71586d95a6458622300937990069b57fcae0baeed20398be3
data/AGENTS.md CHANGED
@@ -22,10 +22,10 @@ Run `pv --version`. If pv is not installed, tell the user:
22
22
 
23
23
  ### Output format (always apply)
24
24
 
25
- Every jq query produced by this skill **must output a CSV line** using `@csv`. The **first field must always be the external identifier** (`externalIdentifier`). Additional fields follow based on the user's query. Example:
25
+ Every jq query produced by this skill **must output a CSV line** using `@csv`. The **first field must always be the external identifier** (`externalIdentifier`). Additional fields follow based on the user's query. By default, multiple values should be joined by " | ". Example:
26
26
 
27
27
  ```
28
- "druid:bc123df4567","some value","another value"
28
+ "druid:bc123df4567","some value | another value"
29
29
  ```
30
30
 
31
31
  Use `[.externalIdentifier, ...] | @csv` as the output expression. Apply this constraint automatically — do not ask the user whether to include the external identifier.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cocina-models (0.121.0)
4
+ cocina-models (0.122.0)
5
5
  activesupport
6
6
  deprecation
7
7
  dry-struct (~> 1.0)
@@ -220,7 +220,7 @@ CHECKSUMS
220
220
  bigdecimal (4.1.2) sha256=53d217666027eab4280346fba98e7d5b66baaae1b9c3c1c0ffe89d48188a3fbd
221
221
  builder (3.3.0) sha256=497918d2f9dca528fdca4b88d84e4ef4387256d984b8154e9d5d3fe5a9c8835f
222
222
  bundler (4.0.13) sha256=19f08be7f27022cf0b89f27da0b044ae075e8270a9ef44ad248a932614e1ca3b
223
- cocina-models (0.121.0)
223
+ cocina-models (0.122.0)
224
224
  concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
225
225
  connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
226
226
  csv (3.3.5) sha256=6e5134ac3383ef728b7f02725d9872934f523cb40b961479f69cf3afa6c8e73f
@@ -67,18 +67,16 @@ def extract_fields(doc)
67
67
  end
68
68
 
69
69
  def build_output(solr, rows, batch_size)
70
- extra_col_count = (rows.first&.size || 1) - 1
71
- extra_headers = extra_col_count.times.map { |i| "col#{i + 2}" }
72
-
73
70
  CSV.generate do |out|
74
- out << (['druid'] + FIELD_HEADERS.values + extra_headers)
71
+ out << (%w[druid value] + FIELD_HEADERS.values)
75
72
 
76
73
  rows.each_slice(batch_size) do |batch|
77
74
  docs = fetch_solr_docs(solr, batch.map { |row| row[0] })
78
75
 
79
76
  batch.each do |row|
80
77
  druid = row[0]
81
- out << ([druid] + extract_fields(docs[druid]) + row[1..])
78
+ value = row.size > 1 ? row[1..].join('|') : ''
79
+ out << ([druid, value] + extract_fields(docs[druid]))
82
80
  end
83
81
  end
84
82
  end
data/bin/validate-data CHANGED
@@ -19,6 +19,7 @@ require 'cocina/models'
19
19
  require 'json'
20
20
  require 'ruby-progressbar'
21
21
  require 'optparse'
22
+ require 'csv'
22
23
 
23
24
  # Parse command line options
24
25
  def parse_options # rubocop:disable Metrics/MethodLength
@@ -104,15 +105,16 @@ def worker_process(reader) # rubocop:disable Metrics/MethodLength
104
105
  # Process each line in the batch
105
106
  batch.each do |line_num, line_content|
106
107
  json = JSON.parse(line_content)
108
+ druid = json['externalIdentifier']
107
109
  Cocina::Models.build(json)
108
110
  rescue JSON::ParserError => e
109
111
  errors << { line: line_num, error: "JSON Parse Error: #{e.message}" }
110
112
  rescue Cocina::Models::ValidationError => e
111
- errors << { line: line_num, error: "Validation Error: #{json['externalIdentifier']} - #{e.message}" }
113
+ errors << { line: line_num, druid:, error: "Validation Error: #{e.message}" }
112
114
  rescue Cocina::Models::UnknownTypeError => e
113
- errors << { line: line_num, error: "Unknown Type Error: #{e.message}" }
115
+ errors << { line: line_num, druid:, error: "Unknown Type Error: #{e.message}" }
114
116
  rescue StandardError => e
115
- errors << { line: line_num, error: "Error: #{e.class} - #{e.message}" }
117
+ errors << { line: line_num, druid:, error: "Error: #{e.class} - #{e.message}" }
116
118
  end
117
119
  end
118
120
 
@@ -245,15 +247,25 @@ def print_summary(total_lines, errors, elapsed_time) # rubocop:disable Metrics/M
245
247
 
246
248
  return unless errors.any?
247
249
 
250
+ sorted_errors = errors.sort_by { |e| e[:line] }
251
+
248
252
  puts "\n"
249
253
  puts 'Error details:'
250
254
  puts '-' * 80
251
255
  # Sort errors by line number for better readability
252
- errors.sort_by { |e| e[:line] }.each do |error|
253
- puts "Line #{error[:line]}: #{error[:error]}"
256
+ sorted_errors.each do |error|
257
+ puts "Line #{error[:line]}: #{error[:druid]} - #{error[:error]}"
254
258
  end
255
259
  puts "\n"
256
- puts "Line numbers with errors: #{errors.sort_by { |e| e[:line] }.map { |e| e[:line] }.join(', ')}"
260
+ puts "Line numbers with errors: #{sorted_errors.map { |e| e[:line] }.join(', ')}"
261
+
262
+ CSV.open('validate-data-errors.csv', 'w') do |csv|
263
+ csv << %w[line druid error]
264
+ sorted_errors.each do |error|
265
+ csv << [error[:line], error[:druid], error[:error]]
266
+ end
267
+ end
268
+ puts 'Errors written to validate-data-errors.csv'
257
269
  end
258
270
 
259
271
  # Main execution
@@ -60,7 +60,7 @@ module Cocina
60
60
 
61
61
  # build non-parallel, single name
62
62
  def build_name(name_node)
63
- return { type: 'unspecified others' } if name_node.xpath('mods:etal', mods: Description::DESC_METADATA_NS).present?
63
+ return nil if name_node.xpath('mods:etal', mods: Description::DESC_METADATA_NS).present?
64
64
 
65
65
  name_parts = build_name_parts(name_node)
66
66
  # If there are no name parts, do not map the name
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Cocina
4
4
  module Models
5
- VERSION = '0.121.0'
5
+ VERSION = '0.122.0'
6
6
  end
7
7
  end
data/schema.json CHANGED
@@ -507,7 +507,28 @@
507
507
  "type": "string"
508
508
  }
509
509
  },
510
- "unevaluatedProperties": false
510
+ "unevaluatedProperties": false,
511
+ "anyOf": [
512
+ {
513
+ "required": ["valueAt"]
514
+ },
515
+ {
516
+ "required": ["name"],
517
+ "properties": {
518
+ "name": {
519
+ "minItems": 1
520
+ }
521
+ }
522
+ },
523
+ {
524
+ "required": ["identifier"],
525
+ "properties": {
526
+ "identifier": {
527
+ "minItems": 1
528
+ }
529
+ }
530
+ }
531
+ ]
511
532
  },
512
533
  "ControlledDigitalLendingAccess": {
513
534
  "type": "object",
@@ -900,7 +921,57 @@
900
921
  }
901
922
  }
902
923
  },
903
- "unevaluatedProperties": false
924
+ "unevaluatedProperties": false,
925
+ "anyOf": [
926
+ {
927
+ "required": ["url"],
928
+ "properties": {
929
+ "url": {
930
+ "minItems": 1
931
+ }
932
+ }
933
+ },
934
+ {
935
+ "required": ["physicalLocation"],
936
+ "properties": {
937
+ "physicalLocation": {
938
+ "minItems": 1
939
+ }
940
+ }
941
+ },
942
+ {
943
+ "required": ["digitalLocation"],
944
+ "properties": {
945
+ "digitalLocation": {
946
+ "minItems": 1
947
+ }
948
+ }
949
+ },
950
+ {
951
+ "required": ["accessContact"],
952
+ "properties": {
953
+ "accessContact": {
954
+ "minItems": 1
955
+ }
956
+ }
957
+ },
958
+ {
959
+ "required": ["digitalRepository"],
960
+ "properties": {
961
+ "digitalRepository": {
962
+ "minItems": 1
963
+ }
964
+ }
965
+ },
966
+ {
967
+ "required": ["note"],
968
+ "properties": {
969
+ "note": {
970
+ "minItems": 1
971
+ }
972
+ }
973
+ }
974
+ ]
904
975
  },
905
976
  "DescriptiveAdminMetadata": {
906
977
  "description": "Information about this resource description.",
@@ -1108,7 +1179,25 @@
1108
1179
  }
1109
1180
  }
1110
1181
  },
1111
- "unevaluatedProperties": false
1182
+ "unevaluatedProperties": false,
1183
+ "anyOf": [
1184
+ {
1185
+ "required": ["form"],
1186
+ "properties": {
1187
+ "form": {
1188
+ "minItems": 1
1189
+ }
1190
+ }
1191
+ },
1192
+ {
1193
+ "required": ["subject"],
1194
+ "properties": {
1195
+ "subject": {
1196
+ "minItems": 1
1197
+ }
1198
+ }
1199
+ }
1200
+ ]
1112
1201
  },
1113
1202
  "DescriptiveGroupedValue": {
1114
1203
  "description": "Value model for a set of descriptive elements grouped together in an unstructured way.",
@@ -1437,7 +1526,65 @@
1437
1526
  }
1438
1527
  }
1439
1528
  ],
1440
- "unevaluatedProperties": false
1529
+ "unevaluatedProperties": false,
1530
+ "anyOf": [
1531
+ {
1532
+ "required": ["date"],
1533
+ "properties": {
1534
+ "date": {
1535
+ "minItems": 1
1536
+ }
1537
+ }
1538
+ },
1539
+ {
1540
+ "required": ["contributor"],
1541
+ "properties": {
1542
+ "contributor": {
1543
+ "minItems": 1
1544
+ }
1545
+ }
1546
+ },
1547
+ {
1548
+ "required": ["location"],
1549
+ "properties": {
1550
+ "location": {
1551
+ "minItems": 1
1552
+ }
1553
+ }
1554
+ },
1555
+ {
1556
+ "required": ["identifier"],
1557
+ "properties": {
1558
+ "identifier": {
1559
+ "minItems": 1
1560
+ }
1561
+ }
1562
+ },
1563
+ {
1564
+ "required": ["note"],
1565
+ "properties": {
1566
+ "note": {
1567
+ "minItems": 1
1568
+ }
1569
+ }
1570
+ },
1571
+ {
1572
+ "required": ["structuredValue"],
1573
+ "properties": {
1574
+ "structuredValue": {
1575
+ "minItems": 1
1576
+ }
1577
+ }
1578
+ },
1579
+ {
1580
+ "required": ["parallelEvent"],
1581
+ "properties": {
1582
+ "parallelEvent": {
1583
+ "minItems": 1
1584
+ }
1585
+ }
1586
+ }
1587
+ ]
1441
1588
  },
1442
1589
  "File": {
1443
1590
  "description": "Binaries that are the basis of what our domain manages. Binaries here do not include metadata files generated for the domain's own management purposes.",
@@ -1550,6 +1697,17 @@
1550
1697
  "sdrPreserve",
1551
1698
  "shelve"
1552
1699
  ],
1700
+ "if": {
1701
+ "properties": {
1702
+ "shelve": { "const": true }
1703
+ }
1704
+ },
1705
+ "then": {
1706
+ "anyOf": [
1707
+ { "properties": { "sdrPreserve": { "const": true } } },
1708
+ { "properties": { "publish": { "const": true } } }
1709
+ ]
1710
+ },
1553
1711
  "unevaluatedProperties": false
1554
1712
  },
1555
1713
  "FileSet": {
@@ -1798,7 +1956,61 @@
1798
1956
  "$ref": "#/$defs/DescriptiveValueLanguage"
1799
1957
  }
1800
1958
  },
1801
- "unevaluatedProperties": false
1959
+ "unevaluatedProperties": false,
1960
+ "anyOf": [
1961
+ {
1962
+ "required": ["value"]
1963
+ },
1964
+ {
1965
+ "required": ["code"]
1966
+ },
1967
+ {
1968
+ "required": ["uri"]
1969
+ },
1970
+ {
1971
+ "required": ["note"],
1972
+ "properties": {
1973
+ "note": {
1974
+ "minItems": 1
1975
+ }
1976
+ }
1977
+ },
1978
+ {
1979
+ "required": ["script"],
1980
+ "properties": {
1981
+ "script": {
1982
+ "minItems": 1
1983
+ }
1984
+ }
1985
+ },
1986
+ {
1987
+ "required": ["valueAt"]
1988
+ },
1989
+ {
1990
+ "required": ["structuredValue"],
1991
+ "properties": {
1992
+ "structuredValue": {
1993
+ "minItems": 1
1994
+ }
1995
+ }
1996
+ },
1997
+ {
1998
+ "required": ["parallelValue"],
1999
+ "properties": {
2000
+ "parallelValue": {
2001
+ "minItems": 1
2002
+ }
2003
+ }
2004
+ },
2005
+ {
2006
+ "required": ["groupedValue"],
2007
+ "properties": {
2008
+ "groupedValue": {
2009
+ "minItems": 1
2010
+ }
2011
+ }
2012
+ }
2013
+ ]
1802
2014
  },
1803
2015
  "LanguageTag": {
1804
2016
  "description": "BCP 47 language tag: https://www.rfc-editor.org/rfc/rfc4646.txt -- other applications (like media players) expect language codes of this format, see e.g. https://videojs.com/guides/text-tracks/#srclang",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cocina-models
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.121.0
4
+ version: 0.122.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Coyne