cocina-models 0.121.0 → 0.122.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/AGENTS.md +2 -2
- data/Gemfile.lock +2 -2
- data/bin/enhance-report-csv +3 -5
- data/bin/validate-data +18 -6
- data/lib/cocina/models/mapping/from_mods/name_builder.rb +1 -1
- data/lib/cocina/models/version.rb +1 -1
- data/schema.json +217 -5
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a7ba361f4c17d3165dce9e5042739493896f798fe9a215ce20054b6f2c739c11
|
|
4
|
+
data.tar.gz: 5ca9b3f5efdd6ab255dfe1f48b19be012638986f7126af70547d701bd5059b01
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: cfd5524aa8e1ce0dc3510855b1c1cb9cadd875878192c71ecfb9fdd02fe74457e00a9a3766eefd3f399b2b3a539a013eb63e929b7b1a46b58fedb789ef170d6c
|
|
7
|
+
data.tar.gz: 27d345e4bd13c181165a943a696ecbfe44c36fce1fcd1db4324f13c8d97fce029b953a36030fc9a71586d95a6458622300937990069b57fcae0baeed20398be3
|
data/AGENTS.md
CHANGED
|
@@ -22,10 +22,10 @@ Run `pv --version`. If pv is not installed, tell the user:
|
|
|
22
22
|
|
|
23
23
|
### Output format (always apply)
|
|
24
24
|
|
|
25
|
-
Every jq query produced by this skill **must output a CSV line** using `@csv`. The **first field must always be the external identifier** (`externalIdentifier`). Additional fields follow based on the user's query. Example:
|
|
25
|
+
Every jq query produced by this skill **must output a CSV line** using `@csv`. The **first field must always be the external identifier** (`externalIdentifier`). Additional fields follow based on the user's query. By default, multiple values should be joined by " | ". Example:
|
|
26
26
|
|
|
27
27
|
```
|
|
28
|
-
"druid:bc123df4567","some value
|
|
28
|
+
"druid:bc123df4567","some value | another value"
|
|
29
29
|
```
|
|
30
30
|
|
|
31
31
|
Use `[.externalIdentifier, ...] | @csv` as the output expression. Apply this constraint automatically — do not ask the user whether to include the external identifier.
|
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
cocina-models (0.
|
|
4
|
+
cocina-models (0.122.0)
|
|
5
5
|
activesupport
|
|
6
6
|
deprecation
|
|
7
7
|
dry-struct (~> 1.0)
|
|
@@ -220,7 +220,7 @@ CHECKSUMS
|
|
|
220
220
|
bigdecimal (4.1.2) sha256=53d217666027eab4280346fba98e7d5b66baaae1b9c3c1c0ffe89d48188a3fbd
|
|
221
221
|
builder (3.3.0) sha256=497918d2f9dca528fdca4b88d84e4ef4387256d984b8154e9d5d3fe5a9c8835f
|
|
222
222
|
bundler (4.0.13) sha256=19f08be7f27022cf0b89f27da0b044ae075e8270a9ef44ad248a932614e1ca3b
|
|
223
|
-
cocina-models (0.
|
|
223
|
+
cocina-models (0.122.0)
|
|
224
224
|
concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
|
|
225
225
|
connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
|
|
226
226
|
csv (3.3.5) sha256=6e5134ac3383ef728b7f02725d9872934f523cb40b961479f69cf3afa6c8e73f
|
data/bin/enhance-report-csv
CHANGED
|
@@ -67,18 +67,16 @@ def extract_fields(doc)
|
|
|
67
67
|
end
|
|
68
68
|
|
|
69
69
|
def build_output(solr, rows, batch_size)
|
|
70
|
-
extra_col_count = (rows.first&.size || 1) - 1
|
|
71
|
-
extra_headers = extra_col_count.times.map { |i| "col#{i + 2}" }
|
|
72
|
-
|
|
73
70
|
CSV.generate do |out|
|
|
74
|
-
out << ([
|
|
71
|
+
out << (%w[druid value] + FIELD_HEADERS.values)
|
|
75
72
|
|
|
76
73
|
rows.each_slice(batch_size) do |batch|
|
|
77
74
|
docs = fetch_solr_docs(solr, batch.map { |row| row[0] })
|
|
78
75
|
|
|
79
76
|
batch.each do |row|
|
|
80
77
|
druid = row[0]
|
|
81
|
-
|
|
78
|
+
value = row.size > 1 ? row[1..].join('|') : ''
|
|
79
|
+
out << ([druid, value] + extract_fields(docs[druid]))
|
|
82
80
|
end
|
|
83
81
|
end
|
|
84
82
|
end
|
data/bin/validate-data
CHANGED
|
@@ -19,6 +19,7 @@ require 'cocina/models'
|
|
|
19
19
|
require 'json'
|
|
20
20
|
require 'ruby-progressbar'
|
|
21
21
|
require 'optparse'
|
|
22
|
+
require 'csv'
|
|
22
23
|
|
|
23
24
|
# Parse command line options
|
|
24
25
|
def parse_options # rubocop:disable Metrics/MethodLength
|
|
@@ -104,15 +105,16 @@ def worker_process(reader) # rubocop:disable Metrics/MethodLength
|
|
|
104
105
|
# Process each line in the batch
|
|
105
106
|
batch.each do |line_num, line_content|
|
|
106
107
|
json = JSON.parse(line_content)
|
|
108
|
+
druid = json['externalIdentifier']
|
|
107
109
|
Cocina::Models.build(json)
|
|
108
110
|
rescue JSON::ParserError => e
|
|
109
111
|
errors << { line: line_num, error: "JSON Parse Error: #{e.message}" }
|
|
110
112
|
rescue Cocina::Models::ValidationError => e
|
|
111
|
-
errors << { line: line_num, error: "Validation Error: #{
|
|
113
|
+
errors << { line: line_num, druid:, error: "Validation Error: #{e.message}" }
|
|
112
114
|
rescue Cocina::Models::UnknownTypeError => e
|
|
113
|
-
errors << { line: line_num, error: "Unknown Type Error: #{e.message}" }
|
|
115
|
+
errors << { line: line_num, druid:, error: "Unknown Type Error: #{e.message}" }
|
|
114
116
|
rescue StandardError => e
|
|
115
|
-
errors << { line: line_num, error: "Error: #{e.class} - #{e.message}" }
|
|
117
|
+
errors << { line: line_num, druid:, error: "Error: #{e.class} - #{e.message}" }
|
|
116
118
|
end
|
|
117
119
|
end
|
|
118
120
|
|
|
@@ -245,15 +247,25 @@ def print_summary(total_lines, errors, elapsed_time) # rubocop:disable Metrics/M
|
|
|
245
247
|
|
|
246
248
|
return unless errors.any?
|
|
247
249
|
|
|
250
|
+
sorted_errors = errors.sort_by { |e| e[:line] }
|
|
251
|
+
|
|
248
252
|
puts "\n"
|
|
249
253
|
puts 'Error details:'
|
|
250
254
|
puts '-' * 80
|
|
251
255
|
# Sort errors by line number for better readability
|
|
252
|
-
|
|
253
|
-
puts "Line #{error[:line]}: #{error[:error]}"
|
|
256
|
+
sorted_errors.each do |error|
|
|
257
|
+
puts "Line #{error[:line]}: #{error[:druid]} - #{error[:error]}"
|
|
254
258
|
end
|
|
255
259
|
puts "\n"
|
|
256
|
-
puts "Line numbers with errors: #{
|
|
260
|
+
puts "Line numbers with errors: #{sorted_errors.map { |e| e[:line] }.join(', ')}"
|
|
261
|
+
|
|
262
|
+
CSV.open('validate-data-errors.csv', 'w') do |csv|
|
|
263
|
+
csv << %w[line druid error]
|
|
264
|
+
sorted_errors.each do |error|
|
|
265
|
+
csv << [error[:line], error[:druid], error[:error]]
|
|
266
|
+
end
|
|
267
|
+
end
|
|
268
|
+
puts 'Errors written to validate-data-errors.csv'
|
|
257
269
|
end
|
|
258
270
|
|
|
259
271
|
# Main execution
|
|
@@ -60,7 +60,7 @@ module Cocina
|
|
|
60
60
|
|
|
61
61
|
# build non-parallel, single name
|
|
62
62
|
def build_name(name_node)
|
|
63
|
-
return
|
|
63
|
+
return nil if name_node.xpath('mods:etal', mods: Description::DESC_METADATA_NS).present?
|
|
64
64
|
|
|
65
65
|
name_parts = build_name_parts(name_node)
|
|
66
66
|
# If there are no name parts, do not map the name
|
data/schema.json
CHANGED
|
@@ -507,7 +507,28 @@
|
|
|
507
507
|
"type": "string"
|
|
508
508
|
}
|
|
509
509
|
},
|
|
510
|
-
"unevaluatedProperties": false
|
|
510
|
+
"unevaluatedProperties": false,
|
|
511
|
+
"anyOf": [
|
|
512
|
+
{
|
|
513
|
+
"required": ["valueAt"]
|
|
514
|
+
},
|
|
515
|
+
{
|
|
516
|
+
"required": ["name"],
|
|
517
|
+
"properties": {
|
|
518
|
+
"name": {
|
|
519
|
+
"minItems": 1
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
},
|
|
523
|
+
{
|
|
524
|
+
"required": ["identifier"],
|
|
525
|
+
"properties": {
|
|
526
|
+
"identifier": {
|
|
527
|
+
"minItems": 1
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
]
|
|
511
532
|
},
|
|
512
533
|
"ControlledDigitalLendingAccess": {
|
|
513
534
|
"type": "object",
|
|
@@ -900,7 +921,57 @@
|
|
|
900
921
|
}
|
|
901
922
|
}
|
|
902
923
|
},
|
|
903
|
-
"unevaluatedProperties": false
|
|
924
|
+
"unevaluatedProperties": false,
|
|
925
|
+
"anyOf": [
|
|
926
|
+
{
|
|
927
|
+
"required": ["url"],
|
|
928
|
+
"properties": {
|
|
929
|
+
"url": {
|
|
930
|
+
"minItems": 1
|
|
931
|
+
}
|
|
932
|
+
}
|
|
933
|
+
},
|
|
934
|
+
{
|
|
935
|
+
"required": ["physicalLocation"],
|
|
936
|
+
"properties": {
|
|
937
|
+
"physicalLocation": {
|
|
938
|
+
"minItems": 1
|
|
939
|
+
}
|
|
940
|
+
}
|
|
941
|
+
},
|
|
942
|
+
{
|
|
943
|
+
"required": ["digitalLocation"],
|
|
944
|
+
"properties": {
|
|
945
|
+
"digitalLocation": {
|
|
946
|
+
"minItems": 1
|
|
947
|
+
}
|
|
948
|
+
}
|
|
949
|
+
},
|
|
950
|
+
{
|
|
951
|
+
"required": ["accessContact"],
|
|
952
|
+
"properties": {
|
|
953
|
+
"accessContact": {
|
|
954
|
+
"minItems": 1
|
|
955
|
+
}
|
|
956
|
+
}
|
|
957
|
+
},
|
|
958
|
+
{
|
|
959
|
+
"required": ["digitalRepository"],
|
|
960
|
+
"properties": {
|
|
961
|
+
"digitalRepository": {
|
|
962
|
+
"minItems": 1
|
|
963
|
+
}
|
|
964
|
+
}
|
|
965
|
+
},
|
|
966
|
+
{
|
|
967
|
+
"required": ["note"],
|
|
968
|
+
"properties": {
|
|
969
|
+
"note": {
|
|
970
|
+
"minItems": 1
|
|
971
|
+
}
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
]
|
|
904
975
|
},
|
|
905
976
|
"DescriptiveAdminMetadata": {
|
|
906
977
|
"description": "Information about this resource description.",
|
|
@@ -1108,7 +1179,25 @@
|
|
|
1108
1179
|
}
|
|
1109
1180
|
}
|
|
1110
1181
|
},
|
|
1111
|
-
"unevaluatedProperties": false
|
|
1182
|
+
"unevaluatedProperties": false,
|
|
1183
|
+
"anyOf": [
|
|
1184
|
+
{
|
|
1185
|
+
"required": ["form"],
|
|
1186
|
+
"properties": {
|
|
1187
|
+
"form": {
|
|
1188
|
+
"minItems": 1
|
|
1189
|
+
}
|
|
1190
|
+
}
|
|
1191
|
+
},
|
|
1192
|
+
{
|
|
1193
|
+
"required": ["subject"],
|
|
1194
|
+
"properties": {
|
|
1195
|
+
"subject": {
|
|
1196
|
+
"minItems": 1
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
]
|
|
1112
1201
|
},
|
|
1113
1202
|
"DescriptiveGroupedValue": {
|
|
1114
1203
|
"description": "Value model for a set of descriptive elements grouped together in an unstructured way.",
|
|
@@ -1437,7 +1526,65 @@
|
|
|
1437
1526
|
}
|
|
1438
1527
|
}
|
|
1439
1528
|
],
|
|
1440
|
-
"unevaluatedProperties": false
|
|
1529
|
+
"unevaluatedProperties": false,
|
|
1530
|
+
"anyOf": [
|
|
1531
|
+
{
|
|
1532
|
+
"required": ["date"],
|
|
1533
|
+
"properties": {
|
|
1534
|
+
"date": {
|
|
1535
|
+
"minItems": 1
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
},
|
|
1539
|
+
{
|
|
1540
|
+
"required": ["contributor"],
|
|
1541
|
+
"properties": {
|
|
1542
|
+
"contributor": {
|
|
1543
|
+
"minItems": 1
|
|
1544
|
+
}
|
|
1545
|
+
}
|
|
1546
|
+
},
|
|
1547
|
+
{
|
|
1548
|
+
"required": ["location"],
|
|
1549
|
+
"properties": {
|
|
1550
|
+
"location": {
|
|
1551
|
+
"minItems": 1
|
|
1552
|
+
}
|
|
1553
|
+
}
|
|
1554
|
+
},
|
|
1555
|
+
{
|
|
1556
|
+
"required": ["identifier"],
|
|
1557
|
+
"properties": {
|
|
1558
|
+
"identifier": {
|
|
1559
|
+
"minItems": 1
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
},
|
|
1563
|
+
{
|
|
1564
|
+
"required": ["note"],
|
|
1565
|
+
"properties": {
|
|
1566
|
+
"note": {
|
|
1567
|
+
"minItems": 1
|
|
1568
|
+
}
|
|
1569
|
+
}
|
|
1570
|
+
},
|
|
1571
|
+
{
|
|
1572
|
+
"required": ["structuredValue"],
|
|
1573
|
+
"properties": {
|
|
1574
|
+
"structuredValue": {
|
|
1575
|
+
"minItems": 1
|
|
1576
|
+
}
|
|
1577
|
+
}
|
|
1578
|
+
},
|
|
1579
|
+
{
|
|
1580
|
+
"required": ["parallelEvent"],
|
|
1581
|
+
"properties": {
|
|
1582
|
+
"parallelEvent": {
|
|
1583
|
+
"minItems": 1
|
|
1584
|
+
}
|
|
1585
|
+
}
|
|
1586
|
+
}
|
|
1587
|
+
]
|
|
1441
1588
|
},
|
|
1442
1589
|
"File": {
|
|
1443
1590
|
"description": "Binaries that are the basis of what our domain manages. Binaries here do not include metadata files generated for the domain's own management purposes.",
|
|
@@ -1550,6 +1697,17 @@
|
|
|
1550
1697
|
"sdrPreserve",
|
|
1551
1698
|
"shelve"
|
|
1552
1699
|
],
|
|
1700
|
+
"if": {
|
|
1701
|
+
"properties": {
|
|
1702
|
+
"shelve": { "const": true }
|
|
1703
|
+
}
|
|
1704
|
+
},
|
|
1705
|
+
"then": {
|
|
1706
|
+
"anyOf": [
|
|
1707
|
+
{ "properties": { "sdrPreserve": { "const": true } } },
|
|
1708
|
+
{ "properties": { "publish": { "const": true } } }
|
|
1709
|
+
]
|
|
1710
|
+
},
|
|
1553
1711
|
"unevaluatedProperties": false
|
|
1554
1712
|
},
|
|
1555
1713
|
"FileSet": {
|
|
@@ -1798,7 +1956,61 @@
|
|
|
1798
1956
|
"$ref": "#/$defs/DescriptiveValueLanguage"
|
|
1799
1957
|
}
|
|
1800
1958
|
},
|
|
1801
|
-
"unevaluatedProperties": false
|
|
1959
|
+
"unevaluatedProperties": false,
|
|
1960
|
+
"anyOf": [
|
|
1961
|
+
{
|
|
1962
|
+
"required": ["value"]
|
|
1963
|
+
},
|
|
1964
|
+
{
|
|
1965
|
+
"required": ["code"]
|
|
1966
|
+
},
|
|
1967
|
+
{
|
|
1968
|
+
"required": ["uri"]
|
|
1969
|
+
},
|
|
1970
|
+
{
|
|
1971
|
+
"required": ["note"],
|
|
1972
|
+
"properties": {
|
|
1973
|
+
"note": {
|
|
1974
|
+
"minItems": 1
|
|
1975
|
+
}
|
|
1976
|
+
}
|
|
1977
|
+
},
|
|
1978
|
+
{
|
|
1979
|
+
"required": ["script"],
|
|
1980
|
+
"properties": {
|
|
1981
|
+
"script": {
|
|
1982
|
+
"minItems": 1
|
|
1983
|
+
}
|
|
1984
|
+
}
|
|
1985
|
+
},
|
|
1986
|
+
{
|
|
1987
|
+
"required": ["valueAt"]
|
|
1988
|
+
},
|
|
1989
|
+
{
|
|
1990
|
+
"required": ["structuredValue"],
|
|
1991
|
+
"properties": {
|
|
1992
|
+
"structuredValue": {
|
|
1993
|
+
"minItems": 1
|
|
1994
|
+
}
|
|
1995
|
+
}
|
|
1996
|
+
},
|
|
1997
|
+
{
|
|
1998
|
+
"required": ["parallelValue"],
|
|
1999
|
+
"properties": {
|
|
2000
|
+
"parallelValue": {
|
|
2001
|
+
"minItems": 1
|
|
2002
|
+
}
|
|
2003
|
+
}
|
|
2004
|
+
},
|
|
2005
|
+
{
|
|
2006
|
+
"required": ["groupedValue"],
|
|
2007
|
+
"properties": {
|
|
2008
|
+
"groupedValue": {
|
|
2009
|
+
"minItems": 1
|
|
2010
|
+
}
|
|
2011
|
+
}
|
|
2012
|
+
}
|
|
2013
|
+
]
|
|
1802
2014
|
},
|
|
1803
2015
|
"LanguageTag": {
|
|
1804
2016
|
"description": "BCP 47 language tag: https://www.rfc-editor.org/rfc/rfc4646.txt -- other applications (like media players) expect language codes of this format, see e.g. https://videojs.com/guides/text-tracks/#srclang",
|