cocina-models 0.113.0 → 0.114.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -1
- data/Gemfile +1 -0
- data/Gemfile.lock +15 -14
- data/README.md +6 -17
- data/bin/validate-data +282 -0
- data/lib/cocina/generator/datatype.rb +1 -1
- data/lib/cocina/generator/generator.rb +1 -1
- data/lib/cocina/generator/schema.rb +2 -2
- data/lib/cocina/generator/schema_array.rb +1 -1
- data/lib/cocina/generator/schema_base.rb +2 -2
- data/lib/cocina/generator/schema_ref.rb +1 -1
- data/lib/cocina/generator/schema_value.rb +1 -1
- data/lib/cocina/generator.rb +1 -1
- data/lib/cocina/json_schema_wrapper.rb +1 -2
- data/lib/cocina/models/access.rb +4 -4
- data/lib/cocina/models/admin_policy_access_template.rb +4 -4
- data/lib/cocina/models/admin_policy_lite.rb +2 -2
- data/lib/cocina/models/collection_lite.rb +4 -4
- data/lib/cocina/models/dro_access.rb +4 -4
- data/lib/cocina/models/dro_lite.rb +6 -6
- data/lib/cocina/models/embargo.rb +4 -4
- data/lib/cocina/models/file_access.rb +4 -4
- data/lib/cocina/models/validatable.rb +6 -1
- data/lib/cocina/models/validators/catalog_links_validator.rb +1 -1
- data/lib/cocina/models/version.rb +1 -1
- data/lib/cocina/models.rb +3 -5
- metadata +4 -4
- data/openapi.yml +0 -1930
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2b8514cca9a06bbf3e7c36d84a1f27d093e16a263e8ac1cb92289b6640d28bec
|
|
4
|
+
data.tar.gz: '09f25bf203314c534eba5e21a16b163f56b21ac2e858cf9a5a25f309086efa12'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2dce9b0cc4eb70cc6a2ddbc2dda88e5249b9ac58a0ec96523ec9c3e23ebe82108d87eb1e36b70e8e44a0d8c0abf75682c6f284dbda26e4cefe469b4acf5bf715
|
|
7
|
+
data.tar.gz: 795ba280b81c888fd005cc5af39d671b942f3d5b718b61b44e902581d476958654f28485aec4f8d18de4d725a6b9338c84c8a0acc4a7c315b04e5cd2ec22c9a8
|
data/.circleci/config.yml
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
cocina-models (0.
|
|
4
|
+
cocina-models (0.114.0)
|
|
5
5
|
activesupport
|
|
6
6
|
deprecation
|
|
7
7
|
dry-struct (~> 1.0)
|
|
@@ -59,7 +59,7 @@ GEM
|
|
|
59
59
|
concurrent-ruby (~> 1.0)
|
|
60
60
|
dry-core (~> 1.1)
|
|
61
61
|
zeitwerk (~> 2.6)
|
|
62
|
-
dry-struct (1.8.
|
|
62
|
+
dry-struct (1.8.1)
|
|
63
63
|
dry-core (~> 1.1)
|
|
64
64
|
dry-types (~> 1.8, >= 1.8.2)
|
|
65
65
|
ice_nine (~> 0.11)
|
|
@@ -86,8 +86,8 @@ GEM
|
|
|
86
86
|
prism (>= 1.3.0)
|
|
87
87
|
rdoc (>= 4.0.0)
|
|
88
88
|
reline (>= 0.4.2)
|
|
89
|
-
json (2.
|
|
90
|
-
json-schema (6.
|
|
89
|
+
json (2.19.1)
|
|
90
|
+
json-schema (6.2.0)
|
|
91
91
|
addressable (~> 2.8)
|
|
92
92
|
bigdecimal (>= 3.1, < 5)
|
|
93
93
|
json_schemer (2.5.0)
|
|
@@ -100,7 +100,7 @@ GEM
|
|
|
100
100
|
language_server-protocol (3.17.0.5)
|
|
101
101
|
lint_roller (1.1.0)
|
|
102
102
|
logger (1.7.0)
|
|
103
|
-
mcp (0.
|
|
103
|
+
mcp (0.8.0)
|
|
104
104
|
json-schema (>= 4.1)
|
|
105
105
|
minitest (6.0.2)
|
|
106
106
|
drb (~> 2.0)
|
|
@@ -124,7 +124,7 @@ GEM
|
|
|
124
124
|
psych (5.3.1)
|
|
125
125
|
date
|
|
126
126
|
stringio
|
|
127
|
-
public_suffix (7.0.
|
|
127
|
+
public_suffix (7.0.5)
|
|
128
128
|
racc (1.8.1)
|
|
129
129
|
rainbow (3.1.1)
|
|
130
130
|
rake (13.3.1)
|
|
@@ -150,7 +150,7 @@ GEM
|
|
|
150
150
|
rspec-support (3.13.7)
|
|
151
151
|
rspec_junit_formatter (0.6.0)
|
|
152
152
|
rspec-core (>= 2, < 4, != 2.12.0)
|
|
153
|
-
rubocop (1.85.
|
|
153
|
+
rubocop (1.85.1)
|
|
154
154
|
json (~> 2.3)
|
|
155
155
|
language_server-protocol (~> 3.17.0.2)
|
|
156
156
|
lint_roller (~> 1.1.0)
|
|
@@ -209,6 +209,7 @@ DEPENDENCIES
|
|
|
209
209
|
rubocop (~> 1.24)
|
|
210
210
|
rubocop-rake
|
|
211
211
|
rubocop-rspec
|
|
212
|
+
ruby-progressbar
|
|
212
213
|
simplecov
|
|
213
214
|
|
|
214
215
|
CHECKSUMS
|
|
@@ -218,7 +219,7 @@ CHECKSUMS
|
|
|
218
219
|
attr_extras (7.1.0) sha256=d96fc9a9dd5d85ba2d37762440a816f840093959ae26bb90da994c2d9f1fc827
|
|
219
220
|
base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
|
|
220
221
|
bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
|
|
221
|
-
cocina-models (0.
|
|
222
|
+
cocina-models (0.114.0)
|
|
222
223
|
concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
|
|
223
224
|
connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
|
|
224
225
|
date (3.5.1) sha256=750d06384d7b9c15d562c76291407d89e368dda4d4fff957eb94962d325a0dc0
|
|
@@ -230,7 +231,7 @@ CHECKSUMS
|
|
|
230
231
|
dry-core (1.2.0) sha256=0cc5a7da88df397f153947eeeae42e876e999c1e30900f3c536fb173854e96a1
|
|
231
232
|
dry-inflector (1.3.1) sha256=7fb0c2bb04f67638f25c52e7ba39ab435d922a3a5c3cd196120f63accb682dcc
|
|
232
233
|
dry-logic (1.6.0) sha256=da6fedbc0f90fc41f9b0cc7e6f05f5d529d1efaef6c8dcc8e0733f685745cea2
|
|
233
|
-
dry-struct (1.8.
|
|
234
|
+
dry-struct (1.8.1) sha256=033868594c45241540172bf1ebbc8bb76b72b4f0717072325deba38ac13e80f1
|
|
234
235
|
dry-types (1.9.1) sha256=baebeecdb9f8395d6c9d227b62011279440943e3ef2468fe8ccc1ba11467f178
|
|
235
236
|
edtf (3.2.0) sha256=a15a0ee274e49c8047a3ebb5d61d793ba44f7f8ffbf0595392c467e3ea8d2447
|
|
236
237
|
equivalent-xml (0.6.0) sha256=8919761efa848ad0846369ff8be1f646b17e5061698c4867b09829000cc3f487
|
|
@@ -240,14 +241,14 @@ CHECKSUMS
|
|
|
240
241
|
ice_nine (0.11.2) sha256=5d506a7d2723d5592dc121b9928e4931742730131f22a1a37649df1c1e2e63db
|
|
241
242
|
io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
|
|
242
243
|
irb (1.17.0) sha256=168c4ddb93d8a361a045c41d92b2952c7a118fa73f23fe14e55609eb7a863aae
|
|
243
|
-
json (2.
|
|
244
|
-
json-schema (6.
|
|
244
|
+
json (2.19.1) sha256=dd94fdc59e48bff85913829a32350b3148156bc4fd2a95a2568a78b11344082d
|
|
245
|
+
json-schema (6.2.0) sha256=e8bff46ed845a22c1ab2bd0d7eccf831c01fe23bb3920caa4c74db4306813666
|
|
245
246
|
json_schemer (2.5.0) sha256=2f01fb4cce721a4e08dd068fc2030cffd0702a7f333f1ea2be6e8991f00ae396
|
|
246
247
|
jsonpath (1.1.5) sha256=29f70467193a2dc93ab864ec3d3326d54267961acc623f487340eb9c34931dbe
|
|
247
248
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
248
249
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
249
250
|
logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
|
|
250
|
-
mcp (0.
|
|
251
|
+
mcp (0.8.0) sha256=ae8bd146bb8e168852866fd26f805f52744f6326afb3211e073f78a95e0c34fb
|
|
251
252
|
minitest (6.0.2) sha256=db6e57956f6ecc6134683b4c87467d6dd792323c7f0eea7b93f66bd284adbc3d
|
|
252
253
|
multi_json (1.19.1) sha256=7aefeff8f2c854bf739931a238e4aea64592845e0c0395c8a7d2eea7fdd631b7
|
|
253
254
|
nokogiri (1.19.1-arm64-darwin) sha256=dfe2d337e6700eac47290407c289d56bcf85805d128c1b5a6434ddb79731cb9e
|
|
@@ -260,7 +261,7 @@ CHECKSUMS
|
|
|
260
261
|
prettyprint (0.2.0) sha256=2bc9e15581a94742064a3cc8b0fb9d45aae3d03a1baa6ef80922627a0766f193
|
|
261
262
|
prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
|
|
262
263
|
psych (5.3.1) sha256=eb7a57cef10c9d70173ff74e739d843ac3b2c019a003de48447b2963d81b1974
|
|
263
|
-
public_suffix (7.0.
|
|
264
|
+
public_suffix (7.0.5) sha256=1a8bb08f1bbea19228d3bed6e5ed908d1cb4f7c2726d18bd9cadf60bc676f623
|
|
264
265
|
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
|
|
265
266
|
rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
|
|
266
267
|
rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
|
|
@@ -273,7 +274,7 @@ CHECKSUMS
|
|
|
273
274
|
rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
|
|
274
275
|
rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
|
|
275
276
|
rspec_junit_formatter (0.6.0) sha256=40dde674e6ae4e6cc0ff560da25497677e34fefd2338cc467a8972f602b62b15
|
|
276
|
-
rubocop (1.85.
|
|
277
|
+
rubocop (1.85.1) sha256=3dbcf9e961baa4c376eeeb2a03913dca5e3987033b04d38fa538aa1e7406cc77
|
|
277
278
|
rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
|
|
278
279
|
rubocop-rake (0.7.1) sha256=3797f2b6810c3e9df7376c26d5f44f3475eda59eb1adc38e6f62ecf027cbae4d
|
|
279
280
|
rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
|
data/README.md
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
The cocina-models gem is a Ruby implementation of the Stanford Digital Repository (SDR) data model, which we named "Cocina." The data being modeled is oriented around digital repository objects.
|
|
8
8
|
|
|
9
|
-
The data model is expressed in an
|
|
9
|
+
The data model is expressed in an JSON Schema specification that lives in this codebase. Expressing the model in such a spec allows for rich validation (using gems such as `json_schemer`). The gem provides a set of generators (see below) to generate Ruby classes from the specification, with modeling provided by dry-struct / dry-types. Together, these provide a way for consumers to validate objects against models and to manipulate those objects.
|
|
10
10
|
|
|
11
11
|
Note that the data model encodes properties as camelCase, which the team believes to be consistent with other HTTP APIs and the original design of the Cocina data model. While using camelCase in Ruby code may look and feel wrong, we did explore automagic conversion between camelCase in the model and snake_case in the Ruby context. We ultimately concluded that we have enough representations of the data model in enough codebases to reasonably worry about data inconsistency problems, none of which we need in our work on SDR.
|
|
12
12
|
|
|
@@ -55,7 +55,10 @@ Beyond what is necessary to test the generator, the Cocina model classes are not
|
|
|
55
55
|
|
|
56
56
|
## Testing validation changes
|
|
57
57
|
|
|
58
|
-
If there is a possibility that a model, mapping, or validation change will conflict with some existing objects then
|
|
58
|
+
If there is a possibility that a model, mapping, or validation change will conflict with some existing objects then `bin/validate-data` should be used for testing. This operates on a sample of objects from the repository and reports any validation errors. You may get the sample by running the script [bin/export-cocina-head-versions](https://github.com/sul-dlss/dor-services-app/pull/5854) and downloading the data file to your computer.
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
Alternatively, you can use [validate-cocina](https://github.com/sul-dlss/dor-services-app/blob/main/bin/validate-cocina) for testing. This must be run on the `sdr-infra` VM since it requires deploying a branch of cocina-models. It is slower than using `bin/validate-data`, but all of the data is completely up to date.
|
|
59
62
|
|
|
60
63
|
For background on object validation, as it relates to migrating versions, see: https://github.com/sul-dlss/dor-services-app/wiki/Migrating-Cocina
|
|
61
64
|
|
|
@@ -153,21 +156,7 @@ This list of services is known to include:
|
|
|
153
156
|
* [sul-dlss/sdr-api](https://github.com/sul-dlss/sdr-api)
|
|
154
157
|
* [sul-dlss/dor-services-app](https://github.com/sul-dlss/dor-services-app/)
|
|
155
158
|
|
|
156
|
-
|
|
157
|
-
#### Step 3A: Update API specifications
|
|
158
|
-
|
|
159
|
-
**NOTE**: You can skip step 3A if there have not been any changes to the `cocina-models` OpenAPI spec since the prior release.
|
|
160
|
-
|
|
161
|
-
The cocina-models gem is used in applications that have an API specification that accepts Cocina models.
|
|
162
|
-
|
|
163
|
-
#### Step 3B: Bump gems and create the PRs
|
|
164
|
-
|
|
165
|
-
If you updated the `schema.json` in step 3A, use the same PR for step 3B. Why? When [dor-services-app](https://github.com/sul-dlss/dor-services-app), for example, is updated to use the new models (via the auto-update script), these clients should be updated at the same time or there is risk of models produced by dor-services-app not being acceptable to the clients.
|
|
166
|
-
|
|
167
|
-
1. Perform `bundle update --conservative cocina-models dor-services-client` in the services above and make PRs for those repos if they don't already exist. You may first need to update how these gems are pinned in the `Gemfile` in order to bump them.
|
|
168
|
-
2. Note that sdr-client is not currently used in these applications, but if it were, would also need to be bumped to the latest release.
|
|
169
|
-
|
|
170
|
-
#### Step 3C: Merge 'em
|
|
159
|
+
Perform `bundle update --conservative cocina-models dor-services-client` in the services above and make PRs for those repos. You may first need to update how these gems are pinned in the `Gemfile` in order to bump them.
|
|
171
160
|
|
|
172
161
|
Get the directly coupled services PRs merged before the deploy in step 5.
|
|
173
162
|
|
data/bin/validate-data
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Validate JSONL (XZ compressed) files against the schema
|
|
5
|
+
|
|
6
|
+
# Trap Ctrl+C to exit gracefully
|
|
7
|
+
Signal.trap('INT') do
|
|
8
|
+
puts "\nTerminated"
|
|
9
|
+
exit(1)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
Signal.trap('TERM') do
|
|
13
|
+
puts "\nTerminated"
|
|
14
|
+
exit(1)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
require 'bundler/setup'
|
|
18
|
+
require 'cocina/models'
|
|
19
|
+
require 'json'
|
|
20
|
+
require 'ruby-progressbar'
|
|
21
|
+
require 'optparse'
|
|
22
|
+
|
|
23
|
+
# Parse command line options
|
|
24
|
+
def parse_options # rubocop:disable Metrics/MethodLength
|
|
25
|
+
options = {
|
|
26
|
+
processes: 12,
|
|
27
|
+
count: nil,
|
|
28
|
+
batch_size: 100
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
OptionParser.new do |opts|
|
|
32
|
+
opts.banner = 'Usage: validate-data FILENAME [options]'
|
|
33
|
+
|
|
34
|
+
opts.on('-p', '--processes NUM', Integer, 'Number of processes to use (default: 12)') do |p|
|
|
35
|
+
options[:processes] = p
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
opts.on('-c', '--count NUM', Integer, 'Total line count (skips counting pass if provided)') do |c|
|
|
39
|
+
options[:count] = c
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
opts.on('-b', '--batch-size NUM', Integer, 'Batch size for worker processing (default: 100)') do |b|
|
|
43
|
+
options[:batch_size] = b
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
opts.on('-h', '--help', 'Display this help message') do
|
|
47
|
+
puts opts
|
|
48
|
+
exit
|
|
49
|
+
end
|
|
50
|
+
end.parse!
|
|
51
|
+
|
|
52
|
+
# Filename is required as a positional argument
|
|
53
|
+
if ARGV.empty?
|
|
54
|
+
puts 'Error: FILENAME is required'
|
|
55
|
+
puts 'Usage: validate-data FILENAME [options]'
|
|
56
|
+
puts 'Run with --help for more information'
|
|
57
|
+
exit 1
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
options[:filename] = ARGV[0]
|
|
61
|
+
options
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Count lines in the file
|
|
65
|
+
def count_lines(filename)
|
|
66
|
+
count = 0
|
|
67
|
+
IO.popen(['xzcat', filename]) do |io|
|
|
68
|
+
io.each_line { count += 1 }
|
|
69
|
+
end
|
|
70
|
+
count
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Get total line count (either from option or by counting)
|
|
74
|
+
def get_total_lines(filename, provided_count)
|
|
75
|
+
if provided_count
|
|
76
|
+
puts "Using provided line count: #{provided_count}"
|
|
77
|
+
provided_count
|
|
78
|
+
else
|
|
79
|
+
puts 'Counting lines...'
|
|
80
|
+
total = count_lines(filename)
|
|
81
|
+
puts "Total lines to validate: #{total}"
|
|
82
|
+
total
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Worker process that reads batches from a pipe and validates
|
|
87
|
+
def worker_process(reader) # rubocop:disable Metrics/MethodLength
|
|
88
|
+
errors = []
|
|
89
|
+
|
|
90
|
+
loop do
|
|
91
|
+
# Read length prefix (4 bytes)
|
|
92
|
+
length_data = reader.read(4)
|
|
93
|
+
break if length_data.nil? || length_data.empty?
|
|
94
|
+
|
|
95
|
+
length = length_data.unpack1('N')
|
|
96
|
+
data = reader.read(length)
|
|
97
|
+
batch = Marshal.load(data) # rubocop:disable Security/MarshalLoad
|
|
98
|
+
|
|
99
|
+
# Process each line in the batch
|
|
100
|
+
batch.each do |line_num, line_content|
|
|
101
|
+
json = JSON.parse(line_content)
|
|
102
|
+
Cocina::Models.build(json)
|
|
103
|
+
rescue JSON::ParserError => e
|
|
104
|
+
errors << { line: line_num, error: "JSON Parse Error: #{e.message}" }
|
|
105
|
+
rescue Cocina::Models::ValidationError => e
|
|
106
|
+
errors << { line: line_num, error: "Validation Error: #{e.message}" }
|
|
107
|
+
rescue Cocina::Models::UnknownTypeError => e
|
|
108
|
+
errors << { line: line_num, error: "Unknown Type Error: #{e.message}" }
|
|
109
|
+
rescue StandardError => e
|
|
110
|
+
errors << { line: line_num, error: "Error: #{e.class} - #{e.message}" }
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
errors
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Spawn worker processes
|
|
118
|
+
def spawn_workers(num_processes) # rubocop:disable Metrics/MethodLength
|
|
119
|
+
workers = []
|
|
120
|
+
result_readers = []
|
|
121
|
+
|
|
122
|
+
num_processes.times do
|
|
123
|
+
work_reader, work_writer = IO.pipe
|
|
124
|
+
result_reader, result_writer = IO.pipe
|
|
125
|
+
|
|
126
|
+
pid = fork do
|
|
127
|
+
# Child process
|
|
128
|
+
work_writer.close
|
|
129
|
+
result_reader.close
|
|
130
|
+
|
|
131
|
+
errors = worker_process(work_reader)
|
|
132
|
+
|
|
133
|
+
# Send results back
|
|
134
|
+
result_writer.write(Marshal.dump(errors))
|
|
135
|
+
result_writer.close
|
|
136
|
+
work_reader.close
|
|
137
|
+
exit(0)
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Parent process
|
|
141
|
+
work_reader.close
|
|
142
|
+
result_writer.close
|
|
143
|
+
|
|
144
|
+
workers << { pid: pid, writer: work_writer }
|
|
145
|
+
result_readers << result_reader
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
[workers, result_readers]
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Send a batch to a worker
|
|
152
|
+
def send_batch(worker, batch)
|
|
153
|
+
return if batch.empty?
|
|
154
|
+
|
|
155
|
+
data = Marshal.dump(batch)
|
|
156
|
+
worker[:writer].write([data.bytesize].pack('N'))
|
|
157
|
+
worker[:writer].write(data)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Stream file and distribute work to workers
|
|
161
|
+
def distribute_work(filename, workers, batch_size, total_lines) # rubocop:disable Metrics/MethodLength
|
|
162
|
+
line_number = 0
|
|
163
|
+
current_worker = 0
|
|
164
|
+
batch = []
|
|
165
|
+
|
|
166
|
+
# Create progress bar
|
|
167
|
+
progressbar = ProgressBar.create(
|
|
168
|
+
title: 'Validating',
|
|
169
|
+
total: total_lines,
|
|
170
|
+
format: '%t: |%B| %p%% %c/%C %a %e',
|
|
171
|
+
throttle_rate: 0.1
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
IO.popen(['xzcat', filename]) do |io|
|
|
175
|
+
io.each_line do |line|
|
|
176
|
+
line_number += 1
|
|
177
|
+
batch << [line_number, line]
|
|
178
|
+
|
|
179
|
+
# When batch is full, send to worker
|
|
180
|
+
if batch.size >= batch_size
|
|
181
|
+
worker = workers[current_worker]
|
|
182
|
+
send_batch(worker, batch)
|
|
183
|
+
batch = []
|
|
184
|
+
current_worker = (current_worker + 1) % workers.length
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# Update progress bar
|
|
188
|
+
progressbar.increment
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
# Send any remaining lines in the last batch
|
|
193
|
+
if batch.any?
|
|
194
|
+
worker = workers[current_worker]
|
|
195
|
+
send_batch(worker, batch)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
# Final progress update
|
|
199
|
+
progressbar.finish
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# Collect results from all workers
|
|
203
|
+
def collect_results(workers, result_readers)
|
|
204
|
+
# Close all worker input pipes to signal completion
|
|
205
|
+
workers.each { |w| w[:writer].close }
|
|
206
|
+
|
|
207
|
+
puts 'Collecting results from workers...'
|
|
208
|
+
all_errors = []
|
|
209
|
+
|
|
210
|
+
result_readers.each do |reader|
|
|
211
|
+
data = reader.read
|
|
212
|
+
worker_errors = Marshal.load(data) # rubocop:disable Security/MarshalLoad
|
|
213
|
+
all_errors.concat(worker_errors)
|
|
214
|
+
reader.close
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
# Wait for all workers to complete
|
|
218
|
+
workers.each { |w| Process.wait(w[:pid]) }
|
|
219
|
+
|
|
220
|
+
all_errors
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Print validation summary
|
|
224
|
+
def print_summary(total_lines, errors, elapsed_time) # rubocop:disable Metrics/MethodLength
|
|
225
|
+
puts '=' * 80
|
|
226
|
+
puts 'VALIDATION SUMMARY'
|
|
227
|
+
puts '=' * 80
|
|
228
|
+
puts "Total lines processed: #{total_lines}"
|
|
229
|
+
puts "Lines with errors: #{errors.length}"
|
|
230
|
+
puts "Success rate: #{((total_lines - errors.length).to_f / total_lines * 100).round(2)}%"
|
|
231
|
+
puts "Time elapsed: #{elapsed_time.round(2)} seconds"
|
|
232
|
+
puts "Throughput: #{(total_lines / elapsed_time).round(0)} lines/second"
|
|
233
|
+
|
|
234
|
+
return unless errors.any?
|
|
235
|
+
|
|
236
|
+
puts "\n"
|
|
237
|
+
puts 'Error details:'
|
|
238
|
+
puts '-' * 80
|
|
239
|
+
# Sort errors by line number for better readability
|
|
240
|
+
errors.sort_by { |e| e[:line] }.each do |error|
|
|
241
|
+
puts "Line #{error[:line]}: #{error[:error]}"
|
|
242
|
+
end
|
|
243
|
+
puts "\n"
|
|
244
|
+
puts "Line numbers with errors: #{errors.sort_by { |e| e[:line] }.map { |e| e[:line] }.join(', ')}"
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Main execution
|
|
248
|
+
def main
|
|
249
|
+
options = parse_options
|
|
250
|
+
|
|
251
|
+
puts "Validating file: #{options[:filename]}"
|
|
252
|
+
puts "Using #{options[:processes]} processes with batch size #{options[:batch_size]}"
|
|
253
|
+
puts '=' * 80
|
|
254
|
+
|
|
255
|
+
# Get total line count
|
|
256
|
+
total_lines = get_total_lines(options[:filename], options[:count])
|
|
257
|
+
puts '=' * 80
|
|
258
|
+
|
|
259
|
+
# Spawn worker processes
|
|
260
|
+
workers, result_readers = spawn_workers(options[:processes])
|
|
261
|
+
|
|
262
|
+
# Start timing
|
|
263
|
+
start_time = Time.now
|
|
264
|
+
|
|
265
|
+
# Distribute work to workers
|
|
266
|
+
distribute_work(options[:filename], workers, options[:batch_size], total_lines)
|
|
267
|
+
|
|
268
|
+
# Collect results
|
|
269
|
+
all_errors = collect_results(workers, result_readers)
|
|
270
|
+
|
|
271
|
+
# Calculate elapsed time
|
|
272
|
+
elapsed_time = Time.now - start_time
|
|
273
|
+
|
|
274
|
+
# Print summary
|
|
275
|
+
print_summary(total_lines, all_errors, elapsed_time)
|
|
276
|
+
|
|
277
|
+
# Exit with appropriate code
|
|
278
|
+
exit(all_errors.empty? ? 0 : 1)
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
# Run the script
|
|
282
|
+
main
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
module Cocina
|
|
4
4
|
module Generator
|
|
5
|
-
# Class for generating from
|
|
5
|
+
# Class for generating from a JSON schema
|
|
6
6
|
class Schema < SchemaBase
|
|
7
7
|
def schema_properties
|
|
8
8
|
@schema_properties ||= (properties + all_of_properties + one_of_properties).uniq(&:key)
|
|
@@ -112,7 +112,7 @@ module Cocina
|
|
|
112
112
|
key: key,
|
|
113
113
|
# The property does less validation because may vary between
|
|
114
114
|
# different oneOf schemas. Validation is still performed
|
|
115
|
-
# by
|
|
115
|
+
# by JSON Schema.
|
|
116
116
|
relaxed: true,
|
|
117
117
|
parent: self,
|
|
118
118
|
schemas: schemas)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
module Cocina
|
|
4
4
|
module Generator
|
|
5
|
-
# Base class for generating from
|
|
5
|
+
# Base class for generating from a JSON Schema
|
|
6
6
|
class SchemaBase
|
|
7
7
|
attr_reader :schema_doc, :key, :required, :nullable, :parent, :relaxed, :schemas, :lite
|
|
8
8
|
|
|
@@ -74,7 +74,7 @@ module Cocina
|
|
|
74
74
|
def relaxed_comment
|
|
75
75
|
return '' unless relaxed
|
|
76
76
|
|
|
77
|
-
"# Validation of this property is relaxed. See the
|
|
77
|
+
"# Validation of this property is relaxed. See the schema.json for full validation.\n"
|
|
78
78
|
end
|
|
79
79
|
|
|
80
80
|
# dry-types-based types contain the word `Types` (e.g., `Types::String`), and custom types (e.g., `SourceId`) do not
|
data/lib/cocina/generator.rb
CHANGED
|
@@ -3,8 +3,7 @@
|
|
|
3
3
|
module Cocina
|
|
4
4
|
# Wrapper for JSON Schema support using json_schemer
|
|
5
5
|
class JsonSchemaWrapper
|
|
6
|
-
class
|
|
7
|
-
class MissingReferenceError < OpenApiError; end
|
|
6
|
+
class MissingReferenceError < StandardError; end
|
|
8
7
|
|
|
9
8
|
def initialize(spec_hash, strict_reference_validation: true)
|
|
10
9
|
@spec = spec_hash
|
data/lib/cocina/models/access.rb
CHANGED
|
@@ -4,15 +4,15 @@ module Cocina
|
|
|
4
4
|
module Models
|
|
5
5
|
class Access < Struct
|
|
6
6
|
# Access level.
|
|
7
|
-
# Validation of this property is relaxed. See the
|
|
7
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
8
8
|
attribute? :view, Types::Strict::String.optional.default('dark')
|
|
9
9
|
# Download access level.
|
|
10
|
-
# Validation of this property is relaxed. See the
|
|
10
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
11
11
|
attribute? :download, Types::Strict::String.optional.default('none')
|
|
12
12
|
# Not used for this access type, must be null.
|
|
13
|
-
# Validation of this property is relaxed. See the
|
|
13
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
14
14
|
attribute? :location, Types::Strict::String.optional
|
|
15
|
-
# Validation of this property is relaxed. See the
|
|
15
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
16
16
|
attribute? :controlledDigitalLending, Types::Strict::Bool.optional.default(false)
|
|
17
17
|
end
|
|
18
18
|
end
|
|
@@ -19,15 +19,15 @@ module Cocina
|
|
|
19
19
|
# CC, RightsStatement.org URI, etc.).
|
|
20
20
|
attribute? :license, License.optional.enum(nil, 'https://www.gnu.org/licenses/agpl.txt', 'https://www.apache.org/licenses/LICENSE-2.0', 'https://opensource.org/licenses/BSD-2-Clause', 'https://opensource.org/licenses/BSD-3-Clause', 'https://creativecommons.org/licenses/by/4.0/legalcode', 'https://creativecommons.org/licenses/by-nc/4.0/legalcode', 'https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode', 'https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode', 'https://creativecommons.org/licenses/by-nd/4.0/legalcode', 'https://creativecommons.org/licenses/by-sa/4.0/legalcode', 'https://creativecommons.org/publicdomain/zero/1.0/legalcode', 'https://opensource.org/licenses/cddl1', 'https://www.eclipse.org/legal/epl-2.0', 'https://www.gnu.org/licenses/gpl-3.0-standalone.html', 'https://www.isc.org/downloads/software-support-policy/isc-license/', 'https://www.gnu.org/licenses/lgpl-3.0-standalone.html', 'https://opensource.org/licenses/MIT', 'https://www.mozilla.org/MPL/2.0/', 'https://opendatacommons.org/licenses/by/1-0/', 'http://opendatacommons.org/licenses/odbl/1.0/', 'https://opendatacommons.org/licenses/odbl/1-0/', 'https://creativecommons.org/publicdomain/mark/1.0/', 'https://opendatacommons.org/licenses/pddl/1-0/', 'https://creativecommons.org/licenses/by/3.0/legalcode', 'https://creativecommons.org/licenses/by-sa/3.0/legalcode', 'https://creativecommons.org/licenses/by-nd/3.0/legalcode', 'https://creativecommons.org/licenses/by-nc/3.0/legalcode', 'https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode', 'https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode', 'https://cocina.sul.stanford.edu/licenses/none')
|
|
21
21
|
# Access level.
|
|
22
|
-
# Validation of this property is relaxed. See the
|
|
22
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
23
23
|
attribute? :view, Types::Strict::String.optional.default('dark')
|
|
24
24
|
# Download access level.
|
|
25
|
-
# Validation of this property is relaxed. See the
|
|
25
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
26
26
|
attribute? :download, Types::Strict::String.optional.default('none')
|
|
27
27
|
# Not used for this access type, must be null.
|
|
28
|
-
# Validation of this property is relaxed. See the
|
|
28
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
29
29
|
attribute? :location, Types::Strict::String.optional
|
|
30
|
-
# Validation of this property is relaxed. See the
|
|
30
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
31
31
|
attribute? :controlledDigitalLending, Types::Strict::Bool.optional.default(false)
|
|
32
32
|
end
|
|
33
33
|
end
|
|
@@ -16,9 +16,9 @@ module Cocina
|
|
|
16
16
|
attribute :label, Types::Strict::String
|
|
17
17
|
attribute :version, Types::Strict::Integer
|
|
18
18
|
# Administrative properties for an AdminPolicy
|
|
19
|
-
# Validation of this property is relaxed. See the
|
|
19
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
20
20
|
attribute? :administrative, AdminPolicyAdministrative.optional
|
|
21
|
-
# Validation of this property is relaxed. See the
|
|
21
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
22
22
|
attribute? :description, Description.optional
|
|
23
23
|
end
|
|
24
24
|
end
|
|
@@ -25,13 +25,13 @@ module Cocina
|
|
|
25
25
|
# Version for the Collection within SDR.
|
|
26
26
|
attribute :version, Types::Strict::Integer
|
|
27
27
|
# Access metadata for collections
|
|
28
|
-
# Validation of this property is relaxed. See the
|
|
28
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
29
29
|
attribute? :access, CollectionAccess.optional
|
|
30
|
-
# Validation of this property is relaxed. See the
|
|
30
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
31
31
|
attribute? :administrative, Administrative.optional
|
|
32
|
-
# Validation of this property is relaxed. See the
|
|
32
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
33
33
|
attribute? :description, Description.optional
|
|
34
|
-
# Validation of this property is relaxed. See the
|
|
34
|
+
# Validation of this property is relaxed. See the schema.json for full validation.
|
|
35
35
|
attribute? :identification, CollectionIdentification.optional
|
|
36
36
|
end
|
|
37
37
|
end
|