cocina-models 0.113.0 → 0.114.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '09669a342a43a6be309144d1d71748372251fdb37ad96c480e7ce88127749ec5'
4
- data.tar.gz: 99e35e0634f65f03e545dfabc40d95cb6a638c1f0f0e05ffbf14315738ac895c
3
+ metadata.gz: 2b8514cca9a06bbf3e7c36d84a1f27d093e16a263e8ac1cb92289b6640d28bec
4
+ data.tar.gz: '09f25bf203314c534eba5e21a16b163f56b21ac2e858cf9a5a25f309086efa12'
5
5
  SHA512:
6
- metadata.gz: 29c840e670181b57df7d29573da1cc019cd6a34300f43548a762f0d394736af09822bb61e85acd7eefa6426e37996fbe5869c4b58c8aa7e9176a201af14f91ec
7
- data.tar.gz: 00f39759126179cd79f5cda1a1cabf93b3da2ba19ef0ecd90fa1f1c10735b31ff966e9496e93cac69fb35255b0dc1c1e45176d70f743630005eb883384e38132
6
+ metadata.gz: 2dce9b0cc4eb70cc6a2ddbc2dda88e5249b9ac58a0ec96523ec9c3e23ebe82108d87eb1e36b70e8e44a0d8c0abf75682c6f284dbda26e4cefe469b4acf5bf715
7
+ data.tar.gz: 795ba280b81c888fd005cc5af39d671b942f3d5b718b61b44e902581d476958654f28485aec4f8d18de4d725a6b9338c84c8a0acc4a7c315b04e5cd2ec22c9a8
data/.circleci/config.yml CHANGED
@@ -11,5 +11,5 @@ workflows:
11
11
  context: dlss
12
12
  before-test:
13
13
  - run:
14
- name: validate openapi
14
+ name: validate schema
15
15
  command: bin/validate-schema schema.json
data/Gemfile CHANGED
@@ -7,3 +7,4 @@ gemspec
7
7
 
8
8
  gem 'debug'
9
9
  gem 'rspec_junit_formatter' # For CircleCI
10
+ gem 'ruby-progressbar'
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cocina-models (0.113.0)
4
+ cocina-models (0.114.0)
5
5
  activesupport
6
6
  deprecation
7
7
  dry-struct (~> 1.0)
@@ -59,7 +59,7 @@ GEM
59
59
  concurrent-ruby (~> 1.0)
60
60
  dry-core (~> 1.1)
61
61
  zeitwerk (~> 2.6)
62
- dry-struct (1.8.0)
62
+ dry-struct (1.8.1)
63
63
  dry-core (~> 1.1)
64
64
  dry-types (~> 1.8, >= 1.8.2)
65
65
  ice_nine (~> 0.11)
@@ -86,8 +86,8 @@ GEM
86
86
  prism (>= 1.3.0)
87
87
  rdoc (>= 4.0.0)
88
88
  reline (>= 0.4.2)
89
- json (2.18.1)
90
- json-schema (6.1.0)
89
+ json (2.19.1)
90
+ json-schema (6.2.0)
91
91
  addressable (~> 2.8)
92
92
  bigdecimal (>= 3.1, < 5)
93
93
  json_schemer (2.5.0)
@@ -100,7 +100,7 @@ GEM
100
100
  language_server-protocol (3.17.0.5)
101
101
  lint_roller (1.1.0)
102
102
  logger (1.7.0)
103
- mcp (0.7.1)
103
+ mcp (0.8.0)
104
104
  json-schema (>= 4.1)
105
105
  minitest (6.0.2)
106
106
  drb (~> 2.0)
@@ -124,7 +124,7 @@ GEM
124
124
  psych (5.3.1)
125
125
  date
126
126
  stringio
127
- public_suffix (7.0.2)
127
+ public_suffix (7.0.5)
128
128
  racc (1.8.1)
129
129
  rainbow (3.1.1)
130
130
  rake (13.3.1)
@@ -150,7 +150,7 @@ GEM
150
150
  rspec-support (3.13.7)
151
151
  rspec_junit_formatter (0.6.0)
152
152
  rspec-core (>= 2, < 4, != 2.12.0)
153
- rubocop (1.85.0)
153
+ rubocop (1.85.1)
154
154
  json (~> 2.3)
155
155
  language_server-protocol (~> 3.17.0.2)
156
156
  lint_roller (~> 1.1.0)
@@ -209,6 +209,7 @@ DEPENDENCIES
209
209
  rubocop (~> 1.24)
210
210
  rubocop-rake
211
211
  rubocop-rspec
212
+ ruby-progressbar
212
213
  simplecov
213
214
 
214
215
  CHECKSUMS
@@ -218,7 +219,7 @@ CHECKSUMS
218
219
  attr_extras (7.1.0) sha256=d96fc9a9dd5d85ba2d37762440a816f840093959ae26bb90da994c2d9f1fc827
219
220
  base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
220
221
  bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
221
- cocina-models (0.113.0)
222
+ cocina-models (0.114.0)
222
223
  concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
223
224
  connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
224
225
  date (3.5.1) sha256=750d06384d7b9c15d562c76291407d89e368dda4d4fff957eb94962d325a0dc0
@@ -230,7 +231,7 @@ CHECKSUMS
230
231
  dry-core (1.2.0) sha256=0cc5a7da88df397f153947eeeae42e876e999c1e30900f3c536fb173854e96a1
231
232
  dry-inflector (1.3.1) sha256=7fb0c2bb04f67638f25c52e7ba39ab435d922a3a5c3cd196120f63accb682dcc
232
233
  dry-logic (1.6.0) sha256=da6fedbc0f90fc41f9b0cc7e6f05f5d529d1efaef6c8dcc8e0733f685745cea2
233
- dry-struct (1.8.0) sha256=74c38b559924fb6462ac43ec780c4533a082d7b1d238a8d7857b773b3b8e2966
234
+ dry-struct (1.8.1) sha256=033868594c45241540172bf1ebbc8bb76b72b4f0717072325deba38ac13e80f1
234
235
  dry-types (1.9.1) sha256=baebeecdb9f8395d6c9d227b62011279440943e3ef2468fe8ccc1ba11467f178
235
236
  edtf (3.2.0) sha256=a15a0ee274e49c8047a3ebb5d61d793ba44f7f8ffbf0595392c467e3ea8d2447
236
237
  equivalent-xml (0.6.0) sha256=8919761efa848ad0846369ff8be1f646b17e5061698c4867b09829000cc3f487
@@ -240,14 +241,14 @@ CHECKSUMS
240
241
  ice_nine (0.11.2) sha256=5d506a7d2723d5592dc121b9928e4931742730131f22a1a37649df1c1e2e63db
241
242
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
242
243
  irb (1.17.0) sha256=168c4ddb93d8a361a045c41d92b2952c7a118fa73f23fe14e55609eb7a863aae
243
- json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
244
- json-schema (6.1.0) sha256=6bf70a2cfb6dfd5a06da28093fa8190f324c88eabd36a7f47097f227321dc702
244
+ json (2.19.1) sha256=dd94fdc59e48bff85913829a32350b3148156bc4fd2a95a2568a78b11344082d
245
+ json-schema (6.2.0) sha256=e8bff46ed845a22c1ab2bd0d7eccf831c01fe23bb3920caa4c74db4306813666
245
246
  json_schemer (2.5.0) sha256=2f01fb4cce721a4e08dd068fc2030cffd0702a7f333f1ea2be6e8991f00ae396
246
247
  jsonpath (1.1.5) sha256=29f70467193a2dc93ab864ec3d3326d54267961acc623f487340eb9c34931dbe
247
248
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
248
249
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
249
250
  logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
250
- mcp (0.7.1) sha256=fa967895d6952bad0d981ea907731d8528d2c246d2079d56a9c8bae83d14f1c7
251
+ mcp (0.8.0) sha256=ae8bd146bb8e168852866fd26f805f52744f6326afb3211e073f78a95e0c34fb
251
252
  minitest (6.0.2) sha256=db6e57956f6ecc6134683b4c87467d6dd792323c7f0eea7b93f66bd284adbc3d
252
253
  multi_json (1.19.1) sha256=7aefeff8f2c854bf739931a238e4aea64592845e0c0395c8a7d2eea7fdd631b7
253
254
  nokogiri (1.19.1-arm64-darwin) sha256=dfe2d337e6700eac47290407c289d56bcf85805d128c1b5a6434ddb79731cb9e
@@ -260,7 +261,7 @@ CHECKSUMS
260
261
  prettyprint (0.2.0) sha256=2bc9e15581a94742064a3cc8b0fb9d45aae3d03a1baa6ef80922627a0766f193
261
262
  prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
262
263
  psych (5.3.1) sha256=eb7a57cef10c9d70173ff74e739d843ac3b2c019a003de48447b2963d81b1974
263
- public_suffix (7.0.2) sha256=9114090c8e4e7135c1fd0e7acfea33afaab38101884320c65aaa0ffb8e26a857
264
+ public_suffix (7.0.5) sha256=1a8bb08f1bbea19228d3bed6e5ed908d1cb4f7c2726d18bd9cadf60bc676f623
264
265
  racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
265
266
  rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
266
267
  rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
@@ -273,7 +274,7 @@ CHECKSUMS
273
274
  rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
274
275
  rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
275
276
  rspec_junit_formatter (0.6.0) sha256=40dde674e6ae4e6cc0ff560da25497677e34fefd2338cc467a8972f602b62b15
276
- rubocop (1.85.0) sha256=317407feb681a07d54f64d2f9e1d6b6af1ce7678e51cd658e3ad8bd66da48c01
277
+ rubocop (1.85.1) sha256=3dbcf9e961baa4c376eeeb2a03913dca5e3987033b04d38fa538aa1e7406cc77
277
278
  rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
278
279
  rubocop-rake (0.7.1) sha256=3797f2b6810c3e9df7376c26d5f44f3475eda59eb1adc38e6f62ecf027cbae4d
279
280
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  The cocina-models gem is a Ruby implementation of the Stanford Digital Repository (SDR) data model, which we named "Cocina." The data being modeled is oriented around digital repository objects.
8
8
 
9
- The data model is expressed in an OpenAPI specification that lives in this codebase. Expressing the model in such a spec allows for rich validation (using gems such as `json_schemer`). The gem provides a set of generators (see below) to generate Ruby classes from the specification, with modeling provided by dry-struct / dry-types. Together, these provide a way for consumers to validate objects against models and to manipulate those objects.
9
+ The data model is expressed in an JSON Schema specification that lives in this codebase. Expressing the model in such a spec allows for rich validation (using gems such as `json_schemer`). The gem provides a set of generators (see below) to generate Ruby classes from the specification, with modeling provided by dry-struct / dry-types. Together, these provide a way for consumers to validate objects against models and to manipulate those objects.
10
10
 
11
11
  Note that the data model encodes properties as camelCase, which the team believes to be consistent with other HTTP APIs and the original design of the Cocina data model. While using camelCase in Ruby code may look and feel wrong, we did explore automagic conversion between camelCase in the model and snake_case in the Ruby context. We ultimately concluded that we have enough representations of the data model in enough codebases to reasonably worry about data inconsistency problems, none of which we need in our work on SDR.
12
12
 
@@ -55,7 +55,10 @@ Beyond what is necessary to test the generator, the Cocina model classes are not
55
55
 
56
56
  ## Testing validation changes
57
57
 
58
- If there is a possibility that a model, mapping, or validation change will conflict with some existing objects then [validate-cocina](https://github.com/sul-dlss/dor-services-app/blob/main/bin/validate-cocina) should be used for testing. This must be run on the `sdr-infra` VM since it requires deploying a branch of cocina-models.
58
+ If there is a possibility that a model, mapping, or validation change will conflict with some existing objects then `bin/validate-data` should be used for testing. This operates on a sample of objects from the repository and reports any validation errors. You may get the sample by running the script [bin/export-cocina-head-versions](https://github.com/sul-dlss/dor-services-app/pull/5854) and downloading the data file to your computer.
59
+
60
+
61
+ Alternatively, you can use [validate-cocina](https://github.com/sul-dlss/dor-services-app/blob/main/bin/validate-cocina) for testing. This must be run on the `sdr-infra` VM since it requires deploying a branch of cocina-models. It is slower than using `bin/validate-data`, but all of the data is completely up to date.
59
62
 
60
63
  For background on object validation, as it relates to migrating versions, see: https://github.com/sul-dlss/dor-services-app/wiki/Migrating-Cocina
61
64
 
@@ -153,21 +156,7 @@ This list of services is known to include:
153
156
  * [sul-dlss/sdr-api](https://github.com/sul-dlss/sdr-api)
154
157
  * [sul-dlss/dor-services-app](https://github.com/sul-dlss/dor-services-app/)
155
158
 
156
-
157
- #### Step 3A: Update API specifications
158
-
159
- **NOTE**: You can skip step 3A if there have not been any changes to the `cocina-models` OpenAPI spec since the prior release.
160
-
161
- The cocina-models gem is used in applications that have an API specification that accepts Cocina models.
162
-
163
- #### Step 3B: Bump gems and create the PRs
164
-
165
- If you updated the `schema.json` in step 3A, use the same PR for step 3B. Why? When [dor-services-app](https://github.com/sul-dlss/dor-services-app), for example, is updated to use the new models (via the auto-update script), these clients should be updated at the same time or there is risk of models produced by dor-services-app not being acceptable to the clients.
166
-
167
- 1. Perform `bundle update --conservative cocina-models dor-services-client` in the services above and make PRs for those repos if they don't already exist. You may first need to update how these gems are pinned in the `Gemfile` in order to bump them.
168
- 2. Note that sdr-client is not currently used in these applications, but if it were, would also need to be bumped to the latest release.
169
-
170
- #### Step 3C: Merge 'em
159
+ Perform `bundle update --conservative cocina-models dor-services-client` in the services above and make PRs for those repos. You may first need to update how these gems are pinned in the `Gemfile` in order to bump them.
171
160
 
172
161
  Get the directly coupled services PRs merged before the deploy in step 5.
173
162
 
data/bin/validate-data ADDED
@@ -0,0 +1,282 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Validate JSONL (XZ compressed) files against the schema
5
+
6
+ # Trap Ctrl+C to exit gracefully
7
+ Signal.trap('INT') do
8
+ puts "\nTerminated"
9
+ exit(1)
10
+ end
11
+
12
+ Signal.trap('TERM') do
13
+ puts "\nTerminated"
14
+ exit(1)
15
+ end
16
+
17
+ require 'bundler/setup'
18
+ require 'cocina/models'
19
+ require 'json'
20
+ require 'ruby-progressbar'
21
+ require 'optparse'
22
+
23
+ # Parse command line options
24
+ def parse_options # rubocop:disable Metrics/MethodLength
25
+ options = {
26
+ processes: 12,
27
+ count: nil,
28
+ batch_size: 100
29
+ }
30
+
31
+ OptionParser.new do |opts|
32
+ opts.banner = 'Usage: validate-data FILENAME [options]'
33
+
34
+ opts.on('-p', '--processes NUM', Integer, 'Number of processes to use (default: 12)') do |p|
35
+ options[:processes] = p
36
+ end
37
+
38
+ opts.on('-c', '--count NUM', Integer, 'Total line count (skips counting pass if provided)') do |c|
39
+ options[:count] = c
40
+ end
41
+
42
+ opts.on('-b', '--batch-size NUM', Integer, 'Batch size for worker processing (default: 100)') do |b|
43
+ options[:batch_size] = b
44
+ end
45
+
46
+ opts.on('-h', '--help', 'Display this help message') do
47
+ puts opts
48
+ exit
49
+ end
50
+ end.parse!
51
+
52
+ # Filename is required as a positional argument
53
+ if ARGV.empty?
54
+ puts 'Error: FILENAME is required'
55
+ puts 'Usage: validate-data FILENAME [options]'
56
+ puts 'Run with --help for more information'
57
+ exit 1
58
+ end
59
+
60
+ options[:filename] = ARGV[0]
61
+ options
62
+ end
63
+
64
+ # Count lines in the file
65
+ def count_lines(filename)
66
+ count = 0
67
+ IO.popen(['xzcat', filename]) do |io|
68
+ io.each_line { count += 1 }
69
+ end
70
+ count
71
+ end
72
+
73
+ # Get total line count (either from option or by counting)
74
+ def get_total_lines(filename, provided_count)
75
+ if provided_count
76
+ puts "Using provided line count: #{provided_count}"
77
+ provided_count
78
+ else
79
+ puts 'Counting lines...'
80
+ total = count_lines(filename)
81
+ puts "Total lines to validate: #{total}"
82
+ total
83
+ end
84
+ end
85
+
86
+ # Worker process that reads batches from a pipe and validates
87
+ def worker_process(reader) # rubocop:disable Metrics/MethodLength
88
+ errors = []
89
+
90
+ loop do
91
+ # Read length prefix (4 bytes)
92
+ length_data = reader.read(4)
93
+ break if length_data.nil? || length_data.empty?
94
+
95
+ length = length_data.unpack1('N')
96
+ data = reader.read(length)
97
+ batch = Marshal.load(data) # rubocop:disable Security/MarshalLoad
98
+
99
+ # Process each line in the batch
100
+ batch.each do |line_num, line_content|
101
+ json = JSON.parse(line_content)
102
+ Cocina::Models.build(json)
103
+ rescue JSON::ParserError => e
104
+ errors << { line: line_num, error: "JSON Parse Error: #{e.message}" }
105
+ rescue Cocina::Models::ValidationError => e
106
+ errors << { line: line_num, error: "Validation Error: #{e.message}" }
107
+ rescue Cocina::Models::UnknownTypeError => e
108
+ errors << { line: line_num, error: "Unknown Type Error: #{e.message}" }
109
+ rescue StandardError => e
110
+ errors << { line: line_num, error: "Error: #{e.class} - #{e.message}" }
111
+ end
112
+ end
113
+
114
+ errors
115
+ end
116
+
117
+ # Spawn worker processes
118
+ def spawn_workers(num_processes) # rubocop:disable Metrics/MethodLength
119
+ workers = []
120
+ result_readers = []
121
+
122
+ num_processes.times do
123
+ work_reader, work_writer = IO.pipe
124
+ result_reader, result_writer = IO.pipe
125
+
126
+ pid = fork do
127
+ # Child process
128
+ work_writer.close
129
+ result_reader.close
130
+
131
+ errors = worker_process(work_reader)
132
+
133
+ # Send results back
134
+ result_writer.write(Marshal.dump(errors))
135
+ result_writer.close
136
+ work_reader.close
137
+ exit(0)
138
+ end
139
+
140
+ # Parent process
141
+ work_reader.close
142
+ result_writer.close
143
+
144
+ workers << { pid: pid, writer: work_writer }
145
+ result_readers << result_reader
146
+ end
147
+
148
+ [workers, result_readers]
149
+ end
150
+
151
+ # Send a batch to a worker
152
+ def send_batch(worker, batch)
153
+ return if batch.empty?
154
+
155
+ data = Marshal.dump(batch)
156
+ worker[:writer].write([data.bytesize].pack('N'))
157
+ worker[:writer].write(data)
158
+ end
159
+
160
+ # Stream file and distribute work to workers
161
+ def distribute_work(filename, workers, batch_size, total_lines) # rubocop:disable Metrics/MethodLength
162
+ line_number = 0
163
+ current_worker = 0
164
+ batch = []
165
+
166
+ # Create progress bar
167
+ progressbar = ProgressBar.create(
168
+ title: 'Validating',
169
+ total: total_lines,
170
+ format: '%t: |%B| %p%% %c/%C %a %e',
171
+ throttle_rate: 0.1
172
+ )
173
+
174
+ IO.popen(['xzcat', filename]) do |io|
175
+ io.each_line do |line|
176
+ line_number += 1
177
+ batch << [line_number, line]
178
+
179
+ # When batch is full, send to worker
180
+ if batch.size >= batch_size
181
+ worker = workers[current_worker]
182
+ send_batch(worker, batch)
183
+ batch = []
184
+ current_worker = (current_worker + 1) % workers.length
185
+ end
186
+
187
+ # Update progress bar
188
+ progressbar.increment
189
+ end
190
+ end
191
+
192
+ # Send any remaining lines in the last batch
193
+ if batch.any?
194
+ worker = workers[current_worker]
195
+ send_batch(worker, batch)
196
+ end
197
+
198
+ # Final progress update
199
+ progressbar.finish
200
+ end
201
+
202
+ # Collect results from all workers
203
+ def collect_results(workers, result_readers)
204
+ # Close all worker input pipes to signal completion
205
+ workers.each { |w| w[:writer].close }
206
+
207
+ puts 'Collecting results from workers...'
208
+ all_errors = []
209
+
210
+ result_readers.each do |reader|
211
+ data = reader.read
212
+ worker_errors = Marshal.load(data) # rubocop:disable Security/MarshalLoad
213
+ all_errors.concat(worker_errors)
214
+ reader.close
215
+ end
216
+
217
+ # Wait for all workers to complete
218
+ workers.each { |w| Process.wait(w[:pid]) }
219
+
220
+ all_errors
221
+ end
222
+
223
+ # Print validation summary
224
+ def print_summary(total_lines, errors, elapsed_time) # rubocop:disable Metrics/MethodLength
225
+ puts '=' * 80
226
+ puts 'VALIDATION SUMMARY'
227
+ puts '=' * 80
228
+ puts "Total lines processed: #{total_lines}"
229
+ puts "Lines with errors: #{errors.length}"
230
+ puts "Success rate: #{((total_lines - errors.length).to_f / total_lines * 100).round(2)}%"
231
+ puts "Time elapsed: #{elapsed_time.round(2)} seconds"
232
+ puts "Throughput: #{(total_lines / elapsed_time).round(0)} lines/second"
233
+
234
+ return unless errors.any?
235
+
236
+ puts "\n"
237
+ puts 'Error details:'
238
+ puts '-' * 80
239
+ # Sort errors by line number for better readability
240
+ errors.sort_by { |e| e[:line] }.each do |error|
241
+ puts "Line #{error[:line]}: #{error[:error]}"
242
+ end
243
+ puts "\n"
244
+ puts "Line numbers with errors: #{errors.sort_by { |e| e[:line] }.map { |e| e[:line] }.join(', ')}"
245
+ end
246
+
247
+ # Main execution
248
+ def main
249
+ options = parse_options
250
+
251
+ puts "Validating file: #{options[:filename]}"
252
+ puts "Using #{options[:processes]} processes with batch size #{options[:batch_size]}"
253
+ puts '=' * 80
254
+
255
+ # Get total line count
256
+ total_lines = get_total_lines(options[:filename], options[:count])
257
+ puts '=' * 80
258
+
259
+ # Spawn worker processes
260
+ workers, result_readers = spawn_workers(options[:processes])
261
+
262
+ # Start timing
263
+ start_time = Time.now
264
+
265
+ # Distribute work to workers
266
+ distribute_work(options[:filename], workers, options[:batch_size], total_lines)
267
+
268
+ # Collect results
269
+ all_errors = collect_results(workers, result_readers)
270
+
271
+ # Calculate elapsed time
272
+ elapsed_time = Time.now - start_time
273
+
274
+ # Print summary
275
+ print_summary(total_lines, all_errors, elapsed_time)
276
+
277
+ # Exit with appropriate code
278
+ exit(all_errors.empty? ? 0 : 1)
279
+ end
280
+
281
+ # Run the script
282
+ main
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi schema
5
+ # Class for generating from a JSON schema
6
6
  class Datatype < SchemaBase
7
7
  def generate
8
8
  <<~RUBY
@@ -4,7 +4,7 @@ require 'fileutils'
4
4
 
5
5
  module Cocina
6
6
  module Generator
7
- # Class for generating Cocina models from openapi.
7
+ # Class for generating Cocina models from JSON Schema.
8
8
  class Generator < Thor # rubocop:disable Metrics/ClassLength
9
9
  include Thor::Actions
10
10
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi schema
5
+ # Class for generating from a JSON schema
6
6
  class Schema < SchemaBase
7
7
  def schema_properties
8
8
  @schema_properties ||= (properties + all_of_properties + one_of_properties).uniq(&:key)
@@ -112,7 +112,7 @@ module Cocina
112
112
  key: key,
113
113
  # The property does less validation because may vary between
114
114
  # different oneOf schemas. Validation is still performed
115
- # by openAPI.
115
+ # by JSON Schema.
116
116
  relaxed: true,
117
117
  parent: self,
118
118
  schemas: schemas)
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi array
5
+ # Class for generating from a JSON Schema array
6
6
  class SchemaArray < SchemaBase
7
7
  GENERIC_ITEMS_NAME = 'items'
8
8
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Base class for generating from openapi
5
+ # Base class for generating from a JSON Schema
6
6
  class SchemaBase
7
7
  attr_reader :schema_doc, :key, :required, :nullable, :parent, :relaxed, :schemas, :lite
8
8
 
@@ -74,7 +74,7 @@ module Cocina
74
74
  def relaxed_comment
75
75
  return '' unless relaxed
76
76
 
77
- "# Validation of this property is relaxed. See the openapi for full validation.\n"
77
+ "# Validation of this property is relaxed. See the schema.json for full validation.\n"
78
78
  end
79
79
 
80
80
  # dry-types-based types contain the word `Types` (e.g., `Types::String`), and custom types (e.g., `SourceId`) do not
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi reference
5
+ # Class for generating from a JSON Schema reference
6
6
  class SchemaRef < SchemaBase
7
7
  def generate
8
8
  if required && !relaxed
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi value
5
+ # Class for generating from a JSON Schema value
6
6
  class SchemaValue < SchemaBase
7
7
  def generate
8
8
  if required && !relaxed
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Cocina
4
- # Module for generating Cocina models from openapi.
4
+ # Module for generating Cocina models from a JSON Schema.
5
5
  module Generator
6
6
  end
7
7
  end
@@ -3,8 +3,7 @@
3
3
  module Cocina
4
4
  # Wrapper for JSON Schema support using json_schemer
5
5
  class JsonSchemaWrapper
6
- class OpenApiError < StandardError; end
7
- class MissingReferenceError < OpenApiError; end
6
+ class MissingReferenceError < StandardError; end
8
7
 
9
8
  def initialize(spec_hash, strict_reference_validation: true)
10
9
  @spec = spec_hash
@@ -4,15 +4,15 @@ module Cocina
4
4
  module Models
5
5
  class Access < Struct
6
6
  # Access level.
7
- # Validation of this property is relaxed. See the openapi for full validation.
7
+ # Validation of this property is relaxed. See the schema.json for full validation.
8
8
  attribute? :view, Types::Strict::String.optional.default('dark')
9
9
  # Download access level.
10
- # Validation of this property is relaxed. See the openapi for full validation.
10
+ # Validation of this property is relaxed. See the schema.json for full validation.
11
11
  attribute? :download, Types::Strict::String.optional.default('none')
12
12
  # Not used for this access type, must be null.
13
- # Validation of this property is relaxed. See the openapi for full validation.
13
+ # Validation of this property is relaxed. See the schema.json for full validation.
14
14
  attribute? :location, Types::Strict::String.optional
15
- # Validation of this property is relaxed. See the openapi for full validation.
15
+ # Validation of this property is relaxed. See the schema.json for full validation.
16
16
  attribute? :controlledDigitalLending, Types::Strict::Bool.optional.default(false)
17
17
  end
18
18
  end
@@ -19,15 +19,15 @@ module Cocina
19
19
  # CC, RightsStatement.org URI, etc.).
20
20
  attribute? :license, License.optional.enum(nil, 'https://www.gnu.org/licenses/agpl.txt', 'https://www.apache.org/licenses/LICENSE-2.0', 'https://opensource.org/licenses/BSD-2-Clause', 'https://opensource.org/licenses/BSD-3-Clause', 'https://creativecommons.org/licenses/by/4.0/legalcode', 'https://creativecommons.org/licenses/by-nc/4.0/legalcode', 'https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode', 'https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode', 'https://creativecommons.org/licenses/by-nd/4.0/legalcode', 'https://creativecommons.org/licenses/by-sa/4.0/legalcode', 'https://creativecommons.org/publicdomain/zero/1.0/legalcode', 'https://opensource.org/licenses/cddl1', 'https://www.eclipse.org/legal/epl-2.0', 'https://www.gnu.org/licenses/gpl-3.0-standalone.html', 'https://www.isc.org/downloads/software-support-policy/isc-license/', 'https://www.gnu.org/licenses/lgpl-3.0-standalone.html', 'https://opensource.org/licenses/MIT', 'https://www.mozilla.org/MPL/2.0/', 'https://opendatacommons.org/licenses/by/1-0/', 'http://opendatacommons.org/licenses/odbl/1.0/', 'https://opendatacommons.org/licenses/odbl/1-0/', 'https://creativecommons.org/publicdomain/mark/1.0/', 'https://opendatacommons.org/licenses/pddl/1-0/', 'https://creativecommons.org/licenses/by/3.0/legalcode', 'https://creativecommons.org/licenses/by-sa/3.0/legalcode', 'https://creativecommons.org/licenses/by-nd/3.0/legalcode', 'https://creativecommons.org/licenses/by-nc/3.0/legalcode', 'https://creativecommons.org/licenses/by-nc-sa/3.0/legalcode', 'https://creativecommons.org/licenses/by-nc-nd/3.0/legalcode', 'https://cocina.sul.stanford.edu/licenses/none')
21
21
  # Access level.
22
- # Validation of this property is relaxed. See the openapi for full validation.
22
+ # Validation of this property is relaxed. See the schema.json for full validation.
23
23
  attribute? :view, Types::Strict::String.optional.default('dark')
24
24
  # Download access level.
25
- # Validation of this property is relaxed. See the openapi for full validation.
25
+ # Validation of this property is relaxed. See the schema.json for full validation.
26
26
  attribute? :download, Types::Strict::String.optional.default('none')
27
27
  # Not used for this access type, must be null.
28
- # Validation of this property is relaxed. See the openapi for full validation.
28
+ # Validation of this property is relaxed. See the schema.json for full validation.
29
29
  attribute? :location, Types::Strict::String.optional
30
- # Validation of this property is relaxed. See the openapi for full validation.
30
+ # Validation of this property is relaxed. See the schema.json for full validation.
31
31
  attribute? :controlledDigitalLending, Types::Strict::Bool.optional.default(false)
32
32
  end
33
33
  end
@@ -16,9 +16,9 @@ module Cocina
16
16
  attribute :label, Types::Strict::String
17
17
  attribute :version, Types::Strict::Integer
18
18
  # Administrative properties for an AdminPolicy
19
- # Validation of this property is relaxed. See the openapi for full validation.
19
+ # Validation of this property is relaxed. See the schema.json for full validation.
20
20
  attribute? :administrative, AdminPolicyAdministrative.optional
21
- # Validation of this property is relaxed. See the openapi for full validation.
21
+ # Validation of this property is relaxed. See the schema.json for full validation.
22
22
  attribute? :description, Description.optional
23
23
  end
24
24
  end
@@ -25,13 +25,13 @@ module Cocina
25
25
  # Version for the Collection within SDR.
26
26
  attribute :version, Types::Strict::Integer
27
27
  # Access metadata for collections
28
- # Validation of this property is relaxed. See the openapi for full validation.
28
+ # Validation of this property is relaxed. See the schema.json for full validation.
29
29
  attribute? :access, CollectionAccess.optional
30
- # Validation of this property is relaxed. See the openapi for full validation.
30
+ # Validation of this property is relaxed. See the schema.json for full validation.
31
31
  attribute? :administrative, Administrative.optional
32
- # Validation of this property is relaxed. See the openapi for full validation.
32
+ # Validation of this property is relaxed. See the schema.json for full validation.
33
33
  attribute? :description, Description.optional
34
- # Validation of this property is relaxed. See the openapi for full validation.
34
+ # Validation of this property is relaxed. See the schema.json for full validation.
35
35
  attribute? :identification, CollectionIdentification.optional
36
36
  end
37
37
  end