cocina-models 0.113.0 → 0.115.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '09669a342a43a6be309144d1d71748372251fdb37ad96c480e7ce88127749ec5'
4
- data.tar.gz: 99e35e0634f65f03e545dfabc40d95cb6a638c1f0f0e05ffbf14315738ac895c
3
+ metadata.gz: 4d9e710c098333b0b95f15f1f624a365b7067e3ef638d48e7f04bcbc72f2b038
4
+ data.tar.gz: 2bcb210b1f563ddc1916fe8a718f326184c75eb077f3c931372083ad1bf8c179
5
5
  SHA512:
6
- metadata.gz: 29c840e670181b57df7d29573da1cc019cd6a34300f43548a762f0d394736af09822bb61e85acd7eefa6426e37996fbe5869c4b58c8aa7e9176a201af14f91ec
7
- data.tar.gz: 00f39759126179cd79f5cda1a1cabf93b3da2ba19ef0ecd90fa1f1c10735b31ff966e9496e93cac69fb35255b0dc1c1e45176d70f743630005eb883384e38132
6
+ metadata.gz: ec2b6a2e5a92c4cc712e09e2edc23a6975c37eb71f13133e1400d21d999880f1a79a8962624bdd339cffaa08e88a0fffd4395c716b1ed49360c5045a43edf666
7
+ data.tar.gz: 69f0f8869d2536477765b033c39387ef4693f648e50747fa49e15d1cb1cd27be7bc830dec49bf5beb06a1a04472ddf78750201c60c31d002f47006ed72c99751
data/.circleci/config.yml CHANGED
@@ -11,5 +11,5 @@ workflows:
11
11
  context: dlss
12
12
  before-test:
13
13
  - run:
14
- name: validate openapi
14
+ name: validate schema
15
15
  command: bin/validate-schema schema.json
data/.rubocop.yml CHANGED
@@ -540,7 +540,6 @@ Style/RedundantArrayFlatten: # new in 1.76
540
540
  Enabled: true
541
541
  Style/ArrayIntersectWithSingleElement: # new in 1.81
542
542
  Enabled: true
543
-
544
543
  Style/EmptyClassDefinition: # new in 1.83
545
544
  Enabled: true
546
545
  Style/ModuleMemberExistenceCheck: # new in 1.82
@@ -552,4 +551,30 @@ Style/ReverseFind: # new in 1.83
552
551
  RSpec/LeakyLocalVariable: # new in 3.8
553
552
  Enabled: true
554
553
  RSpec/Output: # new in 3.9
555
- Enabled: true
554
+ Enabled: true
555
+ Lint/DataDefineOverride: # new in 1.85
556
+ Enabled: true
557
+ Lint/UnreachablePatternBranch: # new in 1.85
558
+ Enabled: true
559
+ Style/FileOpen: # new in 1.85
560
+ Enabled: true
561
+ Style/MapJoin: # new in 1.85
562
+ Enabled: true
563
+ Style/OneClassPerFile: # new in 1.85
564
+ Enabled: true
565
+ Style/PartitionInsteadOfDoubleSelect: # new in 1.85
566
+ Enabled: true
567
+ Style/PredicateWithKind: # new in 1.85
568
+ Enabled: true
569
+ Style/ReduceToHash: # new in 1.85
570
+ Enabled: true
571
+ Style/RedundantMinMaxBy: # new in 1.85
572
+ Enabled: true
573
+ Style/RedundantStructKeywordInit: # new in 1.85
574
+ Enabled: true
575
+ Style/SelectByKind: # new in 1.85
576
+ Enabled: true
577
+ Style/SelectByRange: # new in 1.85
578
+ Enabled: true
579
+ Style/TallyMethod: # new in 1.85
580
+ Enabled: true
data/Gemfile CHANGED
@@ -7,3 +7,4 @@ gemspec
7
7
 
8
8
  gem 'debug'
9
9
  gem 'rspec_junit_formatter' # For CircleCI
10
+ gem 'ruby-progressbar'
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cocina-models (0.113.0)
4
+ cocina-models (0.115.0)
5
5
  activesupport
6
6
  deprecation
7
7
  dry-struct (~> 1.0)
@@ -59,7 +59,7 @@ GEM
59
59
  concurrent-ruby (~> 1.0)
60
60
  dry-core (~> 1.1)
61
61
  zeitwerk (~> 2.6)
62
- dry-struct (1.8.0)
62
+ dry-struct (1.8.1)
63
63
  dry-core (~> 1.1)
64
64
  dry-types (~> 1.8, >= 1.8.2)
65
65
  ice_nine (~> 0.11)
@@ -86,8 +86,8 @@ GEM
86
86
  prism (>= 1.3.0)
87
87
  rdoc (>= 4.0.0)
88
88
  reline (>= 0.4.2)
89
- json (2.18.1)
90
- json-schema (6.1.0)
89
+ json (2.19.1)
90
+ json-schema (6.2.0)
91
91
  addressable (~> 2.8)
92
92
  bigdecimal (>= 3.1, < 5)
93
93
  json_schemer (2.5.0)
@@ -100,7 +100,7 @@ GEM
100
100
  language_server-protocol (3.17.0.5)
101
101
  lint_roller (1.1.0)
102
102
  logger (1.7.0)
103
- mcp (0.7.1)
103
+ mcp (0.8.0)
104
104
  json-schema (>= 4.1)
105
105
  minitest (6.0.2)
106
106
  drb (~> 2.0)
@@ -124,7 +124,7 @@ GEM
124
124
  psych (5.3.1)
125
125
  date
126
126
  stringio
127
- public_suffix (7.0.2)
127
+ public_suffix (7.0.5)
128
128
  racc (1.8.1)
129
129
  rainbow (3.1.1)
130
130
  rake (13.3.1)
@@ -150,7 +150,7 @@ GEM
150
150
  rspec-support (3.13.7)
151
151
  rspec_junit_formatter (0.6.0)
152
152
  rspec-core (>= 2, < 4, != 2.12.0)
153
- rubocop (1.85.0)
153
+ rubocop (1.85.1)
154
154
  json (~> 2.3)
155
155
  language_server-protocol (~> 3.17.0.2)
156
156
  lint_roller (~> 1.1.0)
@@ -162,7 +162,7 @@ GEM
162
162
  rubocop-ast (>= 1.49.0, < 2.0)
163
163
  ruby-progressbar (~> 1.7)
164
164
  unicode-display_width (>= 2.4.0, < 4.0)
165
- rubocop-ast (1.49.0)
165
+ rubocop-ast (1.49.1)
166
166
  parser (>= 3.3.7.2)
167
167
  prism (~> 1.7)
168
168
  rubocop-rake (0.7.1)
@@ -209,6 +209,7 @@ DEPENDENCIES
209
209
  rubocop (~> 1.24)
210
210
  rubocop-rake
211
211
  rubocop-rspec
212
+ ruby-progressbar
212
213
  simplecov
213
214
 
214
215
  CHECKSUMS
@@ -218,7 +219,7 @@ CHECKSUMS
218
219
  attr_extras (7.1.0) sha256=d96fc9a9dd5d85ba2d37762440a816f840093959ae26bb90da994c2d9f1fc827
219
220
  base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
220
221
  bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
221
- cocina-models (0.113.0)
222
+ cocina-models (0.115.0)
222
223
  concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
223
224
  connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
224
225
  date (3.5.1) sha256=750d06384d7b9c15d562c76291407d89e368dda4d4fff957eb94962d325a0dc0
@@ -230,7 +231,7 @@ CHECKSUMS
230
231
  dry-core (1.2.0) sha256=0cc5a7da88df397f153947eeeae42e876e999c1e30900f3c536fb173854e96a1
231
232
  dry-inflector (1.3.1) sha256=7fb0c2bb04f67638f25c52e7ba39ab435d922a3a5c3cd196120f63accb682dcc
232
233
  dry-logic (1.6.0) sha256=da6fedbc0f90fc41f9b0cc7e6f05f5d529d1efaef6c8dcc8e0733f685745cea2
233
- dry-struct (1.8.0) sha256=74c38b559924fb6462ac43ec780c4533a082d7b1d238a8d7857b773b3b8e2966
234
+ dry-struct (1.8.1) sha256=033868594c45241540172bf1ebbc8bb76b72b4f0717072325deba38ac13e80f1
234
235
  dry-types (1.9.1) sha256=baebeecdb9f8395d6c9d227b62011279440943e3ef2468fe8ccc1ba11467f178
235
236
  edtf (3.2.0) sha256=a15a0ee274e49c8047a3ebb5d61d793ba44f7f8ffbf0595392c467e3ea8d2447
236
237
  equivalent-xml (0.6.0) sha256=8919761efa848ad0846369ff8be1f646b17e5061698c4867b09829000cc3f487
@@ -240,14 +241,14 @@ CHECKSUMS
240
241
  ice_nine (0.11.2) sha256=5d506a7d2723d5592dc121b9928e4931742730131f22a1a37649df1c1e2e63db
241
242
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
242
243
  irb (1.17.0) sha256=168c4ddb93d8a361a045c41d92b2952c7a118fa73f23fe14e55609eb7a863aae
243
- json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
244
- json-schema (6.1.0) sha256=6bf70a2cfb6dfd5a06da28093fa8190f324c88eabd36a7f47097f227321dc702
244
+ json (2.19.1) sha256=dd94fdc59e48bff85913829a32350b3148156bc4fd2a95a2568a78b11344082d
245
+ json-schema (6.2.0) sha256=e8bff46ed845a22c1ab2bd0d7eccf831c01fe23bb3920caa4c74db4306813666
245
246
  json_schemer (2.5.0) sha256=2f01fb4cce721a4e08dd068fc2030cffd0702a7f333f1ea2be6e8991f00ae396
246
247
  jsonpath (1.1.5) sha256=29f70467193a2dc93ab864ec3d3326d54267961acc623f487340eb9c34931dbe
247
248
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
248
249
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
249
250
  logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
250
- mcp (0.7.1) sha256=fa967895d6952bad0d981ea907731d8528d2c246d2079d56a9c8bae83d14f1c7
251
+ mcp (0.8.0) sha256=ae8bd146bb8e168852866fd26f805f52744f6326afb3211e073f78a95e0c34fb
251
252
  minitest (6.0.2) sha256=db6e57956f6ecc6134683b4c87467d6dd792323c7f0eea7b93f66bd284adbc3d
252
253
  multi_json (1.19.1) sha256=7aefeff8f2c854bf739931a238e4aea64592845e0c0395c8a7d2eea7fdd631b7
253
254
  nokogiri (1.19.1-arm64-darwin) sha256=dfe2d337e6700eac47290407c289d56bcf85805d128c1b5a6434ddb79731cb9e
@@ -260,7 +261,7 @@ CHECKSUMS
260
261
  prettyprint (0.2.0) sha256=2bc9e15581a94742064a3cc8b0fb9d45aae3d03a1baa6ef80922627a0766f193
261
262
  prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
262
263
  psych (5.3.1) sha256=eb7a57cef10c9d70173ff74e739d843ac3b2c019a003de48447b2963d81b1974
263
- public_suffix (7.0.2) sha256=9114090c8e4e7135c1fd0e7acfea33afaab38101884320c65aaa0ffb8e26a857
264
+ public_suffix (7.0.5) sha256=1a8bb08f1bbea19228d3bed6e5ed908d1cb4f7c2726d18bd9cadf60bc676f623
264
265
  racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
265
266
  rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
266
267
  rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
@@ -273,8 +274,8 @@ CHECKSUMS
273
274
  rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
274
275
  rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
275
276
  rspec_junit_formatter (0.6.0) sha256=40dde674e6ae4e6cc0ff560da25497677e34fefd2338cc467a8972f602b62b15
276
- rubocop (1.85.0) sha256=317407feb681a07d54f64d2f9e1d6b6af1ce7678e51cd658e3ad8bd66da48c01
277
- rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
277
+ rubocop (1.85.1) sha256=3dbcf9e961baa4c376eeeb2a03913dca5e3987033b04d38fa538aa1e7406cc77
278
+ rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035
278
279
  rubocop-rake (0.7.1) sha256=3797f2b6810c3e9df7376c26d5f44f3475eda59eb1adc38e6f62ecf027cbae4d
279
280
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
280
281
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
@@ -294,4 +295,4 @@ CHECKSUMS
294
295
  zeitwerk (2.7.5) sha256=d8da92128c09ea6ec62c949011b00ed4a20242b255293dd66bf41545398f73dd
295
296
 
296
297
  BUNDLED WITH
297
- 4.0.7
298
+ 4.0.8
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  The cocina-models gem is a Ruby implementation of the Stanford Digital Repository (SDR) data model, which we named "Cocina." The data being modeled is oriented around digital repository objects.
8
8
 
9
- The data model is expressed in an OpenAPI specification that lives in this codebase. Expressing the model in such a spec allows for rich validation (using gems such as `json_schemer`). The gem provides a set of generators (see below) to generate Ruby classes from the specification, with modeling provided by dry-struct / dry-types. Together, these provide a way for consumers to validate objects against models and to manipulate those objects.
9
+ The data model is expressed in an JSON Schema specification that lives in this codebase. Expressing the model in such a spec allows for rich validation (using gems such as `json_schemer`). The gem provides a set of generators (see below) to generate Ruby classes from the specification, with modeling provided by dry-struct / dry-types. Together, these provide a way for consumers to validate objects against models and to manipulate those objects.
10
10
 
11
11
  Note that the data model encodes properties as camelCase, which the team believes to be consistent with other HTTP APIs and the original design of the Cocina data model. While using camelCase in Ruby code may look and feel wrong, we did explore automagic conversion between camelCase in the model and snake_case in the Ruby context. We ultimately concluded that we have enough representations of the data model in enough codebases to reasonably worry about data inconsistency problems, none of which we need in our work on SDR.
12
12
 
@@ -55,7 +55,10 @@ Beyond what is necessary to test the generator, the Cocina model classes are not
55
55
 
56
56
  ## Testing validation changes
57
57
 
58
- If there is a possibility that a model, mapping, or validation change will conflict with some existing objects then [validate-cocina](https://github.com/sul-dlss/dor-services-app/blob/main/bin/validate-cocina) should be used for testing. This must be run on the `sdr-infra` VM since it requires deploying a branch of cocina-models.
58
+ If there is a possibility that a model, mapping, or validation change will conflict with some existing objects then `bin/validate-data` should be used for testing. This operates on a sample of objects from the repository and reports any validation errors. You may get the sample by running the script [bin/export-cocina-head-versions](https://github.com/sul-dlss/dor-services-app?tab=readme-ov-file#export-data) and downloading the data file to your computer. Running a full validation takes about 2 hours.
59
+
60
+
61
+ Alternatively, you can use [validate-cocina](https://github.com/sul-dlss/dor-services-app/blob/main/bin/validate-cocina) for testing. This must be run on the `sdr-infra` VM since it requires deploying a branch of cocina-models. It is slower than using `bin/validate-data`, but all of the data is completely up to date.
59
62
 
60
63
  For background on object validation, as it relates to migrating versions, see: https://github.com/sul-dlss/dor-services-app/wiki/Migrating-Cocina
61
64
 
@@ -153,21 +156,7 @@ This list of services is known to include:
153
156
  * [sul-dlss/sdr-api](https://github.com/sul-dlss/sdr-api)
154
157
  * [sul-dlss/dor-services-app](https://github.com/sul-dlss/dor-services-app/)
155
158
 
156
-
157
- #### Step 3A: Update API specifications
158
-
159
- **NOTE**: You can skip step 3A if there have not been any changes to the `cocina-models` OpenAPI spec since the prior release.
160
-
161
- The cocina-models gem is used in applications that have an API specification that accepts Cocina models.
162
-
163
- #### Step 3B: Bump gems and create the PRs
164
-
165
- If you updated the `schema.json` in step 3A, use the same PR for step 3B. Why? When [dor-services-app](https://github.com/sul-dlss/dor-services-app), for example, is updated to use the new models (via the auto-update script), these clients should be updated at the same time or there is risk of models produced by dor-services-app not being acceptable to the clients.
166
-
167
- 1. Perform `bundle update --conservative cocina-models dor-services-client` in the services above and make PRs for those repos if they don't already exist. You may first need to update how these gems are pinned in the `Gemfile` in order to bump them.
168
- 2. Note that sdr-client is not currently used in these applications, but if it were, would also need to be bumped to the latest release.
169
-
170
- #### Step 3C: Merge 'em
159
+ Perform `bundle update --conservative cocina-models dor-services-client` in the services above and make PRs for those repos. You may first need to update how these gems are pinned in the `Gemfile` in order to bump them.
171
160
 
172
161
  Get the directly coupled services PRs merged before the deploy in step 5.
173
162
 
data/bin/validate-data ADDED
@@ -0,0 +1,287 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Validate JSONL (XZ compressed) files against the schema
5
+
6
+ # Trap Ctrl+C to exit gracefully
7
+ Signal.trap('INT') do
8
+ puts "\nTerminated"
9
+ exit(1)
10
+ end
11
+
12
+ Signal.trap('TERM') do
13
+ puts "\nTerminated"
14
+ exit(1)
15
+ end
16
+
17
+ require 'bundler/setup'
18
+ require 'cocina/models'
19
+ require 'json'
20
+ require 'ruby-progressbar'
21
+ require 'optparse'
22
+
23
+ # Parse command line options
24
+ def parse_options # rubocop:disable Metrics/MethodLength
25
+ options = {
26
+ processes: 12,
27
+ count: nil,
28
+ batch_size: 100
29
+ }
30
+
31
+ OptionParser.new do |opts|
32
+ opts.banner = 'Usage: validate-data FILENAME [options]'
33
+
34
+ opts.on('-p', '--processes NUM', Integer, 'Number of processes to use (default: 12)') do |p|
35
+ options[:processes] = p
36
+ end
37
+
38
+ opts.on('-c', '--count NUM', Integer, 'Total line count (skips counting pass if provided)') do |c|
39
+ options[:count] = c
40
+ end
41
+
42
+ opts.on('-b', '--batch-size NUM', Integer, 'Batch size for worker processing (default: 100)') do |b|
43
+ options[:batch_size] = b
44
+ end
45
+
46
+ opts.on('-h', '--help', 'Display this help message') do
47
+ puts opts
48
+ exit
49
+ end
50
+ end.parse!
51
+
52
+ # Filename is required as a positional argument
53
+ if ARGV.empty?
54
+ puts 'Error: FILENAME is required'
55
+ puts 'Usage: validate-data FILENAME [options]'
56
+ puts 'Run with --help for more information'
57
+ exit 1
58
+ end
59
+
60
+ options[:filename] = ARGV[0]
61
+ options
62
+ end
63
+
64
+ # Count lines in the file
65
+ def count_lines(filename)
66
+ count = 0
67
+ IO.popen(['xzcat', filename]) do |io|
68
+ io.each_line { count += 1 }
69
+ end
70
+ count
71
+ end
72
+
73
+ # Get total line count (either from option or by counting)
74
+ def get_total_lines(filename, provided_count)
75
+ if provided_count
76
+ puts "Using provided line count: #{provided_count}"
77
+ provided_count
78
+ else
79
+ puts 'Counting lines...'
80
+ total = count_lines(filename)
81
+ puts "Total lines to validate: #{total}"
82
+ total
83
+ end
84
+ end
85
+
86
+ # Worker process that reads batches from a pipe and validates
87
+ def worker_process(reader) # rubocop:disable Metrics/MethodLength
88
+ errors = []
89
+
90
+ loop do
91
+ # Read length prefix (4 bytes)
92
+ length_data = reader.read(4)
93
+ break if length_data.nil? || length_data.empty?
94
+
95
+ length = length_data.unpack1('N')
96
+ data = reader.read(length)
97
+ batch = Marshal.load(data) # rubocop:disable Security/MarshalLoad
98
+
99
+ # Process each line in the batch
100
+ batch.each do |line_num, line_content|
101
+ json = JSON.parse(line_content)
102
+ Cocina::Models.build(json)
103
+ rescue JSON::ParserError => e
104
+ errors << { line: line_num, error: "JSON Parse Error: #{e.message}" }
105
+ rescue Cocina::Models::ValidationError => e
106
+ errors << { line: line_num, error: "Validation Error: #{json['externalIdentifier']} - #{e.message}" }
107
+ rescue Cocina::Models::UnknownTypeError => e
108
+ errors << { line: line_num, error: "Unknown Type Error: #{e.message}" }
109
+ rescue StandardError => e
110
+ errors << { line: line_num, error: "Error: #{e.class} - #{e.message}" }
111
+ end
112
+ end
113
+
114
+ errors
115
+ end
116
+
117
+ # Spawn worker processes
118
+ def spawn_workers(num_processes) # rubocop:disable Metrics/MethodLength
119
+ workers = []
120
+ result_readers = []
121
+
122
+ num_processes.times do
123
+ work_reader, work_writer = IO.pipe
124
+ result_reader, result_writer = IO.pipe
125
+
126
+ pid = fork do
127
+ # Child process
128
+
129
+ # Close all writer pipes inherited from the parent for OTHER workers.
130
+ workers.each { |w| w[:writer].close }
131
+
132
+ # Now, close the handles for this specific worker that the child doesn't need.
133
+ work_writer.close
134
+ result_reader.close
135
+
136
+ errors = worker_process(work_reader)
137
+
138
+ # Send results back
139
+ result_writer.write(Marshal.dump(errors))
140
+ result_writer.close
141
+ work_reader.close
142
+ exit(0)
143
+ end
144
+
145
+ # Parent process
146
+ work_reader.close
147
+ result_writer.close
148
+
149
+ workers << { pid: pid, writer: work_writer }
150
+ result_readers << result_reader
151
+ end
152
+
153
+ [workers, result_readers]
154
+ end
155
+
156
+ # Send a batch to a worker
157
+ def send_batch(worker, batch)
158
+ return if batch.empty?
159
+
160
+ data = Marshal.dump(batch)
161
+ worker[:writer].write([data.bytesize].pack('N'))
162
+ worker[:writer].write(data)
163
+ end
164
+
165
+ # Stream file and distribute work to workers
166
+ def distribute_work(filename, workers, batch_size, total_lines) # rubocop:disable Metrics/MethodLength
167
+ line_number = 0
168
+ current_worker = 0
169
+ batch = []
170
+
171
+ # Create progress bar
172
+ progressbar = ProgressBar.create(
173
+ title: 'Validating',
174
+ total: total_lines,
175
+ format: '%t: |%B| %p%% %c/%C %a %e',
176
+ throttle_rate: 0.1
177
+ )
178
+
179
+ IO.popen(['xzcat', filename]) do |io|
180
+ io.each_line do |line|
181
+ line_number += 1
182
+ batch << [line_number, line]
183
+
184
+ # When batch is full, send to worker
185
+ if batch.size >= batch_size
186
+ worker = workers[current_worker]
187
+ send_batch(worker, batch)
188
+ batch = []
189
+ current_worker = (current_worker + 1) % workers.length
190
+ end
191
+
192
+ # Update progress bar
193
+ progressbar.increment
194
+ end
195
+ end
196
+
197
+ # Send any remaining lines in the last batch
198
+ if batch.any?
199
+ worker = workers[current_worker]
200
+ send_batch(worker, batch)
201
+ end
202
+
203
+ # Final progress update
204
+ progressbar.finish
205
+ end
206
+
207
+ # Collect results from all workers
208
+ def collect_results(workers, result_readers)
209
+ # Close all worker input pipes to signal completion
210
+ workers.each { |w| w[:writer].close }
211
+
212
+ puts 'Collecting results from workers...'
213
+ all_errors = []
214
+
215
+ result_readers.each do |reader|
216
+ data = reader.read
217
+ worker_errors = Marshal.load(data) # rubocop:disable Security/MarshalLoad
218
+ all_errors.concat(worker_errors)
219
+ reader.close
220
+ end
221
+
222
+ # Wait for all workers to complete
223
+ workers.each { |w| Process.wait(w[:pid]) }
224
+
225
+ all_errors
226
+ end
227
+
228
+ # Print validation summary
229
+ def print_summary(total_lines, errors, elapsed_time) # rubocop:disable Metrics/MethodLength
230
+ puts '=' * 80
231
+ puts 'VALIDATION SUMMARY'
232
+ puts '=' * 80
233
+ puts "Total lines processed: #{total_lines}"
234
+ puts "Lines with errors: #{errors.length}"
235
+ puts "Success rate: #{((total_lines - errors.length).to_f / total_lines * 100).round(2)}%"
236
+ puts "Time elapsed: #{elapsed_time.round(2)} seconds"
237
+ puts "Throughput: #{(total_lines / elapsed_time).round(0)} lines/second"
238
+
239
+ return unless errors.any?
240
+
241
+ puts "\n"
242
+ puts 'Error details:'
243
+ puts '-' * 80
244
+ # Sort errors by line number for better readability
245
+ errors.sort_by { |e| e[:line] }.each do |error|
246
+ puts "Line #{error[:line]}: #{error[:error]}"
247
+ end
248
+ puts "\n"
249
+ puts "Line numbers with errors: #{errors.sort_by { |e| e[:line] }.map { |e| e[:line] }.join(', ')}"
250
+ end
251
+
252
+ # Main execution
253
+ def main
254
+ options = parse_options
255
+
256
+ puts "Validating file: #{options[:filename]}"
257
+ puts "Using #{options[:processes]} processes with batch size #{options[:batch_size]}"
258
+ puts '=' * 80
259
+
260
+ # Get total line count
261
+ total_lines = get_total_lines(options[:filename], options[:count])
262
+ puts '=' * 80
263
+
264
+ # Spawn worker processes
265
+ workers, result_readers = spawn_workers(options[:processes])
266
+
267
+ # Start timing
268
+ start_time = Time.now
269
+
270
+ # Distribute work to workers
271
+ distribute_work(options[:filename], workers, options[:batch_size], total_lines)
272
+
273
+ # Collect results
274
+ all_errors = collect_results(workers, result_readers)
275
+
276
+ # Calculate elapsed time
277
+ elapsed_time = Time.now - start_time
278
+
279
+ # Print summary
280
+ print_summary(total_lines, all_errors, elapsed_time)
281
+
282
+ # Exit with appropriate code
283
+ exit(all_errors.empty? ? 0 : 1)
284
+ end
285
+
286
+ # Run the script
287
+ main
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi schema
5
+ # Class for generating from a JSON schema
6
6
  class Datatype < SchemaBase
7
7
  def generate
8
8
  <<~RUBY
@@ -4,7 +4,7 @@ require 'fileutils'
4
4
 
5
5
  module Cocina
6
6
  module Generator
7
- # Class for generating Cocina models from openapi.
7
+ # Class for generating Cocina models from JSON Schema.
8
8
  class Generator < Thor # rubocop:disable Metrics/ClassLength
9
9
  include Thor::Actions
10
10
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi schema
5
+ # Class for generating from a JSON schema
6
6
  class Schema < SchemaBase
7
7
  def schema_properties
8
8
  @schema_properties ||= (properties + all_of_properties + one_of_properties).uniq(&:key)
@@ -112,7 +112,7 @@ module Cocina
112
112
  key: key,
113
113
  # The property does less validation because may vary between
114
114
  # different oneOf schemas. Validation is still performed
115
- # by openAPI.
115
+ # by JSON Schema.
116
116
  relaxed: true,
117
117
  parent: self,
118
118
  schemas: schemas)
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi array
5
+ # Class for generating from a JSON Schema array
6
6
  class SchemaArray < SchemaBase
7
7
  GENERIC_ITEMS_NAME = 'items'
8
8
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Base class for generating from openapi
5
+ # Base class for generating from a JSON Schema
6
6
  class SchemaBase
7
7
  attr_reader :schema_doc, :key, :required, :nullable, :parent, :relaxed, :schemas, :lite
8
8
 
@@ -74,7 +74,7 @@ module Cocina
74
74
  def relaxed_comment
75
75
  return '' unless relaxed
76
76
 
77
- "# Validation of this property is relaxed. See the openapi for full validation.\n"
77
+ "# Validation of this property is relaxed. See the schema.json for full validation.\n"
78
78
  end
79
79
 
80
80
  # dry-types-based types contain the word `Types` (e.g., `Types::String`), and custom types (e.g., `SourceId`) do not
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi reference
5
+ # Class for generating from a JSON Schema reference
6
6
  class SchemaRef < SchemaBase
7
7
  def generate
8
8
  if required && !relaxed
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi value
5
+ # Class for generating from a JSON Schema value
6
6
  class SchemaValue < SchemaBase
7
7
  def generate
8
8
  if required && !relaxed
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Cocina
4
- # Module for generating Cocina models from openapi.
4
+ # Module for generating Cocina models from a JSON Schema.
5
5
  module Generator
6
6
  end
7
7
  end
@@ -3,8 +3,7 @@
3
3
  module Cocina
4
4
  # Wrapper for JSON Schema support using json_schemer
5
5
  class JsonSchemaWrapper
6
- class OpenApiError < StandardError; end
7
- class MissingReferenceError < OpenApiError; end
6
+ class MissingReferenceError < StandardError; end
8
7
 
9
8
  def initialize(spec_hash, strict_reference_validation: true)
10
9
  @spec = spec_hash
@@ -4,15 +4,15 @@ module Cocina
4
4
  module Models
5
5
  class Access < Struct
6
6
  # Access level.
7
- # Validation of this property is relaxed. See the openapi for full validation.
7
+ # Validation of this property is relaxed. See the schema.json for full validation.
8
8
  attribute? :view, Types::Strict::String.optional.default('dark')
9
9
  # Download access level.
10
- # Validation of this property is relaxed. See the openapi for full validation.
10
+ # Validation of this property is relaxed. See the schema.json for full validation.
11
11
  attribute? :download, Types::Strict::String.optional.default('none')
12
12
  # Not used for this access type, must be null.
13
- # Validation of this property is relaxed. See the openapi for full validation.
13
+ # Validation of this property is relaxed. See the schema.json for full validation.
14
14
  attribute? :location, Types::Strict::String.optional
15
- # Validation of this property is relaxed. See the openapi for full validation.
15
+ # Validation of this property is relaxed. See the schema.json for full validation.
16
16
  attribute? :controlledDigitalLending, Types::Strict::Bool.optional.default(false)
17
17
  end
18
18
  end