cocina-models 0.112.1 → 0.114.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +1 -1
  3. data/Gemfile +1 -0
  4. data/Gemfile.lock +30 -15
  5. data/README.md +10 -18
  6. data/bin/validate-data +282 -0
  7. data/description_types.yml +2 -0
  8. data/docs/description_types.md +2 -0
  9. data/lib/cocina/generator/datatype.rb +1 -1
  10. data/lib/cocina/generator/generator.rb +1 -1
  11. data/lib/cocina/generator/schema.rb +2 -2
  12. data/lib/cocina/generator/schema_array.rb +1 -1
  13. data/lib/cocina/generator/schema_base.rb +2 -2
  14. data/lib/cocina/generator/schema_ref.rb +1 -1
  15. data/lib/cocina/generator/schema_value.rb +1 -1
  16. data/lib/cocina/generator.rb +1 -1
  17. data/lib/cocina/json_schema_wrapper.rb +1 -2
  18. data/lib/cocina/models/access.rb +4 -4
  19. data/lib/cocina/models/admin_policy_access_template.rb +4 -4
  20. data/lib/cocina/models/admin_policy_lite.rb +2 -2
  21. data/lib/cocina/models/collection_lite.rb +4 -4
  22. data/lib/cocina/models/descriptive_basic_value.rb +1 -1
  23. data/lib/cocina/models/descriptive_parallel_event.rb +1 -1
  24. data/lib/cocina/models/descriptive_value.rb +1 -1
  25. data/lib/cocina/models/dro_access.rb +4 -4
  26. data/lib/cocina/models/dro_lite.rb +6 -6
  27. data/lib/cocina/models/embargo.rb +4 -4
  28. data/lib/cocina/models/event.rb +1 -1
  29. data/lib/cocina/models/file_access.rb +4 -4
  30. data/lib/cocina/models/language.rb +1 -1
  31. data/lib/cocina/models/mapping/from_mods/description.rb +1 -1
  32. data/lib/cocina/models/mapping/from_mods/form.rb +0 -6
  33. data/lib/cocina/models/mapping/to_mods/contributor.rb +4 -4
  34. data/lib/cocina/models/related_resource.rb +1 -1
  35. data/lib/cocina/models/title.rb +1 -1
  36. data/lib/cocina/models/validatable.rb +6 -1
  37. data/lib/cocina/models/validators/catalog_links_validator.rb +1 -1
  38. data/lib/cocina/models/version.rb +1 -1
  39. data/lib/cocina/models.rb +3 -5
  40. data/lib/cocina/rspec/factories.rb +3 -3
  41. data/schema.json +5 -5
  42. metadata +4 -5
  43. data/docs/index.html +0 -20
  44. data/openapi.yml +0 -1930
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6f0f9a69413b8b16af1c991c94b54a42b6093ac2dccb78dcfd1fc161df2e8f42
4
- data.tar.gz: 7a5674e97c75fe7eea96fcd227be1deb1ab42c35317f838a0e48bc24faf724ae
3
+ metadata.gz: 2b8514cca9a06bbf3e7c36d84a1f27d093e16a263e8ac1cb92289b6640d28bec
4
+ data.tar.gz: '09f25bf203314c534eba5e21a16b163f56b21ac2e858cf9a5a25f309086efa12'
5
5
  SHA512:
6
- metadata.gz: 8736faf24b0d074167b7c874edb81f2ed6ad53f51b224b2609a992ffbdafcd237bc8d807f0c8a89c3233564e531755caca3e1bbddf42e44c39858fd1fd1d85eb
7
- data.tar.gz: e183d4290660c8f0de527beb8eccbf271925148957637a77d11bd345e22804fd6cb7e60da3e2100ff205a86874f0a672ad4ef60b6a0c63ad3d5854338455b955
6
+ metadata.gz: 2dce9b0cc4eb70cc6a2ddbc2dda88e5249b9ac58a0ec96523ec9c3e23ebe82108d87eb1e36b70e8e44a0d8c0abf75682c6f284dbda26e4cefe469b4acf5bf715
7
+ data.tar.gz: 795ba280b81c888fd005cc5af39d671b942f3d5b718b61b44e902581d476958654f28485aec4f8d18de4d725a6b9338c84c8a0acc4a7c315b04e5cd2ec22c9a8
data/.circleci/config.yml CHANGED
@@ -11,5 +11,5 @@ workflows:
11
11
  context: dlss
12
12
  before-test:
13
13
  - run:
14
- name: validate openapi
14
+ name: validate schema
15
15
  command: bin/validate-schema schema.json
data/Gemfile CHANGED
@@ -7,3 +7,4 @@ gemspec
7
7
 
8
8
  gem 'debug'
9
9
  gem 'rspec_junit_formatter' # For CircleCI
10
+ gem 'ruby-progressbar'
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cocina-models (0.112.1)
4
+ cocina-models (0.114.0)
5
5
  activesupport
6
6
  deprecation
7
7
  dry-struct (~> 1.0)
@@ -32,6 +32,8 @@ GEM
32
32
  securerandom (>= 0.3)
33
33
  tzinfo (~> 2.0, >= 2.0.5)
34
34
  uri (>= 0.13.1)
35
+ addressable (2.8.9)
36
+ public_suffix (>= 2.0.2, < 8.0)
35
37
  ast (2.4.3)
36
38
  attr_extras (7.1.0)
37
39
  base64 (0.3.0)
@@ -57,7 +59,7 @@ GEM
57
59
  concurrent-ruby (~> 1.0)
58
60
  dry-core (~> 1.1)
59
61
  zeitwerk (~> 2.6)
60
- dry-struct (1.8.0)
62
+ dry-struct (1.8.1)
61
63
  dry-core (~> 1.1)
62
64
  dry-types (~> 1.8, >= 1.8.2)
63
65
  ice_nine (~> 0.11)
@@ -73,7 +75,7 @@ GEM
73
75
  activesupport (>= 3.0, < 9.0)
74
76
  equivalent-xml (0.6.0)
75
77
  nokogiri (>= 1.4.3)
76
- erb (6.0.1)
78
+ erb (6.0.2)
77
79
  hana (1.3.7)
78
80
  i18n (1.14.8)
79
81
  concurrent-ruby (~> 1.0)
@@ -84,7 +86,10 @@ GEM
84
86
  prism (>= 1.3.0)
85
87
  rdoc (>= 4.0.0)
86
88
  reline (>= 0.4.2)
87
- json (2.18.1)
89
+ json (2.19.1)
90
+ json-schema (6.2.0)
91
+ addressable (~> 2.8)
92
+ bigdecimal (>= 3.1, < 5)
88
93
  json_schemer (2.5.0)
89
94
  bigdecimal
90
95
  hana (~> 1.3)
@@ -95,7 +100,10 @@ GEM
95
100
  language_server-protocol (3.17.0.5)
96
101
  lint_roller (1.1.0)
97
102
  logger (1.7.0)
98
- minitest (6.0.1)
103
+ mcp (0.8.0)
104
+ json-schema (>= 4.1)
105
+ minitest (6.0.2)
106
+ drb (~> 2.0)
99
107
  prism (~> 1.5)
100
108
  multi_json (1.19.1)
101
109
  nokogiri (1.19.1-arm64-darwin)
@@ -116,6 +124,7 @@ GEM
116
124
  psych (5.3.1)
117
125
  date
118
126
  stringio
127
+ public_suffix (7.0.5)
119
128
  racc (1.8.1)
120
129
  rainbow (3.1.1)
121
130
  rake (13.3.1)
@@ -135,16 +144,17 @@ GEM
135
144
  rspec-expectations (3.13.5)
136
145
  diff-lcs (>= 1.2.0, < 2.0)
137
146
  rspec-support (~> 3.13.0)
138
- rspec-mocks (3.13.7)
147
+ rspec-mocks (3.13.8)
139
148
  diff-lcs (>= 1.2.0, < 2.0)
140
149
  rspec-support (~> 3.13.0)
141
150
  rspec-support (3.13.7)
142
151
  rspec_junit_formatter (0.6.0)
143
152
  rspec-core (>= 2, < 4, != 2.12.0)
144
- rubocop (1.84.2)
153
+ rubocop (1.85.1)
145
154
  json (~> 2.3)
146
155
  language_server-protocol (~> 3.17.0.2)
147
156
  lint_roller (~> 1.1.0)
157
+ mcp (~> 0.6)
148
158
  parallel (~> 1.10)
149
159
  parser (>= 3.3.0.2)
150
160
  rainbow (>= 2.2.2, < 4.0)
@@ -199,15 +209,17 @@ DEPENDENCIES
199
209
  rubocop (~> 1.24)
200
210
  rubocop-rake
201
211
  rubocop-rspec
212
+ ruby-progressbar
202
213
  simplecov
203
214
 
204
215
  CHECKSUMS
205
216
  activesupport (8.1.2) sha256=88842578ccd0d40f658289b0e8c842acfe9af751afee2e0744a7873f50b6fdae
217
+ addressable (2.8.9) sha256=cc154fcbe689711808a43601dee7b980238ce54368d23e127421753e46895485
206
218
  ast (2.4.3) sha256=954615157c1d6a382bc27d690d973195e79db7f55e9765ac7c481c60bdb4d383
207
219
  attr_extras (7.1.0) sha256=d96fc9a9dd5d85ba2d37762440a816f840093959ae26bb90da994c2d9f1fc827
208
220
  base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
209
221
  bigdecimal (4.0.1) sha256=8b07d3d065a9f921c80ceaea7c9d4ae596697295b584c296fe599dd0ad01c4a7
210
- cocina-models (0.112.1)
222
+ cocina-models (0.114.0)
211
223
  concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
212
224
  connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
213
225
  date (3.5.1) sha256=750d06384d7b9c15d562c76291407d89e368dda4d4fff957eb94962d325a0dc0
@@ -219,23 +231,25 @@ CHECKSUMS
219
231
  dry-core (1.2.0) sha256=0cc5a7da88df397f153947eeeae42e876e999c1e30900f3c536fb173854e96a1
220
232
  dry-inflector (1.3.1) sha256=7fb0c2bb04f67638f25c52e7ba39ab435d922a3a5c3cd196120f63accb682dcc
221
233
  dry-logic (1.6.0) sha256=da6fedbc0f90fc41f9b0cc7e6f05f5d529d1efaef6c8dcc8e0733f685745cea2
222
- dry-struct (1.8.0) sha256=74c38b559924fb6462ac43ec780c4533a082d7b1d238a8d7857b773b3b8e2966
234
+ dry-struct (1.8.1) sha256=033868594c45241540172bf1ebbc8bb76b72b4f0717072325deba38ac13e80f1
223
235
  dry-types (1.9.1) sha256=baebeecdb9f8395d6c9d227b62011279440943e3ef2468fe8ccc1ba11467f178
224
236
  edtf (3.2.0) sha256=a15a0ee274e49c8047a3ebb5d61d793ba44f7f8ffbf0595392c467e3ea8d2447
225
237
  equivalent-xml (0.6.0) sha256=8919761efa848ad0846369ff8be1f646b17e5061698c4867b09829000cc3f487
226
- erb (6.0.1) sha256=28ecdd99c5472aebd5674d6061e3c6b0a45c049578b071e5a52c2a7f13c197e5
238
+ erb (6.0.2) sha256=9fe6264d44f79422c87490a1558479bd0e7dad4dd0e317656e67ea3077b5242b
227
239
  hana (1.3.7) sha256=5425db42d651fea08859811c29d20446f16af196308162894db208cac5ce9b0d
228
240
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
229
241
  ice_nine (0.11.2) sha256=5d506a7d2723d5592dc121b9928e4931742730131f22a1a37649df1c1e2e63db
230
242
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
231
243
  irb (1.17.0) sha256=168c4ddb93d8a361a045c41d92b2952c7a118fa73f23fe14e55609eb7a863aae
232
- json (2.18.1) sha256=fe112755501b8d0466b5ada6cf50c8c3f41e897fa128ac5d263ec09eedc9f986
244
+ json (2.19.1) sha256=dd94fdc59e48bff85913829a32350b3148156bc4fd2a95a2568a78b11344082d
245
+ json-schema (6.2.0) sha256=e8bff46ed845a22c1ab2bd0d7eccf831c01fe23bb3920caa4c74db4306813666
233
246
  json_schemer (2.5.0) sha256=2f01fb4cce721a4e08dd068fc2030cffd0702a7f333f1ea2be6e8991f00ae396
234
247
  jsonpath (1.1.5) sha256=29f70467193a2dc93ab864ec3d3326d54267961acc623f487340eb9c34931dbe
235
248
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
236
249
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
237
250
  logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
238
- minitest (6.0.1) sha256=7854c74f48e2e975969062833adc4013f249a4b212f5e7b9d5c040bf838d54bb
251
+ mcp (0.8.0) sha256=ae8bd146bb8e168852866fd26f805f52744f6326afb3211e073f78a95e0c34fb
252
+ minitest (6.0.2) sha256=db6e57956f6ecc6134683b4c87467d6dd792323c7f0eea7b93f66bd284adbc3d
239
253
  multi_json (1.19.1) sha256=7aefeff8f2c854bf739931a238e4aea64592845e0c0395c8a7d2eea7fdd631b7
240
254
  nokogiri (1.19.1-arm64-darwin) sha256=dfe2d337e6700eac47290407c289d56bcf85805d128c1b5a6434ddb79731cb9e
241
255
  nokogiri (1.19.1-x86_64-linux-gnu) sha256=1a4902842a186b4f901078e692d12257678e6133858d0566152fe29cdb98456a
@@ -247,6 +261,7 @@ CHECKSUMS
247
261
  prettyprint (0.2.0) sha256=2bc9e15581a94742064a3cc8b0fb9d45aae3d03a1baa6ef80922627a0766f193
248
262
  prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
249
263
  psych (5.3.1) sha256=eb7a57cef10c9d70173ff74e739d843ac3b2c019a003de48447b2963d81b1974
264
+ public_suffix (7.0.5) sha256=1a8bb08f1bbea19228d3bed6e5ed908d1cb4f7c2726d18bd9cadf60bc676f623
250
265
  racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
251
266
  rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
252
267
  rake (13.3.1) sha256=8c9e89d09f66a26a01264e7e3480ec0607f0c497a861ef16063604b1b08eb19c
@@ -256,10 +271,10 @@ CHECKSUMS
256
271
  rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
257
272
  rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
258
273
  rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
259
- rspec-mocks (3.13.7) sha256=0979034e64b1d7a838aaaddf12bf065ea4dc40ef3d4c39f01f93ae2c66c62b1c
274
+ rspec-mocks (3.13.8) sha256=086ad3d3d17533f4237643de0b5c42f04b66348c28bf6b9c2d3f4a3b01af1d47
260
275
  rspec-support (3.13.7) sha256=0640e5570872aafefd79867901deeeeb40b0c9875a36b983d85f54fb7381c47c
261
276
  rspec_junit_formatter (0.6.0) sha256=40dde674e6ae4e6cc0ff560da25497677e34fefd2338cc467a8972f602b62b15
262
- rubocop (1.84.2) sha256=5692cea54168f3dc8cb79a6fe95c5424b7ea893c707ad7a4307b0585e88dbf5f
277
+ rubocop (1.85.1) sha256=3dbcf9e961baa4c376eeeb2a03913dca5e3987033b04d38fa538aa1e7406cc77
263
278
  rubocop-ast (1.49.0) sha256=49c3676d3123a0923d333e20c6c2dbaaae2d2287b475273fddee0c61da9f71fd
264
279
  rubocop-rake (0.7.1) sha256=3797f2b6810c3e9df7376c26d5f44f3475eda59eb1adc38e6f62ecf027cbae4d
265
280
  rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
@@ -280,4 +295,4 @@ CHECKSUMS
280
295
  zeitwerk (2.7.5) sha256=d8da92128c09ea6ec62c949011b00ed4a20242b255293dd66bf41545398f73dd
281
296
 
282
297
  BUNDLED WITH
283
- 4.0.6
298
+ 4.0.7
data/README.md CHANGED
@@ -1,18 +1,21 @@
1
1
  [![CircleCI](https://circleci.com/gh/sul-dlss/cocina-models.svg?style=svg)](https://circleci.com/gh/sul-dlss/cocina-models)
2
2
  [![Test Coverage](https://codecov.io/github/sul-dlss/cocina-models/graph/badge.svg?token=FG1SRYCME2)](https://codecov.io/github/sul-dlss/cocina-models)
3
3
  [![Gem Version](https://badge.fury.io/rb/cocina-models.svg)](https://badge.fury.io/rb/cocina-models)
4
- [![OpenAPI Validator](http://validator.swagger.io/validator?url=https://raw.githubusercontent.com/sul-dlss/cocina-models/main/openapi.yml)](http://validator.swagger.io/validator/debug?url=https://raw.githubusercontent.com/sul-dlss/cocina-models/main/openapi.yml)
5
4
 
6
5
  # Cocina::Models
7
6
 
8
7
  The cocina-models gem is a Ruby implementation of the Stanford Digital Repository (SDR) data model, which we named "Cocina." The data being modeled is oriented around digital repository objects.
9
8
 
10
- The data model is expressed in an OpenAPI specification that lives in this codebase. Expressing the model in such a spec allows for rich validation (using gems such as `json_schemer`). The gem provides a set of generators (see below) to generate Ruby classes from the specification, with modeling provided by dry-struct / dry-types. Together, these provide a way for consumers to validate objects against models and to manipulate those objects.
9
+ The data model is expressed in an JSON Schema specification that lives in this codebase. Expressing the model in such a spec allows for rich validation (using gems such as `json_schemer`). The gem provides a set of generators (see below) to generate Ruby classes from the specification, with modeling provided by dry-struct / dry-types. Together, these provide a way for consumers to validate objects against models and to manipulate those objects.
11
10
 
12
11
  Note that the data model encodes properties as camelCase, which the team believes to be consistent with other HTTP APIs and the original design of the Cocina data model. While using camelCase in Ruby code may look and feel wrong, we did explore automagic conversion between camelCase in the model and snake_case in the Ruby context. We ultimately concluded that we have enough representations of the data model in enough codebases to reasonably worry about data inconsistency problems, none of which we need in our work on SDR.
13
12
 
14
13
  For more about the model for description see https://consul.stanford.edu/display/DIGMETADATA/Digital+Object+Metadata+Documentation#DigitalObjectMetadataDocumentation-Cocinamodel
15
14
 
15
+ ## Schema
16
+
17
+ The [schema.json](schema.json) can also be viewed via JSON HERO: https://jsonhero.io/j/rynNH3NLBhcf and DOR Services App's openapi.yml: https://sul-dlss.github.io/dor-services-app/
18
+
16
19
  ## Configuration
17
20
 
18
21
  Set the PURL url base:
@@ -52,7 +55,10 @@ Beyond what is necessary to test the generator, the Cocina model classes are not
52
55
 
53
56
  ## Testing validation changes
54
57
 
55
- If there is a possibility that a model, mapping, or validation change will conflict with some existing objects then [validate-cocina](https://github.com/sul-dlss/dor-services-app/blob/main/bin/validate-cocina) should be used for testing. This must be run on the `sdr-infra` VM since it requires deploying a branch of cocina-models.
58
+ If there is a possibility that a model, mapping, or validation change will conflict with some existing objects then `bin/validate-data` should be used for testing. This operates on a sample of objects from the repository and reports any validation errors. You may get the sample by running the script [bin/export-cocina-head-versions](https://github.com/sul-dlss/dor-services-app/pull/5854) and downloading the data file to your computer.
59
+
60
+
61
+ Alternatively, you can use [validate-cocina](https://github.com/sul-dlss/dor-services-app/blob/main/bin/validate-cocina) for testing. This must be run on the `sdr-infra` VM since it requires deploying a branch of cocina-models. It is slower than using `bin/validate-data`, but all of the data is completely up to date.
56
62
 
57
63
  For background on object validation, as it relates to migrating versions, see: https://github.com/sul-dlss/dor-services-app/wiki/Migrating-Cocina
58
64
 
@@ -150,21 +156,7 @@ This list of services is known to include:
150
156
  * [sul-dlss/sdr-api](https://github.com/sul-dlss/sdr-api)
151
157
  * [sul-dlss/dor-services-app](https://github.com/sul-dlss/dor-services-app/)
152
158
 
153
-
154
- #### Step 3A: Update API specifications
155
-
156
- **NOTE**: You can skip step 3A if there have not been any changes to the `cocina-models` OpenAPI spec since the prior release.
157
-
158
- The cocina-models gem is used in applications that have an API specification that accepts Cocina models.
159
-
160
- #### Step 3B: Bump gems and create the PRs
161
-
162
- If you updated the `schema.json` in step 3A, use the same PR for step 3B. Why? When [dor-services-app](https://github.com/sul-dlss/dor-services-app), for example, is updated to use the new models (via the auto-update script), these clients should be updated at the same time or there is risk of models produced by dor-services-app not being acceptable to the clients.
163
-
164
- 1. Perform `bundle update --conservative cocina-models dor-services-client` in the services above and make PRs for those repos if they don't already exist. You may first need to update how these gems are pinned in the `Gemfile` in order to bump them.
165
- 2. Note that sdr-client is not currently used in these applications, but if it were, would also need to be bumped to the latest release.
166
-
167
- #### Step 3C: Merge 'em
159
+ Perform `bundle update --conservative cocina-models dor-services-client` in the services above and make PRs for those repos. You may first need to update how these gems are pinned in the `Gemfile` in order to bump them.
168
160
 
169
161
  Get the directly coupled services PRs merged before the deploy in step 5.
170
162
 
data/bin/validate-data ADDED
@@ -0,0 +1,282 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Validate JSONL (XZ compressed) files against the schema
5
+
6
+ # Trap Ctrl+C to exit gracefully
7
+ Signal.trap('INT') do
8
+ puts "\nTerminated"
9
+ exit(1)
10
+ end
11
+
12
+ Signal.trap('TERM') do
13
+ puts "\nTerminated"
14
+ exit(1)
15
+ end
16
+
17
+ require 'bundler/setup'
18
+ require 'cocina/models'
19
+ require 'json'
20
+ require 'ruby-progressbar'
21
+ require 'optparse'
22
+
23
+ # Parse command line options
24
+ def parse_options # rubocop:disable Metrics/MethodLength
25
+ options = {
26
+ processes: 12,
27
+ count: nil,
28
+ batch_size: 100
29
+ }
30
+
31
+ OptionParser.new do |opts|
32
+ opts.banner = 'Usage: validate-data FILENAME [options]'
33
+
34
+ opts.on('-p', '--processes NUM', Integer, 'Number of processes to use (default: 12)') do |p|
35
+ options[:processes] = p
36
+ end
37
+
38
+ opts.on('-c', '--count NUM', Integer, 'Total line count (skips counting pass if provided)') do |c|
39
+ options[:count] = c
40
+ end
41
+
42
+ opts.on('-b', '--batch-size NUM', Integer, 'Batch size for worker processing (default: 100)') do |b|
43
+ options[:batch_size] = b
44
+ end
45
+
46
+ opts.on('-h', '--help', 'Display this help message') do
47
+ puts opts
48
+ exit
49
+ end
50
+ end.parse!
51
+
52
+ # Filename is required as a positional argument
53
+ if ARGV.empty?
54
+ puts 'Error: FILENAME is required'
55
+ puts 'Usage: validate-data FILENAME [options]'
56
+ puts 'Run with --help for more information'
57
+ exit 1
58
+ end
59
+
60
+ options[:filename] = ARGV[0]
61
+ options
62
+ end
63
+
64
+ # Count lines in the file
65
+ def count_lines(filename)
66
+ count = 0
67
+ IO.popen(['xzcat', filename]) do |io|
68
+ io.each_line { count += 1 }
69
+ end
70
+ count
71
+ end
72
+
73
+ # Get total line count (either from option or by counting)
74
+ def get_total_lines(filename, provided_count)
75
+ if provided_count
76
+ puts "Using provided line count: #{provided_count}"
77
+ provided_count
78
+ else
79
+ puts 'Counting lines...'
80
+ total = count_lines(filename)
81
+ puts "Total lines to validate: #{total}"
82
+ total
83
+ end
84
+ end
85
+
86
+ # Worker process that reads batches from a pipe and validates
87
+ def worker_process(reader) # rubocop:disable Metrics/MethodLength
88
+ errors = []
89
+
90
+ loop do
91
+ # Read length prefix (4 bytes)
92
+ length_data = reader.read(4)
93
+ break if length_data.nil? || length_data.empty?
94
+
95
+ length = length_data.unpack1('N')
96
+ data = reader.read(length)
97
+ batch = Marshal.load(data) # rubocop:disable Security/MarshalLoad
98
+
99
+ # Process each line in the batch
100
+ batch.each do |line_num, line_content|
101
+ json = JSON.parse(line_content)
102
+ Cocina::Models.build(json)
103
+ rescue JSON::ParserError => e
104
+ errors << { line: line_num, error: "JSON Parse Error: #{e.message}" }
105
+ rescue Cocina::Models::ValidationError => e
106
+ errors << { line: line_num, error: "Validation Error: #{e.message}" }
107
+ rescue Cocina::Models::UnknownTypeError => e
108
+ errors << { line: line_num, error: "Unknown Type Error: #{e.message}" }
109
+ rescue StandardError => e
110
+ errors << { line: line_num, error: "Error: #{e.class} - #{e.message}" }
111
+ end
112
+ end
113
+
114
+ errors
115
+ end
116
+
117
+ # Spawn worker processes
118
+ def spawn_workers(num_processes) # rubocop:disable Metrics/MethodLength
119
+ workers = []
120
+ result_readers = []
121
+
122
+ num_processes.times do
123
+ work_reader, work_writer = IO.pipe
124
+ result_reader, result_writer = IO.pipe
125
+
126
+ pid = fork do
127
+ # Child process
128
+ work_writer.close
129
+ result_reader.close
130
+
131
+ errors = worker_process(work_reader)
132
+
133
+ # Send results back
134
+ result_writer.write(Marshal.dump(errors))
135
+ result_writer.close
136
+ work_reader.close
137
+ exit(0)
138
+ end
139
+
140
+ # Parent process
141
+ work_reader.close
142
+ result_writer.close
143
+
144
+ workers << { pid: pid, writer: work_writer }
145
+ result_readers << result_reader
146
+ end
147
+
148
+ [workers, result_readers]
149
+ end
150
+
151
+ # Send a batch to a worker
152
+ def send_batch(worker, batch)
153
+ return if batch.empty?
154
+
155
+ data = Marshal.dump(batch)
156
+ worker[:writer].write([data.bytesize].pack('N'))
157
+ worker[:writer].write(data)
158
+ end
159
+
160
+ # Stream file and distribute work to workers
161
+ def distribute_work(filename, workers, batch_size, total_lines) # rubocop:disable Metrics/MethodLength
162
+ line_number = 0
163
+ current_worker = 0
164
+ batch = []
165
+
166
+ # Create progress bar
167
+ progressbar = ProgressBar.create(
168
+ title: 'Validating',
169
+ total: total_lines,
170
+ format: '%t: |%B| %p%% %c/%C %a %e',
171
+ throttle_rate: 0.1
172
+ )
173
+
174
+ IO.popen(['xzcat', filename]) do |io|
175
+ io.each_line do |line|
176
+ line_number += 1
177
+ batch << [line_number, line]
178
+
179
+ # When batch is full, send to worker
180
+ if batch.size >= batch_size
181
+ worker = workers[current_worker]
182
+ send_batch(worker, batch)
183
+ batch = []
184
+ current_worker = (current_worker + 1) % workers.length
185
+ end
186
+
187
+ # Update progress bar
188
+ progressbar.increment
189
+ end
190
+ end
191
+
192
+ # Send any remaining lines in the last batch
193
+ if batch.any?
194
+ worker = workers[current_worker]
195
+ send_batch(worker, batch)
196
+ end
197
+
198
+ # Final progress update
199
+ progressbar.finish
200
+ end
201
+
202
+ # Collect results from all workers
203
+ def collect_results(workers, result_readers)
204
+ # Close all worker input pipes to signal completion
205
+ workers.each { |w| w[:writer].close }
206
+
207
+ puts 'Collecting results from workers...'
208
+ all_errors = []
209
+
210
+ result_readers.each do |reader|
211
+ data = reader.read
212
+ worker_errors = Marshal.load(data) # rubocop:disable Security/MarshalLoad
213
+ all_errors.concat(worker_errors)
214
+ reader.close
215
+ end
216
+
217
+ # Wait for all workers to complete
218
+ workers.each { |w| Process.wait(w[:pid]) }
219
+
220
+ all_errors
221
+ end
222
+
223
+ # Print validation summary
224
+ def print_summary(total_lines, errors, elapsed_time) # rubocop:disable Metrics/MethodLength
225
+ puts '=' * 80
226
+ puts 'VALIDATION SUMMARY'
227
+ puts '=' * 80
228
+ puts "Total lines processed: #{total_lines}"
229
+ puts "Lines with errors: #{errors.length}"
230
+ puts "Success rate: #{((total_lines - errors.length).to_f / total_lines * 100).round(2)}%"
231
+ puts "Time elapsed: #{elapsed_time.round(2)} seconds"
232
+ puts "Throughput: #{(total_lines / elapsed_time).round(0)} lines/second"
233
+
234
+ return unless errors.any?
235
+
236
+ puts "\n"
237
+ puts 'Error details:'
238
+ puts '-' * 80
239
+ # Sort errors by line number for better readability
240
+ errors.sort_by { |e| e[:line] }.each do |error|
241
+ puts "Line #{error[:line]}: #{error[:error]}"
242
+ end
243
+ puts "\n"
244
+ puts "Line numbers with errors: #{errors.sort_by { |e| e[:line] }.map { |e| e[:line] }.join(', ')}"
245
+ end
246
+
247
+ # Main execution
248
+ def main
249
+ options = parse_options
250
+
251
+ puts "Validating file: #{options[:filename]}"
252
+ puts "Using #{options[:processes]} processes with batch size #{options[:batch_size]}"
253
+ puts '=' * 80
254
+
255
+ # Get total line count
256
+ total_lines = get_total_lines(options[:filename], options[:count])
257
+ puts '=' * 80
258
+
259
+ # Spawn worker processes
260
+ workers, result_readers = spawn_workers(options[:processes])
261
+
262
+ # Start timing
263
+ start_time = Time.now
264
+
265
+ # Distribute work to workers
266
+ distribute_work(options[:filename], workers, options[:batch_size], total_lines)
267
+
268
+ # Collect results
269
+ all_errors = collect_results(workers, result_readers)
270
+
271
+ # Calculate elapsed time
272
+ elapsed_time = Time.now - start_time
273
+
274
+ # Print summary
275
+ print_summary(total_lines, all_errors, elapsed_time)
276
+
277
+ # Exit with appropriate code
278
+ exit(all_errors.empty? ? 0 : 1)
279
+ end
280
+
281
+ # Run the script
282
+ main
@@ -432,6 +432,8 @@ identifier:
432
432
  - value: "West Mat #"
433
433
  - value: Wikidata
434
434
  code: wikidata
435
+ - value: International Article Number
436
+ - value: ProQuest Module ID
435
437
  note:
436
438
  - value: abstract
437
439
  description: A short overview of a research article or other work.
@@ -415,6 +415,8 @@ _Path: identifier.type_
415
415
  * videorecording identifier
416
416
  * West Mat #
417
417
  * Wikidata
418
+ * International Article Number
419
+ * ProQuest Module ID
418
420
  # Note types
419
421
  _Path: note.type_
420
422
  * abstract
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi schema
5
+ # Class for generating from a JSON schema
6
6
  class Datatype < SchemaBase
7
7
  def generate
8
8
  <<~RUBY
@@ -4,7 +4,7 @@ require 'fileutils'
4
4
 
5
5
  module Cocina
6
6
  module Generator
7
- # Class for generating Cocina models from openapi.
7
+ # Class for generating Cocina models from JSON Schema.
8
8
  class Generator < Thor # rubocop:disable Metrics/ClassLength
9
9
  include Thor::Actions
10
10
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi schema
5
+ # Class for generating from a JSON schema
6
6
  class Schema < SchemaBase
7
7
  def schema_properties
8
8
  @schema_properties ||= (properties + all_of_properties + one_of_properties).uniq(&:key)
@@ -112,7 +112,7 @@ module Cocina
112
112
  key: key,
113
113
  # The property does less validation because may vary between
114
114
  # different oneOf schemas. Validation is still performed
115
- # by openAPI.
115
+ # by JSON Schema.
116
116
  relaxed: true,
117
117
  parent: self,
118
118
  schemas: schemas)
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi array
5
+ # Class for generating from a JSON Schema array
6
6
  class SchemaArray < SchemaBase
7
7
  GENERIC_ITEMS_NAME = 'items'
8
8
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Base class for generating from openapi
5
+ # Base class for generating from a JSON Schema
6
6
  class SchemaBase
7
7
  attr_reader :schema_doc, :key, :required, :nullable, :parent, :relaxed, :schemas, :lite
8
8
 
@@ -74,7 +74,7 @@ module Cocina
74
74
  def relaxed_comment
75
75
  return '' unless relaxed
76
76
 
77
- "# Validation of this property is relaxed. See the openapi for full validation.\n"
77
+ "# Validation of this property is relaxed. See the schema.json for full validation.\n"
78
78
  end
79
79
 
80
80
  # dry-types-based types contain the word `Types` (e.g., `Types::String`), and custom types (e.g., `SourceId`) do not
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi reference
5
+ # Class for generating from a JSON Schema reference
6
6
  class SchemaRef < SchemaBase
7
7
  def generate
8
8
  if required && !relaxed
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Cocina
4
4
  module Generator
5
- # Class for generating from an openapi value
5
+ # Class for generating from a JSON Schema value
6
6
  class SchemaValue < SchemaBase
7
7
  def generate
8
8
  if required && !relaxed
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Cocina
4
- # Module for generating Cocina models from openapi.
4
+ # Module for generating Cocina models from a JSON Schema.
5
5
  module Generator
6
6
  end
7
7
  end
@@ -3,8 +3,7 @@
3
3
  module Cocina
4
4
  # Wrapper for JSON Schema support using json_schemer
5
5
  class JsonSchemaWrapper
6
- class OpenApiError < StandardError; end
7
- class MissingReferenceError < OpenApiError; end
6
+ class MissingReferenceError < StandardError; end
8
7
 
9
8
  def initialize(spec_hash, strict_reference_validation: true)
10
9
  @spec = spec_hash