cocina-models 0.119.0 → 0.121.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +1 -1
  3. data/.claude/skills/cocina-jq-query/SKILL.md +8 -0
  4. data/.gitignore +5 -0
  5. data/.rubocop.yml +6 -0
  6. data/AGENTS.md +208 -0
  7. data/Gemfile +2 -0
  8. data/Gemfile.lock +40 -28
  9. data/README.md +4 -1
  10. data/bin/enhance-report-csv +90 -0
  11. data/bin/validate-data +7 -0
  12. data/bin/validate-schema +6 -1
  13. data/cocina-models.gemspec +1 -2
  14. data/lib/cocina/models/contributor.rb +0 -3
  15. data/lib/cocina/models/mapping/from_mods/event.rb +12 -3
  16. data/lib/cocina/models/related_resource.rb +1 -1
  17. data/lib/cocina/models/validators/base_description_visitor_validator.rb +33 -0
  18. data/lib/cocina/models/validators/base_structural_visitor_validator.rb +23 -0
  19. data/lib/cocina/models/validators/composite_description_validator.rb +62 -0
  20. data/lib/cocina/models/validators/composite_structural_validator.rb +48 -0
  21. data/lib/cocina/models/validators/dark_visitor_validator.rb +46 -0
  22. data/lib/cocina/models/validators/description_date_time_visitor_validator.rb +132 -0
  23. data/lib/cocina/models/validators/{description_types_validator.rb → description_types_visitor_validator.rb} +9 -55
  24. data/lib/cocina/models/validators/{description_values_validator.rb → description_values_visitor_validator.rb} +14 -51
  25. data/lib/cocina/models/validators/json_schema_validator.rb +54 -102
  26. data/lib/cocina/models/validators/language_tag_visitor_validator.rb +32 -0
  27. data/lib/cocina/models/validators/reserved_filename_visitor_validator.rb +40 -0
  28. data/lib/cocina/models/validators/validator.rb +5 -9
  29. data/lib/cocina/models/version.rb +1 -1
  30. data/lib/cocina/models.rb +1 -1
  31. data/schema.json +114 -59
  32. metadata +16 -24
  33. data/lib/cocina/models/descriptive_parallel_contributor.rb +0 -29
  34. data/lib/cocina/models/validators/dark_validator.rb +0 -76
  35. data/lib/cocina/models/validators/date_time_validator.rb +0 -100
  36. data/lib/cocina/models/validators/language_tag_validator.rb +0 -76
  37. data/lib/cocina/models/validators/reserved_filename_validator.rb +0 -60
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 95fdb985d82245efee3898558df726439ee7d27e7fa45bd47815b44d52f58679
4
- data.tar.gz: 0a7644f423957828280316d0c5709847f260919e1ceb384c130e8196f4d5eb6a
3
+ metadata.gz: cfcb2c2a393845b1c9129ab48f5fc7340beb8067936694798e800a5935bf1ac9
4
+ data.tar.gz: f9dc80c520ddecb670a5df20273c635146f75d75025132402d4d1329090e29fd
5
5
  SHA512:
6
- metadata.gz: ce4a6079617e05fd8d5b1547d16e983e06f0f53914339f9f19dcb4a870d52c1c565844bb39b0ed5decffe2c68f2a4fc4f2b385b9d8dd4796700f2c1f9aed5c7b
7
- data.tar.gz: eb72b867dcab3ad0ee2e63d3b68276c17fb831365f4c6a785a95fa8250998b663c6bcca2d50d4ffc6786044a17df7321847c069595587a9ae3ac96120f093020
6
+ metadata.gz: 2f7ccadc053ba81117275251401432ec92c65fefd86da94c25e7c862f8680c868d3f392a84b3966fb48609a5c383abd8dc90b8a51e457fd2fe8d3792371bd4a6
7
+ data.tar.gz: c1355245a0571f8933155c9de04b38226b55b96a3ffdb899af3d2af553e30f53ae7986a41e6293339a90cc2a017fcd6eec273fcd1ce64df57b26cbbb5f161193
data/.circleci/config.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  version: 2.1
2
2
  orbs:
3
- ruby-rails: sul-dlss/ruby-rails@4.8.0
3
+ ruby-rails: sul-dlss/ruby-rails@4.10.0
4
4
  workflows:
5
5
  build:
6
6
  jobs:
@@ -0,0 +1,8 @@
1
+ ---
2
+ name: cocina-jq-query
3
+ description: Build and validate a jq query against a Cocina Model JSON serialization. Use when the user wants to query, filter, or transform a Cocina object (DRO, Collection, AdminPolicy) using jq, or asks for help writing a jq expression for Cocina data.
4
+ ---
5
+
6
+ # cocina-jq-query
7
+
8
+ Follow the workflow defined in [AGENTS.md](../../../AGENTS.md) under **cocina-jq**.
data/.gitignore CHANGED
@@ -9,3 +9,8 @@
9
9
 
10
10
  # rspec failure tracking
11
11
  .rspec_status
12
+
13
+ *.jq.txt
14
+ *.jsonl.xz
15
+ *.csv
16
+ *-playground.html
data/.rubocop.yml CHANGED
@@ -126,6 +126,12 @@ RSpec/BeEq: # new in 2.9.0
126
126
  RSpec/BeNil: # new in 2.9.0
127
127
  Enabled: true
128
128
 
129
+ RSpec/DiscardedMatcher: # new in 3.10
130
+ Enabled: true
131
+
132
+ RSpec/MatchWithSimpleRegex: # new in 3.10
133
+ Enabled: true
134
+
129
135
  RSpec/MultipleExpectations:
130
136
  Enabled: false
131
137
 
data/AGENTS.md ADDED
@@ -0,0 +1,208 @@
1
+ # Agent Instructions for cocina-models
2
+
3
+ ## cocina-jq — Build jq queries for Cocina Model JSON
4
+
5
+ Use this workflow when the user wants to query, filter, or transform a Cocina object (DRO, Collection, AdminPolicy) using jq.
6
+
7
+ ### Step 1 — Check prerequisites
8
+
9
+ #### Check jq
10
+ Run `jq --version`. If jq is not installed, tell the user:
11
+ > `jq` is not installed. Install it with `brew install jq`, then retry.
12
+
13
+ Stop here if jq is missing.
14
+
15
+ #### Check parallel
16
+ Run `parallel --version`. If parallel is not installed, tell the user:
17
+ > `parallel` is not installed. Install it with `brew install parallel`, then retry.
18
+
19
+ #### Check pv
20
+ Run `pv --version`. If pv is not installed, tell the user:
21
+ > `pv` is not installed. Install it with `brew install pv`, then retry.
22
+
23
+ ### Output format (always apply)
24
+
25
+ Every jq query produced by this skill **must output a CSV line** using `@csv`. The **first field must always be the external identifier** (`externalIdentifier`). Additional fields follow based on the user's query. Example:
26
+
27
+ ```
28
+ "druid:bc123df4567","some value","another value"
29
+ ```
30
+
31
+ Use `[.externalIdentifier, ...] | @csv` as the output expression. Apply this constraint automatically — do not ask the user whether to include the external identifier.
32
+
33
+ ### Step 2 — Resume or collect inputs
34
+
35
+ First, ask the user:
36
+
37
+ > Do you want to resume an existing query?
38
+
39
+ **If yes:** Ask for the filename of the `.jq.txt` file (e.g., `contributor-name-uri-non-loc.jq.txt`). Read that file from the project root. The file header contains the original inputs as comments (query description, expected output description, example JSON, example output). Parse those comments to reconstruct the inputs. Confirm with the user that the loaded values look correct, then proceed to Step 3 with those inputs (skip re-asking for them).
40
+
41
+ **If no:** Ask the user for each input, one at a time:
42
+
43
+ 1. **Query description** — what should the query do? (e.g., "extract all file labels from structural")
44
+ 2. **Expected output description** — what additional values (beyond the external identifier) should appear in the output?
45
+ 3. **Example Cocina object** — paste JSON directly
46
+ 4. **Example output** — paste the exact expected CSV output (must start with the external identifier as the first field)
47
+
48
+ Explicitly ask for each input; do not infer or guess.
49
+
50
+ ### Step 3 — Clarify ambiguities
51
+
52
+ Review the inputs from Step 2. If anything is unclear or underspecified, ask the user targeted questions before proceeding. Examples of things to clarify:
53
+
54
+ - Is the query meant to return one value per object, or aggregate across many objects?
55
+ - Should missing or null fields be skipped, returned as null, or cause an error?
56
+ - Are there edge cases in the data structure the query must handle (e.g., empty arrays, nested arrays, optional fields)?
57
+ - Is the output format exactly as shown, or is there flexibility (e.g., flat vs. nested)?
58
+
59
+ Ask only questions that would change how the query is written. Do not ask about things already clear from the inputs. If everything is unambiguous, skip this step silently and proceed.
60
+
61
+ ### Step 4 — Load relevant schema portion
62
+
63
+ Read `schema.json` from the project root. Extract only the `$defs` entries relevant to the Cocina object type found in the example's `type` field:
64
+
65
+ - `https://cocina.sul.stanford.edu/models/object` → DRO-related defs
66
+ - `https://cocina.sul.stanford.edu/models/collection` → Collection-related defs
67
+ - `https://cocina.sul.stanford.edu/models/admin_policy` → AdminPolicy-related defs
68
+
69
+ Include only the defs actually referenced (follow `$ref` chains up to 2 levels deep). Do not load the entire schema.
70
+
71
+ ### Step 5 — Generate and validate the query (up to 3 attempts)
72
+
73
+ **Attempt 1:** Use the schema excerpt, example JSON, query description, and expected output to write a jq query.
74
+
75
+ Run it:
76
+ ```bash
77
+ echo '<example_json>' | jq '<query>'
78
+ ```
79
+
80
+ Compare actual output to the example output. If it matches → proceed to Step 5.
81
+
82
+ **Attempt 2 (if attempt 1 fails):** Run `man jq` to load the jq manual. Use it to refine the query. Re-run and validate.
83
+
84
+ **Attempt 3 (if attempt 2 fails):** Make a final attempt using all context. Re-run and validate.
85
+
86
+ **After 3 failures:** Present the best attempt, explain what is wrong, and ask the user to clarify.
87
+
88
+ ### Step 6 — Generate local HTML playground
89
+
90
+ Write `<slug>-playground.html` in the project root using the template below.
91
+
92
+ Substitute:
93
+ - Every occurrence of `SLUG` → the actual slug string
94
+ - `JSON_PLACEHOLDER` → the example JSON (pretty-printed) passed through `JSON.stringify` a second time, producing a valid JS string literal (e.g. `"{\"foo\":\"bar\"}"`)
95
+ - `QUERY_PLACEHOLDER` → the validated jq query passed through `JSON.stringify`, producing a valid JS string literal (e.g. `".foo"`)
96
+
97
+ ```html
98
+ <!DOCTYPE html>
99
+ <html lang="en">
100
+ <head>
101
+ <meta charset="UTF-8">
102
+ <title>jq playground — SLUG</title>
103
+ <style>
104
+ body { font-family: monospace; margin: 2rem; background: #1e1e1e; color: #d4d4d4; }
105
+ h2 { color: #9cdcfe; }
106
+ textarea, input { width: 100%; box-sizing: border-box; background: #252526; color: #d4d4d4; border: 1px solid #444; padding: 8px; font-family: monospace; font-size: 13px; border-radius: 3px; }
107
+ textarea { height: 260px; resize: vertical; }
108
+ input { height: 36px; }
109
+ button { margin-top: 8px; background: #0e639c; color: white; border: none; padding: 8px 20px; cursor: pointer; font-size: 14px; border-radius: 3px; }
110
+ button:hover { background: #1177bb; }
111
+ label { display: block; margin-top: 16px; margin-bottom: 4px; font-size: 12px; color: #9cdcfe; text-transform: uppercase; letter-spacing: 0.05em; }
112
+ #output { background: #252526; border: 1px solid #444; padding: 12px; min-height: 80px; white-space: pre-wrap; word-break: break-all; border-radius: 3px; }
113
+ .error { color: #f44747; }
114
+ </style>
115
+ </head>
116
+ <body>
117
+ <h2>jq playground — SLUG</h2>
118
+ <label>JSON Input</label>
119
+ <textarea id="json"></textarea>
120
+ <label>jq Filter</label>
121
+ <input id="query" type="text" />
122
+ <button onclick="run()">&#9654; Run</button>
123
+ <label>Output</label>
124
+ <pre id="output">(click Run)</pre>
125
+
126
+ <script src="https://cdn.jsdelivr.net/npm/jq-web@0.5.1/jq.wasm.js"></script>
127
+ <script>
128
+ const INITIAL_JSON = JSON_PLACEHOLDER;
129
+ const INITIAL_QUERY = QUERY_PLACEHOLDER;
130
+ document.getElementById('json').value = JSON.stringify(JSON.parse(INITIAL_JSON), null, 2);
131
+ document.getElementById('query').value = INITIAL_QUERY;
132
+
133
+ function run() {
134
+ const json = document.getElementById('json').value;
135
+ const query = document.getElementById('query').value;
136
+ const out = document.getElementById('output');
137
+ out.className = '';
138
+ out.textContent = 'Running…';
139
+ jq.promised.raw(json, query)
140
+ .then(r => { out.textContent = r || '(empty output)'; })
141
+ .catch(e => { out.className = 'error'; out.textContent = String(e); });
142
+ }
143
+ </script>
144
+ </body>
145
+ </html>
146
+ ```
147
+
148
+ ### Step 7 — Write the query to a .jq.txt file
149
+
150
+ Generate a short kebab-case slug summarizing the query (e.g., `invalid-encoding`, `file-label-extract`). Write the file `<slug>.jq.txt` in the project root with the following structure:
151
+
152
+ 1. A comment header containing the user's inputs:
153
+
154
+ ```
155
+ # Query description: <query description from Step 2>
156
+ #
157
+ # Expected output: <expected output description from Step 2>
158
+ #
159
+ # Example input:
160
+ # <example Cocina JSON, each line prefixed with "# ">
161
+ #
162
+ # Example output:
163
+ # <example output CSV, each line prefixed with "# ">
164
+ ```
165
+
166
+ 2. A blank line, then the validated jq query.
167
+
168
+ The comment lines must use `#` so the file remains valid jq syntax. When resuming (Step 2 resume path), parse these comment sections by their labels to reconstruct the inputs.
169
+
170
+ Find the most recent `.jsonl.xz` file in the project root by listing `*.jsonl.xz` files sorted by name descending and taking the first result.
171
+
172
+ ### Step 8 — Output
173
+
174
+ Present:
175
+ 1. The jq query in a code block
176
+ 2. A 1–3 sentence explanation of how it works
177
+ 3. A markdown link to the local playground file using a `file://` URL (e.g. `[Open playground](file:///Users/someuser/data/sdr/cocina-models/<slug>-playground.html)`) — substitute the actual absolute path — plus the equivalent shell command (`open <slug>-playground.html`) for reference
178
+ 4. A ready-to-run shell snippet:
179
+
180
+ ```
181
+ xzcat <most-recent .jsonl.xz filename> \
182
+ | pv -l -s 5500000 \
183
+ | parallel -j$(sysctl -n hw.logicalcpu) --pipe --block 50M --recend '\n' \
184
+ jq -rcf <slug>.jq.txt \
185
+ | bundle exec bin/enhance-report-csv \
186
+ | tee <slug>.csv
187
+ ```
188
+
189
+ Substitute the actual filenames — do not leave placeholders.
190
+
191
+ Also, remind the user to tunnel to Solr in a separate terminal with:
192
+ ```
193
+ ssh -L 8990:sul-solr-prod-a.stanford.edu:80 lyberadmin@argo-prod-02.stanford.edu
194
+ ```
195
+
196
+ ### Step 9 — Iterate
197
+
198
+ After presenting step 7 output, prompt the user:
199
+
200
+ > Want to refine the query? You can describe a change (e.g., "also filter by `type`") or paste a modified jq expression directly.
201
+
202
+ **If the user describes a change:** Update the query to satisfy the new requirement, re-run against the example JSON (same validation loop as Step 5, up to 3 attempts), then repeat Steps 6–8 with the updated query and slug.
203
+
204
+ **If the user pastes a modified query directly:** Validate it by running against the example JSON. If it produces valid output, skip straight to repeating Steps 6–8. If it errors, diagnose and fix (up to 3 attempts), then repeat Steps 5–7.
205
+
206
+ **Each iteration overwrites the `.jq.txt` file and `<slug>-playground.html`** (same slug unless the query purpose changed significantly, in which case generate a new slug) **and replaces all previous outputs** with updated versions.
207
+
208
+ Continue offering to iterate after each round until the user is satisfied.
data/Gemfile CHANGED
@@ -5,6 +5,8 @@ source 'https://rubygems.org'
5
5
  # Specify your gem's dependencies in cocina-models.gemspec
6
6
  gemspec
7
7
 
8
+ gem 'csv'
8
9
  gem 'debug'
10
+ gem 'rsolr'
9
11
  gem 'rspec_junit_formatter' # For CircleCI
10
12
  gem 'ruby-progressbar'
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- cocina-models (0.119.0)
4
+ cocina-models (0.121.0)
5
5
  activesupport
6
6
  deprecation
7
7
  dry-struct (~> 1.0)
@@ -9,8 +9,7 @@ PATH
9
9
  edtf
10
10
  equivalent-xml
11
11
  i18n
12
- json_schemer (~> 2.0)
13
- jsonpath
12
+ jsonschema_rs
14
13
  nokogiri
15
14
  super_diff
16
15
  thor
@@ -36,8 +35,10 @@ GEM
36
35
  attr_extras (7.1.0)
37
36
  base64 (0.3.0)
38
37
  bigdecimal (4.1.2)
38
+ builder (3.3.0)
39
39
  concurrent-ruby (1.3.6)
40
40
  connection_pool (3.0.2)
41
+ csv (3.3.5)
41
42
  date (3.5.1)
42
43
  debug (1.11.1)
43
44
  irb (~> 1.10)
@@ -74,7 +75,12 @@ GEM
74
75
  equivalent-xml (0.6.0)
75
76
  nokogiri (>= 1.4.3)
76
77
  erb (6.0.4)
77
- hana (1.3.7)
78
+ faraday (2.14.2)
79
+ faraday-net_http (>= 2.0, < 3.5)
80
+ json
81
+ logger
82
+ faraday-net_http (3.4.4)
83
+ net-http (~> 0.5)
78
84
  i18n (1.14.8)
79
85
  concurrent-ruby (~> 1.0)
80
86
  ice_nine (0.11.2)
@@ -84,21 +90,19 @@ GEM
84
90
  prism (>= 1.3.0)
85
91
  rdoc (>= 4.0.0)
86
92
  reline (>= 0.4.2)
87
- json (2.19.7)
88
- json_schemer (2.5.0)
89
- bigdecimal
90
- hana (~> 1.3)
91
- regexp_parser (~> 2.0)
92
- simpleidn (~> 0.2)
93
- jsonpath (1.1.5)
94
- multi_json
93
+ json (2.19.8)
94
+ jsonschema_rs (0.46.5-arm64-darwin)
95
+ bigdecimal (>= 3.1, < 5)
96
+ jsonschema_rs (0.46.5-x86_64-linux)
97
+ bigdecimal (>= 3.1, < 5)
95
98
  language_server-protocol (3.17.0.5)
96
99
  lint_roller (1.1.0)
97
100
  logger (1.7.0)
98
101
  minitest (6.0.6)
99
102
  drb (~> 2.0)
100
103
  prism (~> 1.5)
101
- multi_json (1.21.1)
104
+ net-http (0.9.1)
105
+ uri (>= 0.11.1)
102
106
  nokogiri (1.19.3-arm64-darwin)
103
107
  racc (~> 1.4)
104
108
  nokogiri (1.19.3-x86_64-linux-gnu)
@@ -114,7 +118,7 @@ GEM
114
118
  prettyprint
115
119
  prettyprint (0.2.0)
116
120
  prism (1.9.0)
117
- psych (5.3.1)
121
+ psych (5.4.0)
118
122
  date
119
123
  stringio
120
124
  racc (1.8.1)
@@ -127,6 +131,9 @@ GEM
127
131
  regexp_parser (2.12.0)
128
132
  reline (0.6.3)
129
133
  io-console (~> 0.5)
134
+ rsolr (2.6.0)
135
+ builder (>= 2.1.2)
136
+ faraday (>= 0.9, < 3, != 2.0.0)
130
137
  rspec (3.13.2)
131
138
  rspec-core (~> 3.13.0)
132
139
  rspec-expectations (~> 3.13.0)
@@ -159,9 +166,10 @@ GEM
159
166
  rubocop-rake (0.7.1)
160
167
  lint_roller (~> 1.1)
161
168
  rubocop (>= 1.72.1)
162
- rubocop-rspec (3.9.0)
169
+ rubocop-rspec (3.10.2)
163
170
  lint_roller (~> 1.1)
164
- rubocop (~> 1.81)
171
+ regexp_parser (>= 2.0)
172
+ rubocop (~> 1.86, >= 1.86.2)
165
173
  ruby-progressbar (1.13.0)
166
174
  securerandom (0.4.1)
167
175
  simplecov (0.22.0)
@@ -170,7 +178,6 @@ GEM
170
178
  simplecov_json_formatter (~> 0.1)
171
179
  simplecov-html (0.13.2)
172
180
  simplecov_json_formatter (0.1.4)
173
- simpleidn (0.2.3)
174
181
  stringio (3.2.0)
175
182
  super_diff (0.19.0)
176
183
  attr_extras (>= 6.2.4, < 8)
@@ -193,8 +200,10 @@ PLATFORMS
193
200
  DEPENDENCIES
194
201
  bundler (>= 2.0, < 5)
195
202
  cocina-models!
203
+ csv
196
204
  debug
197
205
  rake (~> 13.0)
206
+ rsolr
198
207
  rspec (~> 3.0)
199
208
  rspec_junit_formatter
200
209
  rubocop (~> 1.24)
@@ -209,10 +218,12 @@ CHECKSUMS
209
218
  attr_extras (7.1.0) sha256=d96fc9a9dd5d85ba2d37762440a816f840093959ae26bb90da994c2d9f1fc827
210
219
  base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
211
220
  bigdecimal (4.1.2) sha256=53d217666027eab4280346fba98e7d5b66baaae1b9c3c1c0ffe89d48188a3fbd
212
- bundler (4.0.12) sha256=7f8b757d28dfb636e7b24fba2344ac6dd13b5b24f4b46d62573d483f211825ac
213
- cocina-models (0.119.0)
221
+ builder (3.3.0) sha256=497918d2f9dca528fdca4b88d84e4ef4387256d984b8154e9d5d3fe5a9c8835f
222
+ bundler (4.0.13) sha256=19f08be7f27022cf0b89f27da0b044ae075e8270a9ef44ad248a932614e1ca3b
223
+ cocina-models (0.121.0)
214
224
  concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
215
225
  connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
226
+ csv (3.3.5) sha256=6e5134ac3383ef728b7f02725d9872934f523cb40b961479f69cf3afa6c8e73f
216
227
  date (3.5.1) sha256=750d06384d7b9c15d562c76291407d89e368dda4d4fff957eb94962d325a0dc0
217
228
  debug (1.11.1) sha256=2e0b0ac6119f2207a6f8ac7d4a73ca8eb4e440f64da0a3136c30343146e952b6
218
229
  deprecation (1.1.0) sha256=01707cea9a6ed2d7270377457941f43394a345e6dd8048e1be6d18ff2f2a01e1
@@ -227,19 +238,20 @@ CHECKSUMS
227
238
  edtf (3.2.0) sha256=a15a0ee274e49c8047a3ebb5d61d793ba44f7f8ffbf0595392c467e3ea8d2447
228
239
  equivalent-xml (0.6.0) sha256=8919761efa848ad0846369ff8be1f646b17e5061698c4867b09829000cc3f487
229
240
  erb (6.0.4) sha256=38e3803694be357fe2bfe312487c74beaf9fb4e5beb3e22498952fe1645b95d9
230
- hana (1.3.7) sha256=5425db42d651fea08859811c29d20446f16af196308162894db208cac5ce9b0d
241
+ faraday (2.14.2) sha256=73ccb9994a9e8648f010e32eca2ae82e41c57860aa10932cda29418b9e0223ad
242
+ faraday-net_http (3.4.4) sha256=0e78af151747ed1b00f33e25973b4bc220d7f16c00c39676817c8b12331eb588
231
243
  i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
232
244
  ice_nine (0.11.2) sha256=5d506a7d2723d5592dc121b9928e4931742730131f22a1a37649df1c1e2e63db
233
245
  io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
234
246
  irb (1.18.0) sha256=de9454a0703a54704b9811a5ef31a60c86949fbf4013fcf244fabc7c775248e3
235
- json (2.19.7) sha256=fe432c8639f6efff69f9d73b518a3705d9581ab93156f981ea72806e1e5bcc3e
236
- json_schemer (2.5.0) sha256=2f01fb4cce721a4e08dd068fc2030cffd0702a7f333f1ea2be6e8991f00ae396
237
- jsonpath (1.1.5) sha256=29f70467193a2dc93ab864ec3d3326d54267961acc623f487340eb9c34931dbe
247
+ json (2.19.8) sha256=6354310fd76ef69b87d5bd1f38b40d730613baf90b6803d2d0a48f618d32dfaa
248
+ jsonschema_rs (0.46.5-arm64-darwin) sha256=e80414ed67f0956d3e06474a2fa076fc4a7b722f00e5d7142b70289c016ac6f1
249
+ jsonschema_rs (0.46.5-x86_64-linux) sha256=345c65ec7a5abf8879b9c9356752f0fdf4c9926f6480458fc32803a871b5cbb3
238
250
  language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
239
251
  lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
240
252
  logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
241
253
  minitest (6.0.6) sha256=153ea36d1d987a62942382b61075745042a2b3123b1cd48f4c3675af9cc7d6f1
242
- multi_json (1.21.1) sha256=e6126a31808e3b4d19f483c775ceac34df190dffa62adfb63a165ee14ba68080
254
+ net-http (0.9.1) sha256=25ba0b67c63e89df626ed8fac771d0ad24ad151a858af2cc8e6a716ca4336996
243
255
  nokogiri (1.19.3-arm64-darwin) sha256=71b9bd424b1b7abc18b05052a1a3cfd3627abdca62be280854cc411791357e42
244
256
  nokogiri (1.19.3-x86_64-linux-gnu) sha256=2f5078620fe12e83669b5b17311b32532a8153d02eee7ad06948b926d6080976
245
257
  optimist (3.2.1) sha256=8cf8a0fd69f3aa24ab48885d3a666717c27bc3d9edd6e976e18b9d771e72e34e
@@ -249,13 +261,14 @@ CHECKSUMS
249
261
  pp (0.6.3) sha256=2951d514450b93ccfeb1df7d021cae0da16e0a7f95ee1e2273719669d0ab9df6
250
262
  prettyprint (0.2.0) sha256=2bc9e15581a94742064a3cc8b0fb9d45aae3d03a1baa6ef80922627a0766f193
251
263
  prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
252
- psych (5.3.1) sha256=eb7a57cef10c9d70173ff74e739d843ac3b2c019a003de48447b2963d81b1974
264
+ psych (5.4.0) sha256=14f72d69a611af663d7d70e4a7b67d9eb1f3ae9f8d916b478961d5a0075ba5b7
253
265
  racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
254
266
  rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
255
267
  rake (13.4.2) sha256=cb825b2bd5f1f8e91ca37bddb4b9aaf345551b4731da62949be002fa89283701
256
268
  rdoc (7.2.0) sha256=8650f76cd4009c3b54955eb5d7e3a075c60a57276766ebf36f9085e8c9f23192
257
269
  regexp_parser (2.12.0) sha256=35a916a1d63190ab5c9009457136ae5f3c0c7512d60291d0d1378ba18ce08ebb
258
270
  reline (0.6.3) sha256=1198b04973565b36ec0f11542ab3f5cfeeec34823f4e54cebde90968092b1835
271
+ rsolr (2.6.0) sha256=4b3bcea772cac300562775c20eeddedf63a6b7516a070cb6fbde000b09cfe12b
259
272
  rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
260
273
  rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
261
274
  rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
@@ -265,13 +278,12 @@ CHECKSUMS
265
278
  rubocop (1.87.0) sha256=b9d9ddf55116a513f8ef2c7ae660662d8b49301f118d3f0df61865b33a5c188d
266
279
  rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035
267
280
  rubocop-rake (0.7.1) sha256=3797f2b6810c3e9df7376c26d5f44f3475eda59eb1adc38e6f62ecf027cbae4d
268
- rubocop-rspec (3.9.0) sha256=8fa70a3619408237d789aeecfb9beef40576acc855173e60939d63332fdb55e2
281
+ rubocop-rspec (3.10.2) sha256=0b3e2ecc592cd10ecbf0095bb58d1e357905276e069643523cc19eb7495f65e2
269
282
  ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
270
283
  securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
271
284
  simplecov (0.22.0) sha256=fe2622c7834ff23b98066bb0a854284b2729a569ac659f82621fc22ef36213a5
272
285
  simplecov-html (0.13.2) sha256=bd0b8e54e7c2d7685927e8d6286466359b6f16b18cb0df47b508e8d73c777246
273
286
  simplecov_json_formatter (0.1.4) sha256=529418fbe8de1713ac2b2d612aa3daa56d316975d307244399fa4838c601b428
274
- simpleidn (0.2.3) sha256=08ce96f03fa1605286be22651ba0fc9c0b2d6272c9b27a260bc88be05b0d2c29
275
287
  stringio (3.2.0) sha256=c37cb2e58b4ffbd33fe5cd948c05934af997b36e0b6ca6fdf43afa234cf222e1
276
288
  super_diff (0.19.0) sha256=c35fc1c0daa223d67b203fe3fb49a6cfd67850a53920319565c3c654e03ec902
277
289
  thor (1.5.0) sha256=e3a9e55fe857e44859ce104a84675ab6e8cd59c650a49106a05f55f136425e73
@@ -283,4 +295,4 @@ CHECKSUMS
283
295
  zeitwerk (2.8.2) sha256=7212a61311083c604184b1ea2574b9aa05cd14f855a0841c06985cabe9181d12
284
296
 
285
297
  BUNDLED WITH
286
- 4.0.12
298
+ 4.0.13
data/README.md CHANGED
@@ -51,6 +51,9 @@ exe/generator generate_vocab
51
51
  exe/generator generate_descriptive_docs
52
52
  ```
53
53
 
54
+ ## Reports / querying
55
+ jq-based queries can be authored against a [local data export](https://github.com/sul-dlss/dor-services-app#export-data) using the `/cocina-jq-query` skill. This will help with constructing and efficiently running the query.
56
+
54
57
  ## Testing
55
58
 
56
59
  The generator is tested via its output when run against `schema.json`, viz., the Cocina model classes. Thus, `generate` should be run after any changes to `schema.json`.
@@ -158,7 +161,7 @@ This list of services is known to include:
158
161
  * [sul-dlss/sdr-api](https://github.com/sul-dlss/sdr-api)
159
162
  * [sul-dlss/dor-services-app](https://github.com/sul-dlss/dor-services-app/)
160
163
 
161
- Perform `bundle update --conservative cocina-models dor-services-client` in the services above and make PRs for those repos. You may first need to update how these gems are pinned in the `Gemfile` in order to bump them.
164
+ Perform `bundle update cocina-models dor-services-client --conservative` in the services above and make PRs for those repos. You may first need to update how these gems are pinned in the `Gemfile` in order to bump them.
162
165
 
163
166
  Get the directly coupled services PRs merged before the deploy in step 5.
164
167
 
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'csv'
6
+ require 'optparse'
7
+ require 'rsolr'
8
+
9
+ # This script reads a CSV from standard input, where the first column is expected to be a druid.
10
+ # It queries Solr for each druid to fetch additional fields, then outputs an enhanced CSV to standard output.
11
+ # Usage: enhance-report-csv [options] < input.csv > output.csv
12
+ # To use locally, tunnel to solr with: ssh -L 8990:sul-solr-prod-a.stanford.edu:80 lyberadmin@argo-prod-02.stanford.edu
13
+
14
+ FIELD_HEADERS = {
15
+ 'display_title_ss' => 'title',
16
+ 'member_of_collection_ssim' => 'collection_druids',
17
+ 'collection_title_ssimdv' => 'collection_titles',
18
+ 'governed_by_ssim' => 'apo_druid',
19
+ 'apo_title_ssimdv' => 'apo_title',
20
+ 'folio_instance_hrid_ssim' => 'folio_hrid'
21
+ }.freeze
22
+
23
+ def parse_options # rubocop:disable Metrics/MethodLength
24
+ options = {
25
+ solr_url: 'http://localhost:8990/solr/argo_prod',
26
+ batch_size: 100
27
+ }
28
+
29
+ OptionParser.new do |opts|
30
+ opts.banner = 'Usage: enhance-report-csv [options] < input.csv > output.csv'
31
+
32
+ opts.on('--solr-url URL', 'Solr URL (default: http://localhost:8990/solr/argo_prod)') do |url|
33
+ options[:solr_url] = url
34
+ end
35
+
36
+ opts.on('--batch-size NUM', Integer, 'Solr batch size (default: 100)') do |n|
37
+ options[:batch_size] = n
38
+ end
39
+
40
+ opts.on('-h', '--help', 'Display this help message') do
41
+ puts opts
42
+ exit
43
+ end
44
+ end.parse!
45
+
46
+ options
47
+ end
48
+
49
+ def fetch_solr_docs(solr, druids)
50
+ druids.map { |d| "id:(#{d})" }.join(' OR ')
51
+ response = solr.get('select', params: {
52
+ q: '*:*',
53
+ fq: "{!terms f=id}#{druids.join(',')}",
54
+ fl: "id,#{FIELD_HEADERS.keys.join(',')}",
55
+ rows: druids.size
56
+ })
57
+ response['response']['docs'].to_h do |doc|
58
+ [doc['id'], doc]
59
+ end
60
+ end
61
+
62
+ def extract_fields(doc)
63
+ FIELD_HEADERS.keys.map do |field|
64
+ value = doc&.fetch(field, nil)
65
+ value.is_a?(Array) ? value.join(';') : value.to_s
66
+ end
67
+ end
68
+
69
+ def build_output(solr, rows, batch_size)
70
+ extra_col_count = (rows.first&.size || 1) - 1
71
+ extra_headers = extra_col_count.times.map { |i| "col#{i + 2}" }
72
+
73
+ CSV.generate do |out|
74
+ out << (['druid'] + FIELD_HEADERS.values + extra_headers)
75
+
76
+ rows.each_slice(batch_size) do |batch|
77
+ docs = fetch_solr_docs(solr, batch.map { |row| row[0] })
78
+
79
+ batch.each do |row|
80
+ druid = row[0]
81
+ out << ([druid] + extract_fields(docs[druid]) + row[1..])
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ options = parse_options
88
+ solr = RSolr.connect(url: options[:solr_url])
89
+ rows = CSV.parse($stdin.read, headers: false)
90
+ print build_output(solr, rows, options[:batch_size])
data/bin/validate-data CHANGED
@@ -72,13 +72,18 @@ end
72
72
 
73
73
  # Get total line count (either from option or by counting)
74
74
  def get_total_lines(filename, provided_count)
75
+ count_filename = filename.sub(/\.xz$/, '.count.txt')
75
76
  if provided_count
76
77
  puts "Using provided line count: #{provided_count}"
77
78
  provided_count
79
+ elsif File.exist?(count_filename)
80
+ puts "Reading line count from #{count_filename}..."
81
+ File.read(count_filename).to_i
78
82
  else
79
83
  puts 'Counting lines...'
80
84
  total = count_lines(filename)
81
85
  puts "Total lines to validate: #{total}"
86
+ File.write(count_filename, total)
82
87
  total
83
88
  end
84
89
  end
@@ -191,6 +196,8 @@ def distribute_work(filename, workers, batch_size, total_lines) # rubocop:disabl
191
196
 
192
197
  # Update progress bar
193
198
  progressbar.increment
199
+
200
+ break if line_number >= total_lines
194
201
  end
195
202
  end
196
203
 
data/bin/validate-schema CHANGED
@@ -5,4 +5,9 @@ require 'bundler/setup'
5
5
  require 'cocina/models'
6
6
 
7
7
  filepath = ARGV[0]
8
- exit(1) unless JSONSchemer.valid_schema?(Pathname.new(filepath))
8
+ begin
9
+ JSONSchema.validator_for(JSON.parse(File.read(filepath)))
10
+ rescue StandardError => e
11
+ warn e.message
12
+ exit(1)
13
+ end
@@ -31,8 +31,7 @@ Gem::Specification.new do |spec|
31
31
  spec.add_dependency 'edtf' # used for date/time validation
32
32
  spec.add_dependency 'equivalent-xml' # for diffing MODS
33
33
  spec.add_dependency 'i18n' # for validating BCP 47 language tags, according to RFC 4646
34
- spec.add_dependency 'jsonpath' # used for date/time validation
35
- spec.add_dependency 'json_schemer', '~> 2.0'
34
+ spec.add_dependency 'jsonschema_rs'
36
35
  spec.add_dependency 'nokogiri'
37
36
  spec.add_dependency 'super_diff'
38
37
  spec.add_dependency 'thor'
@@ -23,9 +23,6 @@ module Cocina
23
23
  attribute :note, Types::Strict::Array.of(DescriptiveValue).default([].freeze)
24
24
  # URL or other pointer to the location of the contributor information.
25
25
  attribute? :valueAt, Types::Strict::String
26
- # For multiple representations of information about the same contributor (e.g. in different
27
- # languages).
28
- attribute :parallelContributor, Types::Strict::Array.of(DescriptiveParallelContributor).default([].freeze)
29
26
  end
30
27
  end
31
28
  end