cocina-models 0.119.0 → 0.121.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -1
- data/.claude/skills/cocina-jq-query/SKILL.md +8 -0
- data/.gitignore +5 -0
- data/.rubocop.yml +6 -0
- data/AGENTS.md +208 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +40 -28
- data/README.md +4 -1
- data/bin/enhance-report-csv +90 -0
- data/bin/validate-data +7 -0
- data/bin/validate-schema +6 -1
- data/cocina-models.gemspec +1 -2
- data/lib/cocina/models/contributor.rb +0 -3
- data/lib/cocina/models/mapping/from_mods/event.rb +12 -3
- data/lib/cocina/models/related_resource.rb +1 -1
- data/lib/cocina/models/validators/base_description_visitor_validator.rb +33 -0
- data/lib/cocina/models/validators/base_structural_visitor_validator.rb +23 -0
- data/lib/cocina/models/validators/composite_description_validator.rb +62 -0
- data/lib/cocina/models/validators/composite_structural_validator.rb +48 -0
- data/lib/cocina/models/validators/dark_visitor_validator.rb +46 -0
- data/lib/cocina/models/validators/description_date_time_visitor_validator.rb +132 -0
- data/lib/cocina/models/validators/{description_types_validator.rb → description_types_visitor_validator.rb} +9 -55
- data/lib/cocina/models/validators/{description_values_validator.rb → description_values_visitor_validator.rb} +14 -51
- data/lib/cocina/models/validators/json_schema_validator.rb +54 -102
- data/lib/cocina/models/validators/language_tag_visitor_validator.rb +32 -0
- data/lib/cocina/models/validators/reserved_filename_visitor_validator.rb +40 -0
- data/lib/cocina/models/validators/validator.rb +5 -9
- data/lib/cocina/models/version.rb +1 -1
- data/lib/cocina/models.rb +1 -1
- data/schema.json +114 -59
- metadata +16 -24
- data/lib/cocina/models/descriptive_parallel_contributor.rb +0 -29
- data/lib/cocina/models/validators/dark_validator.rb +0 -76
- data/lib/cocina/models/validators/date_time_validator.rb +0 -100
- data/lib/cocina/models/validators/language_tag_validator.rb +0 -76
- data/lib/cocina/models/validators/reserved_filename_validator.rb +0 -60
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cfcb2c2a393845b1c9129ab48f5fc7340beb8067936694798e800a5935bf1ac9
|
|
4
|
+
data.tar.gz: f9dc80c520ddecb670a5df20273c635146f75d75025132402d4d1329090e29fd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2f7ccadc053ba81117275251401432ec92c65fefd86da94c25e7c862f8680c868d3f392a84b3966fb48609a5c383abd8dc90b8a51e457fd2fe8d3792371bd4a6
|
|
7
|
+
data.tar.gz: c1355245a0571f8933155c9de04b38226b55b96a3ffdb899af3d2af553e30f53ae7986a41e6293339a90cc2a017fcd6eec273fcd1ce64df57b26cbbb5f161193
|
data/.circleci/config.yml
CHANGED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: cocina-jq-query
|
|
3
|
+
description: Build and validate a jq query against a Cocina Model JSON serialization. Use when the user wants to query, filter, or transform a Cocina object (DRO, Collection, AdminPolicy) using jq, or asks for help writing a jq expression for Cocina data.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# cocina-jq-query
|
|
7
|
+
|
|
8
|
+
Follow the workflow defined in [AGENTS.md](../../../AGENTS.md) under **cocina-jq**.
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
|
@@ -126,6 +126,12 @@ RSpec/BeEq: # new in 2.9.0
|
|
|
126
126
|
RSpec/BeNil: # new in 2.9.0
|
|
127
127
|
Enabled: true
|
|
128
128
|
|
|
129
|
+
RSpec/DiscardedMatcher: # new in 3.10
|
|
130
|
+
Enabled: true
|
|
131
|
+
|
|
132
|
+
RSpec/MatchWithSimpleRegex: # new in 3.10
|
|
133
|
+
Enabled: true
|
|
134
|
+
|
|
129
135
|
RSpec/MultipleExpectations:
|
|
130
136
|
Enabled: false
|
|
131
137
|
|
data/AGENTS.md
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
# Agent Instructions for cocina-models
|
|
2
|
+
|
|
3
|
+
## cocina-jq — Build jq queries for Cocina Model JSON
|
|
4
|
+
|
|
5
|
+
Use this workflow when the user wants to query, filter, or transform a Cocina object (DRO, Collection, AdminPolicy) using jq.
|
|
6
|
+
|
|
7
|
+
### Step 1 — Check prerequisites
|
|
8
|
+
|
|
9
|
+
#### Check jq
|
|
10
|
+
Run `jq --version`. If jq is not installed, tell the user:
|
|
11
|
+
> `jq` is not installed. Install it with `brew install jq`, then retry.
|
|
12
|
+
|
|
13
|
+
Stop here if jq is missing.
|
|
14
|
+
|
|
15
|
+
#### Check parallel
|
|
16
|
+
Run `parallel --version`. If parallel is not installed, tell the user:
|
|
17
|
+
> `parallel` is not installed. Install it with `brew install parallel`, then retry.
|
|
18
|
+
|
|
19
|
+
#### Check pv
|
|
20
|
+
Run `pv --version`. If pv is not installed, tell the user:
|
|
21
|
+
> `pv` is not installed. Install it with `brew install pv`, then retry.
|
|
22
|
+
|
|
23
|
+
### Output format (always apply)
|
|
24
|
+
|
|
25
|
+
Every jq query produced by this skill **must output a CSV line** using `@csv`. The **first field must always be the external identifier** (`externalIdentifier`). Additional fields follow based on the user's query. Example:
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
"druid:bc123df4567","some value","another value"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Use `[.externalIdentifier, ...] | @csv` as the output expression. Apply this constraint automatically — do not ask the user whether to include the external identifier.
|
|
32
|
+
|
|
33
|
+
### Step 2 — Resume or collect inputs
|
|
34
|
+
|
|
35
|
+
First, ask the user:
|
|
36
|
+
|
|
37
|
+
> Do you want to resume an existing query?
|
|
38
|
+
|
|
39
|
+
**If yes:** Ask for the filename of the `.jq.txt` file (e.g., `contributor-name-uri-non-loc.jq.txt`). Read that file from the project root. The file header contains the original inputs as comments (query description, expected output description, example JSON, example output). Parse those comments to reconstruct the inputs. Confirm with the user that the loaded values look correct, then proceed to Step 3 with those inputs (skip re-asking for them).
|
|
40
|
+
|
|
41
|
+
**If no:** Ask the user for each input, one at a time:
|
|
42
|
+
|
|
43
|
+
1. **Query description** — what should the query do? (e.g., "extract all file labels from structural")
|
|
44
|
+
2. **Expected output description** — what additional values (beyond the external identifier) should appear in the output?
|
|
45
|
+
3. **Example Cocina object** — paste JSON directly
|
|
46
|
+
4. **Example output** — paste the exact expected CSV output (must start with the external identifier as the first field)
|
|
47
|
+
|
|
48
|
+
Explicitly ask for each input; do not infer or guess.
|
|
49
|
+
|
|
50
|
+
### Step 3 — Clarify ambiguities
|
|
51
|
+
|
|
52
|
+
Review the inputs from Step 2. If anything is unclear or underspecified, ask the user targeted questions before proceeding. Examples of things to clarify:
|
|
53
|
+
|
|
54
|
+
- Is the query meant to return one value per object, or aggregate across many objects?
|
|
55
|
+
- Should missing or null fields be skipped, returned as null, or cause an error?
|
|
56
|
+
- Are there edge cases in the data structure the query must handle (e.g., empty arrays, nested arrays, optional fields)?
|
|
57
|
+
- Is the output format exactly as shown, or is there flexibility (e.g., flat vs. nested)?
|
|
58
|
+
|
|
59
|
+
Ask only questions that would change how the query is written. Do not ask about things already clear from the inputs. If everything is unambiguous, skip this step silently and proceed.
|
|
60
|
+
|
|
61
|
+
### Step 4 — Load relevant schema portion
|
|
62
|
+
|
|
63
|
+
Read `schema.json` from the project root. Extract only the `$defs` entries relevant to the Cocina object type found in the example's `type` field:
|
|
64
|
+
|
|
65
|
+
- `https://cocina.sul.stanford.edu/models/object` → DRO-related defs
|
|
66
|
+
- `https://cocina.sul.stanford.edu/models/collection` → Collection-related defs
|
|
67
|
+
- `https://cocina.sul.stanford.edu/models/admin_policy` → AdminPolicy-related defs
|
|
68
|
+
|
|
69
|
+
Include only the defs actually referenced (follow `$ref` chains up to 2 levels deep). Do not load the entire schema.
|
|
70
|
+
|
|
71
|
+
### Step 5 — Generate and validate the query (up to 3 attempts)
|
|
72
|
+
|
|
73
|
+
**Attempt 1:** Use the schema excerpt, example JSON, query description, and expected output to write a jq query.
|
|
74
|
+
|
|
75
|
+
Run it:
|
|
76
|
+
```bash
|
|
77
|
+
echo '<example_json>' | jq '<query>'
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Compare actual output to the example output. If it matches → proceed to Step 5.
|
|
81
|
+
|
|
82
|
+
**Attempt 2 (if attempt 1 fails):** Run `man jq` to load the jq manual. Use it to refine the query. Re-run and validate.
|
|
83
|
+
|
|
84
|
+
**Attempt 3 (if attempt 2 fails):** Make a final attempt using all context. Re-run and validate.
|
|
85
|
+
|
|
86
|
+
**After 3 failures:** Present the best attempt, explain what is wrong, and ask the user to clarify.
|
|
87
|
+
|
|
88
|
+
### Step 6 — Generate local HTML playground
|
|
89
|
+
|
|
90
|
+
Write `<slug>-playground.html` in the project root using the template below.
|
|
91
|
+
|
|
92
|
+
Substitute:
|
|
93
|
+
- Every occurrence of `SLUG` → the actual slug string
|
|
94
|
+
- `JSON_PLACEHOLDER` → the example JSON (pretty-printed) passed through `JSON.stringify` a second time, producing a valid JS string literal (e.g. `"{\"foo\":\"bar\"}"`)
|
|
95
|
+
- `QUERY_PLACEHOLDER` → the validated jq query passed through `JSON.stringify`, producing a valid JS string literal (e.g. `".foo"`)
|
|
96
|
+
|
|
97
|
+
```html
|
|
98
|
+
<!DOCTYPE html>
|
|
99
|
+
<html lang="en">
|
|
100
|
+
<head>
|
|
101
|
+
<meta charset="UTF-8">
|
|
102
|
+
<title>jq playground — SLUG</title>
|
|
103
|
+
<style>
|
|
104
|
+
body { font-family: monospace; margin: 2rem; background: #1e1e1e; color: #d4d4d4; }
|
|
105
|
+
h2 { color: #9cdcfe; }
|
|
106
|
+
textarea, input { width: 100%; box-sizing: border-box; background: #252526; color: #d4d4d4; border: 1px solid #444; padding: 8px; font-family: monospace; font-size: 13px; border-radius: 3px; }
|
|
107
|
+
textarea { height: 260px; resize: vertical; }
|
|
108
|
+
input { height: 36px; }
|
|
109
|
+
button { margin-top: 8px; background: #0e639c; color: white; border: none; padding: 8px 20px; cursor: pointer; font-size: 14px; border-radius: 3px; }
|
|
110
|
+
button:hover { background: #1177bb; }
|
|
111
|
+
label { display: block; margin-top: 16px; margin-bottom: 4px; font-size: 12px; color: #9cdcfe; text-transform: uppercase; letter-spacing: 0.05em; }
|
|
112
|
+
#output { background: #252526; border: 1px solid #444; padding: 12px; min-height: 80px; white-space: pre-wrap; word-break: break-all; border-radius: 3px; }
|
|
113
|
+
.error { color: #f44747; }
|
|
114
|
+
</style>
|
|
115
|
+
</head>
|
|
116
|
+
<body>
|
|
117
|
+
<h2>jq playground — SLUG</h2>
|
|
118
|
+
<label>JSON Input</label>
|
|
119
|
+
<textarea id="json"></textarea>
|
|
120
|
+
<label>jq Filter</label>
|
|
121
|
+
<input id="query" type="text" />
|
|
122
|
+
<button onclick="run()">▶ Run</button>
|
|
123
|
+
<label>Output</label>
|
|
124
|
+
<pre id="output">(click Run)</pre>
|
|
125
|
+
|
|
126
|
+
<script src="https://cdn.jsdelivr.net/npm/jq-web@0.5.1/jq.wasm.js"></script>
|
|
127
|
+
<script>
|
|
128
|
+
const INITIAL_JSON = JSON_PLACEHOLDER;
|
|
129
|
+
const INITIAL_QUERY = QUERY_PLACEHOLDER;
|
|
130
|
+
document.getElementById('json').value = JSON.stringify(JSON.parse(INITIAL_JSON), null, 2);
|
|
131
|
+
document.getElementById('query').value = INITIAL_QUERY;
|
|
132
|
+
|
|
133
|
+
function run() {
|
|
134
|
+
const json = document.getElementById('json').value;
|
|
135
|
+
const query = document.getElementById('query').value;
|
|
136
|
+
const out = document.getElementById('output');
|
|
137
|
+
out.className = '';
|
|
138
|
+
out.textContent = 'Running…';
|
|
139
|
+
jq.promised.raw(json, query)
|
|
140
|
+
.then(r => { out.textContent = r || '(empty output)'; })
|
|
141
|
+
.catch(e => { out.className = 'error'; out.textContent = String(e); });
|
|
142
|
+
}
|
|
143
|
+
</script>
|
|
144
|
+
</body>
|
|
145
|
+
</html>
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Step 7 — Write the query to a .jq.txt file
|
|
149
|
+
|
|
150
|
+
Generate a short kebab-case slug summarizing the query (e.g., `invalid-encoding`, `file-label-extract`). Write the file `<slug>.jq.txt` in the project root with the following structure:
|
|
151
|
+
|
|
152
|
+
1. A comment header containing the user's inputs:
|
|
153
|
+
|
|
154
|
+
```
|
|
155
|
+
# Query description: <query description from Step 2>
|
|
156
|
+
#
|
|
157
|
+
# Expected output: <expected output description from Step 2>
|
|
158
|
+
#
|
|
159
|
+
# Example input:
|
|
160
|
+
# <example Cocina JSON, each line prefixed with "# ">
|
|
161
|
+
#
|
|
162
|
+
# Example output:
|
|
163
|
+
# <example output CSV, each line prefixed with "# ">
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
2. A blank line, then the validated jq query.
|
|
167
|
+
|
|
168
|
+
The comment lines must use `#` so the file remains valid jq syntax. When resuming (Step 2 resume path), parse these comment sections by their labels to reconstruct the inputs.
|
|
169
|
+
|
|
170
|
+
Find the most recent `.jsonl.xz` file in the project root by listing `*.jsonl.xz` files sorted by name descending and taking the first result.
|
|
171
|
+
|
|
172
|
+
### Step 8 — Output
|
|
173
|
+
|
|
174
|
+
Present:
|
|
175
|
+
1. The jq query in a code block
|
|
176
|
+
2. A 1–3 sentence explanation of how it works
|
|
177
|
+
3. A markdown link to the local playground file using a `file://` URL (e.g. `[Open playground](file:///Users/someuser/data/sdr/cocina-models/<slug>-playground.html)`) — substitute the actual absolute path — plus the equivalent shell command (`open <slug>-playground.html`) for reference
|
|
178
|
+
4. A ready-to-run shell snippet:
|
|
179
|
+
|
|
180
|
+
```
|
|
181
|
+
xzcat <most-recent .jsonl.xz filename> \
|
|
182
|
+
| pv -l -s 5500000 \
|
|
183
|
+
| parallel -j$(sysctl -n hw.logicalcpu) --pipe --block 50M --recend '\n' \
|
|
184
|
+
jq -rcf <slug>.jq.txt \
|
|
185
|
+
| bundle exec bin/enhance-report-csv \
|
|
186
|
+
| tee <slug>.csv
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Substitute the actual filenames — do not leave placeholders.
|
|
190
|
+
|
|
191
|
+
Also, remind the user to tunnel to Solr in a separate terminal with:
|
|
192
|
+
```
|
|
193
|
+
ssh -L 8990:sul-solr-prod-a.stanford.edu:80 lyberadmin@argo-prod-02.stanford.edu
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Step 9 — Iterate
|
|
197
|
+
|
|
198
|
+
After presenting step 7 output, prompt the user:
|
|
199
|
+
|
|
200
|
+
> Want to refine the query? You can describe a change (e.g., "also filter by `type`") or paste a modified jq expression directly.
|
|
201
|
+
|
|
202
|
+
**If the user describes a change:** Update the query to satisfy the new requirement, re-run against the example JSON (same validation loop as Step 5, up to 3 attempts), then repeat Steps 6–8 with the updated query and slug.
|
|
203
|
+
|
|
204
|
+
**If the user pastes a modified query directly:** Validate it by running against the example JSON. If it produces valid output, skip straight to repeating Steps 6–8. If it errors, diagnose and fix (up to 3 attempts), then repeat Steps 5–7.
|
|
205
|
+
|
|
206
|
+
**Each iteration overwrites the `.jq.txt` file and `<slug>-playground.html`** (same slug unless the query purpose changed significantly, in which case generate a new slug) **and replaces all previous outputs** with updated versions.
|
|
207
|
+
|
|
208
|
+
Continue offering to iterate after each round until the user is satisfied.
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
cocina-models (0.
|
|
4
|
+
cocina-models (0.121.0)
|
|
5
5
|
activesupport
|
|
6
6
|
deprecation
|
|
7
7
|
dry-struct (~> 1.0)
|
|
@@ -9,8 +9,7 @@ PATH
|
|
|
9
9
|
edtf
|
|
10
10
|
equivalent-xml
|
|
11
11
|
i18n
|
|
12
|
-
|
|
13
|
-
jsonpath
|
|
12
|
+
jsonschema_rs
|
|
14
13
|
nokogiri
|
|
15
14
|
super_diff
|
|
16
15
|
thor
|
|
@@ -36,8 +35,10 @@ GEM
|
|
|
36
35
|
attr_extras (7.1.0)
|
|
37
36
|
base64 (0.3.0)
|
|
38
37
|
bigdecimal (4.1.2)
|
|
38
|
+
builder (3.3.0)
|
|
39
39
|
concurrent-ruby (1.3.6)
|
|
40
40
|
connection_pool (3.0.2)
|
|
41
|
+
csv (3.3.5)
|
|
41
42
|
date (3.5.1)
|
|
42
43
|
debug (1.11.1)
|
|
43
44
|
irb (~> 1.10)
|
|
@@ -74,7 +75,12 @@ GEM
|
|
|
74
75
|
equivalent-xml (0.6.0)
|
|
75
76
|
nokogiri (>= 1.4.3)
|
|
76
77
|
erb (6.0.4)
|
|
77
|
-
|
|
78
|
+
faraday (2.14.2)
|
|
79
|
+
faraday-net_http (>= 2.0, < 3.5)
|
|
80
|
+
json
|
|
81
|
+
logger
|
|
82
|
+
faraday-net_http (3.4.4)
|
|
83
|
+
net-http (~> 0.5)
|
|
78
84
|
i18n (1.14.8)
|
|
79
85
|
concurrent-ruby (~> 1.0)
|
|
80
86
|
ice_nine (0.11.2)
|
|
@@ -84,21 +90,19 @@ GEM
|
|
|
84
90
|
prism (>= 1.3.0)
|
|
85
91
|
rdoc (>= 4.0.0)
|
|
86
92
|
reline (>= 0.4.2)
|
|
87
|
-
json (2.19.
|
|
88
|
-
|
|
89
|
-
bigdecimal
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
simpleidn (~> 0.2)
|
|
93
|
-
jsonpath (1.1.5)
|
|
94
|
-
multi_json
|
|
93
|
+
json (2.19.8)
|
|
94
|
+
jsonschema_rs (0.46.5-arm64-darwin)
|
|
95
|
+
bigdecimal (>= 3.1, < 5)
|
|
96
|
+
jsonschema_rs (0.46.5-x86_64-linux)
|
|
97
|
+
bigdecimal (>= 3.1, < 5)
|
|
95
98
|
language_server-protocol (3.17.0.5)
|
|
96
99
|
lint_roller (1.1.0)
|
|
97
100
|
logger (1.7.0)
|
|
98
101
|
minitest (6.0.6)
|
|
99
102
|
drb (~> 2.0)
|
|
100
103
|
prism (~> 1.5)
|
|
101
|
-
|
|
104
|
+
net-http (0.9.1)
|
|
105
|
+
uri (>= 0.11.1)
|
|
102
106
|
nokogiri (1.19.3-arm64-darwin)
|
|
103
107
|
racc (~> 1.4)
|
|
104
108
|
nokogiri (1.19.3-x86_64-linux-gnu)
|
|
@@ -114,7 +118,7 @@ GEM
|
|
|
114
118
|
prettyprint
|
|
115
119
|
prettyprint (0.2.0)
|
|
116
120
|
prism (1.9.0)
|
|
117
|
-
psych (5.
|
|
121
|
+
psych (5.4.0)
|
|
118
122
|
date
|
|
119
123
|
stringio
|
|
120
124
|
racc (1.8.1)
|
|
@@ -127,6 +131,9 @@ GEM
|
|
|
127
131
|
regexp_parser (2.12.0)
|
|
128
132
|
reline (0.6.3)
|
|
129
133
|
io-console (~> 0.5)
|
|
134
|
+
rsolr (2.6.0)
|
|
135
|
+
builder (>= 2.1.2)
|
|
136
|
+
faraday (>= 0.9, < 3, != 2.0.0)
|
|
130
137
|
rspec (3.13.2)
|
|
131
138
|
rspec-core (~> 3.13.0)
|
|
132
139
|
rspec-expectations (~> 3.13.0)
|
|
@@ -159,9 +166,10 @@ GEM
|
|
|
159
166
|
rubocop-rake (0.7.1)
|
|
160
167
|
lint_roller (~> 1.1)
|
|
161
168
|
rubocop (>= 1.72.1)
|
|
162
|
-
rubocop-rspec (3.
|
|
169
|
+
rubocop-rspec (3.10.2)
|
|
163
170
|
lint_roller (~> 1.1)
|
|
164
|
-
|
|
171
|
+
regexp_parser (>= 2.0)
|
|
172
|
+
rubocop (~> 1.86, >= 1.86.2)
|
|
165
173
|
ruby-progressbar (1.13.0)
|
|
166
174
|
securerandom (0.4.1)
|
|
167
175
|
simplecov (0.22.0)
|
|
@@ -170,7 +178,6 @@ GEM
|
|
|
170
178
|
simplecov_json_formatter (~> 0.1)
|
|
171
179
|
simplecov-html (0.13.2)
|
|
172
180
|
simplecov_json_formatter (0.1.4)
|
|
173
|
-
simpleidn (0.2.3)
|
|
174
181
|
stringio (3.2.0)
|
|
175
182
|
super_diff (0.19.0)
|
|
176
183
|
attr_extras (>= 6.2.4, < 8)
|
|
@@ -193,8 +200,10 @@ PLATFORMS
|
|
|
193
200
|
DEPENDENCIES
|
|
194
201
|
bundler (>= 2.0, < 5)
|
|
195
202
|
cocina-models!
|
|
203
|
+
csv
|
|
196
204
|
debug
|
|
197
205
|
rake (~> 13.0)
|
|
206
|
+
rsolr
|
|
198
207
|
rspec (~> 3.0)
|
|
199
208
|
rspec_junit_formatter
|
|
200
209
|
rubocop (~> 1.24)
|
|
@@ -209,10 +218,12 @@ CHECKSUMS
|
|
|
209
218
|
attr_extras (7.1.0) sha256=d96fc9a9dd5d85ba2d37762440a816f840093959ae26bb90da994c2d9f1fc827
|
|
210
219
|
base64 (0.3.0) sha256=27337aeabad6ffae05c265c450490628ef3ebd4b67be58257393227588f5a97b
|
|
211
220
|
bigdecimal (4.1.2) sha256=53d217666027eab4280346fba98e7d5b66baaae1b9c3c1c0ffe89d48188a3fbd
|
|
212
|
-
|
|
213
|
-
|
|
221
|
+
builder (3.3.0) sha256=497918d2f9dca528fdca4b88d84e4ef4387256d984b8154e9d5d3fe5a9c8835f
|
|
222
|
+
bundler (4.0.13) sha256=19f08be7f27022cf0b89f27da0b044ae075e8270a9ef44ad248a932614e1ca3b
|
|
223
|
+
cocina-models (0.121.0)
|
|
214
224
|
concurrent-ruby (1.3.6) sha256=6b56837e1e7e5292f9864f34b69c5a2cbc75c0cf5338f1ce9903d10fa762d5ab
|
|
215
225
|
connection_pool (3.0.2) sha256=33fff5ba71a12d2aa26cb72b1db8bba2a1a01823559fb01d29eb74c286e62e0a
|
|
226
|
+
csv (3.3.5) sha256=6e5134ac3383ef728b7f02725d9872934f523cb40b961479f69cf3afa6c8e73f
|
|
216
227
|
date (3.5.1) sha256=750d06384d7b9c15d562c76291407d89e368dda4d4fff957eb94962d325a0dc0
|
|
217
228
|
debug (1.11.1) sha256=2e0b0ac6119f2207a6f8ac7d4a73ca8eb4e440f64da0a3136c30343146e952b6
|
|
218
229
|
deprecation (1.1.0) sha256=01707cea9a6ed2d7270377457941f43394a345e6dd8048e1be6d18ff2f2a01e1
|
|
@@ -227,19 +238,20 @@ CHECKSUMS
|
|
|
227
238
|
edtf (3.2.0) sha256=a15a0ee274e49c8047a3ebb5d61d793ba44f7f8ffbf0595392c467e3ea8d2447
|
|
228
239
|
equivalent-xml (0.6.0) sha256=8919761efa848ad0846369ff8be1f646b17e5061698c4867b09829000cc3f487
|
|
229
240
|
erb (6.0.4) sha256=38e3803694be357fe2bfe312487c74beaf9fb4e5beb3e22498952fe1645b95d9
|
|
230
|
-
|
|
241
|
+
faraday (2.14.2) sha256=73ccb9994a9e8648f010e32eca2ae82e41c57860aa10932cda29418b9e0223ad
|
|
242
|
+
faraday-net_http (3.4.4) sha256=0e78af151747ed1b00f33e25973b4bc220d7f16c00c39676817c8b12331eb588
|
|
231
243
|
i18n (1.14.8) sha256=285778639134865c5e0f6269e0b818256017e8cde89993fdfcbfb64d088824a5
|
|
232
244
|
ice_nine (0.11.2) sha256=5d506a7d2723d5592dc121b9928e4931742730131f22a1a37649df1c1e2e63db
|
|
233
245
|
io-console (0.8.2) sha256=d6e3ae7a7cc7574f4b8893b4fca2162e57a825b223a177b7afa236c5ef9814cc
|
|
234
246
|
irb (1.18.0) sha256=de9454a0703a54704b9811a5ef31a60c86949fbf4013fcf244fabc7c775248e3
|
|
235
|
-
json (2.19.
|
|
236
|
-
|
|
237
|
-
|
|
247
|
+
json (2.19.8) sha256=6354310fd76ef69b87d5bd1f38b40d730613baf90b6803d2d0a48f618d32dfaa
|
|
248
|
+
jsonschema_rs (0.46.5-arm64-darwin) sha256=e80414ed67f0956d3e06474a2fa076fc4a7b722f00e5d7142b70289c016ac6f1
|
|
249
|
+
jsonschema_rs (0.46.5-x86_64-linux) sha256=345c65ec7a5abf8879b9c9356752f0fdf4c9926f6480458fc32803a871b5cbb3
|
|
238
250
|
language_server-protocol (3.17.0.5) sha256=fd1e39a51a28bf3eec959379985a72e296e9f9acfce46f6a79d31ca8760803cc
|
|
239
251
|
lint_roller (1.1.0) sha256=2c0c845b632a7d172cb849cc90c1bce937a28c5c8ccccb50dfd46a485003cc87
|
|
240
252
|
logger (1.7.0) sha256=196edec7cc44b66cfb40f9755ce11b392f21f7967696af15d274dde7edff0203
|
|
241
253
|
minitest (6.0.6) sha256=153ea36d1d987a62942382b61075745042a2b3123b1cd48f4c3675af9cc7d6f1
|
|
242
|
-
|
|
254
|
+
net-http (0.9.1) sha256=25ba0b67c63e89df626ed8fac771d0ad24ad151a858af2cc8e6a716ca4336996
|
|
243
255
|
nokogiri (1.19.3-arm64-darwin) sha256=71b9bd424b1b7abc18b05052a1a3cfd3627abdca62be280854cc411791357e42
|
|
244
256
|
nokogiri (1.19.3-x86_64-linux-gnu) sha256=2f5078620fe12e83669b5b17311b32532a8153d02eee7ad06948b926d6080976
|
|
245
257
|
optimist (3.2.1) sha256=8cf8a0fd69f3aa24ab48885d3a666717c27bc3d9edd6e976e18b9d771e72e34e
|
|
@@ -249,13 +261,14 @@ CHECKSUMS
|
|
|
249
261
|
pp (0.6.3) sha256=2951d514450b93ccfeb1df7d021cae0da16e0a7f95ee1e2273719669d0ab9df6
|
|
250
262
|
prettyprint (0.2.0) sha256=2bc9e15581a94742064a3cc8b0fb9d45aae3d03a1baa6ef80922627a0766f193
|
|
251
263
|
prism (1.9.0) sha256=7b530c6a9f92c24300014919c9dcbc055bf4cdf51ec30aed099b06cd6674ef85
|
|
252
|
-
psych (5.
|
|
264
|
+
psych (5.4.0) sha256=14f72d69a611af663d7d70e4a7b67d9eb1f3ae9f8d916b478961d5a0075ba5b7
|
|
253
265
|
racc (1.8.1) sha256=4a7f6929691dbec8b5209a0b373bc2614882b55fc5d2e447a21aaa691303d62f
|
|
254
266
|
rainbow (3.1.1) sha256=039491aa3a89f42efa1d6dec2fc4e62ede96eb6acd95e52f1ad581182b79bc6a
|
|
255
267
|
rake (13.4.2) sha256=cb825b2bd5f1f8e91ca37bddb4b9aaf345551b4731da62949be002fa89283701
|
|
256
268
|
rdoc (7.2.0) sha256=8650f76cd4009c3b54955eb5d7e3a075c60a57276766ebf36f9085e8c9f23192
|
|
257
269
|
regexp_parser (2.12.0) sha256=35a916a1d63190ab5c9009457136ae5f3c0c7512d60291d0d1378ba18ce08ebb
|
|
258
270
|
reline (0.6.3) sha256=1198b04973565b36ec0f11542ab3f5cfeeec34823f4e54cebde90968092b1835
|
|
271
|
+
rsolr (2.6.0) sha256=4b3bcea772cac300562775c20eeddedf63a6b7516a070cb6fbde000b09cfe12b
|
|
259
272
|
rspec (3.13.2) sha256=206284a08ad798e61f86d7ca3e376718d52c0bc944626b2349266f239f820587
|
|
260
273
|
rspec-core (3.13.6) sha256=a8823c6411667b60a8bca135364351dda34cd55e44ff94c4be4633b37d828b2d
|
|
261
274
|
rspec-expectations (3.13.5) sha256=33a4d3a1d95060aea4c94e9f237030a8f9eae5615e9bd85718fe3a09e4b58836
|
|
@@ -265,13 +278,12 @@ CHECKSUMS
|
|
|
265
278
|
rubocop (1.87.0) sha256=b9d9ddf55116a513f8ef2c7ae660662d8b49301f118d3f0df61865b33a5c188d
|
|
266
279
|
rubocop-ast (1.49.1) sha256=4412f3ee70f6fe4546cc489548e0f6fcf76cafcfa80fa03af67098ffed755035
|
|
267
280
|
rubocop-rake (0.7.1) sha256=3797f2b6810c3e9df7376c26d5f44f3475eda59eb1adc38e6f62ecf027cbae4d
|
|
268
|
-
rubocop-rspec (3.
|
|
281
|
+
rubocop-rspec (3.10.2) sha256=0b3e2ecc592cd10ecbf0095bb58d1e357905276e069643523cc19eb7495f65e2
|
|
269
282
|
ruby-progressbar (1.13.0) sha256=80fc9c47a9b640d6834e0dc7b3c94c9df37f08cb072b7761e4a71e22cff29b33
|
|
270
283
|
securerandom (0.4.1) sha256=cc5193d414a4341b6e225f0cb4446aceca8e50d5e1888743fac16987638ea0b1
|
|
271
284
|
simplecov (0.22.0) sha256=fe2622c7834ff23b98066bb0a854284b2729a569ac659f82621fc22ef36213a5
|
|
272
285
|
simplecov-html (0.13.2) sha256=bd0b8e54e7c2d7685927e8d6286466359b6f16b18cb0df47b508e8d73c777246
|
|
273
286
|
simplecov_json_formatter (0.1.4) sha256=529418fbe8de1713ac2b2d612aa3daa56d316975d307244399fa4838c601b428
|
|
274
|
-
simpleidn (0.2.3) sha256=08ce96f03fa1605286be22651ba0fc9c0b2d6272c9b27a260bc88be05b0d2c29
|
|
275
287
|
stringio (3.2.0) sha256=c37cb2e58b4ffbd33fe5cd948c05934af997b36e0b6ca6fdf43afa234cf222e1
|
|
276
288
|
super_diff (0.19.0) sha256=c35fc1c0daa223d67b203fe3fb49a6cfd67850a53920319565c3c654e03ec902
|
|
277
289
|
thor (1.5.0) sha256=e3a9e55fe857e44859ce104a84675ab6e8cd59c650a49106a05f55f136425e73
|
|
@@ -283,4 +295,4 @@ CHECKSUMS
|
|
|
283
295
|
zeitwerk (2.8.2) sha256=7212a61311083c604184b1ea2574b9aa05cd14f855a0841c06985cabe9181d12
|
|
284
296
|
|
|
285
297
|
BUNDLED WITH
|
|
286
|
-
4.0.
|
|
298
|
+
4.0.13
|
data/README.md
CHANGED
|
@@ -51,6 +51,9 @@ exe/generator generate_vocab
|
|
|
51
51
|
exe/generator generate_descriptive_docs
|
|
52
52
|
```
|
|
53
53
|
|
|
54
|
+
## Reports / querying
|
|
55
|
+
jq-based queries can be authored against a [local data export](https://github.com/sul-dlss/dor-services-app#export-data) using the `/cocina-jq-query` skill. This will help with constructing and efficiently running the query.
|
|
56
|
+
|
|
54
57
|
## Testing
|
|
55
58
|
|
|
56
59
|
The generator is tested via its output when run against `schema.json`, viz., the Cocina model classes. Thus, `generate` should be run after any changes to `schema.json`.
|
|
@@ -158,7 +161,7 @@ This list of services is known to include:
|
|
|
158
161
|
* [sul-dlss/sdr-api](https://github.com/sul-dlss/sdr-api)
|
|
159
162
|
* [sul-dlss/dor-services-app](https://github.com/sul-dlss/dor-services-app/)
|
|
160
163
|
|
|
161
|
-
Perform `bundle update
|
|
164
|
+
Perform `bundle update cocina-models dor-services-client --conservative` in the services above and make PRs for those repos. You may first need to update how these gems are pinned in the `Gemfile` in order to bump them.
|
|
162
165
|
|
|
163
166
|
Get the directly coupled services PRs merged before the deploy in step 5.
|
|
164
167
|
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'bundler/setup'
|
|
5
|
+
require 'csv'
|
|
6
|
+
require 'optparse'
|
|
7
|
+
require 'rsolr'
|
|
8
|
+
|
|
9
|
+
# This script reads a CSV from standard input, where the first column is expected to be a druid.
|
|
10
|
+
# It queries Solr for each druid to fetch additional fields, then outputs an enhanced CSV to standard output.
|
|
11
|
+
# Usage: enhance-report-csv [options] < input.csv > output.csv
|
|
12
|
+
# To use locally, tunnel to solr with: ssh -L 8990:sul-solr-prod-a.stanford.edu:80 lyberadmin@argo-prod-02.stanford.edu
|
|
13
|
+
|
|
14
|
+
FIELD_HEADERS = {
|
|
15
|
+
'display_title_ss' => 'title',
|
|
16
|
+
'member_of_collection_ssim' => 'collection_druids',
|
|
17
|
+
'collection_title_ssimdv' => 'collection_titles',
|
|
18
|
+
'governed_by_ssim' => 'apo_druid',
|
|
19
|
+
'apo_title_ssimdv' => 'apo_title',
|
|
20
|
+
'folio_instance_hrid_ssim' => 'folio_hrid'
|
|
21
|
+
}.freeze
|
|
22
|
+
|
|
23
|
+
def parse_options # rubocop:disable Metrics/MethodLength
|
|
24
|
+
options = {
|
|
25
|
+
solr_url: 'http://localhost:8990/solr/argo_prod',
|
|
26
|
+
batch_size: 100
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
OptionParser.new do |opts|
|
|
30
|
+
opts.banner = 'Usage: enhance-report-csv [options] < input.csv > output.csv'
|
|
31
|
+
|
|
32
|
+
opts.on('--solr-url URL', 'Solr URL (default: http://localhost:8990/solr/argo_prod)') do |url|
|
|
33
|
+
options[:solr_url] = url
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
opts.on('--batch-size NUM', Integer, 'Solr batch size (default: 100)') do |n|
|
|
37
|
+
options[:batch_size] = n
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
opts.on('-h', '--help', 'Display this help message') do
|
|
41
|
+
puts opts
|
|
42
|
+
exit
|
|
43
|
+
end
|
|
44
|
+
end.parse!
|
|
45
|
+
|
|
46
|
+
options
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def fetch_solr_docs(solr, druids)
|
|
50
|
+
druids.map { |d| "id:(#{d})" }.join(' OR ')
|
|
51
|
+
response = solr.get('select', params: {
|
|
52
|
+
q: '*:*',
|
|
53
|
+
fq: "{!terms f=id}#{druids.join(',')}",
|
|
54
|
+
fl: "id,#{FIELD_HEADERS.keys.join(',')}",
|
|
55
|
+
rows: druids.size
|
|
56
|
+
})
|
|
57
|
+
response['response']['docs'].to_h do |doc|
|
|
58
|
+
[doc['id'], doc]
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def extract_fields(doc)
|
|
63
|
+
FIELD_HEADERS.keys.map do |field|
|
|
64
|
+
value = doc&.fetch(field, nil)
|
|
65
|
+
value.is_a?(Array) ? value.join(';') : value.to_s
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def build_output(solr, rows, batch_size)
|
|
70
|
+
extra_col_count = (rows.first&.size || 1) - 1
|
|
71
|
+
extra_headers = extra_col_count.times.map { |i| "col#{i + 2}" }
|
|
72
|
+
|
|
73
|
+
CSV.generate do |out|
|
|
74
|
+
out << (['druid'] + FIELD_HEADERS.values + extra_headers)
|
|
75
|
+
|
|
76
|
+
rows.each_slice(batch_size) do |batch|
|
|
77
|
+
docs = fetch_solr_docs(solr, batch.map { |row| row[0] })
|
|
78
|
+
|
|
79
|
+
batch.each do |row|
|
|
80
|
+
druid = row[0]
|
|
81
|
+
out << ([druid] + extract_fields(docs[druid]) + row[1..])
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
options = parse_options
|
|
88
|
+
solr = RSolr.connect(url: options[:solr_url])
|
|
89
|
+
rows = CSV.parse($stdin.read, headers: false)
|
|
90
|
+
print build_output(solr, rows, options[:batch_size])
|
data/bin/validate-data
CHANGED
|
@@ -72,13 +72,18 @@ end
|
|
|
72
72
|
|
|
73
73
|
# Get total line count (either from option or by counting)
|
|
74
74
|
def get_total_lines(filename, provided_count)
|
|
75
|
+
count_filename = filename.sub(/\.xz$/, '.count.txt')
|
|
75
76
|
if provided_count
|
|
76
77
|
puts "Using provided line count: #{provided_count}"
|
|
77
78
|
provided_count
|
|
79
|
+
elsif File.exist?(count_filename)
|
|
80
|
+
puts "Reading line count from #{count_filename}..."
|
|
81
|
+
File.read(count_filename).to_i
|
|
78
82
|
else
|
|
79
83
|
puts 'Counting lines...'
|
|
80
84
|
total = count_lines(filename)
|
|
81
85
|
puts "Total lines to validate: #{total}"
|
|
86
|
+
File.write(count_filename, total)
|
|
82
87
|
total
|
|
83
88
|
end
|
|
84
89
|
end
|
|
@@ -191,6 +196,8 @@ def distribute_work(filename, workers, batch_size, total_lines) # rubocop:disabl
|
|
|
191
196
|
|
|
192
197
|
# Update progress bar
|
|
193
198
|
progressbar.increment
|
|
199
|
+
|
|
200
|
+
break if line_number >= total_lines
|
|
194
201
|
end
|
|
195
202
|
end
|
|
196
203
|
|
data/bin/validate-schema
CHANGED
|
@@ -5,4 +5,9 @@ require 'bundler/setup'
|
|
|
5
5
|
require 'cocina/models'
|
|
6
6
|
|
|
7
7
|
filepath = ARGV[0]
|
|
8
|
-
|
|
8
|
+
begin
|
|
9
|
+
JSONSchema.validator_for(JSON.parse(File.read(filepath)))
|
|
10
|
+
rescue StandardError => e
|
|
11
|
+
warn e.message
|
|
12
|
+
exit(1)
|
|
13
|
+
end
|
data/cocina-models.gemspec
CHANGED
|
@@ -31,8 +31,7 @@ Gem::Specification.new do |spec|
|
|
|
31
31
|
spec.add_dependency 'edtf' # used for date/time validation
|
|
32
32
|
spec.add_dependency 'equivalent-xml' # for diffing MODS
|
|
33
33
|
spec.add_dependency 'i18n' # for validating BCP 47 language tags, according to RFC 4646
|
|
34
|
-
spec.add_dependency '
|
|
35
|
-
spec.add_dependency 'json_schemer', '~> 2.0'
|
|
34
|
+
spec.add_dependency 'jsonschema_rs'
|
|
36
35
|
spec.add_dependency 'nokogiri'
|
|
37
36
|
spec.add_dependency 'super_diff'
|
|
38
37
|
spec.add_dependency 'thor'
|
|
@@ -23,9 +23,6 @@ module Cocina
|
|
|
23
23
|
attribute :note, Types::Strict::Array.of(DescriptiveValue).default([].freeze)
|
|
24
24
|
# URL or other pointer to the location of the contributor information.
|
|
25
25
|
attribute? :valueAt, Types::Strict::String
|
|
26
|
-
# For multiple representations of information about the same contributor (e.g. in different
|
|
27
|
-
# languages).
|
|
28
|
-
attribute :parallelContributor, Types::Strict::Array.of(DescriptiveParallelContributor).default([].freeze)
|
|
29
26
|
end
|
|
30
27
|
end
|
|
31
28
|
end
|