cocina_display 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -16
- data/lib/cocina_display/cocina_record.rb +22 -7
- data/lib/cocina_display/concerns/contributors.rb +60 -41
- data/lib/cocina_display/concerns/events.rb +37 -25
- data/lib/cocina_display/concerns/forms.rb +134 -0
- data/lib/cocina_display/concerns/subjects.rb +34 -1
- data/lib/cocina_display/contributor.rb +54 -5
- data/lib/cocina_display/dates/date.rb +9 -8
- data/lib/cocina_display/dates/date_range.rb +8 -0
- data/lib/cocina_display/events/event.rb +78 -0
- data/lib/cocina_display/events/imprint.rb +101 -0
- data/lib/cocina_display/events/location.rb +56 -0
- data/lib/cocina_display/marc_relator_codes.rb +314 -0
- data/lib/cocina_display/title_builder.rb +2 -1
- data/lib/cocina_display/utils.rb +25 -2
- data/lib/cocina_display/version.rb +1 -1
- data/script/find_records.rb +85 -0
- metadata +22 -3
- data/lib/cocina_display/imprint.rb +0 -123
@@ -0,0 +1,314 @@
|
|
1
|
+
# Map of MARC relator codes to human-readable terms.
|
2
|
+
# https://www.loc.gov/marc/relators/relaterm.html
|
3
|
+
|
4
|
+
module CocinaDisplay
|
5
|
+
MARC_RELATOR = {
|
6
|
+
"abr" => "abridger",
|
7
|
+
"acp" => "art copyist",
|
8
|
+
"act" => "actor",
|
9
|
+
"adi" => "art director",
|
10
|
+
"adp" => "adapter",
|
11
|
+
"aft" => "author of afterword, colophon, etc.", # discontinued
|
12
|
+
"anc" => "announcer",
|
13
|
+
"anl" => "analyst",
|
14
|
+
"anm" => "animator",
|
15
|
+
"ann" => "annotator",
|
16
|
+
"ant" => "bibliographic antecedent",
|
17
|
+
"ape" => "appellee",
|
18
|
+
"apl" => "appellant",
|
19
|
+
"app" => "applicant",
|
20
|
+
"aqt" => "author in quotations or text abstracts",
|
21
|
+
"arc" => "architect",
|
22
|
+
"ard" => "artistic director",
|
23
|
+
"arr" => "arranger",
|
24
|
+
"art" => "artist",
|
25
|
+
"asg" => "assignee",
|
26
|
+
"asn" => "associated name",
|
27
|
+
"ato" => "autographer",
|
28
|
+
"att" => "attributed name",
|
29
|
+
"auc" => "auctioneer",
|
30
|
+
"aud" => "author of dialog",
|
31
|
+
"aue" => "audio engineer",
|
32
|
+
"aui" => "author of introduction, etc.", # discontinued
|
33
|
+
"aup" => "audio producer",
|
34
|
+
"aus" => "screenwriter",
|
35
|
+
"aut" => "author",
|
36
|
+
"bdd" => "binding designer",
|
37
|
+
"bjd" => "bookjacket designer",
|
38
|
+
"bka" => "book artist",
|
39
|
+
"bkd" => "book designer",
|
40
|
+
"bkp" => "book producer",
|
41
|
+
"blw" => "blurb writer",
|
42
|
+
"bnd" => "binder",
|
43
|
+
"bpd" => "bookplate designer",
|
44
|
+
"brd" => "broadcaster",
|
45
|
+
"brl" => "braille embosser",
|
46
|
+
"bsl" => "bookseller",
|
47
|
+
"cad" => "casting director",
|
48
|
+
"cas" => "caster",
|
49
|
+
"ccp" => "conceptor",
|
50
|
+
"chr" => "choreographer",
|
51
|
+
"clb" => "collaborator", # discontinued
|
52
|
+
"cli" => "client",
|
53
|
+
"cll" => "calligrapher",
|
54
|
+
"clr" => "colorist",
|
55
|
+
"clt" => "collotyper",
|
56
|
+
"cmm" => "commentator",
|
57
|
+
"cmp" => "composer",
|
58
|
+
"cmt" => "compositor",
|
59
|
+
"cnd" => "conductor",
|
60
|
+
"cng" => "cinematographer",
|
61
|
+
"cns" => "censor",
|
62
|
+
"coe" => "contestant-appellee",
|
63
|
+
"col" => "collector",
|
64
|
+
"com" => "compiler",
|
65
|
+
"con" => "conservator",
|
66
|
+
"cop" => "camera operator",
|
67
|
+
"cor" => "collection registrar",
|
68
|
+
"cos" => "contestant",
|
69
|
+
"cot" => "contestant-appellant",
|
70
|
+
"cou" => "court governed",
|
71
|
+
"cov" => "cover designer",
|
72
|
+
"cpc" => "copyright claimant",
|
73
|
+
"cpe" => "complainant-appellee",
|
74
|
+
"cph" => "copyright holder",
|
75
|
+
"cpl" => "complainant",
|
76
|
+
"cpt" => "complainant-appellant",
|
77
|
+
"cre" => "creator",
|
78
|
+
"crp" => "correspondent",
|
79
|
+
"crr" => "corrector",
|
80
|
+
"crt" => "court reporter",
|
81
|
+
"csl" => "consultant",
|
82
|
+
"csp" => "consultant to a project",
|
83
|
+
"cst" => "costume designer",
|
84
|
+
"ctb" => "contributor",
|
85
|
+
"cte" => "contestee-appellee",
|
86
|
+
"ctg" => "cartographer",
|
87
|
+
"ctr" => "contractor",
|
88
|
+
"cts" => "contestee",
|
89
|
+
"ctt" => "contestee-appellant",
|
90
|
+
"cur" => "curator",
|
91
|
+
"cwt" => "commentator for written text",
|
92
|
+
"dbd" => "dubbing director",
|
93
|
+
"dbp" => "distribution place",
|
94
|
+
"dfd" => "defendant",
|
95
|
+
"dfe" => "defendant-appellee",
|
96
|
+
"dft" => "defendant-appellant",
|
97
|
+
"dgc" => "degree committee member",
|
98
|
+
"dgg" => "degree granting institution",
|
99
|
+
"dgs" => "degree supervisor",
|
100
|
+
"dis" => "dissertant",
|
101
|
+
"djo" => "dj",
|
102
|
+
"dln" => "delineator",
|
103
|
+
"dnc" => "dancer",
|
104
|
+
"dnr" => "donor",
|
105
|
+
"dpc" => "depicted",
|
106
|
+
"dpt" => "depositor",
|
107
|
+
"drm" => "draftsman",
|
108
|
+
"drt" => "director",
|
109
|
+
"dsr" => "designer",
|
110
|
+
"dst" => "distributor",
|
111
|
+
"dtc" => "data contributor",
|
112
|
+
"dte" => "dedicatee",
|
113
|
+
"dtm" => "data manager",
|
114
|
+
"dto" => "dedicator",
|
115
|
+
"dub" => "dubious author",
|
116
|
+
"edc" => "editor of compilation",
|
117
|
+
"edd" => "editorial director",
|
118
|
+
"edm" => "editor of moving image work",
|
119
|
+
"edt" => "editor",
|
120
|
+
"egr" => "engraver",
|
121
|
+
"elg" => "electrician",
|
122
|
+
"elt" => "electrotyper",
|
123
|
+
"eng" => "engineer",
|
124
|
+
"enj" => "enacting jurisdiction",
|
125
|
+
"etr" => "etcher",
|
126
|
+
"evp" => "event place",
|
127
|
+
"exp" => "expert",
|
128
|
+
"fac" => "facsimilist",
|
129
|
+
"fds" => "film distributor",
|
130
|
+
"fld" => "field director",
|
131
|
+
"flm" => "film editor",
|
132
|
+
"fmd" => "film director",
|
133
|
+
"fmk" => "filmmaker",
|
134
|
+
"fmo" => "former owner",
|
135
|
+
"fmp" => "film producer",
|
136
|
+
"fnd" => "funder",
|
137
|
+
"fon" => "founder",
|
138
|
+
"fpy" => "first party",
|
139
|
+
"frg" => "forger",
|
140
|
+
"gdv" => "game developer",
|
141
|
+
"gis" => "geographic information specialist",
|
142
|
+
"grt" => "graphic technician", # discontinued
|
143
|
+
"his" => "host institution",
|
144
|
+
"hnr" => "honoree",
|
145
|
+
"hst" => "host",
|
146
|
+
"ill" => "illustrator",
|
147
|
+
"ilu" => "illuminator",
|
148
|
+
"ink" => "inker",
|
149
|
+
"ins" => "inscriber",
|
150
|
+
"inv" => "inventor",
|
151
|
+
"isb" => "issuing body",
|
152
|
+
"itr" => "instrumentalist",
|
153
|
+
"ive" => "interviewee",
|
154
|
+
"ivr" => "interviewer",
|
155
|
+
"jud" => "judge",
|
156
|
+
"jug" => "jurisdiction governed",
|
157
|
+
"lbr" => "laboratory",
|
158
|
+
"lbt" => "librettist",
|
159
|
+
"ldr" => "laboratory director",
|
160
|
+
"led" => "lead",
|
161
|
+
"lee" => "libelee-appellee",
|
162
|
+
"lel" => "libelee",
|
163
|
+
"len" => "lender",
|
164
|
+
"let" => "libelee-appellant",
|
165
|
+
"lgd" => "lighting designer",
|
166
|
+
"lie" => "libelant-appellee",
|
167
|
+
"lil" => "libelant",
|
168
|
+
"lit" => "libelant-appellant",
|
169
|
+
"lsa" => "landscape architect",
|
170
|
+
"lse" => "licensee",
|
171
|
+
"lso" => "licensor",
|
172
|
+
"ltg" => "lithographer",
|
173
|
+
"ltr" => "letterer",
|
174
|
+
"lyr" => "lyricist",
|
175
|
+
"mcp" => "music copyist",
|
176
|
+
"mdc" => "metadata contact",
|
177
|
+
"med" => "medium",
|
178
|
+
"mfp" => "manufacture place",
|
179
|
+
"mfr" => "manufacturer",
|
180
|
+
"mka" => "makeup artist",
|
181
|
+
"mod" => "moderator",
|
182
|
+
"mon" => "monitor",
|
183
|
+
"mrb" => "marbler",
|
184
|
+
"mrk" => "markup editor",
|
185
|
+
"msd" => "musical director",
|
186
|
+
"mte" => "metal-engraver",
|
187
|
+
"mtk" => "minute taker",
|
188
|
+
"mup" => "music programmer",
|
189
|
+
"mus" => "musician",
|
190
|
+
"mxe" => "mixing engineer",
|
191
|
+
"nan" => "news anchor",
|
192
|
+
"nrt" => "narrator",
|
193
|
+
"onp" => "onscreen participant",
|
194
|
+
"opn" => "opponent",
|
195
|
+
"org" => "originator",
|
196
|
+
"orm" => "organizer",
|
197
|
+
"osp" => "onscreen presenter",
|
198
|
+
"oth" => "other",
|
199
|
+
"own" => "owner",
|
200
|
+
"pad" => "place of address",
|
201
|
+
"pan" => "panelist",
|
202
|
+
"pat" => "patron",
|
203
|
+
"pbd" => "publishing director",
|
204
|
+
"pbl" => "publisher",
|
205
|
+
"pdr" => "project director",
|
206
|
+
"pfr" => "proofreader",
|
207
|
+
"pht" => "photographer",
|
208
|
+
"plt" => "platemaker",
|
209
|
+
"pma" => "permitting agency",
|
210
|
+
"pmn" => "production manager",
|
211
|
+
"pnc" => "penciller",
|
212
|
+
"pop" => "printer of plates",
|
213
|
+
"ppm" => "papermaker",
|
214
|
+
"ppt" => "puppeteer",
|
215
|
+
"pra" => "praeses",
|
216
|
+
"prc" => "process contact",
|
217
|
+
"prd" => "production personnel",
|
218
|
+
"pre" => "presenter",
|
219
|
+
"prf" => "performer",
|
220
|
+
"prg" => "programmer",
|
221
|
+
"prm" => "printmaker",
|
222
|
+
"prn" => "production company",
|
223
|
+
"pro" => "producer",
|
224
|
+
"prp" => "production place",
|
225
|
+
"prs" => "production designer",
|
226
|
+
"prt" => "printer",
|
227
|
+
"prv" => "provider",
|
228
|
+
"pta" => "patent applicant",
|
229
|
+
"pte" => "plaintiff-appellee",
|
230
|
+
"ptf" => "plaintiff",
|
231
|
+
"pth" => "patent holder",
|
232
|
+
"ptt" => "plaintiff-appellant",
|
233
|
+
"pup" => "publication place",
|
234
|
+
"rap" => "rapporteur",
|
235
|
+
"rbr" => "rubricator",
|
236
|
+
"rcd" => "recordist",
|
237
|
+
"rce" => "recording engineer",
|
238
|
+
"rcp" => "addressee",
|
239
|
+
"rdd" => "radio director",
|
240
|
+
"red" => "redaktor",
|
241
|
+
"ren" => "renderer",
|
242
|
+
"res" => "researcher",
|
243
|
+
"rev" => "reviewer",
|
244
|
+
"rpc" => "radio producer",
|
245
|
+
"rps" => "repository",
|
246
|
+
"rpt" => "reporter",
|
247
|
+
"rpy" => "responsible party",
|
248
|
+
"rse" => "respondent-appellee",
|
249
|
+
"rsg" => "restager",
|
250
|
+
"rsp" => "respondent",
|
251
|
+
"rsr" => "restorationist",
|
252
|
+
"rst" => "respondent-appellant",
|
253
|
+
"rth" => "research team head",
|
254
|
+
"rtm" => "research team member",
|
255
|
+
"rxa" => "remix artist",
|
256
|
+
"sad" => "scientific advisor",
|
257
|
+
"sce" => "scenarist",
|
258
|
+
"scl" => "sculptor",
|
259
|
+
"scr" => "scribe",
|
260
|
+
"sde" => "sound engineer",
|
261
|
+
"sds" => "sound designer",
|
262
|
+
"sec" => "secretary",
|
263
|
+
"sfx" => "special effects provider",
|
264
|
+
"sgd" => "stage director",
|
265
|
+
"sgn" => "signer",
|
266
|
+
"sht" => "spporting host",
|
267
|
+
"sll" => "seller",
|
268
|
+
"sng" => "singer",
|
269
|
+
"spk" => "speaker",
|
270
|
+
"spn" => "sponsor",
|
271
|
+
"spy" => "second party",
|
272
|
+
"srv" => "surveyor",
|
273
|
+
"std" => "set designer",
|
274
|
+
"stg" => "setting",
|
275
|
+
"stl" => "storyteller",
|
276
|
+
"stm" => "stage manager",
|
277
|
+
"stn" => "standards body",
|
278
|
+
"str" => "stereotyper",
|
279
|
+
"swd" => "software developer",
|
280
|
+
"tad" => "technical advisor",
|
281
|
+
"tau" => "television writer",
|
282
|
+
"tcd" => "technical director",
|
283
|
+
"tch" => "teacher",
|
284
|
+
"ths" => "thesis advisor",
|
285
|
+
"tld" => "television director",
|
286
|
+
"tlg" => "television guest",
|
287
|
+
"tlh" => "television host",
|
288
|
+
"tlp" => "television producer",
|
289
|
+
"trc" => "transcriber",
|
290
|
+
"trl" => "translator",
|
291
|
+
"tyd" => "type designer",
|
292
|
+
"tyg" => "typographer",
|
293
|
+
"uvp" => "university place",
|
294
|
+
"vac" => "voice actor",
|
295
|
+
"vdg" => "videographer",
|
296
|
+
"vfx" => "visual effects provider",
|
297
|
+
"voc" => "vocalist", # discontinued
|
298
|
+
"wac" => "writer of added commentary",
|
299
|
+
"wal" => "writer of added lyrics",
|
300
|
+
"wam" => "writer of accompanying material",
|
301
|
+
"wat" => "writer of added text",
|
302
|
+
"waw" => "writer of afterword",
|
303
|
+
"wdc" => "woodcutter",
|
304
|
+
"wde" => "wood engraver",
|
305
|
+
"wfs" => "writer of film story",
|
306
|
+
"wft" => "writer of intertitles",
|
307
|
+
"wfw" => "writer of foreword",
|
308
|
+
"win" => "writer of introduction",
|
309
|
+
"wit" => "witness",
|
310
|
+
"wpr" => "writer of preface",
|
311
|
+
"wst" => "writer of supplementary textual content",
|
312
|
+
"wts" => "writer of television story"
|
313
|
+
}
|
314
|
+
end
|
@@ -53,7 +53,7 @@ module CocinaDisplay
|
|
53
53
|
def self.sort_title(titles, catalog_links: [])
|
54
54
|
part_label = catalog_links.find { |link| link["catalog"] == "folio" }&.fetch("partLabel", nil)
|
55
55
|
[new(strategy: :first, add_punctuation: false, only_one_parallel_value: false, part_label: part_label, sortable: true).build(titles)]
|
56
|
-
.flatten.compact.map { |title| title.gsub(/[[:punct:]]*/, "").strip }
|
56
|
+
.flatten.compact.map { |title| title.gsub(/[[:punct:]]*/, "").squeeze(" ").strip }
|
57
57
|
end
|
58
58
|
|
59
59
|
# @param strategy [Symbol] ":first" selects a single title value based on precedence of
|
@@ -116,6 +116,7 @@ module CocinaDisplay
|
|
116
116
|
end
|
117
117
|
|
118
118
|
def extract_title(cocina_title)
|
119
|
+
return if cocina_title.blank?
|
119
120
|
title_values = if cocina_title["value"]
|
120
121
|
cocina_title["value"]
|
121
122
|
elsif cocina_title["structuredValue"].present?
|
data/lib/cocina_display/utils.rb
CHANGED
@@ -19,7 +19,7 @@ module CocinaDisplay
|
|
19
19
|
end.delete_suffix(delimiter)
|
20
20
|
end
|
21
21
|
|
22
|
-
# Recursively flatten structured and
|
22
|
+
# Recursively flatten structured, parallel, and grouped values in Cocina metadata.
|
23
23
|
# Returns a list of hashes representing the "leaf" nodes with +value+ key.
|
24
24
|
# @return [Array<Hash>] List of node hashes with "value" present
|
25
25
|
# @param cocina [Hash] The Cocina structured data to flatten
|
@@ -40,10 +40,33 @@ module CocinaDisplay
|
|
40
40
|
return [cocina] if cocina["value"].present?
|
41
41
|
return cocina.flat_map { |node| flatten_nested_values(node, output) } if cocina.is_a?(Array)
|
42
42
|
|
43
|
-
nested_values = Array(cocina["structuredValue"]) + Array(cocina["parallelValue"])
|
43
|
+
nested_values = Array(cocina["structuredValue"]) + Array(cocina["parallelValue"]) + Array(cocina["groupedValue"])
|
44
44
|
return output unless nested_values.any?
|
45
45
|
|
46
46
|
nested_values.flat_map { |node| flatten_nested_values(node, output) }
|
47
47
|
end
|
48
|
+
|
49
|
+
# Recursively remove empty values from a hash, including nested hashes and arrays.
|
50
|
+
# @param hash [Hash] The hash to process
|
51
|
+
# @param output [Hash] Used for recursion, should be empty on first call
|
52
|
+
# @return [Hash] The hash with empty values removed
|
53
|
+
# @example
|
54
|
+
# hash = { "name" => "", "age" => nil, "address => { "city" => "Anytown", "state" => [] } }
|
55
|
+
# # Utils.remove_empty_values(hash)
|
56
|
+
# #=> { "address" => { "city" => "Anytown" } }
|
57
|
+
def self.deep_compact_blank(hash, output = {})
|
58
|
+
hash.each do |key, value|
|
59
|
+
if value.is_a?(Hash)
|
60
|
+
nested = deep_compact_blank(value)
|
61
|
+
output[key] = nested unless nested.empty?
|
62
|
+
elsif value.is_a?(Array)
|
63
|
+
compacted_array = value.map { |v| deep_compact_blank(v) }.reject(&:blank?)
|
64
|
+
output[key] = compacted_array unless compacted_array.empty?
|
65
|
+
elsif value.present?
|
66
|
+
output[key] = value
|
67
|
+
end
|
68
|
+
end
|
69
|
+
output
|
70
|
+
end
|
48
71
|
end
|
49
72
|
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This script is a simple, brute-force method for finding records that
|
4
|
+
# exhibit certain characteristics in the public Cocina JSON for testing.
|
5
|
+
#
|
6
|
+
# It queries purl-fetcher for all DRUIDs released to a specific target and
|
7
|
+
# then fetches each corresponding public Cocina record from PURL and examines it.
|
8
|
+
#
|
9
|
+
# You need to be on VPN to do this, as the purl-fetcher API is only accessible
|
10
|
+
# from within the Stanford network.
|
11
|
+
#
|
12
|
+
# To use, modify any of the noted items below, then run:
|
13
|
+
# $ bundle exec ruby script/find_records.rb
|
14
|
+
#
|
15
|
+
# You can exit early with Ctrl-C, and it will report how many records were
|
16
|
+
# checked before exiting. Running through an entire target will take awhile,
|
17
|
+
# on the order of 30 minutes or more.
|
18
|
+
|
19
|
+
require "benchmark"
|
20
|
+
require "pp"
|
21
|
+
require "purl_fetcher/client"
|
22
|
+
require "cocina_display"
|
23
|
+
require "cocina_display/utils"
|
24
|
+
|
25
|
+
# This should correspond to one of the release targets available in purl-fetcher,
|
26
|
+
# i.e. "Searchworks", "Earthworks", etc.
|
27
|
+
RELEASE_TARGET = "Searchworks"
|
28
|
+
|
29
|
+
# Modify this expression to match the JSON path you want to search, or just
|
30
|
+
# modify the `examine_record` method directly.
|
31
|
+
PATH_EXPR = "$..[?length(@.groupedValue) > 0]"
|
32
|
+
|
33
|
+
# Modify this method as needed to change what you're looking for in each record.
|
34
|
+
# It takes a CocinaRecord object and should return an array of [path, result] pairs.
|
35
|
+
def examine_record(record)
|
36
|
+
record.path(PATH_EXPR).map { |value, _node, _key, path| [path, CocinaDisplay::Utils.deep_compact_blank(value)] }
|
37
|
+
end
|
38
|
+
|
39
|
+
# Track total records in target and how many we've seen
|
40
|
+
released_to_target = []
|
41
|
+
processed_records = 0
|
42
|
+
|
43
|
+
# Handle Ctrl-C gracefully
|
44
|
+
Signal.trap("INT") do
|
45
|
+
puts "\nExiting after processing #{processed_records} records."
|
46
|
+
exit
|
47
|
+
end
|
48
|
+
|
49
|
+
# Fetch everything from purl-fetcher; note that this is one single HTTP request
|
50
|
+
# that returns a massive JSON response – it can be quite slow
|
51
|
+
puts "Finding records released to #{RELEASE_TARGET}..."
|
52
|
+
client = PurlFetcher::Client::Reader.new
|
53
|
+
query_time = Benchmark.realtime do
|
54
|
+
client.released_to(RELEASE_TARGET).each do |record|
|
55
|
+
released_to_target << record["druid"].delete_prefix("druid:")
|
56
|
+
end
|
57
|
+
rescue Faraday::ConnectionFailed => e
|
58
|
+
puts "Connection failed: #{e.message}; are you on VPN?"
|
59
|
+
exit 1
|
60
|
+
end
|
61
|
+
puts "Found #{released_to_target.size} records released to #{RELEASE_TARGET} in #{query_time.round(2)} seconds"
|
62
|
+
|
63
|
+
# Iterate through the list of DRUIDs and fetch each one from PURL, creating a
|
64
|
+
# CocinaRecord object. Then call our examine_record method on it and if
|
65
|
+
# anything was returned, print the DRUID and the results.
|
66
|
+
released_to_target.each do |druid|
|
67
|
+
begin
|
68
|
+
cocina_record = CocinaDisplay::CocinaRecord.fetch(druid)
|
69
|
+
processed_records += 1
|
70
|
+
rescue => e
|
71
|
+
puts "Error fetching record #{druid}: #{e.message}"
|
72
|
+
next
|
73
|
+
end
|
74
|
+
|
75
|
+
results = examine_record(cocina_record)
|
76
|
+
next if results.empty?
|
77
|
+
|
78
|
+
puts "Druid: #{druid}"
|
79
|
+
results.each do |path, result|
|
80
|
+
puts " Path: #{path}"
|
81
|
+
puts " Result: #{result.pretty_inspect}\n"
|
82
|
+
end
|
83
|
+
|
84
|
+
puts "-" * 80
|
85
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cocina_display
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Budak
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-07-
|
11
|
+
date: 2025-07-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: janeway-jsonpath
|
@@ -156,6 +156,20 @@ dependencies:
|
|
156
156
|
- - ">="
|
157
157
|
- !ruby/object:Gem::Version
|
158
158
|
version: 1.9.1
|
159
|
+
- !ruby/object:Gem::Dependency
|
160
|
+
name: purl_fetcher-client
|
161
|
+
requirement: !ruby/object:Gem::Requirement
|
162
|
+
requirements:
|
163
|
+
- - "~>"
|
164
|
+
- !ruby/object:Gem::Version
|
165
|
+
version: '3.1'
|
166
|
+
type: :development
|
167
|
+
prerelease: false
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
169
|
+
requirements:
|
170
|
+
- - "~>"
|
171
|
+
- !ruby/object:Gem::Version
|
172
|
+
version: '3.1'
|
159
173
|
description:
|
160
174
|
email:
|
161
175
|
- budak@stanford.edu
|
@@ -173,18 +187,23 @@ files:
|
|
173
187
|
- lib/cocina_display/concerns/access.rb
|
174
188
|
- lib/cocina_display/concerns/contributors.rb
|
175
189
|
- lib/cocina_display/concerns/events.rb
|
190
|
+
- lib/cocina_display/concerns/forms.rb
|
176
191
|
- lib/cocina_display/concerns/identifiers.rb
|
177
192
|
- lib/cocina_display/concerns/subjects.rb
|
178
193
|
- lib/cocina_display/concerns/titles.rb
|
179
194
|
- lib/cocina_display/contributor.rb
|
180
195
|
- lib/cocina_display/dates/date.rb
|
181
196
|
- lib/cocina_display/dates/date_range.rb
|
182
|
-
- lib/cocina_display/
|
197
|
+
- lib/cocina_display/events/event.rb
|
198
|
+
- lib/cocina_display/events/imprint.rb
|
199
|
+
- lib/cocina_display/events/location.rb
|
183
200
|
- lib/cocina_display/marc_country_codes.rb
|
201
|
+
- lib/cocina_display/marc_relator_codes.rb
|
184
202
|
- lib/cocina_display/subject.rb
|
185
203
|
- lib/cocina_display/title_builder.rb
|
186
204
|
- lib/cocina_display/utils.rb
|
187
205
|
- lib/cocina_display/version.rb
|
206
|
+
- script/find_records.rb
|
188
207
|
- sig/cocina_display.rbs
|
189
208
|
homepage: https://sul-dlss.github.io/cocina_display/
|
190
209
|
licenses:
|
@@ -1,123 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "edtf"
|
4
|
-
require "active_support"
|
5
|
-
require "active_support/core_ext/enumerable"
|
6
|
-
require "active_support/core_ext/object/blank"
|
7
|
-
|
8
|
-
require_relative "utils"
|
9
|
-
require_relative "marc_country_codes"
|
10
|
-
require_relative "dates/date"
|
11
|
-
require_relative "dates/date_range"
|
12
|
-
|
13
|
-
module CocinaDisplay
|
14
|
-
# Wrapper for Cocina events used to generate an imprint statement for display.
|
15
|
-
class Imprint
|
16
|
-
# Parse Cocina dates and convert any non-single dates to DateRange objects.
|
17
|
-
# This ensures that we can correctly deduplicate ranges against single dates.
|
18
|
-
# @param cocina_dates [Array<Hash>] Array of Cocina date hashes
|
19
|
-
# @return [Array<CocinaDisplay::Dates::Date | CocinaDisplay::Dates::DateRange>]
|
20
|
-
def self.parse_dates(cocina_dates)
|
21
|
-
cocina_dates.map { |cd| CocinaDisplay::Dates::Date.from_cocina(cd) }.filter(&:parsable?).compact
|
22
|
-
end
|
23
|
-
|
24
|
-
attr_reader :cocina, :dates
|
25
|
-
|
26
|
-
# Initialize the imprint with Cocina event data.
|
27
|
-
# @param cocina [Hash] Cocina structured data for a single event
|
28
|
-
def initialize(cocina)
|
29
|
-
@cocina = cocina
|
30
|
-
@dates = self.class.parse_dates(Array(cocina["date"]))
|
31
|
-
end
|
32
|
-
|
33
|
-
# The entire imprint statement formatted as a string for display.
|
34
|
-
# @return [String]
|
35
|
-
def display_str
|
36
|
-
place_pub = Utils.compact_and_join([place_str, publisher_str], delimiter: " : ")
|
37
|
-
edition_place_pub = Utils.compact_and_join([edition_str, place_pub], delimiter: " - ")
|
38
|
-
Utils.compact_and_join([edition_place_pub, date_str], delimiter: ", ")
|
39
|
-
end
|
40
|
-
|
41
|
-
# Were any of the dates encoded?
|
42
|
-
# Used to detect which event(s) most likely represent the actual imprint(s).
|
43
|
-
def date_encoding?
|
44
|
-
@dates.any?(&:encoding?)
|
45
|
-
end
|
46
|
-
|
47
|
-
private
|
48
|
-
|
49
|
-
# The date portion of the imprint statement, comprising all unique dates.
|
50
|
-
# @return [String]
|
51
|
-
def date_str
|
52
|
-
Utils.compact_and_join(unique_dates_for_display.map(&:qualified_value))
|
53
|
-
end
|
54
|
-
|
55
|
-
# The editions portion of the imprint statement, combining all edition notes.
|
56
|
-
# @return [String]
|
57
|
-
def edition_str
|
58
|
-
Utils.compact_and_join(Janeway.enum_for("$.note[?@.type == 'edition'].value", cocina))
|
59
|
-
end
|
60
|
-
|
61
|
-
# The place of publication, combining all location values.
|
62
|
-
# @return [String]
|
63
|
-
def place_str
|
64
|
-
Utils.compact_and_join(locations_for_display, delimiter: " : ")
|
65
|
-
end
|
66
|
-
|
67
|
-
# The publisher information, combining all name values for publishers.
|
68
|
-
# @return [String]
|
69
|
-
def publisher_str
|
70
|
-
Utils.compact_and_join(Janeway.enum_for("$.contributor[?@.role[?@.value == 'publisher']].name[*].value", cocina), delimiter: " : ")
|
71
|
-
end
|
72
|
-
|
73
|
-
# Get the place name for a location, decoding from MARC if necessary.
|
74
|
-
# Ignores the unknown/"various locations" country codes and returns nil.
|
75
|
-
# @param location [Hash] A location hash parsed from Cocina
|
76
|
-
# @return [String] The decoded location name
|
77
|
-
# @return [nil] If no valid location value is found
|
78
|
-
def decoded_location(location)
|
79
|
-
return location["value"] if location["value"].present?
|
80
|
-
|
81
|
-
if location.dig("source", "code") == "marccountry" &&
|
82
|
-
location["code"].present? &&
|
83
|
-
["xx", "vp"].exclude?(location["code"])
|
84
|
-
CocinaDisplay::MARC_COUNTRY[location["code"]]
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
# Filter locations to display according to predefined rules.
|
89
|
-
# 1. Prefer unencoded locations (plain value) over encoded ones
|
90
|
-
# 2. If no unencoded locations but there are MARC country codes, decode them
|
91
|
-
# 3. Keep only unique locations after decoding
|
92
|
-
def locations_for_display
|
93
|
-
unencoded_locs, encoded_locs = Array(cocina["location"]).partition { |loc| loc["value"].present? }
|
94
|
-
locs_for_display = unencoded_locs.presence || encoded_locs
|
95
|
-
locs_for_display.map { |loc| decoded_location(loc) }.compact_blank.uniq
|
96
|
-
end
|
97
|
-
|
98
|
-
# Filter dates for uniqueness using base value according to predefined rules.
|
99
|
-
# 1. For a group of dates with the same base value, choose a single one
|
100
|
-
# 2. Prefer unencoded dates over encoded ones when choosing a single date
|
101
|
-
# 3. Remove date ranges that duplicate any unencoded non-range dates
|
102
|
-
# @return [Array<CocinaDisplay::Dates::Date>]
|
103
|
-
# @see CocinaDisplay::Dates::Date#base_value
|
104
|
-
# @see https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
|
105
|
-
def unique_dates_for_display
|
106
|
-
# Choose a single date for each group with the same base value
|
107
|
-
deduped_dates = dates.group_by(&:base_value).map do |base_value, group|
|
108
|
-
if (unencoded = group.reject(&:encoding?)).any?
|
109
|
-
unencoded.first
|
110
|
-
else
|
111
|
-
group.first
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
# Remove any ranges that duplicate part of an unencoded non-range date
|
116
|
-
ranges, singles = deduped_dates.partition { |date| date.is_a?(CocinaDisplay::Dates::DateRange) }
|
117
|
-
unencoded_singles_dates = singles.reject(&:encoding?).flat_map(&:to_a)
|
118
|
-
ranges.reject! { |range| unencoded_singles_dates.any? { |date| range.as_interval.include?(date) } }
|
119
|
-
|
120
|
-
(singles + ranges).sort
|
121
|
-
end
|
122
|
-
end
|
123
|
-
end
|