cocina_display 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ require_relative "../marc_country_codes"
2
+
3
+ module CocinaDisplay
4
+ module Events
5
+ # A single location represented in a Cocina event, like a publication place.
6
+ class Location
7
+ attr_reader :cocina
8
+
9
+ # Initialize a Location object with Cocina structured data.
10
+ # @param cocina [Hash] The Cocina structured data for the location.
11
+ def initialize(cocina)
12
+ @cocina = cocina
13
+ end
14
+
15
+ # The name of the location.
16
+ # Decodes a MARC country code if present and no value was present.
17
+ # @return [String, nil]
18
+ def display_str
19
+ cocina["value"] || decoded_country
20
+ end
21
+
22
+ # Is there an unencoded value (name) for this location?
23
+ # @return [Boolean]
24
+ def unencoded_value?
25
+ cocina["value"].present?
26
+ end
27
+
28
+ private
29
+
30
+ # A code, like a MARC country code, representing the location.
31
+ # @return [String, nil]
32
+ def code
33
+ cocina["code"]
34
+ end
35
+
36
+ # Decoded country name if the location is encoded with a MARC country code.
37
+ # @return [String, nil]
38
+ def decoded_country
39
+ MARC_COUNTRY[code] if marc_country? && valid_country_code?
40
+ end
41
+
42
+ # Is this a decodable country code?
43
+ # Excludes blank values and "xx" (unknown) and "vp" (various places).
44
+ # @return [Boolean]
45
+ def valid_country_code?
46
+ code.present? && ["xx", "vp"].exclude?(code)
47
+ end
48
+
49
+ # Is this location encoded with a MARC country code?
50
+ # @return [Boolean]
51
+ def marc_country?
52
+ cocina.dig("source", "code") == "marccountry"
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,314 @@
1
+ # Map of MARC relator codes to human-readable terms.
2
+ # https://www.loc.gov/marc/relators/relaterm.html
3
+
4
+ module CocinaDisplay
5
+ MARC_RELATOR = {
6
+ "abr" => "abridger",
7
+ "acp" => "art copyist",
8
+ "act" => "actor",
9
+ "adi" => "art director",
10
+ "adp" => "adapter",
11
+ "aft" => "author of afterword, colophon, etc.", # discontinued
12
+ "anc" => "announcer",
13
+ "anl" => "analyst",
14
+ "anm" => "animator",
15
+ "ann" => "annotator",
16
+ "ant" => "bibliographic antecedent",
17
+ "ape" => "appellee",
18
+ "apl" => "appellant",
19
+ "app" => "applicant",
20
+ "aqt" => "author in quotations or text abstracts",
21
+ "arc" => "architect",
22
+ "ard" => "artistic director",
23
+ "arr" => "arranger",
24
+ "art" => "artist",
25
+ "asg" => "assignee",
26
+ "asn" => "associated name",
27
+ "ato" => "autographer",
28
+ "att" => "attributed name",
29
+ "auc" => "auctioneer",
30
+ "aud" => "author of dialog",
31
+ "aue" => "audio engineer",
32
+ "aui" => "author of introduction, etc.", # discontinued
33
+ "aup" => "audio producer",
34
+ "aus" => "screenwriter",
35
+ "aut" => "author",
36
+ "bdd" => "binding designer",
37
+ "bjd" => "bookjacket designer",
38
+ "bka" => "book artist",
39
+ "bkd" => "book designer",
40
+ "bkp" => "book producer",
41
+ "blw" => "blurb writer",
42
+ "bnd" => "binder",
43
+ "bpd" => "bookplate designer",
44
+ "brd" => "broadcaster",
45
+ "brl" => "braille embosser",
46
+ "bsl" => "bookseller",
47
+ "cad" => "casting director",
48
+ "cas" => "caster",
49
+ "ccp" => "conceptor",
50
+ "chr" => "choreographer",
51
+ "clb" => "collaborator", # discontinued
52
+ "cli" => "client",
53
+ "cll" => "calligrapher",
54
+ "clr" => "colorist",
55
+ "clt" => "collotyper",
56
+ "cmm" => "commentator",
57
+ "cmp" => "composer",
58
+ "cmt" => "compositor",
59
+ "cnd" => "conductor",
60
+ "cng" => "cinematographer",
61
+ "cns" => "censor",
62
+ "coe" => "contestant-appellee",
63
+ "col" => "collector",
64
+ "com" => "compiler",
65
+ "con" => "conservator",
66
+ "cop" => "camera operator",
67
+ "cor" => "collection registrar",
68
+ "cos" => "contestant",
69
+ "cot" => "contestant-appellant",
70
+ "cou" => "court governed",
71
+ "cov" => "cover designer",
72
+ "cpc" => "copyright claimant",
73
+ "cpe" => "complainant-appellee",
74
+ "cph" => "copyright holder",
75
+ "cpl" => "complainant",
76
+ "cpt" => "complainant-appellant",
77
+ "cre" => "creator",
78
+ "crp" => "correspondent",
79
+ "crr" => "corrector",
80
+ "crt" => "court reporter",
81
+ "csl" => "consultant",
82
+ "csp" => "consultant to a project",
83
+ "cst" => "costume designer",
84
+ "ctb" => "contributor",
85
+ "cte" => "contestee-appellee",
86
+ "ctg" => "cartographer",
87
+ "ctr" => "contractor",
88
+ "cts" => "contestee",
89
+ "ctt" => "contestee-appellant",
90
+ "cur" => "curator",
91
+ "cwt" => "commentator for written text",
92
+ "dbd" => "dubbing director",
93
+ "dbp" => "distribution place",
94
+ "dfd" => "defendant",
95
+ "dfe" => "defendant-appellee",
96
+ "dft" => "defendant-appellant",
97
+ "dgc" => "degree committee member",
98
+ "dgg" => "degree granting institution",
99
+ "dgs" => "degree supervisor",
100
+ "dis" => "dissertant",
101
+ "djo" => "dj",
102
+ "dln" => "delineator",
103
+ "dnc" => "dancer",
104
+ "dnr" => "donor",
105
+ "dpc" => "depicted",
106
+ "dpt" => "depositor",
107
+ "drm" => "draftsman",
108
+ "drt" => "director",
109
+ "dsr" => "designer",
110
+ "dst" => "distributor",
111
+ "dtc" => "data contributor",
112
+ "dte" => "dedicatee",
113
+ "dtm" => "data manager",
114
+ "dto" => "dedicator",
115
+ "dub" => "dubious author",
116
+ "edc" => "editor of compilation",
117
+ "edd" => "editorial director",
118
+ "edm" => "editor of moving image work",
119
+ "edt" => "editor",
120
+ "egr" => "engraver",
121
+ "elg" => "electrician",
122
+ "elt" => "electrotyper",
123
+ "eng" => "engineer",
124
+ "enj" => "enacting jurisdiction",
125
+ "etr" => "etcher",
126
+ "evp" => "event place",
127
+ "exp" => "expert",
128
+ "fac" => "facsimilist",
129
+ "fds" => "film distributor",
130
+ "fld" => "field director",
131
+ "flm" => "film editor",
132
+ "fmd" => "film director",
133
+ "fmk" => "filmmaker",
134
+ "fmo" => "former owner",
135
+ "fmp" => "film producer",
136
+ "fnd" => "funder",
137
+ "fon" => "founder",
138
+ "fpy" => "first party",
139
+ "frg" => "forger",
140
+ "gdv" => "game developer",
141
+ "gis" => "geographic information specialist",
142
+ "grt" => "graphic technician", # discontinued
143
+ "his" => "host institution",
144
+ "hnr" => "honoree",
145
+ "hst" => "host",
146
+ "ill" => "illustrator",
147
+ "ilu" => "illuminator",
148
+ "ink" => "inker",
149
+ "ins" => "inscriber",
150
+ "inv" => "inventor",
151
+ "isb" => "issuing body",
152
+ "itr" => "instrumentalist",
153
+ "ive" => "interviewee",
154
+ "ivr" => "interviewer",
155
+ "jud" => "judge",
156
+ "jug" => "jurisdiction governed",
157
+ "lbr" => "laboratory",
158
+ "lbt" => "librettist",
159
+ "ldr" => "laboratory director",
160
+ "led" => "lead",
161
+ "lee" => "libelee-appellee",
162
+ "lel" => "libelee",
163
+ "len" => "lender",
164
+ "let" => "libelee-appellant",
165
+ "lgd" => "lighting designer",
166
+ "lie" => "libelant-appellee",
167
+ "lil" => "libelant",
168
+ "lit" => "libelant-appellant",
169
+ "lsa" => "landscape architect",
170
+ "lse" => "licensee",
171
+ "lso" => "licensor",
172
+ "ltg" => "lithographer",
173
+ "ltr" => "letterer",
174
+ "lyr" => "lyricist",
175
+ "mcp" => "music copyist",
176
+ "mdc" => "metadata contact",
177
+ "med" => "medium",
178
+ "mfp" => "manufacture place",
179
+ "mfr" => "manufacturer",
180
+ "mka" => "makeup artist",
181
+ "mod" => "moderator",
182
+ "mon" => "monitor",
183
+ "mrb" => "marbler",
184
+ "mrk" => "markup editor",
185
+ "msd" => "musical director",
186
+ "mte" => "metal-engraver",
187
+ "mtk" => "minute taker",
188
+ "mup" => "music programmer",
189
+ "mus" => "musician",
190
+ "mxe" => "mixing engineer",
191
+ "nan" => "news anchor",
192
+ "nrt" => "narrator",
193
+ "onp" => "onscreen participant",
194
+ "opn" => "opponent",
195
+ "org" => "originator",
196
+ "orm" => "organizer",
197
+ "osp" => "onscreen presenter",
198
+ "oth" => "other",
199
+ "own" => "owner",
200
+ "pad" => "place of address",
201
+ "pan" => "panelist",
202
+ "pat" => "patron",
203
+ "pbd" => "publishing director",
204
+ "pbl" => "publisher",
205
+ "pdr" => "project director",
206
+ "pfr" => "proofreader",
207
+ "pht" => "photographer",
208
+ "plt" => "platemaker",
209
+ "pma" => "permitting agency",
210
+ "pmn" => "production manager",
211
+ "pnc" => "penciller",
212
+ "pop" => "printer of plates",
213
+ "ppm" => "papermaker",
214
+ "ppt" => "puppeteer",
215
+ "pra" => "praeses",
216
+ "prc" => "process contact",
217
+ "prd" => "production personnel",
218
+ "pre" => "presenter",
219
+ "prf" => "performer",
220
+ "prg" => "programmer",
221
+ "prm" => "printmaker",
222
+ "prn" => "production company",
223
+ "pro" => "producer",
224
+ "prp" => "production place",
225
+ "prs" => "production designer",
226
+ "prt" => "printer",
227
+ "prv" => "provider",
228
+ "pta" => "patent applicant",
229
+ "pte" => "plaintiff-appellee",
230
+ "ptf" => "plaintiff",
231
+ "pth" => "patent holder",
232
+ "ptt" => "plaintiff-appellant",
233
+ "pup" => "publication place",
234
+ "rap" => "rapporteur",
235
+ "rbr" => "rubricator",
236
+ "rcd" => "recordist",
237
+ "rce" => "recording engineer",
238
+ "rcp" => "addressee",
239
+ "rdd" => "radio director",
240
+ "red" => "redaktor",
241
+ "ren" => "renderer",
242
+ "res" => "researcher",
243
+ "rev" => "reviewer",
244
+ "rpc" => "radio producer",
245
+ "rps" => "repository",
246
+ "rpt" => "reporter",
247
+ "rpy" => "responsible party",
248
+ "rse" => "respondent-appellee",
249
+ "rsg" => "restager",
250
+ "rsp" => "respondent",
251
+ "rsr" => "restorationist",
252
+ "rst" => "respondent-appellant",
253
+ "rth" => "research team head",
254
+ "rtm" => "research team member",
255
+ "rxa" => "remix artist",
256
+ "sad" => "scientific advisor",
257
+ "sce" => "scenarist",
258
+ "scl" => "sculptor",
259
+ "scr" => "scribe",
260
+ "sde" => "sound engineer",
261
+ "sds" => "sound designer",
262
+ "sec" => "secretary",
263
+ "sfx" => "special effects provider",
264
+ "sgd" => "stage director",
265
+ "sgn" => "signer",
266
+ "sht" => "spporting host",
267
+ "sll" => "seller",
268
+ "sng" => "singer",
269
+ "spk" => "speaker",
270
+ "spn" => "sponsor",
271
+ "spy" => "second party",
272
+ "srv" => "surveyor",
273
+ "std" => "set designer",
274
+ "stg" => "setting",
275
+ "stl" => "storyteller",
276
+ "stm" => "stage manager",
277
+ "stn" => "standards body",
278
+ "str" => "stereotyper",
279
+ "swd" => "software developer",
280
+ "tad" => "technical advisor",
281
+ "tau" => "television writer",
282
+ "tcd" => "technical director",
283
+ "tch" => "teacher",
284
+ "ths" => "thesis advisor",
285
+ "tld" => "television director",
286
+ "tlg" => "television guest",
287
+ "tlh" => "television host",
288
+ "tlp" => "television producer",
289
+ "trc" => "transcriber",
290
+ "trl" => "translator",
291
+ "tyd" => "type designer",
292
+ "tyg" => "typographer",
293
+ "uvp" => "university place",
294
+ "vac" => "voice actor",
295
+ "vdg" => "videographer",
296
+ "vfx" => "visual effects provider",
297
+ "voc" => "vocalist", # discontinued
298
+ "wac" => "writer of added commentary",
299
+ "wal" => "writer of added lyrics",
300
+ "wam" => "writer of accompanying material",
301
+ "wat" => "writer of added text",
302
+ "waw" => "writer of afterword",
303
+ "wdc" => "woodcutter",
304
+ "wde" => "wood engraver",
305
+ "wfs" => "writer of film story",
306
+ "wft" => "writer of intertitles",
307
+ "wfw" => "writer of foreword",
308
+ "win" => "writer of introduction",
309
+ "wit" => "witness",
310
+ "wpr" => "writer of preface",
311
+ "wst" => "writer of supplementary textual content",
312
+ "wts" => "writer of television story"
313
+ }
314
+ end
@@ -0,0 +1,127 @@
1
+ require_relative "utils"
2
+ require_relative "contributor"
3
+ require_relative "title_builder"
4
+ require_relative "dates/date"
5
+
6
+ module CocinaDisplay
7
+ # Base class for subjects in Cocina structured data.
8
+ class Subject
9
+ attr_reader :cocina
10
+
11
+ # Extract the type of the subject from the Cocina structured data.
12
+ # If no top-level type, uses the first structuredValue type.
13
+ # @param cocina [Hash] The Cocina structured data for the subject.
14
+ # @return [String, nil] The type of the subject, or nil if none
15
+ # @see https://github.com/sul-dlss/cocina-models/blob/main/docs/description_types.md#subject-types
16
+ def self.detect_type(cocina)
17
+ cocina["type"] || Utils.flatten_nested_values(cocina).pick("type")
18
+ end
19
+
20
+ # Choose and create the appropriate Subject subclass based on type.
21
+ # @param cocina [Hash] The Cocina structured data for the subject.
22
+ # @return [Subject]
23
+ # @see detect_type
24
+ def self.from_cocina(cocina)
25
+ case detect_type(cocina)
26
+ when "person", "family", "organization", "conference", "event", "name"
27
+ NameSubject.new(cocina)
28
+ when "title"
29
+ TitleSubject.new(cocina)
30
+ when "time"
31
+ TemporalSubject.new(cocina)
32
+ # TODO: special handling for geospatial subjects
33
+ # when "map coordinates", "bounding box coordinates", "point coordinates"
34
+ else
35
+ Subject.new(cocina)
36
+ end
37
+ end
38
+
39
+ # Initialize a Subject object with Cocina structured data.
40
+ # @param cocina [Hash] The Cocina structured data for the subject.
41
+ def initialize(cocina)
42
+ @cocina = cocina
43
+ end
44
+
45
+ # The type of the subject.
46
+ # If no top-level type, uses the first structuredValue type.
47
+ # @return [String, nil] The type of the subject, or nil if none
48
+ # @see detect_type
49
+ def type
50
+ self.class.detect_type(cocina)
51
+ end
52
+
53
+ # A string representation of the subject, formatted for display.
54
+ # Concatenates any structured values with an appropriate delimiter.
55
+ # Subclasses may override this for more specific formatting.
56
+ # @return [String]
57
+ def display_str
58
+ Utils.compact_and_join(descriptive_values, delimiter: delimiter)
59
+ end
60
+
61
+ private
62
+
63
+ # Flatten any structured values into an array of Hashes with "value" keys.
64
+ # If no structured values, will return the top-level cocina data.
65
+ # @see Utils.flatten_nested_values
66
+ # @return [Array<Hash>] An array of Hashes representing all values.
67
+ def descriptive_values
68
+ Utils.flatten_nested_values(cocina).pluck("value")
69
+ end
70
+
71
+ # Delimiter to use for joining structured subject values.
72
+ # LCSH uses a comma (the default); catalog headings use " > ".
73
+ # @return [String]
74
+ def delimiter
75
+ if cocina["displayLabel"]&.downcase == "catalog heading"
76
+ " > "
77
+ else
78
+ ", "
79
+ end
80
+ end
81
+ end
82
+
83
+ # A subject representing a named entity.
84
+ class NameSubject < Subject
85
+ attr_reader :name
86
+
87
+ # Initialize a NameSubject object with Cocina structured data.
88
+ # @param cocina [Hash] The Cocina structured data for the subject.
89
+ def initialize(cocina)
90
+ super
91
+ @name = Contributor::Name.new(cocina)
92
+ end
93
+
94
+ # Use the contributor name formatting rules for display.
95
+ # @return [String] The formatted name string, including life dates
96
+ # @see CocinaDisplay::Contributor::Name#display_str
97
+ def display_str
98
+ @name.display_str(with_date: true)
99
+ end
100
+ end
101
+
102
+ # A subject representing an entity with a title.
103
+ class TitleSubject < Subject
104
+ # Construct a title string to use for display.
105
+ # @see CocinaDisplay::TitleBuilder.build
106
+ # @note Unclear how often structured title subjects occur "in the wild".
107
+ # @return [String]
108
+ def display_str
109
+ TitleBuilder.build([cocina])
110
+ end
111
+ end
112
+
113
+ # A subject representing a date and/or time.
114
+ class TemporalSubject < Subject
115
+ attr_reader :date
116
+
117
+ def initialize(cocina)
118
+ super
119
+ @date = Dates::Date.from_cocina(cocina)
120
+ end
121
+
122
+ # @return [String] The formatted date/time string for display
123
+ def display_str
124
+ @date.qualified_value
125
+ end
126
+ end
127
+ end
@@ -53,7 +53,7 @@ module CocinaDisplay
53
53
  def self.sort_title(titles, catalog_links: [])
54
54
  part_label = catalog_links.find { |link| link["catalog"] == "folio" }&.fetch("partLabel", nil)
55
55
  [new(strategy: :first, add_punctuation: false, only_one_parallel_value: false, part_label: part_label, sortable: true).build(titles)]
56
- .flatten.compact.map { |title| title.gsub(/[[:punct:]]*/, "").strip }
56
+ .flatten.compact.map { |title| title.gsub(/[[:punct:]]*/, "").squeeze(" ").strip }
57
57
  end
58
58
 
59
59
  # @param strategy [Symbol] ":first" selects a single title value based on precedence of
@@ -116,6 +116,7 @@ module CocinaDisplay
116
116
  end
117
117
 
118
118
  def extract_title(cocina_title)
119
+ return if cocina_title.blank?
119
120
  title_values = if cocina_title["value"]
120
121
  cocina_title["value"]
121
122
  elsif cocina_title["structuredValue"].present?
@@ -19,15 +19,54 @@ module CocinaDisplay
19
19
  end.delete_suffix(delimiter)
20
20
  end
21
21
 
22
- # Recursively flatten structured values in Cocina metadata.
23
- # Returns a list of hashes representing the "leaf" nodes with values.
22
+ # Recursively flatten structured, parallel, and grouped values in Cocina metadata.
23
+ # Returns a list of hashes representing the "leaf" nodes with +value+ key.
24
24
  # @return [Array<Hash>] List of node hashes with "value" present
25
- def self.flatten_structured_values(cocina, output = [])
25
+ # @param cocina [Hash] The Cocina structured data to flatten
26
+ # @param output [Array] Used for recursion, should be empty on first call
27
+ # @example simple value
28
+ # cocina = { "value" => "John Doe", "type" => "name" }
29
+ # Utils.flatten_nested_values(cocina)
30
+ # #=> [{"value" => "John Doe", "type" => "name"}]
31
+ # @example structured values
32
+ # cocina = { "structuredValue" => [{"value" => "foo"}, {"value" => "bar"}] }
33
+ # Utils.flatten_nested_values(cocina)
34
+ # #=> [{"value" => "foo"}, {"value" => "bar"}]
35
+ # @example parallel structured and simple values
36
+ # cocina = { "parallelValue" => [{"value" => "foo" }, { "structuredValue" => [{"value" => "bar"}, {"value" => "baz"}] }] }
37
+ # Utils.flatten_nested_values(cocina)
38
+ # #=> [{"value" => "foo"}, {"value" => "foo"}, {"value" => "baz"}]
39
+ def self.flatten_nested_values(cocina, output = [])
26
40
  return [cocina] if cocina["value"].present?
27
- return cocina.flat_map { |node| flatten_structured_values(node, output) } if cocina.is_a?(Array)
28
- return output unless (structured_values = Array(cocina["structuredValue"])).present?
41
+ return cocina.flat_map { |node| flatten_nested_values(node, output) } if cocina.is_a?(Array)
29
42
 
30
- structured_values.flat_map { |node| flatten_structured_values(node, output) }
43
+ nested_values = Array(cocina["structuredValue"]) + Array(cocina["parallelValue"]) + Array(cocina["groupedValue"])
44
+ return output unless nested_values.any?
45
+
46
+ nested_values.flat_map { |node| flatten_nested_values(node, output) }
47
+ end
48
+
49
+ # Recursively remove empty values from a hash, including nested hashes and arrays.
50
+ # @param hash [Hash] The hash to process
51
+ # @param output [Hash] Used for recursion, should be empty on first call
52
+ # @return [Hash] The hash with empty values removed
53
+ # @example
54
+ # hash = { "name" => "", "age" => nil, "address => { "city" => "Anytown", "state" => [] } }
55
+ # # Utils.remove_empty_values(hash)
56
+ # #=> { "address" => { "city" => "Anytown" } }
57
+ def self.deep_compact_blank(hash, output = {})
58
+ hash.each do |key, value|
59
+ if value.is_a?(Hash)
60
+ nested = deep_compact_blank(value)
61
+ output[key] = nested unless nested.empty?
62
+ elsif value.is_a?(Array)
63
+ compacted_array = value.map { |v| deep_compact_blank(v) }.reject(&:blank?)
64
+ output[key] = compacted_array unless compacted_array.empty?
65
+ elsif value.present?
66
+ output[key] = value
67
+ end
68
+ end
69
+ output
31
70
  end
32
71
  end
33
72
  end
@@ -2,5 +2,5 @@
2
2
 
3
3
  # :nodoc:
4
4
  module CocinaDisplay
5
- VERSION = "0.4.0" # :nodoc:
5
+ VERSION = "0.6.0" # :nodoc:
6
6
  end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This script is a simple, brute-force method for finding records that
4
+ # exhibit certain characteristics in the public Cocina JSON for testing.
5
+ #
6
+ # It queries purl-fetcher for all DRUIDs released to a specific target and
7
+ # then fetches each corresponding public Cocina record from PURL and examines it.
8
+ #
9
+ # You need to be on VPN to do this, as the purl-fetcher API is only accessible
10
+ # from within the Stanford network.
11
+ #
12
+ # To use, modify any of the noted items below, then run:
13
+ # $ bundle exec ruby script/find_records.rb
14
+ #
15
+ # You can exit early with Ctrl-C, and it will report how many records were
16
+ # checked before exiting. Running through an entire target will take awhile,
17
+ # on the order of 30 minutes or more.
18
+
19
+ require "benchmark"
20
+ require "pp"
21
+ require "purl_fetcher/client"
22
+ require "cocina_display"
23
+ require "cocina_display/utils"
24
+
25
+ # This should correspond to one of the release targets available in purl-fetcher,
26
+ # i.e. "Searchworks", "Earthworks", etc.
27
+ RELEASE_TARGET = "Searchworks"
28
+
29
+ # Modify this expression to match the JSON path you want to search, or just
30
+ # modify the `examine_record` method directly.
31
+ PATH_EXPR = "$..[?length(@.groupedValue) > 0]"
32
+
33
+ # Modify this method as needed to change what you're looking for in each record.
34
+ # It takes a CocinaRecord object and should return an array of [path, result] pairs.
35
+ def examine_record(record)
36
+ record.path(PATH_EXPR).map { |value, _node, _key, path| [path, CocinaDisplay::Utils.deep_compact_blank(value)] }
37
+ end
38
+
39
+ # Track total records in target and how many we've seen
40
+ released_to_target = []
41
+ processed_records = 0
42
+
43
+ # Handle Ctrl-C gracefully
44
+ Signal.trap("INT") do
45
+ puts "\nExiting after processing #{processed_records} records."
46
+ exit
47
+ end
48
+
49
+ # Fetch everything from purl-fetcher; note that this is one single HTTP request
50
+ # that returns a massive JSON response – it can be quite slow
51
+ puts "Finding records released to #{RELEASE_TARGET}..."
52
+ client = PurlFetcher::Client::Reader.new
53
+ query_time = Benchmark.realtime do
54
+ client.released_to(RELEASE_TARGET).each do |record|
55
+ released_to_target << record["druid"].delete_prefix("druid:")
56
+ end
57
+ rescue Faraday::ConnectionFailed => e
58
+ puts "Connection failed: #{e.message}; are you on VPN?"
59
+ exit 1
60
+ end
61
+ puts "Found #{released_to_target.size} records released to #{RELEASE_TARGET} in #{query_time.round(2)} seconds"
62
+
63
+ # Iterate through the list of DRUIDs and fetch each one from PURL, creating a
64
+ # CocinaRecord object. Then call our examine_record method on it and if
65
+ # anything was returned, print the DRUID and the results.
66
+ released_to_target.each do |druid|
67
+ begin
68
+ cocina_record = CocinaDisplay::CocinaRecord.fetch(druid)
69
+ processed_records += 1
70
+ rescue => e
71
+ puts "Error fetching record #{druid}: #{e.message}"
72
+ next
73
+ end
74
+
75
+ results = examine_record(cocina_record)
76
+ next if results.empty?
77
+
78
+ puts "Druid: #{druid}"
79
+ results.each do |path, result|
80
+ puts " Path: #{path}"
81
+ puts " Result: #{result.pretty_inspect}\n"
82
+ end
83
+
84
+ puts "-" * 80
85
+ end