cocina_display 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -16
- data/lib/cocina_display/cocina_record.rb +24 -7
- data/lib/cocina_display/concerns/contributors.rb +64 -41
- data/lib/cocina_display/concerns/events.rb +37 -25
- data/lib/cocina_display/concerns/forms.rb +134 -0
- data/lib/cocina_display/concerns/languages.rb +20 -0
- data/lib/cocina_display/concerns/subjects.rb +63 -16
- data/lib/cocina_display/contributor.rb +57 -8
- data/lib/cocina_display/dates/date.rb +9 -8
- data/lib/cocina_display/dates/date_range.rb +29 -9
- data/lib/cocina_display/events/event.rb +78 -0
- data/lib/cocina_display/events/imprint.rb +100 -0
- data/lib/cocina_display/events/location.rb +56 -0
- data/lib/cocina_display/language.rb +47 -0
- data/lib/cocina_display/subjects/subject.rb +63 -0
- data/lib/cocina_display/subjects/subject_value.rb +104 -0
- data/lib/cocina_display/title_builder.rb +2 -1
- data/lib/cocina_display/utils.rb +30 -5
- data/lib/cocina_display/version.rb +1 -1
- data/lib/cocina_display/vocabularies/marc_country_codes.rb +393 -0
- data/lib/cocina_display/vocabularies/marc_relator_codes.rb +318 -0
- data/lib/cocina_display/vocabularies/searchworks_languages.rb +526 -0
- data/script/find_records.rb +85 -0
- metadata +42 -5
- data/lib/cocina_display/imprint.rb +0 -123
- data/lib/cocina_display/marc_country_codes.rb +0 -394
- data/lib/cocina_display/subject.rb +0 -127
@@ -0,0 +1,526 @@
|
|
1
|
+
module CocinaDisplay
|
2
|
+
module Vocabularies
|
3
|
+
# Map of language codes to language names used in Searchworks.
|
4
|
+
# @see https://github.com/solrmarc/stanford-solr-marc/blob/master/stanford-sw/translation_maps/language_map.properties
|
5
|
+
# @note Ported from stanford-mods gem.
|
6
|
+
SEARCHWORKS_LANGUAGES = {
|
7
|
+
"aaa" => "Afar",
|
8
|
+
"abk" => "Abkhaz",
|
9
|
+
"ace" => "Achinese",
|
10
|
+
"ach" => "Acoli",
|
11
|
+
"ada" => "Adangme",
|
12
|
+
"ady" => "Adygei",
|
13
|
+
"afa" => "Afroasiatic (Other)",
|
14
|
+
"afh" => "Afrihili (Artificial language)",
|
15
|
+
"afr" => "Afrikaans",
|
16
|
+
"ain" => "Ainu",
|
17
|
+
"ajm" => "Aljamia",
|
18
|
+
"aka" => "Akan",
|
19
|
+
"akk" => "Akkadian",
|
20
|
+
"alb" => "Albanian",
|
21
|
+
"ale" => "Aleut",
|
22
|
+
"alg" => "Algonquian (Other)",
|
23
|
+
"alt" => "Altai",
|
24
|
+
"amh" => "Amharic",
|
25
|
+
"ang" => "English, Old (ca. 450-1100)",
|
26
|
+
"anp" => "Angika",
|
27
|
+
"apa" => "Apache languages",
|
28
|
+
"ara" => "Arabic",
|
29
|
+
"arc" => "Aramaic",
|
30
|
+
"arg" => "Aragonese Spanish",
|
31
|
+
"arm" => "Armenian",
|
32
|
+
"arn" => "Mapuche",
|
33
|
+
"arp" => "Arapaho",
|
34
|
+
"art" => "Artificial (Other)",
|
35
|
+
"arw" => "Arawak",
|
36
|
+
"ase" => "American Sign Language",
|
37
|
+
"asm" => "Assamese",
|
38
|
+
"ast" => "Bable",
|
39
|
+
"ath" => "Athapascan (Other)",
|
40
|
+
"aus" => "Australian languages",
|
41
|
+
"ava" => "Avaric",
|
42
|
+
"ave" => "Avestan",
|
43
|
+
"awa" => "Awadhi",
|
44
|
+
"aym" => "Aymara",
|
45
|
+
"aze" => "Azerbaijani",
|
46
|
+
"bad" => "Banda",
|
47
|
+
"bai" => "Bamileke languages",
|
48
|
+
"bak" => "Bashkir",
|
49
|
+
"bal" => "Baluchi",
|
50
|
+
"bam" => "Bambara",
|
51
|
+
"ban" => "Balinese",
|
52
|
+
"baq" => "Basque",
|
53
|
+
"bas" => "Basa",
|
54
|
+
"bat" => "Baltic (Other)",
|
55
|
+
"bej" => "Beja",
|
56
|
+
"bel" => "Belarusian",
|
57
|
+
"bem" => "Bemba",
|
58
|
+
"ben" => "Bengali",
|
59
|
+
"ber" => "Berber (Other)",
|
60
|
+
"bho" => "Bhojpuri",
|
61
|
+
"bih" => "Bihari",
|
62
|
+
"bik" => "Bikol",
|
63
|
+
"bin" => "Edo",
|
64
|
+
"bis" => "Bislama",
|
65
|
+
"bla" => "Siksika",
|
66
|
+
"bnt" => "Bantu (Other)",
|
67
|
+
"bos" => "Bosnian",
|
68
|
+
"bra" => "Braj",
|
69
|
+
"bre" => "Breton",
|
70
|
+
"btk" => "Batak",
|
71
|
+
"bua" => "Buriat",
|
72
|
+
"bug" => "Bugis",
|
73
|
+
"bul" => "Bulgarian",
|
74
|
+
"bur" => "Burmese",
|
75
|
+
"byn" => "Bilin",
|
76
|
+
"cad" => "Caddo",
|
77
|
+
"cai" => "Central American Indian (Other)",
|
78
|
+
"cam" => "Khmer",
|
79
|
+
"car" => "Carib",
|
80
|
+
"cat" => "Catalan",
|
81
|
+
"cau" => "Caucasian (Other)",
|
82
|
+
"ceb" => "Cebuano",
|
83
|
+
"cel" => "Celtic (Other)",
|
84
|
+
"cha" => "Chamorro",
|
85
|
+
"chb" => "Chibcha",
|
86
|
+
"che" => "Chechen",
|
87
|
+
"chg" => "Chagatai",
|
88
|
+
"chi" => "Chinese",
|
89
|
+
"chk" => "Truk",
|
90
|
+
"chm" => "Mari",
|
91
|
+
"chn" => "Chinook jargon",
|
92
|
+
"cho" => "Choctaw",
|
93
|
+
"chp" => "Chipewyan",
|
94
|
+
"chr" => "Cherokee",
|
95
|
+
"chu" => "Church Slavic",
|
96
|
+
"chv" => "Chuvash",
|
97
|
+
"chy" => "Cheyenne",
|
98
|
+
"cmc" => "Chamic languages",
|
99
|
+
"cop" => "Coptic",
|
100
|
+
"cor" => "Cornish",
|
101
|
+
"cos" => "Corsican",
|
102
|
+
"cpe" => "Creoles and Pidgins, English-based (Other)",
|
103
|
+
"cpf" => "Creoles and Pidgins, French-based (Other)",
|
104
|
+
"cpp" => "Creoles and Pidgins, Portuguese-based (Other)",
|
105
|
+
"cre" => "Cree",
|
106
|
+
"crh" => "Crimean Tatar",
|
107
|
+
"crp" => "Creoles and Pidgins (Other)",
|
108
|
+
"csb" => "Kashubian",
|
109
|
+
"cus" => "Cushitic (Other)",
|
110
|
+
"cze" => "Czech",
|
111
|
+
"dak" => "Dakota",
|
112
|
+
"dan" => "Danish",
|
113
|
+
"dar" => "Dargwa",
|
114
|
+
"day" => "Dayak",
|
115
|
+
"del" => "Delaware",
|
116
|
+
"den" => "Slave",
|
117
|
+
"dgr" => "Dogrib",
|
118
|
+
"din" => "Dinka",
|
119
|
+
"div" => "Divehi",
|
120
|
+
"doi" => "Dogri",
|
121
|
+
"dra" => "Dravidian (Other)",
|
122
|
+
"dsb" => "Lower Sorbian",
|
123
|
+
"dua" => "Duala",
|
124
|
+
"dum" => "Dutch, Middle (ca. 1050-1350)",
|
125
|
+
"dut" => "Dutch",
|
126
|
+
"dyu" => "Dyula",
|
127
|
+
"dzo" => "Dzongkha",
|
128
|
+
"efi" => "Efik",
|
129
|
+
"egy" => "Egyptian",
|
130
|
+
"egy-Egyd" => "Egyptian, Demotic",
|
131
|
+
"eka" => "Ekajuk",
|
132
|
+
"elx" => "Elamite",
|
133
|
+
"eng" => "English",
|
134
|
+
"enm" => "English, Middle (1100-1500)",
|
135
|
+
"epo" => "Esperanto",
|
136
|
+
"esk" => "Eskimo languages",
|
137
|
+
"esp" => "Esperanto",
|
138
|
+
"est" => "Estonian",
|
139
|
+
"eth" => "Ethiopic",
|
140
|
+
"ewe" => "Ewe",
|
141
|
+
"ewo" => "Ewondo",
|
142
|
+
"fan" => "Fang",
|
143
|
+
"fao" => "Faroese",
|
144
|
+
"far" => "Faroese",
|
145
|
+
"fat" => "Fanti",
|
146
|
+
"fij" => "Fijian",
|
147
|
+
"fil" => "Filipino",
|
148
|
+
"fin" => "Finnish",
|
149
|
+
"fiu" => "Finno-Ugrian (Other)",
|
150
|
+
"fon" => "Fon",
|
151
|
+
"fre" => "French",
|
152
|
+
"fri" => "Frisian",
|
153
|
+
"frm" => "French, Middle (ca. 1400-1600)",
|
154
|
+
"fro" => "French, Old (ca. 842-1400)",
|
155
|
+
"frr" => "North Frisian",
|
156
|
+
"frs" => "East Frisian",
|
157
|
+
"fry" => "Frisian",
|
158
|
+
"ful" => "Fula",
|
159
|
+
"fur" => "Friulian",
|
160
|
+
"gaa" => "Ga",
|
161
|
+
"gae" => "Scottish Gaelic",
|
162
|
+
"gag" => "Galician",
|
163
|
+
"gal" => "Oromo",
|
164
|
+
"gay" => "Gayo",
|
165
|
+
"gba" => "Gbaya",
|
166
|
+
"gem" => "Germanic (Other)",
|
167
|
+
"geo" => "Georgian",
|
168
|
+
"ger" => "German",
|
169
|
+
"gez" => "Ethiopic",
|
170
|
+
"gil" => "Gilbertese",
|
171
|
+
"gla" => "Scottish Gaelic",
|
172
|
+
"gle" => "Irish",
|
173
|
+
"glg" => "Galician",
|
174
|
+
"glv" => "Manx",
|
175
|
+
"gmh" => "German, Middle High (ca. 1050-1500)",
|
176
|
+
"goh" => "German, Old High (ca. 750-1050)",
|
177
|
+
"gon" => "Gondi",
|
178
|
+
"gor" => "Gorontalo",
|
179
|
+
"got" => "Gothic",
|
180
|
+
"grb" => "Grebo",
|
181
|
+
"grc" => "Greek, Ancient (to 1453)",
|
182
|
+
"gre" => "Greek, Modern (1453- )",
|
183
|
+
"grn" => "Guarani",
|
184
|
+
"gsw" => "Swiss German",
|
185
|
+
"gua" => "Guarani",
|
186
|
+
"guj" => "Gujarati",
|
187
|
+
"gwi" => "Gwich'in ",
|
188
|
+
"hai" => "Haida",
|
189
|
+
"hat" => "Haitian French Creole",
|
190
|
+
"hau" => "Hausa",
|
191
|
+
"haw" => "Hawaiian",
|
192
|
+
"heb" => "Hebrew",
|
193
|
+
"her" => "Herero",
|
194
|
+
"hil" => "Hiligaynon",
|
195
|
+
"him" => "Himachali",
|
196
|
+
"hin" => "Hindi",
|
197
|
+
"hit" => "Hittite",
|
198
|
+
"hmn" => "Hmong",
|
199
|
+
"hmo" => "Hiri Motu",
|
200
|
+
"hrv" => "Croatian",
|
201
|
+
"hsb" => "Upper Sorbian",
|
202
|
+
"hun" => "Hungarian",
|
203
|
+
"hup" => "Hupa",
|
204
|
+
"iba" => "Iban",
|
205
|
+
"ibo" => "Igbo",
|
206
|
+
"ice" => "Icelandic",
|
207
|
+
"ido" => "Ido",
|
208
|
+
"iii" => "Sichuan Yi",
|
209
|
+
"ijo" => "Ijo",
|
210
|
+
"iku" => "Inuktitut",
|
211
|
+
"ile" => "Interlingue",
|
212
|
+
"ilo" => "Iloko",
|
213
|
+
"ina" => "Interlingua (International Auxiliary Language Association)",
|
214
|
+
"inc" => "Indic (Other)",
|
215
|
+
"ind" => "Indonesian",
|
216
|
+
"ine" => "Indo-European (Other)",
|
217
|
+
"inh" => "Ingush",
|
218
|
+
"int" => "Interlingua (International Auxiliary Language Association)",
|
219
|
+
"ipk" => "Inupiaq",
|
220
|
+
"ira" => "Iranian (Other)",
|
221
|
+
"iri" => "Irish",
|
222
|
+
"iro" => "Iroquoian (Other)",
|
223
|
+
"ita" => "Italian",
|
224
|
+
"jav" => "Javanese",
|
225
|
+
"jbo" => "Lojban (Artificial language)",
|
226
|
+
"jpn" => "Japanese",
|
227
|
+
"jpr" => "Judeo-Persian",
|
228
|
+
"jrb" => "Judeo-Arabic",
|
229
|
+
"kaa" => "Kara-Kalpak",
|
230
|
+
"kab" => "Kabyle",
|
231
|
+
"kac" => "Kachin",
|
232
|
+
"kal" => "Kalatdlisut",
|
233
|
+
"kam" => "Kamba",
|
234
|
+
"kan" => "Kannada",
|
235
|
+
"kar" => "Karen",
|
236
|
+
"kas" => "Kashmiri",
|
237
|
+
"kau" => "Kanuri",
|
238
|
+
"kaw" => "Kawi",
|
239
|
+
"kaz" => "Kazakh",
|
240
|
+
"kbd" => "Kabardian",
|
241
|
+
"kha" => "Khasi",
|
242
|
+
"khi" => "Khoisan (Other)",
|
243
|
+
"khm" => "Khmer",
|
244
|
+
"kho" => "Khotanese",
|
245
|
+
"kik" => "Kikuyu",
|
246
|
+
"kin" => "Kinyarwanda",
|
247
|
+
"kir" => "Kyrgyz",
|
248
|
+
"kmb" => "Kimbundu",
|
249
|
+
"kok" => "Konkani",
|
250
|
+
"kom" => "Komi",
|
251
|
+
"kon" => "Kongo",
|
252
|
+
"kor" => "Korean",
|
253
|
+
"kos" => "Kusaie",
|
254
|
+
"kpe" => "Kpelle",
|
255
|
+
"krc" => "Karachay-Balkar",
|
256
|
+
"krl" => "Karelian",
|
257
|
+
"kro" => "Kru",
|
258
|
+
"kru" => "Kurukh",
|
259
|
+
"kua" => "Kuanyama",
|
260
|
+
"kum" => "Kumyk",
|
261
|
+
"kur" => "Kurdish",
|
262
|
+
"kus" => "Kusaie",
|
263
|
+
"kut" => "Kutenai",
|
264
|
+
"lad" => "Ladino",
|
265
|
+
"lah" => "Lahnda",
|
266
|
+
"lam" => "Lamba",
|
267
|
+
"lan" => "Occitan (post-1500)",
|
268
|
+
"lao" => "Lao",
|
269
|
+
"lap" => "Sami",
|
270
|
+
"lat" => "Latin",
|
271
|
+
"lav" => "Latvian",
|
272
|
+
"lez" => "Lezgian",
|
273
|
+
"lim" => "Limburgish",
|
274
|
+
"lin" => "Lingala",
|
275
|
+
"lit" => "Lithuanian",
|
276
|
+
"lol" => "Mongo-Nkundu",
|
277
|
+
"loz" => "Lozi",
|
278
|
+
"ltz" => "Letzeburgesch",
|
279
|
+
"lua" => "Luba-Lulua",
|
280
|
+
"lub" => "Luba-Katanga",
|
281
|
+
"lug" => "Ganda",
|
282
|
+
"lui" => "Luiseno",
|
283
|
+
"lun" => "Lunda",
|
284
|
+
"luo" => "Luo (Kenya and Tanzania)",
|
285
|
+
"lus" => "Lushai",
|
286
|
+
"mac" => "Macedonian",
|
287
|
+
"mad" => "Madurese",
|
288
|
+
"mag" => "Magahi",
|
289
|
+
"mah" => "Marshallese",
|
290
|
+
"mai" => "Maithili",
|
291
|
+
"mak" => "Makasar",
|
292
|
+
"mal" => "Malayalam",
|
293
|
+
"man" => "Mandingo",
|
294
|
+
"mao" => "Maori",
|
295
|
+
"map" => "Austronesian (Other)",
|
296
|
+
"mar" => "Marathi",
|
297
|
+
"mas" => "Masai",
|
298
|
+
"max" => "Manx",
|
299
|
+
"may" => "Malay",
|
300
|
+
"mdf" => "Moksha",
|
301
|
+
"mdr" => "Mandar",
|
302
|
+
"men" => "Mende",
|
303
|
+
"mga" => "Irish, Middle (ca. 1100-1550)",
|
304
|
+
"mic" => "Micmac",
|
305
|
+
"min" => "Minangkabau",
|
306
|
+
# "mis" => "Miscellaneous languages",
|
307
|
+
"mkh" => "Mon-Khmer (Other)",
|
308
|
+
"mla" => "Malagasy",
|
309
|
+
"mlg" => "Malagasy",
|
310
|
+
"mlt" => "Maltese",
|
311
|
+
"mnc" => "Manchu",
|
312
|
+
"mni" => "Manipuri",
|
313
|
+
"mno" => "Manobo languages",
|
314
|
+
"moh" => "Mohawk",
|
315
|
+
"mol" => "Moldavian",
|
316
|
+
"mon" => "Mongolian",
|
317
|
+
"mos" => "Moore",
|
318
|
+
"#mul" => "Multiple languages",
|
319
|
+
"mun" => "Munda (Other)",
|
320
|
+
"mus" => "Creek",
|
321
|
+
"mwl" => "Mirandese",
|
322
|
+
"mwr" => "Marwari",
|
323
|
+
"myn" => "Mayan languages",
|
324
|
+
"myv" => "Erzya",
|
325
|
+
"nah" => "Nahuatl",
|
326
|
+
"nai" => "North American Indian (Other)",
|
327
|
+
"nap" => "Neapolitan Italian",
|
328
|
+
"nau" => "Nauru",
|
329
|
+
"nav" => "Navajo",
|
330
|
+
"nbl" => "Ndebele (South Africa)",
|
331
|
+
"nde" => "Ndebele (Zimbabwe)",
|
332
|
+
"ndo" => "Ndonga",
|
333
|
+
"nds" => "Low German",
|
334
|
+
"nep" => "Nepali",
|
335
|
+
"new" => "Newari",
|
336
|
+
"nia" => "Nias",
|
337
|
+
"nic" => "Niger-Kordofanian (Other)",
|
338
|
+
"niu" => "Niuean",
|
339
|
+
"nno" => "Norwegian (Nynorsk)",
|
340
|
+
"nob" => "Norwegian (Bokmal)",
|
341
|
+
"nog" => "Nogai",
|
342
|
+
"non" => "Old Norse",
|
343
|
+
"nor" => "Norwegian",
|
344
|
+
"nqo" => "N'Ko",
|
345
|
+
"nso" => "Northern Sotho",
|
346
|
+
"nub" => "Nubian languages",
|
347
|
+
"nwc" => "Newari, Old",
|
348
|
+
"nya" => "Nyanja",
|
349
|
+
"nym" => "Nyamwezi",
|
350
|
+
"nyn" => "Nyankole",
|
351
|
+
"nyo" => "Nyoro",
|
352
|
+
"nzi" => "Nzima",
|
353
|
+
"oci" => "Occitan (post-1500)",
|
354
|
+
"oji" => "Ojibwa",
|
355
|
+
"ori" => "Oriya",
|
356
|
+
"orm" => "Oromo",
|
357
|
+
"osa" => "Osage",
|
358
|
+
"oss" => "Ossetic",
|
359
|
+
"ota" => "Turkish, Ottoman",
|
360
|
+
"oto" => "Otomian languages",
|
361
|
+
"paa" => "Papuan (Other)",
|
362
|
+
"pag" => "Pangasinan",
|
363
|
+
"pal" => "Pahlavi",
|
364
|
+
"pam" => "Pampanga",
|
365
|
+
"pan" => "Panjabi",
|
366
|
+
"pap" => "Papiamento",
|
367
|
+
"pau" => "Palauan",
|
368
|
+
"peo" => "Old Persian (ca. 600-400 B.C.)",
|
369
|
+
"per" => "Persian",
|
370
|
+
"phi" => "Philippine (Other)",
|
371
|
+
"phn" => "Phoenician",
|
372
|
+
"pli" => "Pali",
|
373
|
+
"pol" => "Polish",
|
374
|
+
"pon" => "Ponape",
|
375
|
+
"por" => "Portuguese",
|
376
|
+
"pra" => "Prakrit languages",
|
377
|
+
"pro" => "Provencal (to 1500)",
|
378
|
+
"pus" => "Pushto",
|
379
|
+
"que" => "Quechua",
|
380
|
+
"raj" => "Rajasthani",
|
381
|
+
"rap" => "Rapanui",
|
382
|
+
"rar" => "Rarotongan",
|
383
|
+
"roa" => "Romance (Other)",
|
384
|
+
"roh" => "Raeto-Romance",
|
385
|
+
"rom" => "Romani",
|
386
|
+
"rum" => "Romanian",
|
387
|
+
"run" => "Rundi",
|
388
|
+
"rup" => "Aromanian",
|
389
|
+
"rus" => "Russian",
|
390
|
+
"sad" => "Sandawe",
|
391
|
+
"sag" => "Sango (Ubangi Creole)",
|
392
|
+
"sah" => "Yakut",
|
393
|
+
"sai" => "South American Indian (Other)",
|
394
|
+
"sal" => "Salishan languages",
|
395
|
+
"sam" => "Samaritan Aramaic",
|
396
|
+
"san" => "Sanskrit",
|
397
|
+
"sao" => "Samoan",
|
398
|
+
"sas" => "Sasak",
|
399
|
+
"sat" => "Santali",
|
400
|
+
"scc" => "Serbian",
|
401
|
+
"scn" => "Sicilian Italian",
|
402
|
+
"sco" => "Scots",
|
403
|
+
"scr" => "Croatian",
|
404
|
+
"sel" => "Selkup",
|
405
|
+
"sem" => "Semitic (Other)",
|
406
|
+
"sga" => "Irish, Old (to 1100)",
|
407
|
+
"sgn" => "Sign languages",
|
408
|
+
"shn" => "Shan",
|
409
|
+
"sho" => "Shona",
|
410
|
+
"sid" => "Sidamo",
|
411
|
+
"sin" => "Sinhalese",
|
412
|
+
"sio" => "Siouan (Other)",
|
413
|
+
"sit" => "Sino-Tibetan (Other)",
|
414
|
+
"sla" => "Slavic (Other)",
|
415
|
+
"slo" => "Slovak",
|
416
|
+
"slv" => "Slovenian",
|
417
|
+
"sma" => "Southern Sami",
|
418
|
+
"sme" => "Northern Sami",
|
419
|
+
"smi" => "Sami",
|
420
|
+
"smj" => "Lule Sami",
|
421
|
+
"smn" => "Inari Sami",
|
422
|
+
"smo" => "Samoan",
|
423
|
+
"sms" => "Skolt Sami",
|
424
|
+
"sna" => "Shona",
|
425
|
+
"snd" => "Sindhi",
|
426
|
+
"snh" => "Sinhalese",
|
427
|
+
"snk" => "Soninke",
|
428
|
+
"sog" => "Sogdian",
|
429
|
+
"som" => "Somali",
|
430
|
+
"son" => "Songhai",
|
431
|
+
"sot" => "Sotho",
|
432
|
+
"spa" => "Spanish",
|
433
|
+
"srd" => "Sardinian",
|
434
|
+
"srn" => "Sranan",
|
435
|
+
"srp" => "Serbian",
|
436
|
+
"srr" => "Serer",
|
437
|
+
"ssa" => "Nilo-Saharan (Other)",
|
438
|
+
"sso" => "Sotho",
|
439
|
+
"ssw" => "Swazi",
|
440
|
+
"suk" => "Sukuma",
|
441
|
+
"sun" => "Sundanese",
|
442
|
+
"sus" => "Susu",
|
443
|
+
"sux" => "Sumerian",
|
444
|
+
"swa" => "Swahili",
|
445
|
+
"swe" => "Swedish",
|
446
|
+
"swz" => "Swazi",
|
447
|
+
"syc" => "Syriac",
|
448
|
+
"syr" => "Syriac, Modern",
|
449
|
+
"tag" => "Tagalog",
|
450
|
+
"tah" => "Tahitian",
|
451
|
+
"tai" => "Tai (Other)",
|
452
|
+
"taj" => "Tajik",
|
453
|
+
"tam" => "Tamil",
|
454
|
+
"tar" => "Tatar",
|
455
|
+
"tat" => "Tatar",
|
456
|
+
"tel" => "Telugu",
|
457
|
+
"tem" => "Temne",
|
458
|
+
"ter" => "Terena",
|
459
|
+
"tet" => "Tetum",
|
460
|
+
"tgk" => "Tajik",
|
461
|
+
"tgl" => "Tagalog",
|
462
|
+
"tha" => "Thai",
|
463
|
+
"tib" => "Tibetan",
|
464
|
+
"tig" => "Tigre",
|
465
|
+
"tir" => "Tigrinya",
|
466
|
+
"tiv" => "Tiv",
|
467
|
+
"tkl" => "Tokelauan",
|
468
|
+
"tlh" => "Klingon (Artificial language)",
|
469
|
+
"tli" => "Tlingit",
|
470
|
+
"tmh" => "Tamashek",
|
471
|
+
"tog" => "Tonga (Nyasa)",
|
472
|
+
"ton" => "Tongan",
|
473
|
+
"tpi" => "Tok Pisin",
|
474
|
+
"tru" => "Truk",
|
475
|
+
"tsi" => "Tsimshian",
|
476
|
+
"tsn" => "Tswana",
|
477
|
+
"tso" => "Tsonga",
|
478
|
+
"tsw" => "Tswana",
|
479
|
+
"tuk" => "Turkmen",
|
480
|
+
"tum" => "Tumbuka",
|
481
|
+
"tup" => "Tupi languages",
|
482
|
+
"tur" => "Turkish",
|
483
|
+
"tut" => "Altaic (Other)",
|
484
|
+
"tvl" => "Tuvaluan",
|
485
|
+
"twi" => "Twi",
|
486
|
+
"tyv" => "Tuvinian",
|
487
|
+
"udm" => "Udmurt",
|
488
|
+
"uga" => "Ugaritic",
|
489
|
+
"uig" => "Uighur",
|
490
|
+
"ukr" => "Ukrainian",
|
491
|
+
"umb" => "Umbundu",
|
492
|
+
# "und" => "Undetermined",
|
493
|
+
"urd" => "Urdu",
|
494
|
+
"uzb" => "Uzbek",
|
495
|
+
"vai" => "Vai",
|
496
|
+
"ven" => "Venda",
|
497
|
+
"vie" => "Vietnamese",
|
498
|
+
"vol" => "Volapuk",
|
499
|
+
"vot" => "Votic",
|
500
|
+
"wak" => "Wakashan languages",
|
501
|
+
"wal" => "Walamo",
|
502
|
+
"war" => "Waray",
|
503
|
+
"was" => "Washo",
|
504
|
+
"wel" => "Welsh",
|
505
|
+
"wen" => "Sorbian languages",
|
506
|
+
"wln" => "Walloon",
|
507
|
+
"wol" => "Wolof",
|
508
|
+
"xal" => "Kalmyk",
|
509
|
+
"xho" => "Xhosa",
|
510
|
+
"yao" => "Yao (Africa)",
|
511
|
+
"yap" => "Yapese",
|
512
|
+
"yid" => "Yiddish",
|
513
|
+
"yor" => "Yoruba",
|
514
|
+
"ypk" => "Yupik languages",
|
515
|
+
"zap" => "Zapotec",
|
516
|
+
"zbl" => "Blissymbolics",
|
517
|
+
"zen" => "Zenaga",
|
518
|
+
"zha" => "Zhuang",
|
519
|
+
"znd" => "Zande",
|
520
|
+
"zul" => "Zulu",
|
521
|
+
"zun" => "Zuni",
|
522
|
+
# "zxx" => "null",
|
523
|
+
"zza" => "Zaza"
|
524
|
+
}.freeze
|
525
|
+
end
|
526
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This script is a simple, brute-force method for finding records that
|
4
|
+
# exhibit certain characteristics in the public Cocina JSON for testing.
|
5
|
+
#
|
6
|
+
# It queries purl-fetcher for all DRUIDs released to a specific target and
|
7
|
+
# then fetches each corresponding public Cocina record from PURL and examines it.
|
8
|
+
#
|
9
|
+
# You need to be on VPN to do this, as the purl-fetcher API is only accessible
|
10
|
+
# from within the Stanford network.
|
11
|
+
#
|
12
|
+
# To use, modify any of the noted items below, then run:
|
13
|
+
# $ bundle exec ruby script/find_records.rb
|
14
|
+
#
|
15
|
+
# You can exit early with Ctrl-C, and it will report how many records were
|
16
|
+
# checked before exiting. Running through an entire target will take awhile,
|
17
|
+
# on the order of 30 minutes or more.
|
18
|
+
|
19
|
+
require "benchmark"
|
20
|
+
require "pp"
|
21
|
+
require "purl_fetcher/client"
|
22
|
+
require "cocina_display"
|
23
|
+
require "cocina_display/utils"
|
24
|
+
|
25
|
+
# This should correspond to one of the release targets available in purl-fetcher,
|
26
|
+
# i.e. "Searchworks", "Earthworks", etc.
|
27
|
+
RELEASE_TARGET = "Searchworks"
|
28
|
+
|
29
|
+
# Modify this expression to match the JSON path you want to search, or just
|
30
|
+
# modify the `examine_record` method directly.
|
31
|
+
PATH_EXPR = "$..[?length(@.groupedValue) > 0]"
|
32
|
+
|
33
|
+
# Modify this method as needed to change what you're looking for in each record.
|
34
|
+
# It takes a CocinaRecord object and should return an array of [path, result] pairs.
|
35
|
+
def examine_record(record)
|
36
|
+
record.path(PATH_EXPR).map { |value, _node, _key, path| [path, CocinaDisplay::Utils.deep_compact_blank(value)] }
|
37
|
+
end
|
38
|
+
|
39
|
+
# Track total records in target and how many we've seen
|
40
|
+
released_to_target = []
|
41
|
+
processed_records = 0
|
42
|
+
|
43
|
+
# Handle Ctrl-C gracefully
|
44
|
+
Signal.trap("INT") do
|
45
|
+
puts "\nExiting after processing #{processed_records} records."
|
46
|
+
exit
|
47
|
+
end
|
48
|
+
|
49
|
+
# Fetch everything from purl-fetcher; note that this is one single HTTP request
|
50
|
+
# that returns a massive JSON response – it can be quite slow
|
51
|
+
puts "Finding records released to #{RELEASE_TARGET}..."
|
52
|
+
client = PurlFetcher::Client::Reader.new
|
53
|
+
query_time = Benchmark.realtime do
|
54
|
+
client.released_to(RELEASE_TARGET).each do |record|
|
55
|
+
released_to_target << record["druid"].delete_prefix("druid:")
|
56
|
+
end
|
57
|
+
rescue Faraday::ConnectionFailed => e
|
58
|
+
puts "Connection failed: #{e.message}; are you on VPN?"
|
59
|
+
exit 1
|
60
|
+
end
|
61
|
+
puts "Found #{released_to_target.size} records released to #{RELEASE_TARGET} in #{query_time.round(2)} seconds"
|
62
|
+
|
63
|
+
# Iterate through the list of DRUIDs and fetch each one from PURL, creating a
|
64
|
+
# CocinaRecord object. Then call our examine_record method on it and if
|
65
|
+
# anything was returned, print the DRUID and the results.
|
66
|
+
released_to_target.each do |druid|
|
67
|
+
begin
|
68
|
+
cocina_record = CocinaDisplay::CocinaRecord.fetch(druid)
|
69
|
+
processed_records += 1
|
70
|
+
rescue => e
|
71
|
+
puts "Error fetching record #{druid}: #{e.message}"
|
72
|
+
next
|
73
|
+
end
|
74
|
+
|
75
|
+
results = examine_record(cocina_record)
|
76
|
+
next if results.empty?
|
77
|
+
|
78
|
+
puts "Druid: #{druid}"
|
79
|
+
results.each do |path, result|
|
80
|
+
puts " Path: #{path}"
|
81
|
+
puts " Result: #{result.pretty_inspect}\n"
|
82
|
+
end
|
83
|
+
|
84
|
+
puts "-" * 80
|
85
|
+
end
|