stanford-mods 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -39,6 +39,16 @@ Or install it yourself as:
39
39
  in foo!
40
40
  => nil
41
41
 
42
+ Example Using SearchWorks Mixins:
43
+
44
+ > require 'stanford-mods/searchworks'
45
+ > m = Stanford::Mods::Record.new
46
+ > m.from_str('<mods><language><languageTerm authority="iso639-2b" type="code">dut</languageTerm></language></mods>')
47
+ > m.language_facet <-- from Searchworks mixin
48
+ => ['Dutch']
49
+ > m.languages <-- from mods gem
50
+ => ['Dutch; Flemish']
51
+
42
52
  ## Contributing
43
53
 
44
54
  1. Fork it
@@ -50,5 +60,6 @@ Or install it yourself as:
50
60
 
51
61
  == Releases
52
62
 
63
+ 0.0.3 began SearchWorks mixins
53
64
  0.0.2 add usage instructions to readme
54
65
  0.0.1 Initial commit - grab name
@@ -0,0 +1,55 @@
1
+ require 'stanford-mods/searchworks_languages'
2
+
3
+ # # SearchWorks specific wranglings of MODS metadata as an extension of the Mods::Record object
4
+ module Stanford
5
+ module Mods
6
+
7
+ class Record < ::Mods::Record
8
+
9
+ # if it's coming from DOR, then it is available online
10
+ def access_facet
11
+ ['Online']
12
+ end
13
+
14
+ # include langagues known to SearchWorks; try to error correct when possible (e.g. when ISO-639 disagrees with MARC standard)
15
+ def language_facet
16
+ result = []
17
+ @mods_ng_xml.language.each { |n|
18
+ # get languageTerm codes and add their translations to the result
19
+ n.code_term.each { |ct|
20
+ if ct.authority.match(/^iso639/)
21
+ begin
22
+ vals = ct.text.split(/[,|\ ]/).reject {|x| x.strip.length == 0 }
23
+ vals.each do |v|
24
+ iso639_val = ISO_639.find(v.strip).english_name
25
+ if SEARCHWORKS_LANGUAGES.has_value?(iso639_val)
26
+ result << iso639_val
27
+ else
28
+ result << SEARCHWORKS_LANGUAGES[v.strip]
29
+ end
30
+ end
31
+ rescue => e
32
+ p "Couldn't find english name for #{ct.text}"
33
+ result << SEARCHWORKS_LANGUAGES[v.strip]
34
+ end
35
+ else
36
+ result << SEARCHWORKS_LANGUAGES[v.strip]
37
+ end
38
+ }
39
+ # add languageTerm text values
40
+ n.text_term.each { |tt|
41
+ val = tt.text.strip
42
+ result << val if val.length > 0 && SEARCHWORKS_LANGUAGES.has_value?(val)
43
+ }
44
+
45
+ # add language values that aren't in languageTerm subelement
46
+ if n.languageTerm.size == 0
47
+ result << n.text if SEARCHWORKS_LANGUAGES.has_value?(n.text)
48
+ end
49
+ }
50
+ result.uniq
51
+ end
52
+
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,519 @@
1
+ # Language Values used by SearchWorks
2
+ # From https://github.com/solrmarc/stanford-solr-marc/blob/master/stanford-sw/translation_maps/language_map.properties
3
+ SEARCHWORKS_LANGUAGES = {
4
+ 'aaa' => 'Afar',
5
+ 'abk' => 'Abkhaz',
6
+ 'ace' => 'Achinese',
7
+ 'ach' => 'Acoli',
8
+ 'ada' => 'Adangme',
9
+ 'ady' => 'Adygei',
10
+ 'afa' => 'Afroasiatic (Other)',
11
+ 'afh' => 'Afrihili (Artificial language)',
12
+ 'afr' => 'Afrikaans',
13
+ 'ain' => 'Ainu',
14
+ 'ajm' => 'Aljamia',
15
+ 'aka' => 'Akan',
16
+ 'akk' => 'Akkadian',
17
+ 'alb' => 'Albanian',
18
+ 'ale' => 'Aleut',
19
+ 'alg' => 'Algonquian (Other)',
20
+ 'alt' => 'Altai',
21
+ 'amh' => 'Amharic',
22
+ 'ang' => 'English, Old (ca. 450-1100)',
23
+ 'anp' => 'Angika',
24
+ 'apa' => 'Apache languages',
25
+ 'ara' => 'Arabic',
26
+ 'arc' => 'Aramaic',
27
+ 'arg' => 'Aragonese Spanish',
28
+ 'arm' => 'Armenian',
29
+ 'arn' => 'Mapuche',
30
+ 'arp' => 'Arapaho',
31
+ 'art' => 'Artificial (Other)',
32
+ 'arw' => 'Arawak',
33
+ 'asm' => 'Assamese',
34
+ 'ast' => 'Bable',
35
+ 'ath' => 'Athapascan (Other)',
36
+ 'aus' => 'Australian languages',
37
+ 'ava' => 'Avaric',
38
+ 'ave' => 'Avestan',
39
+ 'awa' => 'Awadhi',
40
+ 'aym' => 'Aymara',
41
+ 'aze' => 'Azerbaijani',
42
+ 'bad' => 'Banda',
43
+ 'bai' => 'Bamileke languages',
44
+ 'bak' => 'Bashkir',
45
+ 'bal' => 'Baluchi',
46
+ 'bam' => 'Bambara',
47
+ 'ban' => 'Balinese',
48
+ 'baq' => 'Basque',
49
+ 'bas' => 'Basa',
50
+ 'bat' => 'Baltic (Other)',
51
+ 'bej' => 'Beja',
52
+ 'bel' => 'Belarusian',
53
+ 'bem' => 'Bemba',
54
+ 'ben' => 'Bengali',
55
+ 'ber' => 'Berber (Other)',
56
+ 'bho' => 'Bhojpuri',
57
+ 'bih' => 'Bihari',
58
+ 'bik' => 'Bikol',
59
+ 'bin' => 'Edo',
60
+ 'bis' => 'Bislama',
61
+ 'bla' => 'Siksika',
62
+ 'bnt' => 'Bantu (Other)',
63
+ 'bos' => 'Bosnian',
64
+ 'bra' => 'Braj',
65
+ 'bre' => 'Breton',
66
+ 'btk' => 'Batak',
67
+ 'bua' => 'Buriat',
68
+ 'bug' => 'Bugis',
69
+ 'bul' => 'Bulgarian',
70
+ 'bur' => 'Burmese',
71
+ 'byn' => 'Bilin',
72
+ 'cad' => 'Caddo',
73
+ 'cai' => 'Central American Indian (Other)',
74
+ 'cam' => 'Khmer',
75
+ 'car' => 'Carib',
76
+ 'cat' => 'Catalan',
77
+ 'cau' => 'Caucasian (Other)',
78
+ 'ceb' => 'Cebuano',
79
+ 'cel' => 'Celtic (Other)',
80
+ 'cha' => 'Chamorro',
81
+ 'chb' => 'Chibcha',
82
+ 'che' => 'Chechen',
83
+ 'chg' => 'Chagatai',
84
+ 'chi' => 'Chinese',
85
+ 'chk' => 'Truk',
86
+ 'chm' => 'Mari',
87
+ 'chn' => 'Chinook jargon',
88
+ 'cho' => 'Choctaw',
89
+ 'chp' => 'Chipewyan',
90
+ 'chr' => 'Cherokee',
91
+ 'chu' => 'Church Slavic',
92
+ 'chv' => 'Chuvash',
93
+ 'chy' => 'Cheyenne',
94
+ 'cmc' => 'Chamic languages',
95
+ 'cop' => 'Coptic',
96
+ 'cor' => 'Cornish',
97
+ 'cos' => 'Corsican',
98
+ 'cpe' => 'Creoles and Pidgins, English-based (Other)',
99
+ 'cpf' => 'Creoles and Pidgins, French-based (Other)',
100
+ 'cpp' => 'Creoles and Pidgins, Portuguese-based (Other)',
101
+ 'cre' => 'Cree',
102
+ 'crh' => 'Crimean Tatar',
103
+ 'crp' => 'Creoles and Pidgins (Other)',
104
+ 'csb' => 'Kashubian',
105
+ 'cus' => 'Cushitic (Other)',
106
+ 'cze' => 'Czech',
107
+ 'dak' => 'Dakota',
108
+ 'dan' => 'Danish',
109
+ 'dar' => 'Dargwa',
110
+ 'day' => 'Dayak',
111
+ 'del' => 'Delaware',
112
+ 'den' => 'Slave',
113
+ 'dgr' => 'Dogrib',
114
+ 'din' => 'Dinka',
115
+ 'div' => 'Divehi',
116
+ 'doi' => 'Dogri',
117
+ 'dra' => 'Dravidian (Other)',
118
+ 'dsb' => 'Lower Sorbian',
119
+ 'dua' => 'Duala',
120
+ 'dum' => 'Dutch, Middle (ca. 1050-1350)',
121
+ 'dut' => 'Dutch',
122
+ 'dyu' => 'Dyula',
123
+ 'dzo' => 'Dzongkha',
124
+ 'efi' => 'Efik',
125
+ 'egy' => 'Egyptian',
126
+ 'eka' => 'Ekajuk',
127
+ 'elx' => 'Elamite',
128
+ 'eng' => 'English',
129
+ 'enm' => 'English, Middle (1100-1500)',
130
+ 'epo' => 'Esperanto',
131
+ 'esk' => 'Eskimo languages',
132
+ 'esp' => 'Esperanto',
133
+ 'est' => 'Estonian',
134
+ 'eth' => 'Ethiopic',
135
+ 'ewe' => 'Ewe',
136
+ 'ewo' => 'Ewondo',
137
+ 'fan' => 'Fang',
138
+ 'fao' => 'Faroese',
139
+ 'far' => 'Faroese',
140
+ 'fat' => 'Fanti',
141
+ 'fij' => 'Fijian',
142
+ 'fil' => 'Filipino',
143
+ 'fin' => 'Finnish',
144
+ 'fiu' => 'Finno-Ugrian (Other)',
145
+ 'fon' => 'Fon',
146
+ 'fre' => 'French',
147
+ 'fri' => 'Frisian',
148
+ 'frm' => 'French, Middle (ca. 1400-1600)',
149
+ 'fro' => 'French, Old (ca. 842-1400)',
150
+ 'frr' => 'North Frisian',
151
+ 'frs' => 'East Frisian',
152
+ 'fry' => 'Frisian',
153
+ 'ful' => 'Fula',
154
+ 'fur' => 'Friulian',
155
+ 'gaa' => 'Ga',
156
+ 'gae' => 'Scottish Gaelic',
157
+ 'gag' => 'Galician',
158
+ 'gal' => 'Oromo',
159
+ 'gay' => 'Gayo',
160
+ 'gba' => 'Gbaya',
161
+ 'gem' => 'Germanic (Other)',
162
+ 'geo' => 'Georgian',
163
+ 'ger' => 'German',
164
+ 'gez' => 'Ethiopic',
165
+ 'gil' => 'Gilbertese',
166
+ 'gla' => 'Scottish Gaelic',
167
+ 'gle' => 'Irish',
168
+ 'glg' => 'Galician',
169
+ 'glv' => 'Manx',
170
+ 'gmh' => 'German, Middle High (ca. 1050-1500)',
171
+ 'goh' => 'German, Old High (ca. 750-1050)',
172
+ 'gon' => 'Gondi',
173
+ 'gor' => 'Gorontalo',
174
+ 'got' => 'Gothic',
175
+ 'grb' => 'Grebo',
176
+ 'grc' => 'Greek, Ancient (to 1453)',
177
+ 'gre' => 'Greek, Modern (1453- )',
178
+ 'grn' => 'Guarani',
179
+ 'gsw' => 'Swiss German',
180
+ 'gua' => 'Guarani',
181
+ 'guj' => 'Gujarati',
182
+ 'gwi' => "Gwich'in ",
183
+ 'hai' => 'Haida',
184
+ 'hat' => 'Haitian French Creole',
185
+ 'hau' => 'Hausa',
186
+ 'haw' => 'Hawaiian',
187
+ 'heb' => 'Hebrew',
188
+ 'her' => 'Herero',
189
+ 'hil' => 'Hiligaynon',
190
+ 'him' => 'Himachali',
191
+ 'hin' => 'Hindi',
192
+ 'hit' => 'Hittite',
193
+ 'hmn' => 'Hmong',
194
+ 'hmo' => 'Hiri Motu',
195
+ 'hrv' => 'Croatian',
196
+ 'hsb' => 'Upper Sorbian',
197
+ 'hun' => 'Hungarian',
198
+ 'hup' => 'Hupa',
199
+ 'iba' => 'Iban',
200
+ 'ibo' => 'Igbo',
201
+ 'ice' => 'Icelandic',
202
+ 'ido' => 'Ido',
203
+ 'iii' => 'Sichuan Yi',
204
+ 'ijo' => 'Ijo',
205
+ 'iku' => 'Inuktitut',
206
+ 'ile' => 'Interlingue',
207
+ 'ilo' => 'Iloko',
208
+ 'ina' => 'Interlingua (International Auxiliary Language Association)',
209
+ 'inc' => 'Indic (Other)',
210
+ 'ind' => 'Indonesian',
211
+ 'ine' => 'Indo-European (Other)',
212
+ 'inh' => 'Ingush',
213
+ 'int' => 'Interlingua (International Auxiliary Language Association)',
214
+ 'ipk' => 'Inupiaq',
215
+ 'ira' => 'Iranian (Other)',
216
+ 'iri' => 'Irish',
217
+ 'iro' => 'Iroquoian (Other)',
218
+ 'ita' => 'Italian',
219
+ 'jav' => 'Javanese',
220
+ 'jbo' => 'Lojban (Artificial language)',
221
+ 'jpn' => 'Japanese',
222
+ 'jpr' => 'Judeo-Persian',
223
+ 'jrb' => 'Judeo-Arabic',
224
+ 'kaa' => 'Kara-Kalpak',
225
+ 'kab' => 'Kabyle',
226
+ 'kac' => 'Kachin',
227
+ 'kal' => 'Kalatdlisut',
228
+ 'kam' => 'Kamba',
229
+ 'kan' => 'Kannada',
230
+ 'kar' => 'Karen',
231
+ 'kas' => 'Kashmiri',
232
+ 'kau' => 'Kanuri',
233
+ 'kaw' => 'Kawi',
234
+ 'kaz' => 'Kazakh',
235
+ 'kbd' => 'Kabardian',
236
+ 'kha' => 'Khasi',
237
+ 'khi' => 'Khoisan (Other)',
238
+ 'khm' => 'Khmer',
239
+ 'kho' => 'Khotanese',
240
+ 'kik' => 'Kikuyu',
241
+ 'kin' => 'Kinyarwanda',
242
+ 'kir' => 'Kyrgyz',
243
+ 'kmb' => 'Kimbundu',
244
+ 'kok' => 'Konkani',
245
+ 'kom' => 'Komi',
246
+ 'kon' => 'Kongo',
247
+ 'kor' => 'Korean',
248
+ 'kos' => 'Kusaie',
249
+ 'kpe' => 'Kpelle',
250
+ 'krc' => 'Karachay-Balkar',
251
+ 'krl' => 'Karelian',
252
+ 'kro' => 'Kru',
253
+ 'kru' => 'Kurukh',
254
+ 'kua' => 'Kuanyama',
255
+ 'kum' => 'Kumyk',
256
+ 'kur' => 'Kurdish',
257
+ 'kus' => 'Kusaie',
258
+ 'kut' => 'Kutenai',
259
+ 'lad' => 'Ladino',
260
+ 'lah' => 'Lahnda',
261
+ 'lam' => 'Lamba',
262
+ 'lan' => 'Occitan (post-1500)',
263
+ 'lao' => 'Lao',
264
+ 'lap' => 'Sami',
265
+ 'lat' => 'Latin',
266
+ 'lav' => 'Latvian',
267
+ 'lez' => 'Lezgian',
268
+ 'lim' => 'Limburgish',
269
+ 'lin' => 'Lingala',
270
+ 'lit' => 'Lithuanian',
271
+ 'lol' => 'Mongo-Nkundu',
272
+ 'loz' => 'Lozi',
273
+ 'ltz' => 'Letzeburgesch',
274
+ 'lua' => 'Luba-Lulua',
275
+ 'lub' => 'Luba-Katanga',
276
+ 'lug' => 'Ganda',
277
+ 'lui' => 'Luiseno',
278
+ 'lun' => 'Lunda',
279
+ 'luo' => 'Luo (Kenya and Tanzania)',
280
+ 'lus' => 'Lushai',
281
+ 'mac' => 'Macedonian',
282
+ 'mad' => 'Madurese',
283
+ 'mag' => 'Magahi',
284
+ 'mah' => 'Marshallese',
285
+ 'mai' => 'Maithili',
286
+ 'mak' => 'Makasar',
287
+ 'mal' => 'Malayalam',
288
+ 'man' => 'Mandingo',
289
+ 'mao' => 'Maori',
290
+ 'map' => 'Austronesian (Other)',
291
+ 'mar' => 'Marathi',
292
+ 'mas' => 'Masai',
293
+ 'max' => 'Manx',
294
+ 'may' => 'Malay',
295
+ 'mdf' => 'Moksha',
296
+ 'mdr' => 'Mandar',
297
+ 'men' => 'Mende',
298
+ 'mga' => 'Irish, Middle (ca. 1100-1550)',
299
+ 'mic' => 'Micmac',
300
+ 'min' => 'Minangkabau',
301
+ '#mis' => 'Miscellaneous languages',
302
+ 'mkh' => 'Mon-Khmer (Other)',
303
+ 'mla' => 'Malagasy',
304
+ 'mlg' => 'Malagasy',
305
+ 'mlt' => 'Maltese',
306
+ 'mnc' => 'Manchu',
307
+ 'mni' => 'Manipuri',
308
+ 'mno' => 'Manobo languages',
309
+ 'moh' => 'Mohawk',
310
+ 'mol' => 'Moldavian',
311
+ 'mon' => 'Mongolian',
312
+ 'mos' => 'Moore',
313
+ '#mul' => 'Multiple languages',
314
+ 'mun' => 'Munda (Other)',
315
+ 'mus' => 'Creek',
316
+ 'mwl' => 'Mirandese',
317
+ 'mwr' => 'Marwari',
318
+ 'myn' => 'Mayan languages',
319
+ 'myv' => 'Erzya',
320
+ 'nah' => 'Nahuatl',
321
+ 'nai' => 'North American Indian (Other)',
322
+ 'nap' => 'Neapolitan Italian',
323
+ 'nau' => 'Nauru',
324
+ 'nav' => 'Navajo',
325
+ 'nbl' => 'Ndebele (South Africa)',
326
+ 'nde' => 'Ndebele (Zimbabwe)',
327
+ 'ndo' => 'Ndonga',
328
+ 'nds' => 'Low German',
329
+ 'nep' => 'Nepali',
330
+ 'new' => 'Newari',
331
+ 'nia' => 'Nias',
332
+ 'nic' => 'Niger-Kordofanian (Other)',
333
+ 'niu' => 'Niuean',
334
+ 'nno' => 'Norwegian (Nynorsk)',
335
+ 'nob' => 'Norwegian (Bokmal)',
336
+ 'nog' => 'Nogai',
337
+ 'non' => 'Old Norse',
338
+ 'nor' => 'Norwegian',
339
+ 'nqo' => "N'Ko",
340
+ 'nso' => 'Northern Sotho',
341
+ 'nub' => 'Nubian languages',
342
+ 'nwc' => 'Newari, Old',
343
+ 'nya' => 'Nyanja',
344
+ 'nym' => 'Nyamwezi',
345
+ 'nyn' => 'Nyankole',
346
+ 'nyo' => 'Nyoro',
347
+ 'nzi' => 'Nzima',
348
+ 'oci' => 'Occitan (post-1500)',
349
+ 'oji' => 'Ojibwa',
350
+ 'ori' => 'Oriya',
351
+ 'orm' => 'Oromo',
352
+ 'osa' => 'Osage',
353
+ 'oss' => 'Ossetic',
354
+ 'ota' => 'Turkish, Ottoman',
355
+ 'oto' => 'Otomian languages',
356
+ 'paa' => 'Papuan (Other)',
357
+ 'pag' => 'Pangasinan',
358
+ 'pal' => 'Pahlavi',
359
+ 'pam' => 'Pampanga',
360
+ 'pan' => 'Panjabi',
361
+ 'pap' => 'Papiamento',
362
+ 'pau' => 'Palauan',
363
+ 'peo' => 'Old Persian (ca. 600-400 B.C.)',
364
+ 'per' => 'Persian',
365
+ 'phi' => 'Philippine (Other)',
366
+ 'phn' => 'Phoenician',
367
+ 'pli' => 'Pali',
368
+ 'pol' => 'Polish',
369
+ 'pon' => 'Ponape',
370
+ 'por' => 'Portuguese',
371
+ 'pra' => 'Prakrit languages',
372
+ 'pro' => 'Provencal (to 1500)',
373
+ 'pus' => 'Pushto',
374
+ 'que' => 'Quechua',
375
+ 'raj' => 'Rajasthani',
376
+ 'rap' => 'Rapanui',
377
+ 'rar' => 'Rarotongan',
378
+ 'roa' => 'Romance (Other)',
379
+ 'roh' => 'Raeto-Romance',
380
+ 'rom' => 'Romani',
381
+ 'rum' => 'Romanian',
382
+ 'run' => 'Rundi',
383
+ 'rup' => 'Aromanian',
384
+ 'rus' => 'Russian',
385
+ 'sad' => 'Sandawe',
386
+ 'sag' => 'Sango (Ubangi Creole)',
387
+ 'sah' => 'Yakut',
388
+ 'sai' => 'South American Indian (Other)',
389
+ 'sal' => 'Salishan languages',
390
+ 'sam' => 'Samaritan Aramaic',
391
+ 'san' => 'Sanskrit',
392
+ 'sao' => 'Samoan',
393
+ 'sas' => 'Sasak',
394
+ 'sat' => 'Santali',
395
+ 'scc' => 'Serbian',
396
+ 'scn' => 'Sicilian Italian',
397
+ 'sco' => 'Scots',
398
+ 'scr' => 'Croatian',
399
+ 'sel' => 'Selkup',
400
+ 'sem' => 'Semitic (Other)',
401
+ 'sga' => 'Irish, Old (to 1100)',
402
+ 'sgn' => 'Sign languages',
403
+ 'shn' => 'Shan',
404
+ 'sho' => 'Shona',
405
+ 'sid' => 'Sidamo',
406
+ 'sin' => 'Sinhalese',
407
+ 'sio' => 'Siouan (Other)',
408
+ 'sit' => 'Sino-Tibetan (Other)',
409
+ 'sla' => 'Slavic (Other)',
410
+ 'slo' => 'Slovak',
411
+ 'slv' => 'Slovenian',
412
+ 'sma' => 'Southern Sami',
413
+ 'sme' => 'Northern Sami',
414
+ 'smi' => 'Sami',
415
+ 'smj' => 'Lule Sami',
416
+ 'smn' => 'Inari Sami',
417
+ 'smo' => 'Samoan',
418
+ 'sms' => 'Skolt Sami',
419
+ 'sna' => 'Shona',
420
+ 'snd' => 'Sindhi',
421
+ 'snh' => 'Sinhalese',
422
+ 'snk' => 'Soninke',
423
+ 'sog' => 'Sogdian',
424
+ 'som' => 'Somali',
425
+ 'son' => 'Songhai',
426
+ 'sot' => 'Sotho',
427
+ 'spa' => 'Spanish',
428
+ 'srd' => 'Sardinian',
429
+ 'srn' => 'Sranan',
430
+ 'srp' => 'Serbian',
431
+ 'srr' => 'Serer',
432
+ 'ssa' => 'Nilo-Saharan (Other)',
433
+ 'sso' => 'Sotho',
434
+ 'ssw' => 'Swazi',
435
+ 'suk' => 'Sukuma',
436
+ 'sun' => 'Sundanese',
437
+ 'sus' => 'Susu',
438
+ 'sux' => 'Sumerian',
439
+ 'swa' => 'Swahili',
440
+ 'swe' => 'Swedish',
441
+ 'swz' => 'Swazi',
442
+ 'syc' => 'Syriac',
443
+ 'syr' => 'Syriac, Modern',
444
+ 'tag' => 'Tagalog',
445
+ 'tah' => 'Tahitian',
446
+ 'tai' => 'Tai (Other)',
447
+ 'taj' => 'Tajik',
448
+ 'tam' => 'Tamil',
449
+ 'tar' => 'Tatar',
450
+ 'tat' => 'Tatar',
451
+ 'tel' => 'Telugu',
452
+ 'tem' => 'Temne',
453
+ 'ter' => 'Terena',
454
+ 'tet' => 'Tetum',
455
+ 'tgk' => 'Tajik',
456
+ 'tgl' => 'Tagalog',
457
+ 'tha' => 'Thai',
458
+ 'tib' => 'Tibetan',
459
+ 'tig' => 'Tigre',
460
+ 'tir' => 'Tigrinya',
461
+ 'tiv' => 'Tiv',
462
+ 'tkl' => 'Tokelauan',
463
+ 'tlh' => 'Klingon (Artificial language)',
464
+ 'tli' => 'Tlingit',
465
+ 'tmh' => 'Tamashek',
466
+ 'tog' => 'Tonga (Nyasa)',
467
+ 'ton' => 'Tongan',
468
+ 'tpi' => 'Tok Pisin',
469
+ 'tru' => 'Truk',
470
+ 'tsi' => 'Tsimshian',
471
+ 'tsn' => 'Tswana',
472
+ 'tso' => 'Tsonga',
473
+ 'tsw' => 'Tswana',
474
+ 'tuk' => 'Turkmen',
475
+ 'tum' => 'Tumbuka',
476
+ 'tup' => 'Tupi languages',
477
+ 'tur' => 'Turkish',
478
+ 'tut' => 'Altaic (Other)',
479
+ 'tvl' => 'Tuvaluan',
480
+ 'twi' => 'Twi',
481
+ 'tyv' => 'Tuvinian',
482
+ 'udm' => 'Udmurt',
483
+ 'uga' => 'Ugaritic',
484
+ 'uig' => 'Uighur',
485
+ 'ukr' => 'Ukrainian',
486
+ 'umb' => 'Umbundu',
487
+ #'und' => 'Undetermined',
488
+ 'urd' => 'Urdu',
489
+ 'uzb' => 'Uzbek',
490
+ 'vai' => 'Vai',
491
+ 'ven' => 'Venda',
492
+ 'vie' => 'Vietnamese',
493
+ 'vol' => 'Volapuk',
494
+ 'vot' => 'Votic',
495
+ 'wak' => 'Wakashan languages',
496
+ 'wal' => 'Walamo',
497
+ 'war' => 'Waray',
498
+ 'was' => 'Washo',
499
+ 'wel' => 'Welsh',
500
+ 'wen' => 'Sorbian languages',
501
+ 'wln' => 'Walloon',
502
+ 'wol' => 'Wolof',
503
+ 'xal' => 'Kalmyk',
504
+ 'xho' => 'Xhosa',
505
+ 'yao' => 'Yao (Africa)',
506
+ 'yap' => 'Yapese',
507
+ 'yid' => 'Yiddish',
508
+ 'yor' => 'Yoruba',
509
+ 'ypk' => 'Yupik languages',
510
+ 'zap' => 'Zapotec',
511
+ 'zbl' => 'Blissymbolics',
512
+ 'zen' => 'Zenaga',
513
+ 'zha' => 'Zhuang',
514
+ 'znd' => 'Zande',
515
+ 'zul' => 'Zulu',
516
+ 'zun' => 'Zuni',
517
+ #'zxx' => 'null',
518
+ 'zza' => 'Zaza'
519
+ }
@@ -1,5 +1,5 @@
1
1
  module Stanford
2
2
  module Mods
3
- VERSION = "0.0.2"
3
+ VERSION = "0.0.3"
4
4
  end
5
5
  end
data/lib/stanford-mods.rb CHANGED
@@ -7,12 +7,7 @@ module Stanford
7
7
  module Mods
8
8
 
9
9
  class Record < ::Mods::Record
10
-
11
- # proof of concept method
12
- def to_be_removed
13
- puts "in to_be_removed!"
14
- end
15
-
10
+
16
11
  end
17
12
  end
18
13
  end
@@ -0,0 +1,21 @@
1
+ # for test coverage
2
+ require 'simplecov'
3
+ require 'simplecov-rcov'
4
+ class SimpleCov::Formatter::MergedFormatter
5
+ def format(result)
6
+ SimpleCov::Formatter::HTMLFormatter.new.format(result)
7
+ SimpleCov::Formatter::RcovFormatter.new.format(result)
8
+ end
9
+ end
10
+ SimpleCov.formatter = SimpleCov::Formatter::MergedFormatter
11
+ SimpleCov.start do
12
+ add_filter "/spec/"
13
+ end
14
+
15
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
16
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
17
+
18
+ require 'stanford-mods'
19
+
20
+ #RSpec.configure do |config|
21
+ #end
@@ -0,0 +1,119 @@
1
+ require 'spec_helper'
2
+ require 'stanford-mods/searchworks'
3
+
4
+ describe "Values for SearchWorks Solr" do
5
+ # from https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
6
+
7
+ context "required fields" do
8
+ context "DOR specific" do
9
+ it "druid" do
10
+ pending "to be implemented in harvestdor"
11
+ end
12
+ it "url_fulltext" do
13
+ pending "to be implemented"
14
+ end
15
+ it "mods_xml" do
16
+ pending "to be implemented"
17
+ end
18
+ it "parent_coll_ckey if item object" do
19
+ pending "to be implemented in harvestdor"
20
+ end
21
+ end
22
+
23
+ it "id" do
24
+ pending "to be implemented in harvestdor"
25
+ end
26
+
27
+ it "all_search" do
28
+ pending "to be implemented"
29
+ end
30
+
31
+ it "format" do
32
+ pending "to be implemented, using SearchWorks controlled vocab"
33
+ end
34
+
35
+ # FIXME: update per gryphDOR code / searcworks code / new schema
36
+
37
+ it "collection" do
38
+ pending "to be implemented, using controlled vocab, in harvestdor"
39
+ end
40
+
41
+ it "display_type" do
42
+ pending "to be implemented, using controlled vocab"
43
+ end
44
+
45
+ end
46
+
47
+ context "strongly recommended fields" do
48
+ it "access_facet" do
49
+ Stanford::Mods::Record.new.access_facet.should == ['Online']
50
+ end
51
+ context "title fields" do
52
+ context "for display" do
53
+ it "short title" do
54
+ pending "to be implemented"
55
+ end
56
+ it "full title" do
57
+ pending "to be implemented"
58
+ end
59
+ end
60
+ context "for searching" do
61
+ it "short title" do
62
+ pending "to be implemented"
63
+ end
64
+ it "full title" do
65
+ pending "to be implemented"
66
+ end
67
+ end
68
+ it "sortable title" do
69
+ pending "to be implemented"
70
+ end
71
+ end
72
+ end
73
+
74
+ context "recommended fields" do
75
+ context "publication date" do
76
+ it "for searching and facet" do
77
+ pending "to be implemented"
78
+ end
79
+ it "for sorting" do
80
+ pending "to be implemented"
81
+ end
82
+ it "for pub date grouping (hierarchical / date slider?)" do
83
+ pending "to be implemented"
84
+ end
85
+ end
86
+ context "language" do
87
+ it "should use the SearchWorks controlled vocabulary" do
88
+ m = '<mods><language><languageTerm authority="iso639-2b" type="code">per ara, dut</languageTerm></language></mods>'
89
+ r = Stanford::Mods::Record.new
90
+ r.from_str(m)
91
+ r.language_facet.size.should == 3
92
+ r.language_facet.should include("Persian")
93
+ r.language_facet.should include("Arabic")
94
+ r.language_facet.should include("Dutch")
95
+ r.language_facet.should_not include("Dutch; Flemish")
96
+ end
97
+ it "should not have duplicates" do
98
+ m = '<mods><language><languageTerm type="code" authority="iso639-2b">eng</languageTerm><languageTerm type="text">English</languageTerm></language></mods>'
99
+ r = Stanford::Mods::Record.new
100
+ r.from_str(m)
101
+ r.language_facet.size.should == 1
102
+ r.language_facet.should include("English")
103
+ end
104
+
105
+ end
106
+ context "authors" do
107
+ it "main author" do
108
+ pending "to be implemented"
109
+ end
110
+ it "additional authors" do
111
+ pending "to be implemented"
112
+ end
113
+ it "author sort" do
114
+ pending "to be implemented"
115
+ end
116
+ end
117
+ end
118
+
119
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stanford-mods
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-11-12 00:00:00.000000000 Z
13
+ date: 2012-11-13 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: mods
@@ -146,7 +146,11 @@ files:
146
146
  - config/mappings_hash.rb
147
147
  - lib/stanford-mods.rb
148
148
  - lib/stanford-mods/mappings.rb
149
+ - lib/stanford-mods/searchworks.rb
150
+ - lib/stanford-mods/searchworks_languages.rb
149
151
  - lib/stanford-mods/version.rb
152
+ - spec/spec_helper.rb
153
+ - spec/values_for_req_sw_spec.rb
150
154
  - stanford-mods.gemspec
151
155
  homepage: https://github.com/sul-dlss/stanford-mods
152
156
  licenses: []
@@ -162,7 +166,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
162
166
  version: '0'
163
167
  segments:
164
168
  - 0
165
- hash: 893390290114598282
169
+ hash: -3428048468669990853
166
170
  required_rubygems_version: !ruby/object:Gem::Requirement
167
171
  none: false
168
172
  requirements:
@@ -171,12 +175,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
171
175
  version: '0'
172
176
  segments:
173
177
  - 0
174
- hash: 893390290114598282
178
+ hash: -3428048468669990853
175
179
  requirements: []
176
180
  rubyforge_project:
177
181
  rubygems_version: 1.8.24
178
182
  signing_key:
179
183
  specification_version: 3
180
184
  summary: Stanford specific wrangling of MODS metadata
181
- test_files: []
185
+ test_files:
186
+ - spec/spec_helper.rb
187
+ - spec/values_for_req_sw_spec.rb
182
188
  has_rdoc: