stanford-mods 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -39,6 +39,16 @@ Or install it yourself as:
39
39
  in foo!
40
40
  => nil
41
41
 
42
+ Example Using SearchWorks Mixins:
43
+
44
+ > require 'stanford-mods/searchworks'
45
+ > m = Stanford::Mods::Record.new
46
+ > m.from_str('<mods><language><languageTerm authority="iso639-2b" type="code">dut</languageTerm></language></mods>')
47
+ > m.language_facet <-- from Searchworks mixin
48
+ => ['Dutch']
49
+ > m.languages <-- from mods gem
50
+ => ['Dutch; Flemish']
51
+
42
52
  ## Contributing
43
53
 
44
54
  1. Fork it
@@ -50,5 +60,6 @@ Or install it yourself as:
50
60
 
51
61
  == Releases
52
62
 
63
+ 0.0.3 began SearchWorks mixins
53
64
  0.0.2 add usage instructions to readme
54
65
  0.0.1 Initial commit - grab name
@@ -0,0 +1,55 @@
1
+ require 'stanford-mods/searchworks_languages'
2
+
3
+ # # SearchWorks specific wranglings of MODS metadata as an extension of the Mods::Record object
4
+ module Stanford
5
+ module Mods
6
+
7
+ class Record < ::Mods::Record
8
+
9
+ # if it's coming from DOR, then it is available online
10
+ def access_facet
11
+ ['Online']
12
+ end
13
+
14
+ # include langagues known to SearchWorks; try to error correct when possible (e.g. when ISO-639 disagrees with MARC standard)
15
+ def language_facet
16
+ result = []
17
+ @mods_ng_xml.language.each { |n|
18
+ # get languageTerm codes and add their translations to the result
19
+ n.code_term.each { |ct|
20
+ if ct.authority.match(/^iso639/)
21
+ begin
22
+ vals = ct.text.split(/[,|\ ]/).reject {|x| x.strip.length == 0 }
23
+ vals.each do |v|
24
+ iso639_val = ISO_639.find(v.strip).english_name
25
+ if SEARCHWORKS_LANGUAGES.has_value?(iso639_val)
26
+ result << iso639_val
27
+ else
28
+ result << SEARCHWORKS_LANGUAGES[v.strip]
29
+ end
30
+ end
31
+ rescue => e
32
+ p "Couldn't find english name for #{ct.text}"
33
+ result << SEARCHWORKS_LANGUAGES[v.strip]
34
+ end
35
+ else
36
+ result << SEARCHWORKS_LANGUAGES[v.strip]
37
+ end
38
+ }
39
+ # add languageTerm text values
40
+ n.text_term.each { |tt|
41
+ val = tt.text.strip
42
+ result << val if val.length > 0 && SEARCHWORKS_LANGUAGES.has_value?(val)
43
+ }
44
+
45
+ # add language values that aren't in languageTerm subelement
46
+ if n.languageTerm.size == 0
47
+ result << n.text if SEARCHWORKS_LANGUAGES.has_value?(n.text)
48
+ end
49
+ }
50
+ result.uniq
51
+ end
52
+
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,519 @@
1
+ # Language Values used by SearchWorks
2
+ # From https://github.com/solrmarc/stanford-solr-marc/blob/master/stanford-sw/translation_maps/language_map.properties
3
+ SEARCHWORKS_LANGUAGES = {
4
+ 'aaa' => 'Afar',
5
+ 'abk' => 'Abkhaz',
6
+ 'ace' => 'Achinese',
7
+ 'ach' => 'Acoli',
8
+ 'ada' => 'Adangme',
9
+ 'ady' => 'Adygei',
10
+ 'afa' => 'Afroasiatic (Other)',
11
+ 'afh' => 'Afrihili (Artificial language)',
12
+ 'afr' => 'Afrikaans',
13
+ 'ain' => 'Ainu',
14
+ 'ajm' => 'Aljamia',
15
+ 'aka' => 'Akan',
16
+ 'akk' => 'Akkadian',
17
+ 'alb' => 'Albanian',
18
+ 'ale' => 'Aleut',
19
+ 'alg' => 'Algonquian (Other)',
20
+ 'alt' => 'Altai',
21
+ 'amh' => 'Amharic',
22
+ 'ang' => 'English, Old (ca. 450-1100)',
23
+ 'anp' => 'Angika',
24
+ 'apa' => 'Apache languages',
25
+ 'ara' => 'Arabic',
26
+ 'arc' => 'Aramaic',
27
+ 'arg' => 'Aragonese Spanish',
28
+ 'arm' => 'Armenian',
29
+ 'arn' => 'Mapuche',
30
+ 'arp' => 'Arapaho',
31
+ 'art' => 'Artificial (Other)',
32
+ 'arw' => 'Arawak',
33
+ 'asm' => 'Assamese',
34
+ 'ast' => 'Bable',
35
+ 'ath' => 'Athapascan (Other)',
36
+ 'aus' => 'Australian languages',
37
+ 'ava' => 'Avaric',
38
+ 'ave' => 'Avestan',
39
+ 'awa' => 'Awadhi',
40
+ 'aym' => 'Aymara',
41
+ 'aze' => 'Azerbaijani',
42
+ 'bad' => 'Banda',
43
+ 'bai' => 'Bamileke languages',
44
+ 'bak' => 'Bashkir',
45
+ 'bal' => 'Baluchi',
46
+ 'bam' => 'Bambara',
47
+ 'ban' => 'Balinese',
48
+ 'baq' => 'Basque',
49
+ 'bas' => 'Basa',
50
+ 'bat' => 'Baltic (Other)',
51
+ 'bej' => 'Beja',
52
+ 'bel' => 'Belarusian',
53
+ 'bem' => 'Bemba',
54
+ 'ben' => 'Bengali',
55
+ 'ber' => 'Berber (Other)',
56
+ 'bho' => 'Bhojpuri',
57
+ 'bih' => 'Bihari',
58
+ 'bik' => 'Bikol',
59
+ 'bin' => 'Edo',
60
+ 'bis' => 'Bislama',
61
+ 'bla' => 'Siksika',
62
+ 'bnt' => 'Bantu (Other)',
63
+ 'bos' => 'Bosnian',
64
+ 'bra' => 'Braj',
65
+ 'bre' => 'Breton',
66
+ 'btk' => 'Batak',
67
+ 'bua' => 'Buriat',
68
+ 'bug' => 'Bugis',
69
+ 'bul' => 'Bulgarian',
70
+ 'bur' => 'Burmese',
71
+ 'byn' => 'Bilin',
72
+ 'cad' => 'Caddo',
73
+ 'cai' => 'Central American Indian (Other)',
74
+ 'cam' => 'Khmer',
75
+ 'car' => 'Carib',
76
+ 'cat' => 'Catalan',
77
+ 'cau' => 'Caucasian (Other)',
78
+ 'ceb' => 'Cebuano',
79
+ 'cel' => 'Celtic (Other)',
80
+ 'cha' => 'Chamorro',
81
+ 'chb' => 'Chibcha',
82
+ 'che' => 'Chechen',
83
+ 'chg' => 'Chagatai',
84
+ 'chi' => 'Chinese',
85
+ 'chk' => 'Truk',
86
+ 'chm' => 'Mari',
87
+ 'chn' => 'Chinook jargon',
88
+ 'cho' => 'Choctaw',
89
+ 'chp' => 'Chipewyan',
90
+ 'chr' => 'Cherokee',
91
+ 'chu' => 'Church Slavic',
92
+ 'chv' => 'Chuvash',
93
+ 'chy' => 'Cheyenne',
94
+ 'cmc' => 'Chamic languages',
95
+ 'cop' => 'Coptic',
96
+ 'cor' => 'Cornish',
97
+ 'cos' => 'Corsican',
98
+ 'cpe' => 'Creoles and Pidgins, English-based (Other)',
99
+ 'cpf' => 'Creoles and Pidgins, French-based (Other)',
100
+ 'cpp' => 'Creoles and Pidgins, Portuguese-based (Other)',
101
+ 'cre' => 'Cree',
102
+ 'crh' => 'Crimean Tatar',
103
+ 'crp' => 'Creoles and Pidgins (Other)',
104
+ 'csb' => 'Kashubian',
105
+ 'cus' => 'Cushitic (Other)',
106
+ 'cze' => 'Czech',
107
+ 'dak' => 'Dakota',
108
+ 'dan' => 'Danish',
109
+ 'dar' => 'Dargwa',
110
+ 'day' => 'Dayak',
111
+ 'del' => 'Delaware',
112
+ 'den' => 'Slave',
113
+ 'dgr' => 'Dogrib',
114
+ 'din' => 'Dinka',
115
+ 'div' => 'Divehi',
116
+ 'doi' => 'Dogri',
117
+ 'dra' => 'Dravidian (Other)',
118
+ 'dsb' => 'Lower Sorbian',
119
+ 'dua' => 'Duala',
120
+ 'dum' => 'Dutch, Middle (ca. 1050-1350)',
121
+ 'dut' => 'Dutch',
122
+ 'dyu' => 'Dyula',
123
+ 'dzo' => 'Dzongkha',
124
+ 'efi' => 'Efik',
125
+ 'egy' => 'Egyptian',
126
+ 'eka' => 'Ekajuk',
127
+ 'elx' => 'Elamite',
128
+ 'eng' => 'English',
129
+ 'enm' => 'English, Middle (1100-1500)',
130
+ 'epo' => 'Esperanto',
131
+ 'esk' => 'Eskimo languages',
132
+ 'esp' => 'Esperanto',
133
+ 'est' => 'Estonian',
134
+ 'eth' => 'Ethiopic',
135
+ 'ewe' => 'Ewe',
136
+ 'ewo' => 'Ewondo',
137
+ 'fan' => 'Fang',
138
+ 'fao' => 'Faroese',
139
+ 'far' => 'Faroese',
140
+ 'fat' => 'Fanti',
141
+ 'fij' => 'Fijian',
142
+ 'fil' => 'Filipino',
143
+ 'fin' => 'Finnish',
144
+ 'fiu' => 'Finno-Ugrian (Other)',
145
+ 'fon' => 'Fon',
146
+ 'fre' => 'French',
147
+ 'fri' => 'Frisian',
148
+ 'frm' => 'French, Middle (ca. 1400-1600)',
149
+ 'fro' => 'French, Old (ca. 842-1400)',
150
+ 'frr' => 'North Frisian',
151
+ 'frs' => 'East Frisian',
152
+ 'fry' => 'Frisian',
153
+ 'ful' => 'Fula',
154
+ 'fur' => 'Friulian',
155
+ 'gaa' => 'Ga',
156
+ 'gae' => 'Scottish Gaelic',
157
+ 'gag' => 'Galician',
158
+ 'gal' => 'Oromo',
159
+ 'gay' => 'Gayo',
160
+ 'gba' => 'Gbaya',
161
+ 'gem' => 'Germanic (Other)',
162
+ 'geo' => 'Georgian',
163
+ 'ger' => 'German',
164
+ 'gez' => 'Ethiopic',
165
+ 'gil' => 'Gilbertese',
166
+ 'gla' => 'Scottish Gaelic',
167
+ 'gle' => 'Irish',
168
+ 'glg' => 'Galician',
169
+ 'glv' => 'Manx',
170
+ 'gmh' => 'German, Middle High (ca. 1050-1500)',
171
+ 'goh' => 'German, Old High (ca. 750-1050)',
172
+ 'gon' => 'Gondi',
173
+ 'gor' => 'Gorontalo',
174
+ 'got' => 'Gothic',
175
+ 'grb' => 'Grebo',
176
+ 'grc' => 'Greek, Ancient (to 1453)',
177
+ 'gre' => 'Greek, Modern (1453- )',
178
+ 'grn' => 'Guarani',
179
+ 'gsw' => 'Swiss German',
180
+ 'gua' => 'Guarani',
181
+ 'guj' => 'Gujarati',
182
+ 'gwi' => "Gwich'in ",
183
+ 'hai' => 'Haida',
184
+ 'hat' => 'Haitian French Creole',
185
+ 'hau' => 'Hausa',
186
+ 'haw' => 'Hawaiian',
187
+ 'heb' => 'Hebrew',
188
+ 'her' => 'Herero',
189
+ 'hil' => 'Hiligaynon',
190
+ 'him' => 'Himachali',
191
+ 'hin' => 'Hindi',
192
+ 'hit' => 'Hittite',
193
+ 'hmn' => 'Hmong',
194
+ 'hmo' => 'Hiri Motu',
195
+ 'hrv' => 'Croatian',
196
+ 'hsb' => 'Upper Sorbian',
197
+ 'hun' => 'Hungarian',
198
+ 'hup' => 'Hupa',
199
+ 'iba' => 'Iban',
200
+ 'ibo' => 'Igbo',
201
+ 'ice' => 'Icelandic',
202
+ 'ido' => 'Ido',
203
+ 'iii' => 'Sichuan Yi',
204
+ 'ijo' => 'Ijo',
205
+ 'iku' => 'Inuktitut',
206
+ 'ile' => 'Interlingue',
207
+ 'ilo' => 'Iloko',
208
+ 'ina' => 'Interlingua (International Auxiliary Language Association)',
209
+ 'inc' => 'Indic (Other)',
210
+ 'ind' => 'Indonesian',
211
+ 'ine' => 'Indo-European (Other)',
212
+ 'inh' => 'Ingush',
213
+ 'int' => 'Interlingua (International Auxiliary Language Association)',
214
+ 'ipk' => 'Inupiaq',
215
+ 'ira' => 'Iranian (Other)',
216
+ 'iri' => 'Irish',
217
+ 'iro' => 'Iroquoian (Other)',
218
+ 'ita' => 'Italian',
219
+ 'jav' => 'Javanese',
220
+ 'jbo' => 'Lojban (Artificial language)',
221
+ 'jpn' => 'Japanese',
222
+ 'jpr' => 'Judeo-Persian',
223
+ 'jrb' => 'Judeo-Arabic',
224
+ 'kaa' => 'Kara-Kalpak',
225
+ 'kab' => 'Kabyle',
226
+ 'kac' => 'Kachin',
227
+ 'kal' => 'Kalatdlisut',
228
+ 'kam' => 'Kamba',
229
+ 'kan' => 'Kannada',
230
+ 'kar' => 'Karen',
231
+ 'kas' => 'Kashmiri',
232
+ 'kau' => 'Kanuri',
233
+ 'kaw' => 'Kawi',
234
+ 'kaz' => 'Kazakh',
235
+ 'kbd' => 'Kabardian',
236
+ 'kha' => 'Khasi',
237
+ 'khi' => 'Khoisan (Other)',
238
+ 'khm' => 'Khmer',
239
+ 'kho' => 'Khotanese',
240
+ 'kik' => 'Kikuyu',
241
+ 'kin' => 'Kinyarwanda',
242
+ 'kir' => 'Kyrgyz',
243
+ 'kmb' => 'Kimbundu',
244
+ 'kok' => 'Konkani',
245
+ 'kom' => 'Komi',
246
+ 'kon' => 'Kongo',
247
+ 'kor' => 'Korean',
248
+ 'kos' => 'Kusaie',
249
+ 'kpe' => 'Kpelle',
250
+ 'krc' => 'Karachay-Balkar',
251
+ 'krl' => 'Karelian',
252
+ 'kro' => 'Kru',
253
+ 'kru' => 'Kurukh',
254
+ 'kua' => 'Kuanyama',
255
+ 'kum' => 'Kumyk',
256
+ 'kur' => 'Kurdish',
257
+ 'kus' => 'Kusaie',
258
+ 'kut' => 'Kutenai',
259
+ 'lad' => 'Ladino',
260
+ 'lah' => 'Lahnda',
261
+ 'lam' => 'Lamba',
262
+ 'lan' => 'Occitan (post-1500)',
263
+ 'lao' => 'Lao',
264
+ 'lap' => 'Sami',
265
+ 'lat' => 'Latin',
266
+ 'lav' => 'Latvian',
267
+ 'lez' => 'Lezgian',
268
+ 'lim' => 'Limburgish',
269
+ 'lin' => 'Lingala',
270
+ 'lit' => 'Lithuanian',
271
+ 'lol' => 'Mongo-Nkundu',
272
+ 'loz' => 'Lozi',
273
+ 'ltz' => 'Letzeburgesch',
274
+ 'lua' => 'Luba-Lulua',
275
+ 'lub' => 'Luba-Katanga',
276
+ 'lug' => 'Ganda',
277
+ 'lui' => 'Luiseno',
278
+ 'lun' => 'Lunda',
279
+ 'luo' => 'Luo (Kenya and Tanzania)',
280
+ 'lus' => 'Lushai',
281
+ 'mac' => 'Macedonian',
282
+ 'mad' => 'Madurese',
283
+ 'mag' => 'Magahi',
284
+ 'mah' => 'Marshallese',
285
+ 'mai' => 'Maithili',
286
+ 'mak' => 'Makasar',
287
+ 'mal' => 'Malayalam',
288
+ 'man' => 'Mandingo',
289
+ 'mao' => 'Maori',
290
+ 'map' => 'Austronesian (Other)',
291
+ 'mar' => 'Marathi',
292
+ 'mas' => 'Masai',
293
+ 'max' => 'Manx',
294
+ 'may' => 'Malay',
295
+ 'mdf' => 'Moksha',
296
+ 'mdr' => 'Mandar',
297
+ 'men' => 'Mende',
298
+ 'mga' => 'Irish, Middle (ca. 1100-1550)',
299
+ 'mic' => 'Micmac',
300
+ 'min' => 'Minangkabau',
301
+ '#mis' => 'Miscellaneous languages',
302
+ 'mkh' => 'Mon-Khmer (Other)',
303
+ 'mla' => 'Malagasy',
304
+ 'mlg' => 'Malagasy',
305
+ 'mlt' => 'Maltese',
306
+ 'mnc' => 'Manchu',
307
+ 'mni' => 'Manipuri',
308
+ 'mno' => 'Manobo languages',
309
+ 'moh' => 'Mohawk',
310
+ 'mol' => 'Moldavian',
311
+ 'mon' => 'Mongolian',
312
+ 'mos' => 'Moore',
313
+ '#mul' => 'Multiple languages',
314
+ 'mun' => 'Munda (Other)',
315
+ 'mus' => 'Creek',
316
+ 'mwl' => 'Mirandese',
317
+ 'mwr' => 'Marwari',
318
+ 'myn' => 'Mayan languages',
319
+ 'myv' => 'Erzya',
320
+ 'nah' => 'Nahuatl',
321
+ 'nai' => 'North American Indian (Other)',
322
+ 'nap' => 'Neapolitan Italian',
323
+ 'nau' => 'Nauru',
324
+ 'nav' => 'Navajo',
325
+ 'nbl' => 'Ndebele (South Africa)',
326
+ 'nde' => 'Ndebele (Zimbabwe)',
327
+ 'ndo' => 'Ndonga',
328
+ 'nds' => 'Low German',
329
+ 'nep' => 'Nepali',
330
+ 'new' => 'Newari',
331
+ 'nia' => 'Nias',
332
+ 'nic' => 'Niger-Kordofanian (Other)',
333
+ 'niu' => 'Niuean',
334
+ 'nno' => 'Norwegian (Nynorsk)',
335
+ 'nob' => 'Norwegian (Bokmal)',
336
+ 'nog' => 'Nogai',
337
+ 'non' => 'Old Norse',
338
+ 'nor' => 'Norwegian',
339
+ 'nqo' => "N'Ko",
340
+ 'nso' => 'Northern Sotho',
341
+ 'nub' => 'Nubian languages',
342
+ 'nwc' => 'Newari, Old',
343
+ 'nya' => 'Nyanja',
344
+ 'nym' => 'Nyamwezi',
345
+ 'nyn' => 'Nyankole',
346
+ 'nyo' => 'Nyoro',
347
+ 'nzi' => 'Nzima',
348
+ 'oci' => 'Occitan (post-1500)',
349
+ 'oji' => 'Ojibwa',
350
+ 'ori' => 'Oriya',
351
+ 'orm' => 'Oromo',
352
+ 'osa' => 'Osage',
353
+ 'oss' => 'Ossetic',
354
+ 'ota' => 'Turkish, Ottoman',
355
+ 'oto' => 'Otomian languages',
356
+ 'paa' => 'Papuan (Other)',
357
+ 'pag' => 'Pangasinan',
358
+ 'pal' => 'Pahlavi',
359
+ 'pam' => 'Pampanga',
360
+ 'pan' => 'Panjabi',
361
+ 'pap' => 'Papiamento',
362
+ 'pau' => 'Palauan',
363
+ 'peo' => 'Old Persian (ca. 600-400 B.C.)',
364
+ 'per' => 'Persian',
365
+ 'phi' => 'Philippine (Other)',
366
+ 'phn' => 'Phoenician',
367
+ 'pli' => 'Pali',
368
+ 'pol' => 'Polish',
369
+ 'pon' => 'Ponape',
370
+ 'por' => 'Portuguese',
371
+ 'pra' => 'Prakrit languages',
372
+ 'pro' => 'Provencal (to 1500)',
373
+ 'pus' => 'Pushto',
374
+ 'que' => 'Quechua',
375
+ 'raj' => 'Rajasthani',
376
+ 'rap' => 'Rapanui',
377
+ 'rar' => 'Rarotongan',
378
+ 'roa' => 'Romance (Other)',
379
+ 'roh' => 'Raeto-Romance',
380
+ 'rom' => 'Romani',
381
+ 'rum' => 'Romanian',
382
+ 'run' => 'Rundi',
383
+ 'rup' => 'Aromanian',
384
+ 'rus' => 'Russian',
385
+ 'sad' => 'Sandawe',
386
+ 'sag' => 'Sango (Ubangi Creole)',
387
+ 'sah' => 'Yakut',
388
+ 'sai' => 'South American Indian (Other)',
389
+ 'sal' => 'Salishan languages',
390
+ 'sam' => 'Samaritan Aramaic',
391
+ 'san' => 'Sanskrit',
392
+ 'sao' => 'Samoan',
393
+ 'sas' => 'Sasak',
394
+ 'sat' => 'Santali',
395
+ 'scc' => 'Serbian',
396
+ 'scn' => 'Sicilian Italian',
397
+ 'sco' => 'Scots',
398
+ 'scr' => 'Croatian',
399
+ 'sel' => 'Selkup',
400
+ 'sem' => 'Semitic (Other)',
401
+ 'sga' => 'Irish, Old (to 1100)',
402
+ 'sgn' => 'Sign languages',
403
+ 'shn' => 'Shan',
404
+ 'sho' => 'Shona',
405
+ 'sid' => 'Sidamo',
406
+ 'sin' => 'Sinhalese',
407
+ 'sio' => 'Siouan (Other)',
408
+ 'sit' => 'Sino-Tibetan (Other)',
409
+ 'sla' => 'Slavic (Other)',
410
+ 'slo' => 'Slovak',
411
+ 'slv' => 'Slovenian',
412
+ 'sma' => 'Southern Sami',
413
+ 'sme' => 'Northern Sami',
414
+ 'smi' => 'Sami',
415
+ 'smj' => 'Lule Sami',
416
+ 'smn' => 'Inari Sami',
417
+ 'smo' => 'Samoan',
418
+ 'sms' => 'Skolt Sami',
419
+ 'sna' => 'Shona',
420
+ 'snd' => 'Sindhi',
421
+ 'snh' => 'Sinhalese',
422
+ 'snk' => 'Soninke',
423
+ 'sog' => 'Sogdian',
424
+ 'som' => 'Somali',
425
+ 'son' => 'Songhai',
426
+ 'sot' => 'Sotho',
427
+ 'spa' => 'Spanish',
428
+ 'srd' => 'Sardinian',
429
+ 'srn' => 'Sranan',
430
+ 'srp' => 'Serbian',
431
+ 'srr' => 'Serer',
432
+ 'ssa' => 'Nilo-Saharan (Other)',
433
+ 'sso' => 'Sotho',
434
+ 'ssw' => 'Swazi',
435
+ 'suk' => 'Sukuma',
436
+ 'sun' => 'Sundanese',
437
+ 'sus' => 'Susu',
438
+ 'sux' => 'Sumerian',
439
+ 'swa' => 'Swahili',
440
+ 'swe' => 'Swedish',
441
+ 'swz' => 'Swazi',
442
+ 'syc' => 'Syriac',
443
+ 'syr' => 'Syriac, Modern',
444
+ 'tag' => 'Tagalog',
445
+ 'tah' => 'Tahitian',
446
+ 'tai' => 'Tai (Other)',
447
+ 'taj' => 'Tajik',
448
+ 'tam' => 'Tamil',
449
+ 'tar' => 'Tatar',
450
+ 'tat' => 'Tatar',
451
+ 'tel' => 'Telugu',
452
+ 'tem' => 'Temne',
453
+ 'ter' => 'Terena',
454
+ 'tet' => 'Tetum',
455
+ 'tgk' => 'Tajik',
456
+ 'tgl' => 'Tagalog',
457
+ 'tha' => 'Thai',
458
+ 'tib' => 'Tibetan',
459
+ 'tig' => 'Tigre',
460
+ 'tir' => 'Tigrinya',
461
+ 'tiv' => 'Tiv',
462
+ 'tkl' => 'Tokelauan',
463
+ 'tlh' => 'Klingon (Artificial language)',
464
+ 'tli' => 'Tlingit',
465
+ 'tmh' => 'Tamashek',
466
+ 'tog' => 'Tonga (Nyasa)',
467
+ 'ton' => 'Tongan',
468
+ 'tpi' => 'Tok Pisin',
469
+ 'tru' => 'Truk',
470
+ 'tsi' => 'Tsimshian',
471
+ 'tsn' => 'Tswana',
472
+ 'tso' => 'Tsonga',
473
+ 'tsw' => 'Tswana',
474
+ 'tuk' => 'Turkmen',
475
+ 'tum' => 'Tumbuka',
476
+ 'tup' => 'Tupi languages',
477
+ 'tur' => 'Turkish',
478
+ 'tut' => 'Altaic (Other)',
479
+ 'tvl' => 'Tuvaluan',
480
+ 'twi' => 'Twi',
481
+ 'tyv' => 'Tuvinian',
482
+ 'udm' => 'Udmurt',
483
+ 'uga' => 'Ugaritic',
484
+ 'uig' => 'Uighur',
485
+ 'ukr' => 'Ukrainian',
486
+ 'umb' => 'Umbundu',
487
+ #'und' => 'Undetermined',
488
+ 'urd' => 'Urdu',
489
+ 'uzb' => 'Uzbek',
490
+ 'vai' => 'Vai',
491
+ 'ven' => 'Venda',
492
+ 'vie' => 'Vietnamese',
493
+ 'vol' => 'Volapuk',
494
+ 'vot' => 'Votic',
495
+ 'wak' => 'Wakashan languages',
496
+ 'wal' => 'Walamo',
497
+ 'war' => 'Waray',
498
+ 'was' => 'Washo',
499
+ 'wel' => 'Welsh',
500
+ 'wen' => 'Sorbian languages',
501
+ 'wln' => 'Walloon',
502
+ 'wol' => 'Wolof',
503
+ 'xal' => 'Kalmyk',
504
+ 'xho' => 'Xhosa',
505
+ 'yao' => 'Yao (Africa)',
506
+ 'yap' => 'Yapese',
507
+ 'yid' => 'Yiddish',
508
+ 'yor' => 'Yoruba',
509
+ 'ypk' => 'Yupik languages',
510
+ 'zap' => 'Zapotec',
511
+ 'zbl' => 'Blissymbolics',
512
+ 'zen' => 'Zenaga',
513
+ 'zha' => 'Zhuang',
514
+ 'znd' => 'Zande',
515
+ 'zul' => 'Zulu',
516
+ 'zun' => 'Zuni',
517
+ #'zxx' => 'null',
518
+ 'zza' => 'Zaza'
519
+ }
@@ -1,5 +1,5 @@
1
1
  module Stanford
2
2
  module Mods
3
- VERSION = "0.0.2"
3
+ VERSION = "0.0.3"
4
4
  end
5
5
  end
data/lib/stanford-mods.rb CHANGED
@@ -7,12 +7,7 @@ module Stanford
7
7
  module Mods
8
8
 
9
9
  class Record < ::Mods::Record
10
-
11
- # proof of concept method
12
- def to_be_removed
13
- puts "in to_be_removed!"
14
- end
15
-
10
+
16
11
  end
17
12
  end
18
13
  end
@@ -0,0 +1,21 @@
1
+ # for test coverage
2
+ require 'simplecov'
3
+ require 'simplecov-rcov'
4
+ class SimpleCov::Formatter::MergedFormatter
5
+ def format(result)
6
+ SimpleCov::Formatter::HTMLFormatter.new.format(result)
7
+ SimpleCov::Formatter::RcovFormatter.new.format(result)
8
+ end
9
+ end
10
+ SimpleCov.formatter = SimpleCov::Formatter::MergedFormatter
11
+ SimpleCov.start do
12
+ add_filter "/spec/"
13
+ end
14
+
15
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
16
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
17
+
18
+ require 'stanford-mods'
19
+
20
+ #RSpec.configure do |config|
21
+ #end
@@ -0,0 +1,119 @@
1
+ require 'spec_helper'
2
+ require 'stanford-mods/searchworks'
3
+
4
+ describe "Values for SearchWorks Solr" do
5
+ # from https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
6
+
7
+ context "required fields" do
8
+ context "DOR specific" do
9
+ it "druid" do
10
+ pending "to be implemented in harvestdor"
11
+ end
12
+ it "url_fulltext" do
13
+ pending "to be implemented"
14
+ end
15
+ it "mods_xml" do
16
+ pending "to be implemented"
17
+ end
18
+ it "parent_coll_ckey if item object" do
19
+ pending "to be implemented in harvestdor"
20
+ end
21
+ end
22
+
23
+ it "id" do
24
+ pending "to be implemented in harvestdor"
25
+ end
26
+
27
+ it "all_search" do
28
+ pending "to be implemented"
29
+ end
30
+
31
+ it "format" do
32
+ pending "to be implemented, using SearchWorks controlled vocab"
33
+ end
34
+
35
+ # FIXME: update per gryphDOR code / searcworks code / new schema
36
+
37
+ it "collection" do
38
+ pending "to be implemented, using controlled vocab, in harvestdor"
39
+ end
40
+
41
+ it "display_type" do
42
+ pending "to be implemented, using controlled vocab"
43
+ end
44
+
45
+ end
46
+
47
+ context "strongly recommended fields" do
48
+ it "access_facet" do
49
+ Stanford::Mods::Record.new.access_facet.should == ['Online']
50
+ end
51
+ context "title fields" do
52
+ context "for display" do
53
+ it "short title" do
54
+ pending "to be implemented"
55
+ end
56
+ it "full title" do
57
+ pending "to be implemented"
58
+ end
59
+ end
60
+ context "for searching" do
61
+ it "short title" do
62
+ pending "to be implemented"
63
+ end
64
+ it "full title" do
65
+ pending "to be implemented"
66
+ end
67
+ end
68
+ it "sortable title" do
69
+ pending "to be implemented"
70
+ end
71
+ end
72
+ end
73
+
74
+ context "recommended fields" do
75
+ context "publication date" do
76
+ it "for searching and facet" do
77
+ pending "to be implemented"
78
+ end
79
+ it "for sorting" do
80
+ pending "to be implemented"
81
+ end
82
+ it "for pub date grouping (hierarchical / date slider?)" do
83
+ pending "to be implemented"
84
+ end
85
+ end
86
+ context "language" do
87
+ it "should use the SearchWorks controlled vocabulary" do
88
+ m = '<mods><language><languageTerm authority="iso639-2b" type="code">per ara, dut</languageTerm></language></mods>'
89
+ r = Stanford::Mods::Record.new
90
+ r.from_str(m)
91
+ r.language_facet.size.should == 3
92
+ r.language_facet.should include("Persian")
93
+ r.language_facet.should include("Arabic")
94
+ r.language_facet.should include("Dutch")
95
+ r.language_facet.should_not include("Dutch; Flemish")
96
+ end
97
+ it "should not have duplicates" do
98
+ m = '<mods><language><languageTerm type="code" authority="iso639-2b">eng</languageTerm><languageTerm type="text">English</languageTerm></language></mods>'
99
+ r = Stanford::Mods::Record.new
100
+ r.from_str(m)
101
+ r.language_facet.size.should == 1
102
+ r.language_facet.should include("English")
103
+ end
104
+
105
+ end
106
+ context "authors" do
107
+ it "main author" do
108
+ pending "to be implemented"
109
+ end
110
+ it "additional authors" do
111
+ pending "to be implemented"
112
+ end
113
+ it "author sort" do
114
+ pending "to be implemented"
115
+ end
116
+ end
117
+ end
118
+
119
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: stanford-mods
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-11-12 00:00:00.000000000 Z
13
+ date: 2012-11-13 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: mods
@@ -146,7 +146,11 @@ files:
146
146
  - config/mappings_hash.rb
147
147
  - lib/stanford-mods.rb
148
148
  - lib/stanford-mods/mappings.rb
149
+ - lib/stanford-mods/searchworks.rb
150
+ - lib/stanford-mods/searchworks_languages.rb
149
151
  - lib/stanford-mods/version.rb
152
+ - spec/spec_helper.rb
153
+ - spec/values_for_req_sw_spec.rb
150
154
  - stanford-mods.gemspec
151
155
  homepage: https://github.com/sul-dlss/stanford-mods
152
156
  licenses: []
@@ -162,7 +166,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
162
166
  version: '0'
163
167
  segments:
164
168
  - 0
165
- hash: 893390290114598282
169
+ hash: -3428048468669990853
166
170
  required_rubygems_version: !ruby/object:Gem::Requirement
167
171
  none: false
168
172
  requirements:
@@ -171,12 +175,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
171
175
  version: '0'
172
176
  segments:
173
177
  - 0
174
- hash: 893390290114598282
178
+ hash: -3428048468669990853
175
179
  requirements: []
176
180
  rubyforge_project:
177
181
  rubygems_version: 1.8.24
178
182
  signing_key:
179
183
  specification_version: 3
180
184
  summary: Stanford specific wrangling of MODS metadata
181
- test_files: []
185
+ test_files:
186
+ - spec/spec_helper.rb
187
+ - spec/values_for_req_sw_spec.rb
182
188
  has_rdoc: