stanford-mods 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +11 -0
- data/lib/stanford-mods/searchworks.rb +55 -0
- data/lib/stanford-mods/searchworks_languages.rb +519 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/lib/stanford-mods.rb +1 -6
- data/spec/spec_helper.rb +21 -0
- data/spec/values_for_req_sw_spec.rb +119 -0
- metadata +11 -5
data/README.rdoc
CHANGED
@@ -39,6 +39,16 @@ Or install it yourself as:
|
|
39
39
|
in foo!
|
40
40
|
=> nil
|
41
41
|
|
42
|
+
Example Using SearchWorks Mixins:
|
43
|
+
|
44
|
+
> require 'stanford-mods/searchworks'
|
45
|
+
> m = Stanford::Mods::Record.new
|
46
|
+
> m.from_str('<mods><language><languageTerm authority="iso639-2b" type="code">dut</languageTerm></language></mods>')
|
47
|
+
> m.language_facet <-- from Searchworks mixin
|
48
|
+
=> ['Dutch']
|
49
|
+
> m.languages <-- from mods gem
|
50
|
+
=> ['Dutch; Flemish']
|
51
|
+
|
42
52
|
## Contributing
|
43
53
|
|
44
54
|
1. Fork it
|
@@ -50,5 +60,6 @@ Or install it yourself as:
|
|
50
60
|
|
51
61
|
== Releases
|
52
62
|
|
63
|
+
0.0.3 began SearchWorks mixins
|
53
64
|
0.0.2 add usage instructions to readme
|
54
65
|
0.0.1 Initial commit - grab name
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'stanford-mods/searchworks_languages'
|
2
|
+
|
3
|
+
# # SearchWorks specific wranglings of MODS metadata as an extension of the Mods::Record object
|
4
|
+
module Stanford
|
5
|
+
module Mods
|
6
|
+
|
7
|
+
class Record < ::Mods::Record
|
8
|
+
|
9
|
+
# if it's coming from DOR, then it is available online
|
10
|
+
def access_facet
|
11
|
+
['Online']
|
12
|
+
end
|
13
|
+
|
14
|
+
# include langagues known to SearchWorks; try to error correct when possible (e.g. when ISO-639 disagrees with MARC standard)
|
15
|
+
def language_facet
|
16
|
+
result = []
|
17
|
+
@mods_ng_xml.language.each { |n|
|
18
|
+
# get languageTerm codes and add their translations to the result
|
19
|
+
n.code_term.each { |ct|
|
20
|
+
if ct.authority.match(/^iso639/)
|
21
|
+
begin
|
22
|
+
vals = ct.text.split(/[,|\ ]/).reject {|x| x.strip.length == 0 }
|
23
|
+
vals.each do |v|
|
24
|
+
iso639_val = ISO_639.find(v.strip).english_name
|
25
|
+
if SEARCHWORKS_LANGUAGES.has_value?(iso639_val)
|
26
|
+
result << iso639_val
|
27
|
+
else
|
28
|
+
result << SEARCHWORKS_LANGUAGES[v.strip]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
rescue => e
|
32
|
+
p "Couldn't find english name for #{ct.text}"
|
33
|
+
result << SEARCHWORKS_LANGUAGES[v.strip]
|
34
|
+
end
|
35
|
+
else
|
36
|
+
result << SEARCHWORKS_LANGUAGES[v.strip]
|
37
|
+
end
|
38
|
+
}
|
39
|
+
# add languageTerm text values
|
40
|
+
n.text_term.each { |tt|
|
41
|
+
val = tt.text.strip
|
42
|
+
result << val if val.length > 0 && SEARCHWORKS_LANGUAGES.has_value?(val)
|
43
|
+
}
|
44
|
+
|
45
|
+
# add language values that aren't in languageTerm subelement
|
46
|
+
if n.languageTerm.size == 0
|
47
|
+
result << n.text if SEARCHWORKS_LANGUAGES.has_value?(n.text)
|
48
|
+
end
|
49
|
+
}
|
50
|
+
result.uniq
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,519 @@
|
|
1
|
+
# Language Values used by SearchWorks
|
2
|
+
# From https://github.com/solrmarc/stanford-solr-marc/blob/master/stanford-sw/translation_maps/language_map.properties
|
3
|
+
SEARCHWORKS_LANGUAGES = {
|
4
|
+
'aaa' => 'Afar',
|
5
|
+
'abk' => 'Abkhaz',
|
6
|
+
'ace' => 'Achinese',
|
7
|
+
'ach' => 'Acoli',
|
8
|
+
'ada' => 'Adangme',
|
9
|
+
'ady' => 'Adygei',
|
10
|
+
'afa' => 'Afroasiatic (Other)',
|
11
|
+
'afh' => 'Afrihili (Artificial language)',
|
12
|
+
'afr' => 'Afrikaans',
|
13
|
+
'ain' => 'Ainu',
|
14
|
+
'ajm' => 'Aljamia',
|
15
|
+
'aka' => 'Akan',
|
16
|
+
'akk' => 'Akkadian',
|
17
|
+
'alb' => 'Albanian',
|
18
|
+
'ale' => 'Aleut',
|
19
|
+
'alg' => 'Algonquian (Other)',
|
20
|
+
'alt' => 'Altai',
|
21
|
+
'amh' => 'Amharic',
|
22
|
+
'ang' => 'English, Old (ca. 450-1100)',
|
23
|
+
'anp' => 'Angika',
|
24
|
+
'apa' => 'Apache languages',
|
25
|
+
'ara' => 'Arabic',
|
26
|
+
'arc' => 'Aramaic',
|
27
|
+
'arg' => 'Aragonese Spanish',
|
28
|
+
'arm' => 'Armenian',
|
29
|
+
'arn' => 'Mapuche',
|
30
|
+
'arp' => 'Arapaho',
|
31
|
+
'art' => 'Artificial (Other)',
|
32
|
+
'arw' => 'Arawak',
|
33
|
+
'asm' => 'Assamese',
|
34
|
+
'ast' => 'Bable',
|
35
|
+
'ath' => 'Athapascan (Other)',
|
36
|
+
'aus' => 'Australian languages',
|
37
|
+
'ava' => 'Avaric',
|
38
|
+
'ave' => 'Avestan',
|
39
|
+
'awa' => 'Awadhi',
|
40
|
+
'aym' => 'Aymara',
|
41
|
+
'aze' => 'Azerbaijani',
|
42
|
+
'bad' => 'Banda',
|
43
|
+
'bai' => 'Bamileke languages',
|
44
|
+
'bak' => 'Bashkir',
|
45
|
+
'bal' => 'Baluchi',
|
46
|
+
'bam' => 'Bambara',
|
47
|
+
'ban' => 'Balinese',
|
48
|
+
'baq' => 'Basque',
|
49
|
+
'bas' => 'Basa',
|
50
|
+
'bat' => 'Baltic (Other)',
|
51
|
+
'bej' => 'Beja',
|
52
|
+
'bel' => 'Belarusian',
|
53
|
+
'bem' => 'Bemba',
|
54
|
+
'ben' => 'Bengali',
|
55
|
+
'ber' => 'Berber (Other)',
|
56
|
+
'bho' => 'Bhojpuri',
|
57
|
+
'bih' => 'Bihari',
|
58
|
+
'bik' => 'Bikol',
|
59
|
+
'bin' => 'Edo',
|
60
|
+
'bis' => 'Bislama',
|
61
|
+
'bla' => 'Siksika',
|
62
|
+
'bnt' => 'Bantu (Other)',
|
63
|
+
'bos' => 'Bosnian',
|
64
|
+
'bra' => 'Braj',
|
65
|
+
'bre' => 'Breton',
|
66
|
+
'btk' => 'Batak',
|
67
|
+
'bua' => 'Buriat',
|
68
|
+
'bug' => 'Bugis',
|
69
|
+
'bul' => 'Bulgarian',
|
70
|
+
'bur' => 'Burmese',
|
71
|
+
'byn' => 'Bilin',
|
72
|
+
'cad' => 'Caddo',
|
73
|
+
'cai' => 'Central American Indian (Other)',
|
74
|
+
'cam' => 'Khmer',
|
75
|
+
'car' => 'Carib',
|
76
|
+
'cat' => 'Catalan',
|
77
|
+
'cau' => 'Caucasian (Other)',
|
78
|
+
'ceb' => 'Cebuano',
|
79
|
+
'cel' => 'Celtic (Other)',
|
80
|
+
'cha' => 'Chamorro',
|
81
|
+
'chb' => 'Chibcha',
|
82
|
+
'che' => 'Chechen',
|
83
|
+
'chg' => 'Chagatai',
|
84
|
+
'chi' => 'Chinese',
|
85
|
+
'chk' => 'Truk',
|
86
|
+
'chm' => 'Mari',
|
87
|
+
'chn' => 'Chinook jargon',
|
88
|
+
'cho' => 'Choctaw',
|
89
|
+
'chp' => 'Chipewyan',
|
90
|
+
'chr' => 'Cherokee',
|
91
|
+
'chu' => 'Church Slavic',
|
92
|
+
'chv' => 'Chuvash',
|
93
|
+
'chy' => 'Cheyenne',
|
94
|
+
'cmc' => 'Chamic languages',
|
95
|
+
'cop' => 'Coptic',
|
96
|
+
'cor' => 'Cornish',
|
97
|
+
'cos' => 'Corsican',
|
98
|
+
'cpe' => 'Creoles and Pidgins, English-based (Other)',
|
99
|
+
'cpf' => 'Creoles and Pidgins, French-based (Other)',
|
100
|
+
'cpp' => 'Creoles and Pidgins, Portuguese-based (Other)',
|
101
|
+
'cre' => 'Cree',
|
102
|
+
'crh' => 'Crimean Tatar',
|
103
|
+
'crp' => 'Creoles and Pidgins (Other)',
|
104
|
+
'csb' => 'Kashubian',
|
105
|
+
'cus' => 'Cushitic (Other)',
|
106
|
+
'cze' => 'Czech',
|
107
|
+
'dak' => 'Dakota',
|
108
|
+
'dan' => 'Danish',
|
109
|
+
'dar' => 'Dargwa',
|
110
|
+
'day' => 'Dayak',
|
111
|
+
'del' => 'Delaware',
|
112
|
+
'den' => 'Slave',
|
113
|
+
'dgr' => 'Dogrib',
|
114
|
+
'din' => 'Dinka',
|
115
|
+
'div' => 'Divehi',
|
116
|
+
'doi' => 'Dogri',
|
117
|
+
'dra' => 'Dravidian (Other)',
|
118
|
+
'dsb' => 'Lower Sorbian',
|
119
|
+
'dua' => 'Duala',
|
120
|
+
'dum' => 'Dutch, Middle (ca. 1050-1350)',
|
121
|
+
'dut' => 'Dutch',
|
122
|
+
'dyu' => 'Dyula',
|
123
|
+
'dzo' => 'Dzongkha',
|
124
|
+
'efi' => 'Efik',
|
125
|
+
'egy' => 'Egyptian',
|
126
|
+
'eka' => 'Ekajuk',
|
127
|
+
'elx' => 'Elamite',
|
128
|
+
'eng' => 'English',
|
129
|
+
'enm' => 'English, Middle (1100-1500)',
|
130
|
+
'epo' => 'Esperanto',
|
131
|
+
'esk' => 'Eskimo languages',
|
132
|
+
'esp' => 'Esperanto',
|
133
|
+
'est' => 'Estonian',
|
134
|
+
'eth' => 'Ethiopic',
|
135
|
+
'ewe' => 'Ewe',
|
136
|
+
'ewo' => 'Ewondo',
|
137
|
+
'fan' => 'Fang',
|
138
|
+
'fao' => 'Faroese',
|
139
|
+
'far' => 'Faroese',
|
140
|
+
'fat' => 'Fanti',
|
141
|
+
'fij' => 'Fijian',
|
142
|
+
'fil' => 'Filipino',
|
143
|
+
'fin' => 'Finnish',
|
144
|
+
'fiu' => 'Finno-Ugrian (Other)',
|
145
|
+
'fon' => 'Fon',
|
146
|
+
'fre' => 'French',
|
147
|
+
'fri' => 'Frisian',
|
148
|
+
'frm' => 'French, Middle (ca. 1400-1600)',
|
149
|
+
'fro' => 'French, Old (ca. 842-1400)',
|
150
|
+
'frr' => 'North Frisian',
|
151
|
+
'frs' => 'East Frisian',
|
152
|
+
'fry' => 'Frisian',
|
153
|
+
'ful' => 'Fula',
|
154
|
+
'fur' => 'Friulian',
|
155
|
+
'gaa' => 'Ga',
|
156
|
+
'gae' => 'Scottish Gaelic',
|
157
|
+
'gag' => 'Galician',
|
158
|
+
'gal' => 'Oromo',
|
159
|
+
'gay' => 'Gayo',
|
160
|
+
'gba' => 'Gbaya',
|
161
|
+
'gem' => 'Germanic (Other)',
|
162
|
+
'geo' => 'Georgian',
|
163
|
+
'ger' => 'German',
|
164
|
+
'gez' => 'Ethiopic',
|
165
|
+
'gil' => 'Gilbertese',
|
166
|
+
'gla' => 'Scottish Gaelic',
|
167
|
+
'gle' => 'Irish',
|
168
|
+
'glg' => 'Galician',
|
169
|
+
'glv' => 'Manx',
|
170
|
+
'gmh' => 'German, Middle High (ca. 1050-1500)',
|
171
|
+
'goh' => 'German, Old High (ca. 750-1050)',
|
172
|
+
'gon' => 'Gondi',
|
173
|
+
'gor' => 'Gorontalo',
|
174
|
+
'got' => 'Gothic',
|
175
|
+
'grb' => 'Grebo',
|
176
|
+
'grc' => 'Greek, Ancient (to 1453)',
|
177
|
+
'gre' => 'Greek, Modern (1453- )',
|
178
|
+
'grn' => 'Guarani',
|
179
|
+
'gsw' => 'Swiss German',
|
180
|
+
'gua' => 'Guarani',
|
181
|
+
'guj' => 'Gujarati',
|
182
|
+
'gwi' => "Gwich'in ",
|
183
|
+
'hai' => 'Haida',
|
184
|
+
'hat' => 'Haitian French Creole',
|
185
|
+
'hau' => 'Hausa',
|
186
|
+
'haw' => 'Hawaiian',
|
187
|
+
'heb' => 'Hebrew',
|
188
|
+
'her' => 'Herero',
|
189
|
+
'hil' => 'Hiligaynon',
|
190
|
+
'him' => 'Himachali',
|
191
|
+
'hin' => 'Hindi',
|
192
|
+
'hit' => 'Hittite',
|
193
|
+
'hmn' => 'Hmong',
|
194
|
+
'hmo' => 'Hiri Motu',
|
195
|
+
'hrv' => 'Croatian',
|
196
|
+
'hsb' => 'Upper Sorbian',
|
197
|
+
'hun' => 'Hungarian',
|
198
|
+
'hup' => 'Hupa',
|
199
|
+
'iba' => 'Iban',
|
200
|
+
'ibo' => 'Igbo',
|
201
|
+
'ice' => 'Icelandic',
|
202
|
+
'ido' => 'Ido',
|
203
|
+
'iii' => 'Sichuan Yi',
|
204
|
+
'ijo' => 'Ijo',
|
205
|
+
'iku' => 'Inuktitut',
|
206
|
+
'ile' => 'Interlingue',
|
207
|
+
'ilo' => 'Iloko',
|
208
|
+
'ina' => 'Interlingua (International Auxiliary Language Association)',
|
209
|
+
'inc' => 'Indic (Other)',
|
210
|
+
'ind' => 'Indonesian',
|
211
|
+
'ine' => 'Indo-European (Other)',
|
212
|
+
'inh' => 'Ingush',
|
213
|
+
'int' => 'Interlingua (International Auxiliary Language Association)',
|
214
|
+
'ipk' => 'Inupiaq',
|
215
|
+
'ira' => 'Iranian (Other)',
|
216
|
+
'iri' => 'Irish',
|
217
|
+
'iro' => 'Iroquoian (Other)',
|
218
|
+
'ita' => 'Italian',
|
219
|
+
'jav' => 'Javanese',
|
220
|
+
'jbo' => 'Lojban (Artificial language)',
|
221
|
+
'jpn' => 'Japanese',
|
222
|
+
'jpr' => 'Judeo-Persian',
|
223
|
+
'jrb' => 'Judeo-Arabic',
|
224
|
+
'kaa' => 'Kara-Kalpak',
|
225
|
+
'kab' => 'Kabyle',
|
226
|
+
'kac' => 'Kachin',
|
227
|
+
'kal' => 'Kalatdlisut',
|
228
|
+
'kam' => 'Kamba',
|
229
|
+
'kan' => 'Kannada',
|
230
|
+
'kar' => 'Karen',
|
231
|
+
'kas' => 'Kashmiri',
|
232
|
+
'kau' => 'Kanuri',
|
233
|
+
'kaw' => 'Kawi',
|
234
|
+
'kaz' => 'Kazakh',
|
235
|
+
'kbd' => 'Kabardian',
|
236
|
+
'kha' => 'Khasi',
|
237
|
+
'khi' => 'Khoisan (Other)',
|
238
|
+
'khm' => 'Khmer',
|
239
|
+
'kho' => 'Khotanese',
|
240
|
+
'kik' => 'Kikuyu',
|
241
|
+
'kin' => 'Kinyarwanda',
|
242
|
+
'kir' => 'Kyrgyz',
|
243
|
+
'kmb' => 'Kimbundu',
|
244
|
+
'kok' => 'Konkani',
|
245
|
+
'kom' => 'Komi',
|
246
|
+
'kon' => 'Kongo',
|
247
|
+
'kor' => 'Korean',
|
248
|
+
'kos' => 'Kusaie',
|
249
|
+
'kpe' => 'Kpelle',
|
250
|
+
'krc' => 'Karachay-Balkar',
|
251
|
+
'krl' => 'Karelian',
|
252
|
+
'kro' => 'Kru',
|
253
|
+
'kru' => 'Kurukh',
|
254
|
+
'kua' => 'Kuanyama',
|
255
|
+
'kum' => 'Kumyk',
|
256
|
+
'kur' => 'Kurdish',
|
257
|
+
'kus' => 'Kusaie',
|
258
|
+
'kut' => 'Kutenai',
|
259
|
+
'lad' => 'Ladino',
|
260
|
+
'lah' => 'Lahnda',
|
261
|
+
'lam' => 'Lamba',
|
262
|
+
'lan' => 'Occitan (post-1500)',
|
263
|
+
'lao' => 'Lao',
|
264
|
+
'lap' => 'Sami',
|
265
|
+
'lat' => 'Latin',
|
266
|
+
'lav' => 'Latvian',
|
267
|
+
'lez' => 'Lezgian',
|
268
|
+
'lim' => 'Limburgish',
|
269
|
+
'lin' => 'Lingala',
|
270
|
+
'lit' => 'Lithuanian',
|
271
|
+
'lol' => 'Mongo-Nkundu',
|
272
|
+
'loz' => 'Lozi',
|
273
|
+
'ltz' => 'Letzeburgesch',
|
274
|
+
'lua' => 'Luba-Lulua',
|
275
|
+
'lub' => 'Luba-Katanga',
|
276
|
+
'lug' => 'Ganda',
|
277
|
+
'lui' => 'Luiseno',
|
278
|
+
'lun' => 'Lunda',
|
279
|
+
'luo' => 'Luo (Kenya and Tanzania)',
|
280
|
+
'lus' => 'Lushai',
|
281
|
+
'mac' => 'Macedonian',
|
282
|
+
'mad' => 'Madurese',
|
283
|
+
'mag' => 'Magahi',
|
284
|
+
'mah' => 'Marshallese',
|
285
|
+
'mai' => 'Maithili',
|
286
|
+
'mak' => 'Makasar',
|
287
|
+
'mal' => 'Malayalam',
|
288
|
+
'man' => 'Mandingo',
|
289
|
+
'mao' => 'Maori',
|
290
|
+
'map' => 'Austronesian (Other)',
|
291
|
+
'mar' => 'Marathi',
|
292
|
+
'mas' => 'Masai',
|
293
|
+
'max' => 'Manx',
|
294
|
+
'may' => 'Malay',
|
295
|
+
'mdf' => 'Moksha',
|
296
|
+
'mdr' => 'Mandar',
|
297
|
+
'men' => 'Mende',
|
298
|
+
'mga' => 'Irish, Middle (ca. 1100-1550)',
|
299
|
+
'mic' => 'Micmac',
|
300
|
+
'min' => 'Minangkabau',
|
301
|
+
'#mis' => 'Miscellaneous languages',
|
302
|
+
'mkh' => 'Mon-Khmer (Other)',
|
303
|
+
'mla' => 'Malagasy',
|
304
|
+
'mlg' => 'Malagasy',
|
305
|
+
'mlt' => 'Maltese',
|
306
|
+
'mnc' => 'Manchu',
|
307
|
+
'mni' => 'Manipuri',
|
308
|
+
'mno' => 'Manobo languages',
|
309
|
+
'moh' => 'Mohawk',
|
310
|
+
'mol' => 'Moldavian',
|
311
|
+
'mon' => 'Mongolian',
|
312
|
+
'mos' => 'Moore',
|
313
|
+
'#mul' => 'Multiple languages',
|
314
|
+
'mun' => 'Munda (Other)',
|
315
|
+
'mus' => 'Creek',
|
316
|
+
'mwl' => 'Mirandese',
|
317
|
+
'mwr' => 'Marwari',
|
318
|
+
'myn' => 'Mayan languages',
|
319
|
+
'myv' => 'Erzya',
|
320
|
+
'nah' => 'Nahuatl',
|
321
|
+
'nai' => 'North American Indian (Other)',
|
322
|
+
'nap' => 'Neapolitan Italian',
|
323
|
+
'nau' => 'Nauru',
|
324
|
+
'nav' => 'Navajo',
|
325
|
+
'nbl' => 'Ndebele (South Africa)',
|
326
|
+
'nde' => 'Ndebele (Zimbabwe)',
|
327
|
+
'ndo' => 'Ndonga',
|
328
|
+
'nds' => 'Low German',
|
329
|
+
'nep' => 'Nepali',
|
330
|
+
'new' => 'Newari',
|
331
|
+
'nia' => 'Nias',
|
332
|
+
'nic' => 'Niger-Kordofanian (Other)',
|
333
|
+
'niu' => 'Niuean',
|
334
|
+
'nno' => 'Norwegian (Nynorsk)',
|
335
|
+
'nob' => 'Norwegian (Bokmal)',
|
336
|
+
'nog' => 'Nogai',
|
337
|
+
'non' => 'Old Norse',
|
338
|
+
'nor' => 'Norwegian',
|
339
|
+
'nqo' => "N'Ko",
|
340
|
+
'nso' => 'Northern Sotho',
|
341
|
+
'nub' => 'Nubian languages',
|
342
|
+
'nwc' => 'Newari, Old',
|
343
|
+
'nya' => 'Nyanja',
|
344
|
+
'nym' => 'Nyamwezi',
|
345
|
+
'nyn' => 'Nyankole',
|
346
|
+
'nyo' => 'Nyoro',
|
347
|
+
'nzi' => 'Nzima',
|
348
|
+
'oci' => 'Occitan (post-1500)',
|
349
|
+
'oji' => 'Ojibwa',
|
350
|
+
'ori' => 'Oriya',
|
351
|
+
'orm' => 'Oromo',
|
352
|
+
'osa' => 'Osage',
|
353
|
+
'oss' => 'Ossetic',
|
354
|
+
'ota' => 'Turkish, Ottoman',
|
355
|
+
'oto' => 'Otomian languages',
|
356
|
+
'paa' => 'Papuan (Other)',
|
357
|
+
'pag' => 'Pangasinan',
|
358
|
+
'pal' => 'Pahlavi',
|
359
|
+
'pam' => 'Pampanga',
|
360
|
+
'pan' => 'Panjabi',
|
361
|
+
'pap' => 'Papiamento',
|
362
|
+
'pau' => 'Palauan',
|
363
|
+
'peo' => 'Old Persian (ca. 600-400 B.C.)',
|
364
|
+
'per' => 'Persian',
|
365
|
+
'phi' => 'Philippine (Other)',
|
366
|
+
'phn' => 'Phoenician',
|
367
|
+
'pli' => 'Pali',
|
368
|
+
'pol' => 'Polish',
|
369
|
+
'pon' => 'Ponape',
|
370
|
+
'por' => 'Portuguese',
|
371
|
+
'pra' => 'Prakrit languages',
|
372
|
+
'pro' => 'Provencal (to 1500)',
|
373
|
+
'pus' => 'Pushto',
|
374
|
+
'que' => 'Quechua',
|
375
|
+
'raj' => 'Rajasthani',
|
376
|
+
'rap' => 'Rapanui',
|
377
|
+
'rar' => 'Rarotongan',
|
378
|
+
'roa' => 'Romance (Other)',
|
379
|
+
'roh' => 'Raeto-Romance',
|
380
|
+
'rom' => 'Romani',
|
381
|
+
'rum' => 'Romanian',
|
382
|
+
'run' => 'Rundi',
|
383
|
+
'rup' => 'Aromanian',
|
384
|
+
'rus' => 'Russian',
|
385
|
+
'sad' => 'Sandawe',
|
386
|
+
'sag' => 'Sango (Ubangi Creole)',
|
387
|
+
'sah' => 'Yakut',
|
388
|
+
'sai' => 'South American Indian (Other)',
|
389
|
+
'sal' => 'Salishan languages',
|
390
|
+
'sam' => 'Samaritan Aramaic',
|
391
|
+
'san' => 'Sanskrit',
|
392
|
+
'sao' => 'Samoan',
|
393
|
+
'sas' => 'Sasak',
|
394
|
+
'sat' => 'Santali',
|
395
|
+
'scc' => 'Serbian',
|
396
|
+
'scn' => 'Sicilian Italian',
|
397
|
+
'sco' => 'Scots',
|
398
|
+
'scr' => 'Croatian',
|
399
|
+
'sel' => 'Selkup',
|
400
|
+
'sem' => 'Semitic (Other)',
|
401
|
+
'sga' => 'Irish, Old (to 1100)',
|
402
|
+
'sgn' => 'Sign languages',
|
403
|
+
'shn' => 'Shan',
|
404
|
+
'sho' => 'Shona',
|
405
|
+
'sid' => 'Sidamo',
|
406
|
+
'sin' => 'Sinhalese',
|
407
|
+
'sio' => 'Siouan (Other)',
|
408
|
+
'sit' => 'Sino-Tibetan (Other)',
|
409
|
+
'sla' => 'Slavic (Other)',
|
410
|
+
'slo' => 'Slovak',
|
411
|
+
'slv' => 'Slovenian',
|
412
|
+
'sma' => 'Southern Sami',
|
413
|
+
'sme' => 'Northern Sami',
|
414
|
+
'smi' => 'Sami',
|
415
|
+
'smj' => 'Lule Sami',
|
416
|
+
'smn' => 'Inari Sami',
|
417
|
+
'smo' => 'Samoan',
|
418
|
+
'sms' => 'Skolt Sami',
|
419
|
+
'sna' => 'Shona',
|
420
|
+
'snd' => 'Sindhi',
|
421
|
+
'snh' => 'Sinhalese',
|
422
|
+
'snk' => 'Soninke',
|
423
|
+
'sog' => 'Sogdian',
|
424
|
+
'som' => 'Somali',
|
425
|
+
'son' => 'Songhai',
|
426
|
+
'sot' => 'Sotho',
|
427
|
+
'spa' => 'Spanish',
|
428
|
+
'srd' => 'Sardinian',
|
429
|
+
'srn' => 'Sranan',
|
430
|
+
'srp' => 'Serbian',
|
431
|
+
'srr' => 'Serer',
|
432
|
+
'ssa' => 'Nilo-Saharan (Other)',
|
433
|
+
'sso' => 'Sotho',
|
434
|
+
'ssw' => 'Swazi',
|
435
|
+
'suk' => 'Sukuma',
|
436
|
+
'sun' => 'Sundanese',
|
437
|
+
'sus' => 'Susu',
|
438
|
+
'sux' => 'Sumerian',
|
439
|
+
'swa' => 'Swahili',
|
440
|
+
'swe' => 'Swedish',
|
441
|
+
'swz' => 'Swazi',
|
442
|
+
'syc' => 'Syriac',
|
443
|
+
'syr' => 'Syriac, Modern',
|
444
|
+
'tag' => 'Tagalog',
|
445
|
+
'tah' => 'Tahitian',
|
446
|
+
'tai' => 'Tai (Other)',
|
447
|
+
'taj' => 'Tajik',
|
448
|
+
'tam' => 'Tamil',
|
449
|
+
'tar' => 'Tatar',
|
450
|
+
'tat' => 'Tatar',
|
451
|
+
'tel' => 'Telugu',
|
452
|
+
'tem' => 'Temne',
|
453
|
+
'ter' => 'Terena',
|
454
|
+
'tet' => 'Tetum',
|
455
|
+
'tgk' => 'Tajik',
|
456
|
+
'tgl' => 'Tagalog',
|
457
|
+
'tha' => 'Thai',
|
458
|
+
'tib' => 'Tibetan',
|
459
|
+
'tig' => 'Tigre',
|
460
|
+
'tir' => 'Tigrinya',
|
461
|
+
'tiv' => 'Tiv',
|
462
|
+
'tkl' => 'Tokelauan',
|
463
|
+
'tlh' => 'Klingon (Artificial language)',
|
464
|
+
'tli' => 'Tlingit',
|
465
|
+
'tmh' => 'Tamashek',
|
466
|
+
'tog' => 'Tonga (Nyasa)',
|
467
|
+
'ton' => 'Tongan',
|
468
|
+
'tpi' => 'Tok Pisin',
|
469
|
+
'tru' => 'Truk',
|
470
|
+
'tsi' => 'Tsimshian',
|
471
|
+
'tsn' => 'Tswana',
|
472
|
+
'tso' => 'Tsonga',
|
473
|
+
'tsw' => 'Tswana',
|
474
|
+
'tuk' => 'Turkmen',
|
475
|
+
'tum' => 'Tumbuka',
|
476
|
+
'tup' => 'Tupi languages',
|
477
|
+
'tur' => 'Turkish',
|
478
|
+
'tut' => 'Altaic (Other)',
|
479
|
+
'tvl' => 'Tuvaluan',
|
480
|
+
'twi' => 'Twi',
|
481
|
+
'tyv' => 'Tuvinian',
|
482
|
+
'udm' => 'Udmurt',
|
483
|
+
'uga' => 'Ugaritic',
|
484
|
+
'uig' => 'Uighur',
|
485
|
+
'ukr' => 'Ukrainian',
|
486
|
+
'umb' => 'Umbundu',
|
487
|
+
#'und' => 'Undetermined',
|
488
|
+
'urd' => 'Urdu',
|
489
|
+
'uzb' => 'Uzbek',
|
490
|
+
'vai' => 'Vai',
|
491
|
+
'ven' => 'Venda',
|
492
|
+
'vie' => 'Vietnamese',
|
493
|
+
'vol' => 'Volapuk',
|
494
|
+
'vot' => 'Votic',
|
495
|
+
'wak' => 'Wakashan languages',
|
496
|
+
'wal' => 'Walamo',
|
497
|
+
'war' => 'Waray',
|
498
|
+
'was' => 'Washo',
|
499
|
+
'wel' => 'Welsh',
|
500
|
+
'wen' => 'Sorbian languages',
|
501
|
+
'wln' => 'Walloon',
|
502
|
+
'wol' => 'Wolof',
|
503
|
+
'xal' => 'Kalmyk',
|
504
|
+
'xho' => 'Xhosa',
|
505
|
+
'yao' => 'Yao (Africa)',
|
506
|
+
'yap' => 'Yapese',
|
507
|
+
'yid' => 'Yiddish',
|
508
|
+
'yor' => 'Yoruba',
|
509
|
+
'ypk' => 'Yupik languages',
|
510
|
+
'zap' => 'Zapotec',
|
511
|
+
'zbl' => 'Blissymbolics',
|
512
|
+
'zen' => 'Zenaga',
|
513
|
+
'zha' => 'Zhuang',
|
514
|
+
'znd' => 'Zande',
|
515
|
+
'zul' => 'Zulu',
|
516
|
+
'zun' => 'Zuni',
|
517
|
+
#'zxx' => 'null',
|
518
|
+
'zza' => 'Zaza'
|
519
|
+
}
|
data/lib/stanford-mods.rb
CHANGED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# for test coverage
|
2
|
+
require 'simplecov'
|
3
|
+
require 'simplecov-rcov'
|
4
|
+
class SimpleCov::Formatter::MergedFormatter
|
5
|
+
def format(result)
|
6
|
+
SimpleCov::Formatter::HTMLFormatter.new.format(result)
|
7
|
+
SimpleCov::Formatter::RcovFormatter.new.format(result)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
SimpleCov.formatter = SimpleCov::Formatter::MergedFormatter
|
11
|
+
SimpleCov.start do
|
12
|
+
add_filter "/spec/"
|
13
|
+
end
|
14
|
+
|
15
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
16
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
17
|
+
|
18
|
+
require 'stanford-mods'
|
19
|
+
|
20
|
+
#RSpec.configure do |config|
|
21
|
+
#end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'stanford-mods/searchworks'
|
3
|
+
|
4
|
+
describe "Values for SearchWorks Solr" do
|
5
|
+
# from https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
|
6
|
+
|
7
|
+
context "required fields" do
|
8
|
+
context "DOR specific" do
|
9
|
+
it "druid" do
|
10
|
+
pending "to be implemented in harvestdor"
|
11
|
+
end
|
12
|
+
it "url_fulltext" do
|
13
|
+
pending "to be implemented"
|
14
|
+
end
|
15
|
+
it "mods_xml" do
|
16
|
+
pending "to be implemented"
|
17
|
+
end
|
18
|
+
it "parent_coll_ckey if item object" do
|
19
|
+
pending "to be implemented in harvestdor"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
it "id" do
|
24
|
+
pending "to be implemented in harvestdor"
|
25
|
+
end
|
26
|
+
|
27
|
+
it "all_search" do
|
28
|
+
pending "to be implemented"
|
29
|
+
end
|
30
|
+
|
31
|
+
it "format" do
|
32
|
+
pending "to be implemented, using SearchWorks controlled vocab"
|
33
|
+
end
|
34
|
+
|
35
|
+
# FIXME: update per gryphDOR code / searcworks code / new schema
|
36
|
+
|
37
|
+
it "collection" do
|
38
|
+
pending "to be implemented, using controlled vocab, in harvestdor"
|
39
|
+
end
|
40
|
+
|
41
|
+
it "display_type" do
|
42
|
+
pending "to be implemented, using controlled vocab"
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
context "strongly recommended fields" do
|
48
|
+
it "access_facet" do
|
49
|
+
Stanford::Mods::Record.new.access_facet.should == ['Online']
|
50
|
+
end
|
51
|
+
context "title fields" do
|
52
|
+
context "for display" do
|
53
|
+
it "short title" do
|
54
|
+
pending "to be implemented"
|
55
|
+
end
|
56
|
+
it "full title" do
|
57
|
+
pending "to be implemented"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
context "for searching" do
|
61
|
+
it "short title" do
|
62
|
+
pending "to be implemented"
|
63
|
+
end
|
64
|
+
it "full title" do
|
65
|
+
pending "to be implemented"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
it "sortable title" do
|
69
|
+
pending "to be implemented"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
context "recommended fields" do
|
75
|
+
context "publication date" do
|
76
|
+
it "for searching and facet" do
|
77
|
+
pending "to be implemented"
|
78
|
+
end
|
79
|
+
it "for sorting" do
|
80
|
+
pending "to be implemented"
|
81
|
+
end
|
82
|
+
it "for pub date grouping (hierarchical / date slider?)" do
|
83
|
+
pending "to be implemented"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
context "language" do
|
87
|
+
it "should use the SearchWorks controlled vocabulary" do
|
88
|
+
m = '<mods><language><languageTerm authority="iso639-2b" type="code">per ara, dut</languageTerm></language></mods>'
|
89
|
+
r = Stanford::Mods::Record.new
|
90
|
+
r.from_str(m)
|
91
|
+
r.language_facet.size.should == 3
|
92
|
+
r.language_facet.should include("Persian")
|
93
|
+
r.language_facet.should include("Arabic")
|
94
|
+
r.language_facet.should include("Dutch")
|
95
|
+
r.language_facet.should_not include("Dutch; Flemish")
|
96
|
+
end
|
97
|
+
it "should not have duplicates" do
|
98
|
+
m = '<mods><language><languageTerm type="code" authority="iso639-2b">eng</languageTerm><languageTerm type="text">English</languageTerm></language></mods>'
|
99
|
+
r = Stanford::Mods::Record.new
|
100
|
+
r.from_str(m)
|
101
|
+
r.language_facet.size.should == 1
|
102
|
+
r.language_facet.should include("English")
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
context "authors" do
|
107
|
+
it "main author" do
|
108
|
+
pending "to be implemented"
|
109
|
+
end
|
110
|
+
it "additional authors" do
|
111
|
+
pending "to be implemented"
|
112
|
+
end
|
113
|
+
it "author sort" do
|
114
|
+
pending "to be implemented"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stanford-mods
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-11-
|
13
|
+
date: 2012-11-13 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: mods
|
@@ -146,7 +146,11 @@ files:
|
|
146
146
|
- config/mappings_hash.rb
|
147
147
|
- lib/stanford-mods.rb
|
148
148
|
- lib/stanford-mods/mappings.rb
|
149
|
+
- lib/stanford-mods/searchworks.rb
|
150
|
+
- lib/stanford-mods/searchworks_languages.rb
|
149
151
|
- lib/stanford-mods/version.rb
|
152
|
+
- spec/spec_helper.rb
|
153
|
+
- spec/values_for_req_sw_spec.rb
|
150
154
|
- stanford-mods.gemspec
|
151
155
|
homepage: https://github.com/sul-dlss/stanford-mods
|
152
156
|
licenses: []
|
@@ -162,7 +166,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
162
166
|
version: '0'
|
163
167
|
segments:
|
164
168
|
- 0
|
165
|
-
hash:
|
169
|
+
hash: -3428048468669990853
|
166
170
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
171
|
none: false
|
168
172
|
requirements:
|
@@ -171,12 +175,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
171
175
|
version: '0'
|
172
176
|
segments:
|
173
177
|
- 0
|
174
|
-
hash:
|
178
|
+
hash: -3428048468669990853
|
175
179
|
requirements: []
|
176
180
|
rubyforge_project:
|
177
181
|
rubygems_version: 1.8.24
|
178
182
|
signing_key:
|
179
183
|
specification_version: 3
|
180
184
|
summary: Stanford specific wrangling of MODS metadata
|
181
|
-
test_files:
|
185
|
+
test_files:
|
186
|
+
- spec/spec_helper.rb
|
187
|
+
- spec/values_for_req_sw_spec.rb
|
182
188
|
has_rdoc:
|