stanford-mods 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +11 -0
- data/lib/stanford-mods/searchworks.rb +55 -0
- data/lib/stanford-mods/searchworks_languages.rb +519 -0
- data/lib/stanford-mods/version.rb +1 -1
- data/lib/stanford-mods.rb +1 -6
- data/spec/spec_helper.rb +21 -0
- data/spec/values_for_req_sw_spec.rb +119 -0
- metadata +11 -5
data/README.rdoc
CHANGED
@@ -39,6 +39,16 @@ Or install it yourself as:
|
|
39
39
|
in foo!
|
40
40
|
=> nil
|
41
41
|
|
42
|
+
Example Using SearchWorks Mixins:
|
43
|
+
|
44
|
+
> require 'stanford-mods/searchworks'
|
45
|
+
> m = Stanford::Mods::Record.new
|
46
|
+
> m.from_str('<mods><language><languageTerm authority="iso639-2b" type="code">dut</languageTerm></language></mods>')
|
47
|
+
> m.language_facet <-- from Searchworks mixin
|
48
|
+
=> ['Dutch']
|
49
|
+
> m.languages <-- from mods gem
|
50
|
+
=> ['Dutch; Flemish']
|
51
|
+
|
42
52
|
## Contributing
|
43
53
|
|
44
54
|
1. Fork it
|
@@ -50,5 +60,6 @@ Or install it yourself as:
|
|
50
60
|
|
51
61
|
== Releases
|
52
62
|
|
63
|
+
0.0.3 began SearchWorks mixins
|
53
64
|
0.0.2 add usage instructions to readme
|
54
65
|
0.0.1 Initial commit - grab name
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'stanford-mods/searchworks_languages'
|
2
|
+
|
3
|
+
# # SearchWorks specific wranglings of MODS metadata as an extension of the Mods::Record object
|
4
|
+
module Stanford
|
5
|
+
module Mods
|
6
|
+
|
7
|
+
class Record < ::Mods::Record
|
8
|
+
|
9
|
+
# if it's coming from DOR, then it is available online
|
10
|
+
def access_facet
|
11
|
+
['Online']
|
12
|
+
end
|
13
|
+
|
14
|
+
# include langagues known to SearchWorks; try to error correct when possible (e.g. when ISO-639 disagrees with MARC standard)
|
15
|
+
def language_facet
|
16
|
+
result = []
|
17
|
+
@mods_ng_xml.language.each { |n|
|
18
|
+
# get languageTerm codes and add their translations to the result
|
19
|
+
n.code_term.each { |ct|
|
20
|
+
if ct.authority.match(/^iso639/)
|
21
|
+
begin
|
22
|
+
vals = ct.text.split(/[,|\ ]/).reject {|x| x.strip.length == 0 }
|
23
|
+
vals.each do |v|
|
24
|
+
iso639_val = ISO_639.find(v.strip).english_name
|
25
|
+
if SEARCHWORKS_LANGUAGES.has_value?(iso639_val)
|
26
|
+
result << iso639_val
|
27
|
+
else
|
28
|
+
result << SEARCHWORKS_LANGUAGES[v.strip]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
rescue => e
|
32
|
+
p "Couldn't find english name for #{ct.text}"
|
33
|
+
result << SEARCHWORKS_LANGUAGES[v.strip]
|
34
|
+
end
|
35
|
+
else
|
36
|
+
result << SEARCHWORKS_LANGUAGES[v.strip]
|
37
|
+
end
|
38
|
+
}
|
39
|
+
# add languageTerm text values
|
40
|
+
n.text_term.each { |tt|
|
41
|
+
val = tt.text.strip
|
42
|
+
result << val if val.length > 0 && SEARCHWORKS_LANGUAGES.has_value?(val)
|
43
|
+
}
|
44
|
+
|
45
|
+
# add language values that aren't in languageTerm subelement
|
46
|
+
if n.languageTerm.size == 0
|
47
|
+
result << n.text if SEARCHWORKS_LANGUAGES.has_value?(n.text)
|
48
|
+
end
|
49
|
+
}
|
50
|
+
result.uniq
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,519 @@
|
|
1
|
+
# Language Values used by SearchWorks
|
2
|
+
# From https://github.com/solrmarc/stanford-solr-marc/blob/master/stanford-sw/translation_maps/language_map.properties
|
3
|
+
SEARCHWORKS_LANGUAGES = {
|
4
|
+
'aaa' => 'Afar',
|
5
|
+
'abk' => 'Abkhaz',
|
6
|
+
'ace' => 'Achinese',
|
7
|
+
'ach' => 'Acoli',
|
8
|
+
'ada' => 'Adangme',
|
9
|
+
'ady' => 'Adygei',
|
10
|
+
'afa' => 'Afroasiatic (Other)',
|
11
|
+
'afh' => 'Afrihili (Artificial language)',
|
12
|
+
'afr' => 'Afrikaans',
|
13
|
+
'ain' => 'Ainu',
|
14
|
+
'ajm' => 'Aljamia',
|
15
|
+
'aka' => 'Akan',
|
16
|
+
'akk' => 'Akkadian',
|
17
|
+
'alb' => 'Albanian',
|
18
|
+
'ale' => 'Aleut',
|
19
|
+
'alg' => 'Algonquian (Other)',
|
20
|
+
'alt' => 'Altai',
|
21
|
+
'amh' => 'Amharic',
|
22
|
+
'ang' => 'English, Old (ca. 450-1100)',
|
23
|
+
'anp' => 'Angika',
|
24
|
+
'apa' => 'Apache languages',
|
25
|
+
'ara' => 'Arabic',
|
26
|
+
'arc' => 'Aramaic',
|
27
|
+
'arg' => 'Aragonese Spanish',
|
28
|
+
'arm' => 'Armenian',
|
29
|
+
'arn' => 'Mapuche',
|
30
|
+
'arp' => 'Arapaho',
|
31
|
+
'art' => 'Artificial (Other)',
|
32
|
+
'arw' => 'Arawak',
|
33
|
+
'asm' => 'Assamese',
|
34
|
+
'ast' => 'Bable',
|
35
|
+
'ath' => 'Athapascan (Other)',
|
36
|
+
'aus' => 'Australian languages',
|
37
|
+
'ava' => 'Avaric',
|
38
|
+
'ave' => 'Avestan',
|
39
|
+
'awa' => 'Awadhi',
|
40
|
+
'aym' => 'Aymara',
|
41
|
+
'aze' => 'Azerbaijani',
|
42
|
+
'bad' => 'Banda',
|
43
|
+
'bai' => 'Bamileke languages',
|
44
|
+
'bak' => 'Bashkir',
|
45
|
+
'bal' => 'Baluchi',
|
46
|
+
'bam' => 'Bambara',
|
47
|
+
'ban' => 'Balinese',
|
48
|
+
'baq' => 'Basque',
|
49
|
+
'bas' => 'Basa',
|
50
|
+
'bat' => 'Baltic (Other)',
|
51
|
+
'bej' => 'Beja',
|
52
|
+
'bel' => 'Belarusian',
|
53
|
+
'bem' => 'Bemba',
|
54
|
+
'ben' => 'Bengali',
|
55
|
+
'ber' => 'Berber (Other)',
|
56
|
+
'bho' => 'Bhojpuri',
|
57
|
+
'bih' => 'Bihari',
|
58
|
+
'bik' => 'Bikol',
|
59
|
+
'bin' => 'Edo',
|
60
|
+
'bis' => 'Bislama',
|
61
|
+
'bla' => 'Siksika',
|
62
|
+
'bnt' => 'Bantu (Other)',
|
63
|
+
'bos' => 'Bosnian',
|
64
|
+
'bra' => 'Braj',
|
65
|
+
'bre' => 'Breton',
|
66
|
+
'btk' => 'Batak',
|
67
|
+
'bua' => 'Buriat',
|
68
|
+
'bug' => 'Bugis',
|
69
|
+
'bul' => 'Bulgarian',
|
70
|
+
'bur' => 'Burmese',
|
71
|
+
'byn' => 'Bilin',
|
72
|
+
'cad' => 'Caddo',
|
73
|
+
'cai' => 'Central American Indian (Other)',
|
74
|
+
'cam' => 'Khmer',
|
75
|
+
'car' => 'Carib',
|
76
|
+
'cat' => 'Catalan',
|
77
|
+
'cau' => 'Caucasian (Other)',
|
78
|
+
'ceb' => 'Cebuano',
|
79
|
+
'cel' => 'Celtic (Other)',
|
80
|
+
'cha' => 'Chamorro',
|
81
|
+
'chb' => 'Chibcha',
|
82
|
+
'che' => 'Chechen',
|
83
|
+
'chg' => 'Chagatai',
|
84
|
+
'chi' => 'Chinese',
|
85
|
+
'chk' => 'Truk',
|
86
|
+
'chm' => 'Mari',
|
87
|
+
'chn' => 'Chinook jargon',
|
88
|
+
'cho' => 'Choctaw',
|
89
|
+
'chp' => 'Chipewyan',
|
90
|
+
'chr' => 'Cherokee',
|
91
|
+
'chu' => 'Church Slavic',
|
92
|
+
'chv' => 'Chuvash',
|
93
|
+
'chy' => 'Cheyenne',
|
94
|
+
'cmc' => 'Chamic languages',
|
95
|
+
'cop' => 'Coptic',
|
96
|
+
'cor' => 'Cornish',
|
97
|
+
'cos' => 'Corsican',
|
98
|
+
'cpe' => 'Creoles and Pidgins, English-based (Other)',
|
99
|
+
'cpf' => 'Creoles and Pidgins, French-based (Other)',
|
100
|
+
'cpp' => 'Creoles and Pidgins, Portuguese-based (Other)',
|
101
|
+
'cre' => 'Cree',
|
102
|
+
'crh' => 'Crimean Tatar',
|
103
|
+
'crp' => 'Creoles and Pidgins (Other)',
|
104
|
+
'csb' => 'Kashubian',
|
105
|
+
'cus' => 'Cushitic (Other)',
|
106
|
+
'cze' => 'Czech',
|
107
|
+
'dak' => 'Dakota',
|
108
|
+
'dan' => 'Danish',
|
109
|
+
'dar' => 'Dargwa',
|
110
|
+
'day' => 'Dayak',
|
111
|
+
'del' => 'Delaware',
|
112
|
+
'den' => 'Slave',
|
113
|
+
'dgr' => 'Dogrib',
|
114
|
+
'din' => 'Dinka',
|
115
|
+
'div' => 'Divehi',
|
116
|
+
'doi' => 'Dogri',
|
117
|
+
'dra' => 'Dravidian (Other)',
|
118
|
+
'dsb' => 'Lower Sorbian',
|
119
|
+
'dua' => 'Duala',
|
120
|
+
'dum' => 'Dutch, Middle (ca. 1050-1350)',
|
121
|
+
'dut' => 'Dutch',
|
122
|
+
'dyu' => 'Dyula',
|
123
|
+
'dzo' => 'Dzongkha',
|
124
|
+
'efi' => 'Efik',
|
125
|
+
'egy' => 'Egyptian',
|
126
|
+
'eka' => 'Ekajuk',
|
127
|
+
'elx' => 'Elamite',
|
128
|
+
'eng' => 'English',
|
129
|
+
'enm' => 'English, Middle (1100-1500)',
|
130
|
+
'epo' => 'Esperanto',
|
131
|
+
'esk' => 'Eskimo languages',
|
132
|
+
'esp' => 'Esperanto',
|
133
|
+
'est' => 'Estonian',
|
134
|
+
'eth' => 'Ethiopic',
|
135
|
+
'ewe' => 'Ewe',
|
136
|
+
'ewo' => 'Ewondo',
|
137
|
+
'fan' => 'Fang',
|
138
|
+
'fao' => 'Faroese',
|
139
|
+
'far' => 'Faroese',
|
140
|
+
'fat' => 'Fanti',
|
141
|
+
'fij' => 'Fijian',
|
142
|
+
'fil' => 'Filipino',
|
143
|
+
'fin' => 'Finnish',
|
144
|
+
'fiu' => 'Finno-Ugrian (Other)',
|
145
|
+
'fon' => 'Fon',
|
146
|
+
'fre' => 'French',
|
147
|
+
'fri' => 'Frisian',
|
148
|
+
'frm' => 'French, Middle (ca. 1400-1600)',
|
149
|
+
'fro' => 'French, Old (ca. 842-1400)',
|
150
|
+
'frr' => 'North Frisian',
|
151
|
+
'frs' => 'East Frisian',
|
152
|
+
'fry' => 'Frisian',
|
153
|
+
'ful' => 'Fula',
|
154
|
+
'fur' => 'Friulian',
|
155
|
+
'gaa' => 'Ga',
|
156
|
+
'gae' => 'Scottish Gaelic',
|
157
|
+
'gag' => 'Galician',
|
158
|
+
'gal' => 'Oromo',
|
159
|
+
'gay' => 'Gayo',
|
160
|
+
'gba' => 'Gbaya',
|
161
|
+
'gem' => 'Germanic (Other)',
|
162
|
+
'geo' => 'Georgian',
|
163
|
+
'ger' => 'German',
|
164
|
+
'gez' => 'Ethiopic',
|
165
|
+
'gil' => 'Gilbertese',
|
166
|
+
'gla' => 'Scottish Gaelic',
|
167
|
+
'gle' => 'Irish',
|
168
|
+
'glg' => 'Galician',
|
169
|
+
'glv' => 'Manx',
|
170
|
+
'gmh' => 'German, Middle High (ca. 1050-1500)',
|
171
|
+
'goh' => 'German, Old High (ca. 750-1050)',
|
172
|
+
'gon' => 'Gondi',
|
173
|
+
'gor' => 'Gorontalo',
|
174
|
+
'got' => 'Gothic',
|
175
|
+
'grb' => 'Grebo',
|
176
|
+
'grc' => 'Greek, Ancient (to 1453)',
|
177
|
+
'gre' => 'Greek, Modern (1453- )',
|
178
|
+
'grn' => 'Guarani',
|
179
|
+
'gsw' => 'Swiss German',
|
180
|
+
'gua' => 'Guarani',
|
181
|
+
'guj' => 'Gujarati',
|
182
|
+
'gwi' => "Gwich'in ",
|
183
|
+
'hai' => 'Haida',
|
184
|
+
'hat' => 'Haitian French Creole',
|
185
|
+
'hau' => 'Hausa',
|
186
|
+
'haw' => 'Hawaiian',
|
187
|
+
'heb' => 'Hebrew',
|
188
|
+
'her' => 'Herero',
|
189
|
+
'hil' => 'Hiligaynon',
|
190
|
+
'him' => 'Himachali',
|
191
|
+
'hin' => 'Hindi',
|
192
|
+
'hit' => 'Hittite',
|
193
|
+
'hmn' => 'Hmong',
|
194
|
+
'hmo' => 'Hiri Motu',
|
195
|
+
'hrv' => 'Croatian',
|
196
|
+
'hsb' => 'Upper Sorbian',
|
197
|
+
'hun' => 'Hungarian',
|
198
|
+
'hup' => 'Hupa',
|
199
|
+
'iba' => 'Iban',
|
200
|
+
'ibo' => 'Igbo',
|
201
|
+
'ice' => 'Icelandic',
|
202
|
+
'ido' => 'Ido',
|
203
|
+
'iii' => 'Sichuan Yi',
|
204
|
+
'ijo' => 'Ijo',
|
205
|
+
'iku' => 'Inuktitut',
|
206
|
+
'ile' => 'Interlingue',
|
207
|
+
'ilo' => 'Iloko',
|
208
|
+
'ina' => 'Interlingua (International Auxiliary Language Association)',
|
209
|
+
'inc' => 'Indic (Other)',
|
210
|
+
'ind' => 'Indonesian',
|
211
|
+
'ine' => 'Indo-European (Other)',
|
212
|
+
'inh' => 'Ingush',
|
213
|
+
'int' => 'Interlingua (International Auxiliary Language Association)',
|
214
|
+
'ipk' => 'Inupiaq',
|
215
|
+
'ira' => 'Iranian (Other)',
|
216
|
+
'iri' => 'Irish',
|
217
|
+
'iro' => 'Iroquoian (Other)',
|
218
|
+
'ita' => 'Italian',
|
219
|
+
'jav' => 'Javanese',
|
220
|
+
'jbo' => 'Lojban (Artificial language)',
|
221
|
+
'jpn' => 'Japanese',
|
222
|
+
'jpr' => 'Judeo-Persian',
|
223
|
+
'jrb' => 'Judeo-Arabic',
|
224
|
+
'kaa' => 'Kara-Kalpak',
|
225
|
+
'kab' => 'Kabyle',
|
226
|
+
'kac' => 'Kachin',
|
227
|
+
'kal' => 'Kalatdlisut',
|
228
|
+
'kam' => 'Kamba',
|
229
|
+
'kan' => 'Kannada',
|
230
|
+
'kar' => 'Karen',
|
231
|
+
'kas' => 'Kashmiri',
|
232
|
+
'kau' => 'Kanuri',
|
233
|
+
'kaw' => 'Kawi',
|
234
|
+
'kaz' => 'Kazakh',
|
235
|
+
'kbd' => 'Kabardian',
|
236
|
+
'kha' => 'Khasi',
|
237
|
+
'khi' => 'Khoisan (Other)',
|
238
|
+
'khm' => 'Khmer',
|
239
|
+
'kho' => 'Khotanese',
|
240
|
+
'kik' => 'Kikuyu',
|
241
|
+
'kin' => 'Kinyarwanda',
|
242
|
+
'kir' => 'Kyrgyz',
|
243
|
+
'kmb' => 'Kimbundu',
|
244
|
+
'kok' => 'Konkani',
|
245
|
+
'kom' => 'Komi',
|
246
|
+
'kon' => 'Kongo',
|
247
|
+
'kor' => 'Korean',
|
248
|
+
'kos' => 'Kusaie',
|
249
|
+
'kpe' => 'Kpelle',
|
250
|
+
'krc' => 'Karachay-Balkar',
|
251
|
+
'krl' => 'Karelian',
|
252
|
+
'kro' => 'Kru',
|
253
|
+
'kru' => 'Kurukh',
|
254
|
+
'kua' => 'Kuanyama',
|
255
|
+
'kum' => 'Kumyk',
|
256
|
+
'kur' => 'Kurdish',
|
257
|
+
'kus' => 'Kusaie',
|
258
|
+
'kut' => 'Kutenai',
|
259
|
+
'lad' => 'Ladino',
|
260
|
+
'lah' => 'Lahnda',
|
261
|
+
'lam' => 'Lamba',
|
262
|
+
'lan' => 'Occitan (post-1500)',
|
263
|
+
'lao' => 'Lao',
|
264
|
+
'lap' => 'Sami',
|
265
|
+
'lat' => 'Latin',
|
266
|
+
'lav' => 'Latvian',
|
267
|
+
'lez' => 'Lezgian',
|
268
|
+
'lim' => 'Limburgish',
|
269
|
+
'lin' => 'Lingala',
|
270
|
+
'lit' => 'Lithuanian',
|
271
|
+
'lol' => 'Mongo-Nkundu',
|
272
|
+
'loz' => 'Lozi',
|
273
|
+
'ltz' => 'Letzeburgesch',
|
274
|
+
'lua' => 'Luba-Lulua',
|
275
|
+
'lub' => 'Luba-Katanga',
|
276
|
+
'lug' => 'Ganda',
|
277
|
+
'lui' => 'Luiseno',
|
278
|
+
'lun' => 'Lunda',
|
279
|
+
'luo' => 'Luo (Kenya and Tanzania)',
|
280
|
+
'lus' => 'Lushai',
|
281
|
+
'mac' => 'Macedonian',
|
282
|
+
'mad' => 'Madurese',
|
283
|
+
'mag' => 'Magahi',
|
284
|
+
'mah' => 'Marshallese',
|
285
|
+
'mai' => 'Maithili',
|
286
|
+
'mak' => 'Makasar',
|
287
|
+
'mal' => 'Malayalam',
|
288
|
+
'man' => 'Mandingo',
|
289
|
+
'mao' => 'Maori',
|
290
|
+
'map' => 'Austronesian (Other)',
|
291
|
+
'mar' => 'Marathi',
|
292
|
+
'mas' => 'Masai',
|
293
|
+
'max' => 'Manx',
|
294
|
+
'may' => 'Malay',
|
295
|
+
'mdf' => 'Moksha',
|
296
|
+
'mdr' => 'Mandar',
|
297
|
+
'men' => 'Mende',
|
298
|
+
'mga' => 'Irish, Middle (ca. 1100-1550)',
|
299
|
+
'mic' => 'Micmac',
|
300
|
+
'min' => 'Minangkabau',
|
301
|
+
'#mis' => 'Miscellaneous languages',
|
302
|
+
'mkh' => 'Mon-Khmer (Other)',
|
303
|
+
'mla' => 'Malagasy',
|
304
|
+
'mlg' => 'Malagasy',
|
305
|
+
'mlt' => 'Maltese',
|
306
|
+
'mnc' => 'Manchu',
|
307
|
+
'mni' => 'Manipuri',
|
308
|
+
'mno' => 'Manobo languages',
|
309
|
+
'moh' => 'Mohawk',
|
310
|
+
'mol' => 'Moldavian',
|
311
|
+
'mon' => 'Mongolian',
|
312
|
+
'mos' => 'Moore',
|
313
|
+
'#mul' => 'Multiple languages',
|
314
|
+
'mun' => 'Munda (Other)',
|
315
|
+
'mus' => 'Creek',
|
316
|
+
'mwl' => 'Mirandese',
|
317
|
+
'mwr' => 'Marwari',
|
318
|
+
'myn' => 'Mayan languages',
|
319
|
+
'myv' => 'Erzya',
|
320
|
+
'nah' => 'Nahuatl',
|
321
|
+
'nai' => 'North American Indian (Other)',
|
322
|
+
'nap' => 'Neapolitan Italian',
|
323
|
+
'nau' => 'Nauru',
|
324
|
+
'nav' => 'Navajo',
|
325
|
+
'nbl' => 'Ndebele (South Africa)',
|
326
|
+
'nde' => 'Ndebele (Zimbabwe)',
|
327
|
+
'ndo' => 'Ndonga',
|
328
|
+
'nds' => 'Low German',
|
329
|
+
'nep' => 'Nepali',
|
330
|
+
'new' => 'Newari',
|
331
|
+
'nia' => 'Nias',
|
332
|
+
'nic' => 'Niger-Kordofanian (Other)',
|
333
|
+
'niu' => 'Niuean',
|
334
|
+
'nno' => 'Norwegian (Nynorsk)',
|
335
|
+
'nob' => 'Norwegian (Bokmal)',
|
336
|
+
'nog' => 'Nogai',
|
337
|
+
'non' => 'Old Norse',
|
338
|
+
'nor' => 'Norwegian',
|
339
|
+
'nqo' => "N'Ko",
|
340
|
+
'nso' => 'Northern Sotho',
|
341
|
+
'nub' => 'Nubian languages',
|
342
|
+
'nwc' => 'Newari, Old',
|
343
|
+
'nya' => 'Nyanja',
|
344
|
+
'nym' => 'Nyamwezi',
|
345
|
+
'nyn' => 'Nyankole',
|
346
|
+
'nyo' => 'Nyoro',
|
347
|
+
'nzi' => 'Nzima',
|
348
|
+
'oci' => 'Occitan (post-1500)',
|
349
|
+
'oji' => 'Ojibwa',
|
350
|
+
'ori' => 'Oriya',
|
351
|
+
'orm' => 'Oromo',
|
352
|
+
'osa' => 'Osage',
|
353
|
+
'oss' => 'Ossetic',
|
354
|
+
'ota' => 'Turkish, Ottoman',
|
355
|
+
'oto' => 'Otomian languages',
|
356
|
+
'paa' => 'Papuan (Other)',
|
357
|
+
'pag' => 'Pangasinan',
|
358
|
+
'pal' => 'Pahlavi',
|
359
|
+
'pam' => 'Pampanga',
|
360
|
+
'pan' => 'Panjabi',
|
361
|
+
'pap' => 'Papiamento',
|
362
|
+
'pau' => 'Palauan',
|
363
|
+
'peo' => 'Old Persian (ca. 600-400 B.C.)',
|
364
|
+
'per' => 'Persian',
|
365
|
+
'phi' => 'Philippine (Other)',
|
366
|
+
'phn' => 'Phoenician',
|
367
|
+
'pli' => 'Pali',
|
368
|
+
'pol' => 'Polish',
|
369
|
+
'pon' => 'Ponape',
|
370
|
+
'por' => 'Portuguese',
|
371
|
+
'pra' => 'Prakrit languages',
|
372
|
+
'pro' => 'Provencal (to 1500)',
|
373
|
+
'pus' => 'Pushto',
|
374
|
+
'que' => 'Quechua',
|
375
|
+
'raj' => 'Rajasthani',
|
376
|
+
'rap' => 'Rapanui',
|
377
|
+
'rar' => 'Rarotongan',
|
378
|
+
'roa' => 'Romance (Other)',
|
379
|
+
'roh' => 'Raeto-Romance',
|
380
|
+
'rom' => 'Romani',
|
381
|
+
'rum' => 'Romanian',
|
382
|
+
'run' => 'Rundi',
|
383
|
+
'rup' => 'Aromanian',
|
384
|
+
'rus' => 'Russian',
|
385
|
+
'sad' => 'Sandawe',
|
386
|
+
'sag' => 'Sango (Ubangi Creole)',
|
387
|
+
'sah' => 'Yakut',
|
388
|
+
'sai' => 'South American Indian (Other)',
|
389
|
+
'sal' => 'Salishan languages',
|
390
|
+
'sam' => 'Samaritan Aramaic',
|
391
|
+
'san' => 'Sanskrit',
|
392
|
+
'sao' => 'Samoan',
|
393
|
+
'sas' => 'Sasak',
|
394
|
+
'sat' => 'Santali',
|
395
|
+
'scc' => 'Serbian',
|
396
|
+
'scn' => 'Sicilian Italian',
|
397
|
+
'sco' => 'Scots',
|
398
|
+
'scr' => 'Croatian',
|
399
|
+
'sel' => 'Selkup',
|
400
|
+
'sem' => 'Semitic (Other)',
|
401
|
+
'sga' => 'Irish, Old (to 1100)',
|
402
|
+
'sgn' => 'Sign languages',
|
403
|
+
'shn' => 'Shan',
|
404
|
+
'sho' => 'Shona',
|
405
|
+
'sid' => 'Sidamo',
|
406
|
+
'sin' => 'Sinhalese',
|
407
|
+
'sio' => 'Siouan (Other)',
|
408
|
+
'sit' => 'Sino-Tibetan (Other)',
|
409
|
+
'sla' => 'Slavic (Other)',
|
410
|
+
'slo' => 'Slovak',
|
411
|
+
'slv' => 'Slovenian',
|
412
|
+
'sma' => 'Southern Sami',
|
413
|
+
'sme' => 'Northern Sami',
|
414
|
+
'smi' => 'Sami',
|
415
|
+
'smj' => 'Lule Sami',
|
416
|
+
'smn' => 'Inari Sami',
|
417
|
+
'smo' => 'Samoan',
|
418
|
+
'sms' => 'Skolt Sami',
|
419
|
+
'sna' => 'Shona',
|
420
|
+
'snd' => 'Sindhi',
|
421
|
+
'snh' => 'Sinhalese',
|
422
|
+
'snk' => 'Soninke',
|
423
|
+
'sog' => 'Sogdian',
|
424
|
+
'som' => 'Somali',
|
425
|
+
'son' => 'Songhai',
|
426
|
+
'sot' => 'Sotho',
|
427
|
+
'spa' => 'Spanish',
|
428
|
+
'srd' => 'Sardinian',
|
429
|
+
'srn' => 'Sranan',
|
430
|
+
'srp' => 'Serbian',
|
431
|
+
'srr' => 'Serer',
|
432
|
+
'ssa' => 'Nilo-Saharan (Other)',
|
433
|
+
'sso' => 'Sotho',
|
434
|
+
'ssw' => 'Swazi',
|
435
|
+
'suk' => 'Sukuma',
|
436
|
+
'sun' => 'Sundanese',
|
437
|
+
'sus' => 'Susu',
|
438
|
+
'sux' => 'Sumerian',
|
439
|
+
'swa' => 'Swahili',
|
440
|
+
'swe' => 'Swedish',
|
441
|
+
'swz' => 'Swazi',
|
442
|
+
'syc' => 'Syriac',
|
443
|
+
'syr' => 'Syriac, Modern',
|
444
|
+
'tag' => 'Tagalog',
|
445
|
+
'tah' => 'Tahitian',
|
446
|
+
'tai' => 'Tai (Other)',
|
447
|
+
'taj' => 'Tajik',
|
448
|
+
'tam' => 'Tamil',
|
449
|
+
'tar' => 'Tatar',
|
450
|
+
'tat' => 'Tatar',
|
451
|
+
'tel' => 'Telugu',
|
452
|
+
'tem' => 'Temne',
|
453
|
+
'ter' => 'Terena',
|
454
|
+
'tet' => 'Tetum',
|
455
|
+
'tgk' => 'Tajik',
|
456
|
+
'tgl' => 'Tagalog',
|
457
|
+
'tha' => 'Thai',
|
458
|
+
'tib' => 'Tibetan',
|
459
|
+
'tig' => 'Tigre',
|
460
|
+
'tir' => 'Tigrinya',
|
461
|
+
'tiv' => 'Tiv',
|
462
|
+
'tkl' => 'Tokelauan',
|
463
|
+
'tlh' => 'Klingon (Artificial language)',
|
464
|
+
'tli' => 'Tlingit',
|
465
|
+
'tmh' => 'Tamashek',
|
466
|
+
'tog' => 'Tonga (Nyasa)',
|
467
|
+
'ton' => 'Tongan',
|
468
|
+
'tpi' => 'Tok Pisin',
|
469
|
+
'tru' => 'Truk',
|
470
|
+
'tsi' => 'Tsimshian',
|
471
|
+
'tsn' => 'Tswana',
|
472
|
+
'tso' => 'Tsonga',
|
473
|
+
'tsw' => 'Tswana',
|
474
|
+
'tuk' => 'Turkmen',
|
475
|
+
'tum' => 'Tumbuka',
|
476
|
+
'tup' => 'Tupi languages',
|
477
|
+
'tur' => 'Turkish',
|
478
|
+
'tut' => 'Altaic (Other)',
|
479
|
+
'tvl' => 'Tuvaluan',
|
480
|
+
'twi' => 'Twi',
|
481
|
+
'tyv' => 'Tuvinian',
|
482
|
+
'udm' => 'Udmurt',
|
483
|
+
'uga' => 'Ugaritic',
|
484
|
+
'uig' => 'Uighur',
|
485
|
+
'ukr' => 'Ukrainian',
|
486
|
+
'umb' => 'Umbundu',
|
487
|
+
#'und' => 'Undetermined',
|
488
|
+
'urd' => 'Urdu',
|
489
|
+
'uzb' => 'Uzbek',
|
490
|
+
'vai' => 'Vai',
|
491
|
+
'ven' => 'Venda',
|
492
|
+
'vie' => 'Vietnamese',
|
493
|
+
'vol' => 'Volapuk',
|
494
|
+
'vot' => 'Votic',
|
495
|
+
'wak' => 'Wakashan languages',
|
496
|
+
'wal' => 'Walamo',
|
497
|
+
'war' => 'Waray',
|
498
|
+
'was' => 'Washo',
|
499
|
+
'wel' => 'Welsh',
|
500
|
+
'wen' => 'Sorbian languages',
|
501
|
+
'wln' => 'Walloon',
|
502
|
+
'wol' => 'Wolof',
|
503
|
+
'xal' => 'Kalmyk',
|
504
|
+
'xho' => 'Xhosa',
|
505
|
+
'yao' => 'Yao (Africa)',
|
506
|
+
'yap' => 'Yapese',
|
507
|
+
'yid' => 'Yiddish',
|
508
|
+
'yor' => 'Yoruba',
|
509
|
+
'ypk' => 'Yupik languages',
|
510
|
+
'zap' => 'Zapotec',
|
511
|
+
'zbl' => 'Blissymbolics',
|
512
|
+
'zen' => 'Zenaga',
|
513
|
+
'zha' => 'Zhuang',
|
514
|
+
'znd' => 'Zande',
|
515
|
+
'zul' => 'Zulu',
|
516
|
+
'zun' => 'Zuni',
|
517
|
+
#'zxx' => 'null',
|
518
|
+
'zza' => 'Zaza'
|
519
|
+
}
|
data/lib/stanford-mods.rb
CHANGED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# for test coverage
|
2
|
+
require 'simplecov'
|
3
|
+
require 'simplecov-rcov'
|
4
|
+
class SimpleCov::Formatter::MergedFormatter
|
5
|
+
def format(result)
|
6
|
+
SimpleCov::Formatter::HTMLFormatter.new.format(result)
|
7
|
+
SimpleCov::Formatter::RcovFormatter.new.format(result)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
SimpleCov.formatter = SimpleCov::Formatter::MergedFormatter
|
11
|
+
SimpleCov.start do
|
12
|
+
add_filter "/spec/"
|
13
|
+
end
|
14
|
+
|
15
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
16
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
17
|
+
|
18
|
+
require 'stanford-mods'
|
19
|
+
|
20
|
+
#RSpec.configure do |config|
|
21
|
+
#end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'stanford-mods/searchworks'
|
3
|
+
|
4
|
+
describe "Values for SearchWorks Solr" do
|
5
|
+
# from https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
|
6
|
+
|
7
|
+
context "required fields" do
|
8
|
+
context "DOR specific" do
|
9
|
+
it "druid" do
|
10
|
+
pending "to be implemented in harvestdor"
|
11
|
+
end
|
12
|
+
it "url_fulltext" do
|
13
|
+
pending "to be implemented"
|
14
|
+
end
|
15
|
+
it "mods_xml" do
|
16
|
+
pending "to be implemented"
|
17
|
+
end
|
18
|
+
it "parent_coll_ckey if item object" do
|
19
|
+
pending "to be implemented in harvestdor"
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
it "id" do
|
24
|
+
pending "to be implemented in harvestdor"
|
25
|
+
end
|
26
|
+
|
27
|
+
it "all_search" do
|
28
|
+
pending "to be implemented"
|
29
|
+
end
|
30
|
+
|
31
|
+
it "format" do
|
32
|
+
pending "to be implemented, using SearchWorks controlled vocab"
|
33
|
+
end
|
34
|
+
|
35
|
+
# FIXME: update per gryphDOR code / searcworks code / new schema
|
36
|
+
|
37
|
+
it "collection" do
|
38
|
+
pending "to be implemented, using controlled vocab, in harvestdor"
|
39
|
+
end
|
40
|
+
|
41
|
+
it "display_type" do
|
42
|
+
pending "to be implemented, using controlled vocab"
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
context "strongly recommended fields" do
|
48
|
+
it "access_facet" do
|
49
|
+
Stanford::Mods::Record.new.access_facet.should == ['Online']
|
50
|
+
end
|
51
|
+
context "title fields" do
|
52
|
+
context "for display" do
|
53
|
+
it "short title" do
|
54
|
+
pending "to be implemented"
|
55
|
+
end
|
56
|
+
it "full title" do
|
57
|
+
pending "to be implemented"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
context "for searching" do
|
61
|
+
it "short title" do
|
62
|
+
pending "to be implemented"
|
63
|
+
end
|
64
|
+
it "full title" do
|
65
|
+
pending "to be implemented"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
it "sortable title" do
|
69
|
+
pending "to be implemented"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
context "recommended fields" do
|
75
|
+
context "publication date" do
|
76
|
+
it "for searching and facet" do
|
77
|
+
pending "to be implemented"
|
78
|
+
end
|
79
|
+
it "for sorting" do
|
80
|
+
pending "to be implemented"
|
81
|
+
end
|
82
|
+
it "for pub date grouping (hierarchical / date slider?)" do
|
83
|
+
pending "to be implemented"
|
84
|
+
end
|
85
|
+
end
|
86
|
+
context "language" do
|
87
|
+
it "should use the SearchWorks controlled vocabulary" do
|
88
|
+
m = '<mods><language><languageTerm authority="iso639-2b" type="code">per ara, dut</languageTerm></language></mods>'
|
89
|
+
r = Stanford::Mods::Record.new
|
90
|
+
r.from_str(m)
|
91
|
+
r.language_facet.size.should == 3
|
92
|
+
r.language_facet.should include("Persian")
|
93
|
+
r.language_facet.should include("Arabic")
|
94
|
+
r.language_facet.should include("Dutch")
|
95
|
+
r.language_facet.should_not include("Dutch; Flemish")
|
96
|
+
end
|
97
|
+
it "should not have duplicates" do
|
98
|
+
m = '<mods><language><languageTerm type="code" authority="iso639-2b">eng</languageTerm><languageTerm type="text">English</languageTerm></language></mods>'
|
99
|
+
r = Stanford::Mods::Record.new
|
100
|
+
r.from_str(m)
|
101
|
+
r.language_facet.size.should == 1
|
102
|
+
r.language_facet.should include("English")
|
103
|
+
end
|
104
|
+
|
105
|
+
end
|
106
|
+
context "authors" do
|
107
|
+
it "main author" do
|
108
|
+
pending "to be implemented"
|
109
|
+
end
|
110
|
+
it "additional authors" do
|
111
|
+
pending "to be implemented"
|
112
|
+
end
|
113
|
+
it "author sort" do
|
114
|
+
pending "to be implemented"
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: stanford-mods
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-11-
|
13
|
+
date: 2012-11-13 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: mods
|
@@ -146,7 +146,11 @@ files:
|
|
146
146
|
- config/mappings_hash.rb
|
147
147
|
- lib/stanford-mods.rb
|
148
148
|
- lib/stanford-mods/mappings.rb
|
149
|
+
- lib/stanford-mods/searchworks.rb
|
150
|
+
- lib/stanford-mods/searchworks_languages.rb
|
149
151
|
- lib/stanford-mods/version.rb
|
152
|
+
- spec/spec_helper.rb
|
153
|
+
- spec/values_for_req_sw_spec.rb
|
150
154
|
- stanford-mods.gemspec
|
151
155
|
homepage: https://github.com/sul-dlss/stanford-mods
|
152
156
|
licenses: []
|
@@ -162,7 +166,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
162
166
|
version: '0'
|
163
167
|
segments:
|
164
168
|
- 0
|
165
|
-
hash:
|
169
|
+
hash: -3428048468669990853
|
166
170
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
167
171
|
none: false
|
168
172
|
requirements:
|
@@ -171,12 +175,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
171
175
|
version: '0'
|
172
176
|
segments:
|
173
177
|
- 0
|
174
|
-
hash:
|
178
|
+
hash: -3428048468669990853
|
175
179
|
requirements: []
|
176
180
|
rubyforge_project:
|
177
181
|
rubygems_version: 1.8.24
|
178
182
|
signing_key:
|
179
183
|
specification_version: 3
|
180
184
|
summary: Stanford specific wrangling of MODS metadata
|
181
|
-
test_files:
|
185
|
+
test_files:
|
186
|
+
- spec/spec_helper.rb
|
187
|
+
- spec/values_for_req_sw_spec.rb
|
182
188
|
has_rdoc:
|