traject 0.0.2 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -0
- data/README.md +85 -61
- data/Rakefile +5 -0
- data/bin/traject +31 -3
- data/doc/settings.md +74 -13
- data/lib/tasks/load_maps.rake +48 -0
- data/lib/traject/indexer/settings.rb +75 -0
- data/lib/traject/indexer.rb +255 -45
- data/lib/traject/json_writer.rb +4 -2
- data/lib/traject/macros/marc21.rb +18 -6
- data/lib/traject/macros/marc21_semantics.rb +405 -0
- data/lib/traject/macros/marc_format_classifier.rb +180 -0
- data/lib/traject/marc4j_reader.rb +160 -0
- data/lib/traject/marc_extractor.rb +33 -17
- data/lib/traject/marc_reader.rb +14 -11
- data/lib/traject/solrj_writer.rb +247 -9
- data/lib/traject/thread_pool.rb +154 -0
- data/lib/traject/translation_map.rb +46 -4
- data/lib/traject/util.rb +30 -0
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/lcc_top_level.yaml +26 -0
- data/lib/translation_maps/marc_genre_007.yaml +9 -0
- data/lib/translation_maps/marc_genre_leader.yaml +22 -0
- data/lib/translation_maps/marc_geographic.yaml +589 -0
- data/lib/translation_maps/marc_instruments.yaml +102 -0
- data/lib/translation_maps/marc_languages.yaml +490 -0
- data/test/indexer/each_record_test.rb +34 -0
- data/test/indexer/macros_marc21_semantics_test.rb +206 -0
- data/test/indexer/macros_marc21_test.rb +10 -1
- data/test/indexer/map_record_test.rb +78 -8
- data/test/indexer/read_write_test.rb +43 -10
- data/test/indexer/settings_test.rb +60 -4
- data/test/indexer/to_field_test.rb +39 -0
- data/test/marc4j_reader_test.rb +75 -0
- data/test/marc_extractor_test.rb +62 -0
- data/test/marc_format_classifier_test.rb +91 -0
- data/test/marc_reader_test.rb +12 -0
- data/test/solrj_writer_test.rb +146 -43
- data/test/test_helper.rb +50 -0
- data/test/test_support/245_no_ab.marc +1 -0
- data/test/test_support/880_with_no_6.utf8.marc +1 -0
- data/test/test_support/bad_subfield_code.marc +1 -0
- data/test/test_support/date_resort_to_260.marc +1 -0
- data/test/test_support/date_type_r_missing_date2.marc +1 -0
- data/test/test_support/date_with_u.marc +1 -0
- data/test/test_support/demo_config.rb +153 -0
- data/test/test_support/emptyish_record.marc +1 -0
- data/test/test_support/louis_armstrong.marc +1 -0
- data/test/test_support/manuscript_online_thesis.marc +1 -0
- data/test/test_support/microform_online_conference.marc +1 -0
- data/test/test_support/multi_era.marc +1 -0
- data/test/test_support/multi_geo.marc +1 -0
- data/test/test_support/musical_cage.marc +1 -0
- data/test/test_support/one-marc8.mrc +1 -0
- data/test/test_support/online_only.marc +1 -0
- data/test/test_support/packed_041a_lang.marc +1 -0
- data/test/test_support/the_business_ren.marc +1 -0
- data/test/translation_map_test.rb +8 -0
- data/test/translation_maps/properties_map.properties +5 -0
- data/traject.gemspec +1 -1
- data/vendor/marc4j/README.md +17 -0
- data/vendor/marc4j/lib/marc4j-2.5.1-beta.jar +0 -0
- metadata +81 -2
@@ -0,0 +1,490 @@
|
|
1
|
+
# Map Language Codes (in 008[35-37], 041) to User Friendly Term
|
2
|
+
|
3
|
+
# ???: null
|
4
|
+
aar: Afar
|
5
|
+
abk: Abkhaz
|
6
|
+
ace: Achinese
|
7
|
+
ach: Acoli
|
8
|
+
ada: Adangme
|
9
|
+
ady: Adygei
|
10
|
+
afa: Afroasiatic (Other)
|
11
|
+
afh: Afrihili (Artificial language)
|
12
|
+
afr: Afrikaans
|
13
|
+
ajm: Aljamia
|
14
|
+
aka: Akan
|
15
|
+
akk: Akkadian
|
16
|
+
alb: Albanian
|
17
|
+
ale: Aleut
|
18
|
+
alg: Algonquian (Other)
|
19
|
+
amh: Amharic
|
20
|
+
ang: English, Old (ca. 450-1100)
|
21
|
+
apa: Apache languages
|
22
|
+
ara: Arabic
|
23
|
+
arc: Aramaic
|
24
|
+
arg: Aragonese Spanish
|
25
|
+
arm: Armenian
|
26
|
+
arn: Mapuche
|
27
|
+
arp: Arapaho
|
28
|
+
art: Artificial (Other)
|
29
|
+
arw: Arawak
|
30
|
+
asm: Assamese
|
31
|
+
ast: Bable
|
32
|
+
ath: Athapascan (Other)
|
33
|
+
aus: Australian languages
|
34
|
+
ava: Avaric
|
35
|
+
ave: Avestan
|
36
|
+
awa: Awadhi
|
37
|
+
aym: Aymara
|
38
|
+
aze: Azerbaijani
|
39
|
+
bad: Banda
|
40
|
+
bai: Bamileke languages
|
41
|
+
bak: Bashkir
|
42
|
+
bal: Baluchi
|
43
|
+
bam: Bambara
|
44
|
+
ban: Balinese
|
45
|
+
baq: Basque
|
46
|
+
bas: Basa
|
47
|
+
bat: Baltic (Other)
|
48
|
+
bej: Beja
|
49
|
+
bel: Belarusian
|
50
|
+
bem: Bemba
|
51
|
+
ben: Bengali
|
52
|
+
ber: Berber (Other)
|
53
|
+
bho: Bhojpuri
|
54
|
+
bih: Bihari
|
55
|
+
bik: Bikol
|
56
|
+
bin: Edo
|
57
|
+
bis: Bislama
|
58
|
+
bla: Siksika
|
59
|
+
bnt: Bantu (Other)
|
60
|
+
bos: Bosnian
|
61
|
+
bra: Braj
|
62
|
+
bre: Breton
|
63
|
+
btk: Batak
|
64
|
+
bua: Buriat
|
65
|
+
bug: Bugis
|
66
|
+
bul: Bulgarian
|
67
|
+
bur: Burmese
|
68
|
+
cad: Caddo
|
69
|
+
cai: Central American Indian (Other)
|
70
|
+
cam: Khmer
|
71
|
+
car: Carib
|
72
|
+
cat: Catalan
|
73
|
+
cau: Caucasian (Other)
|
74
|
+
ceb: Cebuano
|
75
|
+
cel: Celtic (Other)
|
76
|
+
cha: Chamorro
|
77
|
+
chb: Chibcha
|
78
|
+
che: Chechen
|
79
|
+
chg: Chagatai
|
80
|
+
chi: Chinese
|
81
|
+
chk: Truk
|
82
|
+
chm: Mari
|
83
|
+
chn: Chinook jargon
|
84
|
+
cho: Choctaw
|
85
|
+
chp: Chipewyan
|
86
|
+
chr: Cherokee
|
87
|
+
chu: Church Slavic
|
88
|
+
chv: Chuvash
|
89
|
+
chy: Cheyenne
|
90
|
+
cmc: Chamic languages
|
91
|
+
cop: Coptic
|
92
|
+
cor: Cornish
|
93
|
+
cos: Corsican
|
94
|
+
cpe: Creoles and Pidgins, English-based (Other)
|
95
|
+
cpf: Creoles and Pidgins, French-based (Other)
|
96
|
+
cpp: Creoles and Pidgins, Portuguese-based (Other)
|
97
|
+
cre: Cree
|
98
|
+
crh: Crimean Tatar
|
99
|
+
crp: Creoles and Pidgins (Other)
|
100
|
+
cus: Cushitic (Other)
|
101
|
+
cze: Czech
|
102
|
+
dak: Dakota
|
103
|
+
dan: Danish
|
104
|
+
dar: Dargwa
|
105
|
+
day: Dayak
|
106
|
+
del: Delaware
|
107
|
+
den: Slave
|
108
|
+
dgr: Dogrib
|
109
|
+
din: Dinka
|
110
|
+
div: Divehi
|
111
|
+
doi: Dogri
|
112
|
+
dra: Dravidian (Other)
|
113
|
+
dua: Duala
|
114
|
+
dum: Dutch, Middle (ca. 1050-1350)
|
115
|
+
dut: Dutch
|
116
|
+
dyu: Dyula
|
117
|
+
dzo: Dzongkha
|
118
|
+
efi: Efik
|
119
|
+
egy: Egyptian
|
120
|
+
eka: Ekajuk
|
121
|
+
elx: Elamite
|
122
|
+
eng: English
|
123
|
+
enm: English, Middle (1100-1500)
|
124
|
+
epo: Esperanto
|
125
|
+
esk: Eskimo languages
|
126
|
+
esp: Esperanto
|
127
|
+
est: Estonian
|
128
|
+
eth: Ethiopic
|
129
|
+
ewe: Ewe
|
130
|
+
ewo: Ewondo
|
131
|
+
fan: Fang
|
132
|
+
fao: Faroese
|
133
|
+
far: Faroese
|
134
|
+
fat: Fanti
|
135
|
+
fij: Fijian
|
136
|
+
fin: Finnish
|
137
|
+
fiu: Finno-Ugrian (Other)
|
138
|
+
fon: Fon
|
139
|
+
fre: French
|
140
|
+
fri: Frisian
|
141
|
+
frm: French, Middle (ca. 1400-1600)
|
142
|
+
fro: French, Old (ca. 842-1400)
|
143
|
+
fry: Frisian
|
144
|
+
ful: Fula
|
145
|
+
fur: Friulian
|
146
|
+
gaa: Ga
|
147
|
+
gae: Scottish Gaelic
|
148
|
+
gag: Galician
|
149
|
+
gal: Oromo
|
150
|
+
gay: Gayo
|
151
|
+
gba: Gbaya
|
152
|
+
gem: Germanic (Other)
|
153
|
+
geo: Georgian
|
154
|
+
ger: German
|
155
|
+
gez: Ethiopic
|
156
|
+
gil: Gilbertese
|
157
|
+
gla: Scottish Gaelic
|
158
|
+
gle: Irish
|
159
|
+
glg: Galician
|
160
|
+
glv: Manx
|
161
|
+
gmh: German, Middle High (ca. 1050-1500)
|
162
|
+
goh: German, Old High (ca. 750-1050)
|
163
|
+
gon: Gondi
|
164
|
+
gor: Gorontalo
|
165
|
+
got: Gothic
|
166
|
+
grb: Grebo
|
167
|
+
grc: Greek, Ancient (to 1453)
|
168
|
+
gre: Greek, Modern (1453- )
|
169
|
+
grn: Guarani
|
170
|
+
gua: Guarani
|
171
|
+
guj: Gujarati
|
172
|
+
gwi: Gwich'in
|
173
|
+
hai: Haida
|
174
|
+
hat: Haitian French Creole
|
175
|
+
hau: Hausa
|
176
|
+
haw: Hawaiian
|
177
|
+
heb: Hebrew
|
178
|
+
her: Herero
|
179
|
+
hil: Hiligaynon
|
180
|
+
him: Himachali
|
181
|
+
hin: Hindi
|
182
|
+
hit: Hittite
|
183
|
+
hmn: Hmong
|
184
|
+
hmo: Hiri Motu
|
185
|
+
hun: Hungarian
|
186
|
+
hup: Hupa
|
187
|
+
iba: Iban
|
188
|
+
ibo: Igbo
|
189
|
+
ice: Icelandic
|
190
|
+
ido: Ido
|
191
|
+
iii: Sichuan Yi
|
192
|
+
ijo: Ijo
|
193
|
+
iku: Inuktitut
|
194
|
+
ile: Interlingue
|
195
|
+
ilo: Iloko
|
196
|
+
ina: Interlingua (International Auxiliary Language Association)
|
197
|
+
inc: Indic (Other)
|
198
|
+
ind: Indonesian
|
199
|
+
ine: Indo-European (Other)
|
200
|
+
inh: Ingush
|
201
|
+
int: Interlingua (International Auxiliary Language Association)
|
202
|
+
ipk: Inupiaq
|
203
|
+
ira: Iranian (Other)
|
204
|
+
iri: Irish
|
205
|
+
iro: Iroquoian (Other)
|
206
|
+
ita: Italian
|
207
|
+
jav: Javanese
|
208
|
+
jpn: Japanese
|
209
|
+
jpr: Judeo-Persian
|
210
|
+
jrb: Judeo-Arabic
|
211
|
+
kaa: Kara-Kalpak
|
212
|
+
kab: Kabyle
|
213
|
+
kac: Kachin
|
214
|
+
kal: Kalatdlisut
|
215
|
+
kam: Kamba
|
216
|
+
kan: Kannada
|
217
|
+
kar: Karen
|
218
|
+
kas: Kashmiri
|
219
|
+
kau: Kanuri
|
220
|
+
kaw: Kawi
|
221
|
+
kaz: Kazakh
|
222
|
+
kbd: Kabardian
|
223
|
+
kha: Khasi
|
224
|
+
khi: Khoisan (Other)
|
225
|
+
khm: Khmer
|
226
|
+
kho: Khotanese
|
227
|
+
kik: Kikuyu
|
228
|
+
kin: Kinyarwanda
|
229
|
+
kir: Kyrgyz
|
230
|
+
kmb: Kimbundu
|
231
|
+
kok: Konkani
|
232
|
+
kom: Komi
|
233
|
+
kon: Kongo
|
234
|
+
kor: Korean
|
235
|
+
kos: Kusaie
|
236
|
+
kpe: Kpelle
|
237
|
+
kro: Kru
|
238
|
+
kru: Kurukh
|
239
|
+
kua: Kuanyama
|
240
|
+
kum: Kumyk
|
241
|
+
kur: Kurdish
|
242
|
+
kus: Kusaie
|
243
|
+
kut: Kutenai
|
244
|
+
lad: Ladino
|
245
|
+
lah: Lahnda
|
246
|
+
lam: Lamba
|
247
|
+
lan: Occitan (post-1500)
|
248
|
+
lao: Lao
|
249
|
+
lap: Sami
|
250
|
+
lat: Latin
|
251
|
+
lav: Latvian
|
252
|
+
lez: Lezgian
|
253
|
+
lim: Limburgish
|
254
|
+
lin: Lingala
|
255
|
+
lit: Lithuanian
|
256
|
+
lol: Mongo-Nkundu
|
257
|
+
loz: Lozi
|
258
|
+
ltz: Letzeburgesch
|
259
|
+
lua: Luba-Lulua
|
260
|
+
lub: Luba-Katanga
|
261
|
+
lug: Ganda
|
262
|
+
lui: Luiseno
|
263
|
+
lun: Lunda
|
264
|
+
luo: Luo (Kenya and Tanzania)
|
265
|
+
lus: Lushai
|
266
|
+
mac: Macedonian
|
267
|
+
mad: Madurese
|
268
|
+
mag: Magahi
|
269
|
+
mah: Marshallese
|
270
|
+
mai: Maithili
|
271
|
+
mak: Makasar
|
272
|
+
mal: Malayalam
|
273
|
+
man: Mandingo
|
274
|
+
mao: Maori
|
275
|
+
map: Austronesian (Other)
|
276
|
+
mar: Marathi
|
277
|
+
mas: Masai
|
278
|
+
max: Manx
|
279
|
+
may: Malay
|
280
|
+
mdr: Mandar
|
281
|
+
men: Mende
|
282
|
+
mga: Irish, Middle (ca. 1100-1550)
|
283
|
+
mic: Micmac
|
284
|
+
min: Minangkabau
|
285
|
+
mis: Miscellaneous languages
|
286
|
+
mkh: Mon-Khmer (Other)
|
287
|
+
mla: Malagasy
|
288
|
+
mlg: Malagasy
|
289
|
+
mlt: Maltese
|
290
|
+
mnc: Manchu
|
291
|
+
mni: Manipuri
|
292
|
+
mno: Manobo languages
|
293
|
+
moh: Mohawk
|
294
|
+
mol: Moldavian
|
295
|
+
mon: Mongolian
|
296
|
+
mos: Moore
|
297
|
+
mul: Multiple languages
|
298
|
+
mun: Munda (Other)
|
299
|
+
mus: Creek
|
300
|
+
mwr: Marwari
|
301
|
+
myn: Mayan languages
|
302
|
+
nah: Nahuatl
|
303
|
+
nai: North American Indian (Other)
|
304
|
+
nap: Neapolitan Italian
|
305
|
+
nau: Nauru
|
306
|
+
nav: Navajo
|
307
|
+
nbl: Ndebele (South Africa)
|
308
|
+
nde: Ndebele (Zimbabwe)
|
309
|
+
ndo: Ndonga
|
310
|
+
nds: Low German
|
311
|
+
nep: Nepali
|
312
|
+
new: Newari
|
313
|
+
nia: Nias
|
314
|
+
nic: Niger-Kordofanian (Other)
|
315
|
+
niu: Niuean
|
316
|
+
nno: Norwegian (Nynorsk)
|
317
|
+
nob: Norwegian (Bokmal)
|
318
|
+
nog: Nogai
|
319
|
+
non: Old Norse
|
320
|
+
nor: Norwegian
|
321
|
+
nso: Northern Sotho
|
322
|
+
nub: Nubian languages
|
323
|
+
nya: Nyanja
|
324
|
+
nym: Nyamwezi
|
325
|
+
nyn: Nyankole
|
326
|
+
nyo: Nyoro
|
327
|
+
nzi: Nzima
|
328
|
+
oci: Occitan (post-1500)
|
329
|
+
oji: Ojibwa
|
330
|
+
ori: Oriya
|
331
|
+
orm: Oromo
|
332
|
+
osa: Osage
|
333
|
+
oss: Ossetic
|
334
|
+
ota: Turkish, Ottoman
|
335
|
+
oto: Otomian languages
|
336
|
+
paa: Papuan (Other)
|
337
|
+
pag: Pangasinan
|
338
|
+
pal: Pahlavi
|
339
|
+
pam: Pampanga
|
340
|
+
pan: Panjabi
|
341
|
+
pap: Papiamento
|
342
|
+
pau: Palauan
|
343
|
+
peo: Old Persian (ca. 600-400 B.C.)
|
344
|
+
per: Persian
|
345
|
+
phi: Philippine (Other)
|
346
|
+
phn: Phoenician
|
347
|
+
pli: Pali
|
348
|
+
pol: Polish
|
349
|
+
pon: Ponape
|
350
|
+
por: Portuguese
|
351
|
+
pra: Prakrit languages
|
352
|
+
pro: Provencal (to 1500)
|
353
|
+
pus: Pushto
|
354
|
+
que: Quechua
|
355
|
+
raj: Rajasthani
|
356
|
+
rap: Rapanui
|
357
|
+
rar: Rarotongan
|
358
|
+
roa: Romance (Other)
|
359
|
+
roh: Raeto-Romance
|
360
|
+
rom: Romani
|
361
|
+
rum: Romanian
|
362
|
+
run: Rundi
|
363
|
+
rus: Russian
|
364
|
+
sad: Sandawe
|
365
|
+
sag: Sango (Ubangi Creole)
|
366
|
+
sah: Yakut
|
367
|
+
sai: South American Indian (Other)
|
368
|
+
sal: Salishan languages
|
369
|
+
sam: Samaritan Aramaic
|
370
|
+
san: Sanskrit
|
371
|
+
sao: Samoan
|
372
|
+
sas: Sasak
|
373
|
+
sat: Santali
|
374
|
+
scc: Serbian
|
375
|
+
sco: Scots
|
376
|
+
scr: Croatian
|
377
|
+
sel: Selkup
|
378
|
+
sem: Semitic (Other)
|
379
|
+
sga: Irish, Old (to 1100)
|
380
|
+
sgn: Sign languages
|
381
|
+
shn: Shan
|
382
|
+
sho: Shona
|
383
|
+
sid: Sidamo
|
384
|
+
sin: Sinhalese
|
385
|
+
sio: Siouan (Other)
|
386
|
+
sit: Sino-Tibetan (Other)
|
387
|
+
sla: Slavic (Other)
|
388
|
+
slo: Slovak
|
389
|
+
slv: Slovenian
|
390
|
+
sma: Southern Sami
|
391
|
+
sme: Northern Sami
|
392
|
+
smi: Sami
|
393
|
+
smj: Lule Sami
|
394
|
+
smn: Inari Sami
|
395
|
+
smo: Samoan
|
396
|
+
sms: Skolt Sami
|
397
|
+
sna: Shona
|
398
|
+
snd: Sindhi
|
399
|
+
snh: Sinhalese
|
400
|
+
snk: Soninke
|
401
|
+
sog: Sogdian
|
402
|
+
som: Somali
|
403
|
+
son: Songhai
|
404
|
+
sot: Sotho
|
405
|
+
spa: Spanish
|
406
|
+
srd: Sardinian
|
407
|
+
srr: Serer
|
408
|
+
ssa: Nilo-Saharan (Other)
|
409
|
+
sso: Sotho
|
410
|
+
ssw: Swazi
|
411
|
+
suk: Sukuma
|
412
|
+
sun: Sundanese
|
413
|
+
sus: Susu
|
414
|
+
sux: Sumerian
|
415
|
+
swa: Swahili
|
416
|
+
swe: Swedish
|
417
|
+
swz: Swazi
|
418
|
+
syr: Syriac
|
419
|
+
tag: Tagalog
|
420
|
+
tah: Tahitian
|
421
|
+
tai: Tai (Other)
|
422
|
+
taj: Tajik
|
423
|
+
tam: Tamil
|
424
|
+
tar: Tatar
|
425
|
+
tat: Tatar
|
426
|
+
tel: Telugu
|
427
|
+
tem: Temne
|
428
|
+
ter: Terena
|
429
|
+
tet: Tetum
|
430
|
+
tgk: Tajik
|
431
|
+
tgl: Tagalog
|
432
|
+
tha: Thai
|
433
|
+
tib: Tibetan
|
434
|
+
tig: Tigre
|
435
|
+
tir: Tigrinya
|
436
|
+
tiv: Tiv
|
437
|
+
tkl: Tokelauan
|
438
|
+
tli: Tlingit
|
439
|
+
tmh: Tamashek
|
440
|
+
tog: Tonga (Nyasa)
|
441
|
+
ton: Tongan
|
442
|
+
tpi: Tok Pisin
|
443
|
+
tru: Truk
|
444
|
+
tsi: Tsimshian
|
445
|
+
tsn: Tswana
|
446
|
+
tso: Tsonga
|
447
|
+
tsw: Tswana
|
448
|
+
tuk: Turkmen
|
449
|
+
tum: Tumbuka
|
450
|
+
tup: Tupi languages
|
451
|
+
tur: Turkish
|
452
|
+
tut: Altaic (Other)
|
453
|
+
tvl: Tuvaluan
|
454
|
+
twi: Twi
|
455
|
+
tyv: Tuvinian
|
456
|
+
udm: Udmurt
|
457
|
+
uga: Ugaritic
|
458
|
+
uig: Uighur
|
459
|
+
ukr: Ukrainian
|
460
|
+
umb: Umbundu
|
461
|
+
# und: Undetermined
|
462
|
+
urd: Urdu
|
463
|
+
uzb: Uzbek
|
464
|
+
vai: Vai
|
465
|
+
ven: Venda
|
466
|
+
vie: Vietnamese
|
467
|
+
vol: Volapuk
|
468
|
+
vot: Votic
|
469
|
+
wak: Wakashan languages
|
470
|
+
wal: Walamo
|
471
|
+
war: Waray
|
472
|
+
was: Washo
|
473
|
+
wel: Welsh
|
474
|
+
wen: Sorbian languages
|
475
|
+
wln: Walloon
|
476
|
+
wol: Wolof
|
477
|
+
xal: Kalmyk
|
478
|
+
xho: Xhosa
|
479
|
+
yao: Yao (Africa)
|
480
|
+
yap: Yapese
|
481
|
+
yid: Yiddish
|
482
|
+
yor: Yoruba
|
483
|
+
ypk: Yupik languages
|
484
|
+
zap: Zapotec
|
485
|
+
zen: Zenaga
|
486
|
+
zha: Zhuang
|
487
|
+
znd: Zande
|
488
|
+
zul: Zulu
|
489
|
+
zun: Zuni
|
490
|
+
# zxx: null
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Traject::Indexer#each_record" do
|
4
|
+
before do
|
5
|
+
@indexer = Traject::Indexer.new
|
6
|
+
end
|
7
|
+
|
8
|
+
describe "checks arguments" do
|
9
|
+
it "rejects no-arg block" do
|
10
|
+
assert_raises(ArgumentError) do
|
11
|
+
@indexer.each_record do
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
it "rejects three-arg block" do
|
16
|
+
assert_raises(ArgumentError) do
|
17
|
+
@indexer.each_record do |one, two, three|
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
it "accepts one-arg block" do
|
22
|
+
@indexer.each_record do |record|
|
23
|
+
end
|
24
|
+
end
|
25
|
+
it "accepts two-arg block" do
|
26
|
+
@indexer.each_record do |record, context|
|
27
|
+
end
|
28
|
+
end
|
29
|
+
it "accepts variable arity block" do
|
30
|
+
@indexer.each_record do |*variable|
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,206 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
require 'traject/indexer'
|
4
|
+
require 'traject/macros/marc21_semantics'
|
5
|
+
|
6
|
+
require 'json'
|
7
|
+
require 'marc/record'
|
8
|
+
|
9
|
+
# See also marc_extractor_test.rb for more detailed tests on marc extraction,
|
10
|
+
# this is just a basic test to make sure our macro works passing through to there
|
11
|
+
# and other options.
|
12
|
+
describe "Traject::Macros::Marc21Semantics" do
|
13
|
+
Marc21Semantics = Traject::Macros::Marc21Semantics # shortcut
|
14
|
+
|
15
|
+
before do
|
16
|
+
@indexer = Traject::Indexer.new
|
17
|
+
@indexer.extend Marc21Semantics
|
18
|
+
|
19
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
20
|
+
end
|
21
|
+
|
22
|
+
it "oclcnum" do
|
23
|
+
@indexer.instance_eval do
|
24
|
+
to_field "oclcnum", oclcnum
|
25
|
+
end
|
26
|
+
output = @indexer.map_record(@record)
|
27
|
+
|
28
|
+
assert_equal %w{2710183 47971712}, output["oclcnum"]
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "marc_sortable_author" do
|
32
|
+
# these probably should be taking only certain subfields, but we're copying
|
33
|
+
# from SolrMarc that didn't do so either and nobody noticed, so not bothering for now.
|
34
|
+
before do
|
35
|
+
@indexer.instance_eval do
|
36
|
+
to_field "author_sort", marc_sortable_author
|
37
|
+
end
|
38
|
+
end
|
39
|
+
it "collates author and title" do
|
40
|
+
output = @indexer.map_record(@record)
|
41
|
+
|
42
|
+
assert_equal ["Herman, Edward S.Manufacturing consent : the political economy of the mass media / Edward S. Herman and Noam Chomsky ; with a new introduction by the authors."], output["author_sort"]
|
43
|
+
end
|
44
|
+
it "respects non-filing" do
|
45
|
+
@record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first
|
46
|
+
|
47
|
+
output = @indexer.map_record(@record)
|
48
|
+
|
49
|
+
assert_equal ["Business renaissance quarterly [electronic resource]."], output["author_sort"]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "marc_sortable_title" do
|
54
|
+
before do
|
55
|
+
@indexer.instance_eval { to_field "title_sort", marc_sortable_title }
|
56
|
+
end
|
57
|
+
it "works" do
|
58
|
+
output = @indexer.map_record(@record)
|
59
|
+
assert_equal ["Manufacturing consent : the political economy of the mass media"], output["title_sort"]
|
60
|
+
end
|
61
|
+
it "respects non-filing" do
|
62
|
+
@record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first
|
63
|
+
output = @indexer.map_record(@record)
|
64
|
+
|
65
|
+
assert_equal ["Business renaissance quarterly"], output["title_sort"]
|
66
|
+
end
|
67
|
+
it "works with a record with no 245$ab" do
|
68
|
+
@record = MARC::Reader.new(support_file_path "245_no_ab.marc").to_a.first
|
69
|
+
output = @indexer.map_record(@record)
|
70
|
+
assert_equal ["Papers"], output["title_sort"]
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
describe "marc_languages" do
|
75
|
+
before do
|
76
|
+
@indexer.instance_eval {to_field "languages", marc_languages() }
|
77
|
+
end
|
78
|
+
|
79
|
+
it "unpacks packed 041a and translates" do
|
80
|
+
@record = MARC::Reader.new(support_file_path "packed_041a_lang.marc").to_a.first
|
81
|
+
output = @indexer.map_record(@record)
|
82
|
+
|
83
|
+
assert_equal ["English", "French", "German", "Italian", "Spanish", "Russian"], output["languages"]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
describe "marc_instrumentation_humanized" do
|
88
|
+
before do
|
89
|
+
@record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
|
90
|
+
@indexer.instance_eval {to_field "instrumentation", marc_instrumentation_humanized }
|
91
|
+
end
|
92
|
+
|
93
|
+
it "translates, de-duping" do
|
94
|
+
output = @indexer.map_record(@record)
|
95
|
+
|
96
|
+
assert_equal ["Larger ensemble, Unspecified", "Piano", "Soprano voice", "Tenor voice", "Violin", "Larger ensemble, Ethnic", "Guitar", "Voices, Unspecified"], output["instrumentation"]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
describe "marc_instrument_codes_normalized" do
|
101
|
+
before do
|
102
|
+
@record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
|
103
|
+
@indexer.instance_eval {to_field "instrument_codes", marc_instrument_codes_normalized }
|
104
|
+
end
|
105
|
+
it "normalizes, de-duping" do
|
106
|
+
output = @indexer.map_record(@record)
|
107
|
+
|
108
|
+
assert_equal ["on", "ka01", "ka", "va01", "va", "vd01", "vd", "sa01", "sa", "oy", "tb01", "tb", "vn12", "vn"],
|
109
|
+
output["instrument_codes"]
|
110
|
+
end
|
111
|
+
it "codes soloist 048$b" do
|
112
|
+
@record = MARC::Reader.new(support_file_path "louis_armstrong.marc").to_a.first
|
113
|
+
output = @indexer.map_record(@record)
|
114
|
+
|
115
|
+
assert_equal ["bb01", "bb01.s", "bb", "bb.s", "oe"],
|
116
|
+
output["instrument_codes"]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
describe "publication_date" do
|
121
|
+
# there are way too many edge cases for us to test em all, but we'll test some of em.
|
122
|
+
it "pulls out 008 date_type s" do
|
123
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
124
|
+
assert_equal 2002, Marc21Semantics.publication_date(@record)
|
125
|
+
end
|
126
|
+
it "uses start date for date_type c continuing resource" do
|
127
|
+
@record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first
|
128
|
+
assert_equal 2006, Marc21Semantics.publication_date(@record)
|
129
|
+
end
|
130
|
+
it "returns nil when the records really got nothing" do
|
131
|
+
@record = MARC::Reader.new(support_file_path "emptyish_record.marc").to_a.first
|
132
|
+
assert_equal nil, Marc21Semantics.publication_date(@record)
|
133
|
+
end
|
134
|
+
it "estimates with a single 'u'" do
|
135
|
+
@record = MARC::Reader.new(support_file_path "date_with_u.marc").to_a.first
|
136
|
+
# was 184u as date1 on a continuing resource. For continuing resources,
|
137
|
+
# we take the first date. And need to deal with the u.
|
138
|
+
assert_equal 1845, Marc21Semantics.publication_date(@record)
|
139
|
+
end
|
140
|
+
it "resorts to 260c" do
|
141
|
+
@record = MARC::Reader.new(support_file_path "date_resort_to_260.marc").to_a.first
|
142
|
+
assert_equal 1980, Marc21Semantics.publication_date(@record)
|
143
|
+
end
|
144
|
+
it "works with date type r missing date2" do
|
145
|
+
@record = MARC::Reader.new(support_file_path "date_type_r_missing_date2.marc").to_a.first
|
146
|
+
assert_equal 1957, Marc21Semantics.publication_date(@record)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe "marc_lcc_to_broad_category" do
|
151
|
+
before do
|
152
|
+
@indexer.instance_eval {to_field "discipline_facet", marc_lcc_to_broad_category }
|
153
|
+
end
|
154
|
+
it "maps a simple example" do
|
155
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
156
|
+
output = @indexer.map_record(@record)
|
157
|
+
|
158
|
+
assert_equal ["Language & Literature"], output["discipline_facet"]
|
159
|
+
end
|
160
|
+
it "maps to default" do
|
161
|
+
@record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
|
162
|
+
output = @indexer.map_record(@record)
|
163
|
+
assert_equal ["Unknown"], output["discipline_facet"]
|
164
|
+
end
|
165
|
+
it "maps to nothing if none and no default" do
|
166
|
+
@indexer.instance_eval {to_field "discipline_no_default", marc_lcc_to_broad_category(:default => nil)}
|
167
|
+
@record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
|
168
|
+
output = @indexer.map_record(@record)
|
169
|
+
|
170
|
+
assert_nil output["discipline_no_default"]
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
describe "marc_geo_facet" do
|
175
|
+
before do
|
176
|
+
@indexer.instance_eval {to_field "geo_facet", marc_geo_facet }
|
177
|
+
end
|
178
|
+
it "maps a complicated record" do
|
179
|
+
@record = MARC::Reader.new(support_file_path "multi_geo.marc").to_a.first
|
180
|
+
output = @indexer.map_record(@record)
|
181
|
+
|
182
|
+
assert_equal ["Europe", "Middle East", "Africa, North", "Agora (Athens, Greece)", "Rome (Italy)", "Italy"],
|
183
|
+
output["geo_facet"]
|
184
|
+
end
|
185
|
+
it "maps nothing on a record with no geo" do
|
186
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
187
|
+
output = @indexer.map_record(@record)
|
188
|
+
assert_nil output["geo_facet"]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
describe "marc_era_facet" do
|
193
|
+
before do
|
194
|
+
@indexer.instance_eval {to_field "era_facet", marc_era_facet}
|
195
|
+
end
|
196
|
+
it "maps a complicated record" do
|
197
|
+
@record = MARC::Reader.new(support_file_path "multi_era.marc").to_a.first
|
198
|
+
output = @indexer.map_record(@record)
|
199
|
+
|
200
|
+
assert_equal ["Early modern, 1500-1700", "17th century", "Great Britain: Puritan Revolution, 1642-1660", "Great Britain: Civil War, 1642-1649", "1642-1660"],
|
201
|
+
output["era_facet"]
|
202
|
+
end
|
203
|
+
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|