traject 0.0.2 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/README.md +85 -61
- data/Rakefile +5 -0
- data/bin/traject +31 -3
- data/doc/settings.md +74 -13
- data/lib/tasks/load_maps.rake +48 -0
- data/lib/traject/indexer/settings.rb +75 -0
- data/lib/traject/indexer.rb +255 -45
- data/lib/traject/json_writer.rb +4 -2
- data/lib/traject/macros/marc21.rb +18 -6
- data/lib/traject/macros/marc21_semantics.rb +405 -0
- data/lib/traject/macros/marc_format_classifier.rb +180 -0
- data/lib/traject/marc4j_reader.rb +160 -0
- data/lib/traject/marc_extractor.rb +33 -17
- data/lib/traject/marc_reader.rb +14 -11
- data/lib/traject/solrj_writer.rb +247 -9
- data/lib/traject/thread_pool.rb +154 -0
- data/lib/traject/translation_map.rb +46 -4
- data/lib/traject/util.rb +30 -0
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/lcc_top_level.yaml +26 -0
- data/lib/translation_maps/marc_genre_007.yaml +9 -0
- data/lib/translation_maps/marc_genre_leader.yaml +22 -0
- data/lib/translation_maps/marc_geographic.yaml +589 -0
- data/lib/translation_maps/marc_instruments.yaml +102 -0
- data/lib/translation_maps/marc_languages.yaml +490 -0
- data/test/indexer/each_record_test.rb +34 -0
- data/test/indexer/macros_marc21_semantics_test.rb +206 -0
- data/test/indexer/macros_marc21_test.rb +10 -1
- data/test/indexer/map_record_test.rb +78 -8
- data/test/indexer/read_write_test.rb +43 -10
- data/test/indexer/settings_test.rb +60 -4
- data/test/indexer/to_field_test.rb +39 -0
- data/test/marc4j_reader_test.rb +75 -0
- data/test/marc_extractor_test.rb +62 -0
- data/test/marc_format_classifier_test.rb +91 -0
- data/test/marc_reader_test.rb +12 -0
- data/test/solrj_writer_test.rb +146 -43
- data/test/test_helper.rb +50 -0
- data/test/test_support/245_no_ab.marc +1 -0
- data/test/test_support/880_with_no_6.utf8.marc +1 -0
- data/test/test_support/bad_subfield_code.marc +1 -0
- data/test/test_support/date_resort_to_260.marc +1 -0
- data/test/test_support/date_type_r_missing_date2.marc +1 -0
- data/test/test_support/date_with_u.marc +1 -0
- data/test/test_support/demo_config.rb +153 -0
- data/test/test_support/emptyish_record.marc +1 -0
- data/test/test_support/louis_armstrong.marc +1 -0
- data/test/test_support/manuscript_online_thesis.marc +1 -0
- data/test/test_support/microform_online_conference.marc +1 -0
- data/test/test_support/multi_era.marc +1 -0
- data/test/test_support/multi_geo.marc +1 -0
- data/test/test_support/musical_cage.marc +1 -0
- data/test/test_support/one-marc8.mrc +1 -0
- data/test/test_support/online_only.marc +1 -0
- data/test/test_support/packed_041a_lang.marc +1 -0
- data/test/test_support/the_business_ren.marc +1 -0
- data/test/translation_map_test.rb +8 -0
- data/test/translation_maps/properties_map.properties +5 -0
- data/traject.gemspec +1 -1
- data/vendor/marc4j/README.md +17 -0
- data/vendor/marc4j/lib/marc4j-2.5.1-beta.jar +0 -0
- metadata +81 -2
@@ -0,0 +1,490 @@
|
|
1
|
+
# Map Language Codes (in 008[35-37], 041) to User Friendly Term
|
2
|
+
|
3
|
+
# ???: null
|
4
|
+
aar: Afar
|
5
|
+
abk: Abkhaz
|
6
|
+
ace: Achinese
|
7
|
+
ach: Acoli
|
8
|
+
ada: Adangme
|
9
|
+
ady: Adygei
|
10
|
+
afa: Afroasiatic (Other)
|
11
|
+
afh: Afrihili (Artificial language)
|
12
|
+
afr: Afrikaans
|
13
|
+
ajm: Aljamia
|
14
|
+
aka: Akan
|
15
|
+
akk: Akkadian
|
16
|
+
alb: Albanian
|
17
|
+
ale: Aleut
|
18
|
+
alg: Algonquian (Other)
|
19
|
+
amh: Amharic
|
20
|
+
ang: English, Old (ca. 450-1100)
|
21
|
+
apa: Apache languages
|
22
|
+
ara: Arabic
|
23
|
+
arc: Aramaic
|
24
|
+
arg: Aragonese Spanish
|
25
|
+
arm: Armenian
|
26
|
+
arn: Mapuche
|
27
|
+
arp: Arapaho
|
28
|
+
art: Artificial (Other)
|
29
|
+
arw: Arawak
|
30
|
+
asm: Assamese
|
31
|
+
ast: Bable
|
32
|
+
ath: Athapascan (Other)
|
33
|
+
aus: Australian languages
|
34
|
+
ava: Avaric
|
35
|
+
ave: Avestan
|
36
|
+
awa: Awadhi
|
37
|
+
aym: Aymara
|
38
|
+
aze: Azerbaijani
|
39
|
+
bad: Banda
|
40
|
+
bai: Bamileke languages
|
41
|
+
bak: Bashkir
|
42
|
+
bal: Baluchi
|
43
|
+
bam: Bambara
|
44
|
+
ban: Balinese
|
45
|
+
baq: Basque
|
46
|
+
bas: Basa
|
47
|
+
bat: Baltic (Other)
|
48
|
+
bej: Beja
|
49
|
+
bel: Belarusian
|
50
|
+
bem: Bemba
|
51
|
+
ben: Bengali
|
52
|
+
ber: Berber (Other)
|
53
|
+
bho: Bhojpuri
|
54
|
+
bih: Bihari
|
55
|
+
bik: Bikol
|
56
|
+
bin: Edo
|
57
|
+
bis: Bislama
|
58
|
+
bla: Siksika
|
59
|
+
bnt: Bantu (Other)
|
60
|
+
bos: Bosnian
|
61
|
+
bra: Braj
|
62
|
+
bre: Breton
|
63
|
+
btk: Batak
|
64
|
+
bua: Buriat
|
65
|
+
bug: Bugis
|
66
|
+
bul: Bulgarian
|
67
|
+
bur: Burmese
|
68
|
+
cad: Caddo
|
69
|
+
cai: Central American Indian (Other)
|
70
|
+
cam: Khmer
|
71
|
+
car: Carib
|
72
|
+
cat: Catalan
|
73
|
+
cau: Caucasian (Other)
|
74
|
+
ceb: Cebuano
|
75
|
+
cel: Celtic (Other)
|
76
|
+
cha: Chamorro
|
77
|
+
chb: Chibcha
|
78
|
+
che: Chechen
|
79
|
+
chg: Chagatai
|
80
|
+
chi: Chinese
|
81
|
+
chk: Truk
|
82
|
+
chm: Mari
|
83
|
+
chn: Chinook jargon
|
84
|
+
cho: Choctaw
|
85
|
+
chp: Chipewyan
|
86
|
+
chr: Cherokee
|
87
|
+
chu: Church Slavic
|
88
|
+
chv: Chuvash
|
89
|
+
chy: Cheyenne
|
90
|
+
cmc: Chamic languages
|
91
|
+
cop: Coptic
|
92
|
+
cor: Cornish
|
93
|
+
cos: Corsican
|
94
|
+
cpe: Creoles and Pidgins, English-based (Other)
|
95
|
+
cpf: Creoles and Pidgins, French-based (Other)
|
96
|
+
cpp: Creoles and Pidgins, Portuguese-based (Other)
|
97
|
+
cre: Cree
|
98
|
+
crh: Crimean Tatar
|
99
|
+
crp: Creoles and Pidgins (Other)
|
100
|
+
cus: Cushitic (Other)
|
101
|
+
cze: Czech
|
102
|
+
dak: Dakota
|
103
|
+
dan: Danish
|
104
|
+
dar: Dargwa
|
105
|
+
day: Dayak
|
106
|
+
del: Delaware
|
107
|
+
den: Slave
|
108
|
+
dgr: Dogrib
|
109
|
+
din: Dinka
|
110
|
+
div: Divehi
|
111
|
+
doi: Dogri
|
112
|
+
dra: Dravidian (Other)
|
113
|
+
dua: Duala
|
114
|
+
dum: Dutch, Middle (ca. 1050-1350)
|
115
|
+
dut: Dutch
|
116
|
+
dyu: Dyula
|
117
|
+
dzo: Dzongkha
|
118
|
+
efi: Efik
|
119
|
+
egy: Egyptian
|
120
|
+
eka: Ekajuk
|
121
|
+
elx: Elamite
|
122
|
+
eng: English
|
123
|
+
enm: English, Middle (1100-1500)
|
124
|
+
epo: Esperanto
|
125
|
+
esk: Eskimo languages
|
126
|
+
esp: Esperanto
|
127
|
+
est: Estonian
|
128
|
+
eth: Ethiopic
|
129
|
+
ewe: Ewe
|
130
|
+
ewo: Ewondo
|
131
|
+
fan: Fang
|
132
|
+
fao: Faroese
|
133
|
+
far: Faroese
|
134
|
+
fat: Fanti
|
135
|
+
fij: Fijian
|
136
|
+
fin: Finnish
|
137
|
+
fiu: Finno-Ugrian (Other)
|
138
|
+
fon: Fon
|
139
|
+
fre: French
|
140
|
+
fri: Frisian
|
141
|
+
frm: French, Middle (ca. 1400-1600)
|
142
|
+
fro: French, Old (ca. 842-1400)
|
143
|
+
fry: Frisian
|
144
|
+
ful: Fula
|
145
|
+
fur: Friulian
|
146
|
+
gaa: Ga
|
147
|
+
gae: Scottish Gaelic
|
148
|
+
gag: Galician
|
149
|
+
gal: Oromo
|
150
|
+
gay: Gayo
|
151
|
+
gba: Gbaya
|
152
|
+
gem: Germanic (Other)
|
153
|
+
geo: Georgian
|
154
|
+
ger: German
|
155
|
+
gez: Ethiopic
|
156
|
+
gil: Gilbertese
|
157
|
+
gla: Scottish Gaelic
|
158
|
+
gle: Irish
|
159
|
+
glg: Galician
|
160
|
+
glv: Manx
|
161
|
+
gmh: German, Middle High (ca. 1050-1500)
|
162
|
+
goh: German, Old High (ca. 750-1050)
|
163
|
+
gon: Gondi
|
164
|
+
gor: Gorontalo
|
165
|
+
got: Gothic
|
166
|
+
grb: Grebo
|
167
|
+
grc: Greek, Ancient (to 1453)
|
168
|
+
gre: Greek, Modern (1453- )
|
169
|
+
grn: Guarani
|
170
|
+
gua: Guarani
|
171
|
+
guj: Gujarati
|
172
|
+
gwi: Gwich'in
|
173
|
+
hai: Haida
|
174
|
+
hat: Haitian French Creole
|
175
|
+
hau: Hausa
|
176
|
+
haw: Hawaiian
|
177
|
+
heb: Hebrew
|
178
|
+
her: Herero
|
179
|
+
hil: Hiligaynon
|
180
|
+
him: Himachali
|
181
|
+
hin: Hindi
|
182
|
+
hit: Hittite
|
183
|
+
hmn: Hmong
|
184
|
+
hmo: Hiri Motu
|
185
|
+
hun: Hungarian
|
186
|
+
hup: Hupa
|
187
|
+
iba: Iban
|
188
|
+
ibo: Igbo
|
189
|
+
ice: Icelandic
|
190
|
+
ido: Ido
|
191
|
+
iii: Sichuan Yi
|
192
|
+
ijo: Ijo
|
193
|
+
iku: Inuktitut
|
194
|
+
ile: Interlingue
|
195
|
+
ilo: Iloko
|
196
|
+
ina: Interlingua (International Auxiliary Language Association)
|
197
|
+
inc: Indic (Other)
|
198
|
+
ind: Indonesian
|
199
|
+
ine: Indo-European (Other)
|
200
|
+
inh: Ingush
|
201
|
+
int: Interlingua (International Auxiliary Language Association)
|
202
|
+
ipk: Inupiaq
|
203
|
+
ira: Iranian (Other)
|
204
|
+
iri: Irish
|
205
|
+
iro: Iroquoian (Other)
|
206
|
+
ita: Italian
|
207
|
+
jav: Javanese
|
208
|
+
jpn: Japanese
|
209
|
+
jpr: Judeo-Persian
|
210
|
+
jrb: Judeo-Arabic
|
211
|
+
kaa: Kara-Kalpak
|
212
|
+
kab: Kabyle
|
213
|
+
kac: Kachin
|
214
|
+
kal: Kalatdlisut
|
215
|
+
kam: Kamba
|
216
|
+
kan: Kannada
|
217
|
+
kar: Karen
|
218
|
+
kas: Kashmiri
|
219
|
+
kau: Kanuri
|
220
|
+
kaw: Kawi
|
221
|
+
kaz: Kazakh
|
222
|
+
kbd: Kabardian
|
223
|
+
kha: Khasi
|
224
|
+
khi: Khoisan (Other)
|
225
|
+
khm: Khmer
|
226
|
+
kho: Khotanese
|
227
|
+
kik: Kikuyu
|
228
|
+
kin: Kinyarwanda
|
229
|
+
kir: Kyrgyz
|
230
|
+
kmb: Kimbundu
|
231
|
+
kok: Konkani
|
232
|
+
kom: Komi
|
233
|
+
kon: Kongo
|
234
|
+
kor: Korean
|
235
|
+
kos: Kusaie
|
236
|
+
kpe: Kpelle
|
237
|
+
kro: Kru
|
238
|
+
kru: Kurukh
|
239
|
+
kua: Kuanyama
|
240
|
+
kum: Kumyk
|
241
|
+
kur: Kurdish
|
242
|
+
kus: Kusaie
|
243
|
+
kut: Kutenai
|
244
|
+
lad: Ladino
|
245
|
+
lah: Lahnda
|
246
|
+
lam: Lamba
|
247
|
+
lan: Occitan (post-1500)
|
248
|
+
lao: Lao
|
249
|
+
lap: Sami
|
250
|
+
lat: Latin
|
251
|
+
lav: Latvian
|
252
|
+
lez: Lezgian
|
253
|
+
lim: Limburgish
|
254
|
+
lin: Lingala
|
255
|
+
lit: Lithuanian
|
256
|
+
lol: Mongo-Nkundu
|
257
|
+
loz: Lozi
|
258
|
+
ltz: Letzeburgesch
|
259
|
+
lua: Luba-Lulua
|
260
|
+
lub: Luba-Katanga
|
261
|
+
lug: Ganda
|
262
|
+
lui: Luiseno
|
263
|
+
lun: Lunda
|
264
|
+
luo: Luo (Kenya and Tanzania)
|
265
|
+
lus: Lushai
|
266
|
+
mac: Macedonian
|
267
|
+
mad: Madurese
|
268
|
+
mag: Magahi
|
269
|
+
mah: Marshallese
|
270
|
+
mai: Maithili
|
271
|
+
mak: Makasar
|
272
|
+
mal: Malayalam
|
273
|
+
man: Mandingo
|
274
|
+
mao: Maori
|
275
|
+
map: Austronesian (Other)
|
276
|
+
mar: Marathi
|
277
|
+
mas: Masai
|
278
|
+
max: Manx
|
279
|
+
may: Malay
|
280
|
+
mdr: Mandar
|
281
|
+
men: Mende
|
282
|
+
mga: Irish, Middle (ca. 1100-1550)
|
283
|
+
mic: Micmac
|
284
|
+
min: Minangkabau
|
285
|
+
mis: Miscellaneous languages
|
286
|
+
mkh: Mon-Khmer (Other)
|
287
|
+
mla: Malagasy
|
288
|
+
mlg: Malagasy
|
289
|
+
mlt: Maltese
|
290
|
+
mnc: Manchu
|
291
|
+
mni: Manipuri
|
292
|
+
mno: Manobo languages
|
293
|
+
moh: Mohawk
|
294
|
+
mol: Moldavian
|
295
|
+
mon: Mongolian
|
296
|
+
mos: Moore
|
297
|
+
mul: Multiple languages
|
298
|
+
mun: Munda (Other)
|
299
|
+
mus: Creek
|
300
|
+
mwr: Marwari
|
301
|
+
myn: Mayan languages
|
302
|
+
nah: Nahuatl
|
303
|
+
nai: North American Indian (Other)
|
304
|
+
nap: Neapolitan Italian
|
305
|
+
nau: Nauru
|
306
|
+
nav: Navajo
|
307
|
+
nbl: Ndebele (South Africa)
|
308
|
+
nde: Ndebele (Zimbabwe)
|
309
|
+
ndo: Ndonga
|
310
|
+
nds: Low German
|
311
|
+
nep: Nepali
|
312
|
+
new: Newari
|
313
|
+
nia: Nias
|
314
|
+
nic: Niger-Kordofanian (Other)
|
315
|
+
niu: Niuean
|
316
|
+
nno: Norwegian (Nynorsk)
|
317
|
+
nob: Norwegian (Bokmal)
|
318
|
+
nog: Nogai
|
319
|
+
non: Old Norse
|
320
|
+
nor: Norwegian
|
321
|
+
nso: Northern Sotho
|
322
|
+
nub: Nubian languages
|
323
|
+
nya: Nyanja
|
324
|
+
nym: Nyamwezi
|
325
|
+
nyn: Nyankole
|
326
|
+
nyo: Nyoro
|
327
|
+
nzi: Nzima
|
328
|
+
oci: Occitan (post-1500)
|
329
|
+
oji: Ojibwa
|
330
|
+
ori: Oriya
|
331
|
+
orm: Oromo
|
332
|
+
osa: Osage
|
333
|
+
oss: Ossetic
|
334
|
+
ota: Turkish, Ottoman
|
335
|
+
oto: Otomian languages
|
336
|
+
paa: Papuan (Other)
|
337
|
+
pag: Pangasinan
|
338
|
+
pal: Pahlavi
|
339
|
+
pam: Pampanga
|
340
|
+
pan: Panjabi
|
341
|
+
pap: Papiamento
|
342
|
+
pau: Palauan
|
343
|
+
peo: Old Persian (ca. 600-400 B.C.)
|
344
|
+
per: Persian
|
345
|
+
phi: Philippine (Other)
|
346
|
+
phn: Phoenician
|
347
|
+
pli: Pali
|
348
|
+
pol: Polish
|
349
|
+
pon: Ponape
|
350
|
+
por: Portuguese
|
351
|
+
pra: Prakrit languages
|
352
|
+
pro: Provencal (to 1500)
|
353
|
+
pus: Pushto
|
354
|
+
que: Quechua
|
355
|
+
raj: Rajasthani
|
356
|
+
rap: Rapanui
|
357
|
+
rar: Rarotongan
|
358
|
+
roa: Romance (Other)
|
359
|
+
roh: Raeto-Romance
|
360
|
+
rom: Romani
|
361
|
+
rum: Romanian
|
362
|
+
run: Rundi
|
363
|
+
rus: Russian
|
364
|
+
sad: Sandawe
|
365
|
+
sag: Sango (Ubangi Creole)
|
366
|
+
sah: Yakut
|
367
|
+
sai: South American Indian (Other)
|
368
|
+
sal: Salishan languages
|
369
|
+
sam: Samaritan Aramaic
|
370
|
+
san: Sanskrit
|
371
|
+
sao: Samoan
|
372
|
+
sas: Sasak
|
373
|
+
sat: Santali
|
374
|
+
scc: Serbian
|
375
|
+
sco: Scots
|
376
|
+
scr: Croatian
|
377
|
+
sel: Selkup
|
378
|
+
sem: Semitic (Other)
|
379
|
+
sga: Irish, Old (to 1100)
|
380
|
+
sgn: Sign languages
|
381
|
+
shn: Shan
|
382
|
+
sho: Shona
|
383
|
+
sid: Sidamo
|
384
|
+
sin: Sinhalese
|
385
|
+
sio: Siouan (Other)
|
386
|
+
sit: Sino-Tibetan (Other)
|
387
|
+
sla: Slavic (Other)
|
388
|
+
slo: Slovak
|
389
|
+
slv: Slovenian
|
390
|
+
sma: Southern Sami
|
391
|
+
sme: Northern Sami
|
392
|
+
smi: Sami
|
393
|
+
smj: Lule Sami
|
394
|
+
smn: Inari Sami
|
395
|
+
smo: Samoan
|
396
|
+
sms: Skolt Sami
|
397
|
+
sna: Shona
|
398
|
+
snd: Sindhi
|
399
|
+
snh: Sinhalese
|
400
|
+
snk: Soninke
|
401
|
+
sog: Sogdian
|
402
|
+
som: Somali
|
403
|
+
son: Songhai
|
404
|
+
sot: Sotho
|
405
|
+
spa: Spanish
|
406
|
+
srd: Sardinian
|
407
|
+
srr: Serer
|
408
|
+
ssa: Nilo-Saharan (Other)
|
409
|
+
sso: Sotho
|
410
|
+
ssw: Swazi
|
411
|
+
suk: Sukuma
|
412
|
+
sun: Sundanese
|
413
|
+
sus: Susu
|
414
|
+
sux: Sumerian
|
415
|
+
swa: Swahili
|
416
|
+
swe: Swedish
|
417
|
+
swz: Swazi
|
418
|
+
syr: Syriac
|
419
|
+
tag: Tagalog
|
420
|
+
tah: Tahitian
|
421
|
+
tai: Tai (Other)
|
422
|
+
taj: Tajik
|
423
|
+
tam: Tamil
|
424
|
+
tar: Tatar
|
425
|
+
tat: Tatar
|
426
|
+
tel: Telugu
|
427
|
+
tem: Temne
|
428
|
+
ter: Terena
|
429
|
+
tet: Tetum
|
430
|
+
tgk: Tajik
|
431
|
+
tgl: Tagalog
|
432
|
+
tha: Thai
|
433
|
+
tib: Tibetan
|
434
|
+
tig: Tigre
|
435
|
+
tir: Tigrinya
|
436
|
+
tiv: Tiv
|
437
|
+
tkl: Tokelauan
|
438
|
+
tli: Tlingit
|
439
|
+
tmh: Tamashek
|
440
|
+
tog: Tonga (Nyasa)
|
441
|
+
ton: Tongan
|
442
|
+
tpi: Tok Pisin
|
443
|
+
tru: Truk
|
444
|
+
tsi: Tsimshian
|
445
|
+
tsn: Tswana
|
446
|
+
tso: Tsonga
|
447
|
+
tsw: Tswana
|
448
|
+
tuk: Turkmen
|
449
|
+
tum: Tumbuka
|
450
|
+
tup: Tupi languages
|
451
|
+
tur: Turkish
|
452
|
+
tut: Altaic (Other)
|
453
|
+
tvl: Tuvaluan
|
454
|
+
twi: Twi
|
455
|
+
tyv: Tuvinian
|
456
|
+
udm: Udmurt
|
457
|
+
uga: Ugaritic
|
458
|
+
uig: Uighur
|
459
|
+
ukr: Ukrainian
|
460
|
+
umb: Umbundu
|
461
|
+
# und: Undetermined
|
462
|
+
urd: Urdu
|
463
|
+
uzb: Uzbek
|
464
|
+
vai: Vai
|
465
|
+
ven: Venda
|
466
|
+
vie: Vietnamese
|
467
|
+
vol: Volapuk
|
468
|
+
vot: Votic
|
469
|
+
wak: Wakashan languages
|
470
|
+
wal: Walamo
|
471
|
+
war: Waray
|
472
|
+
was: Washo
|
473
|
+
wel: Welsh
|
474
|
+
wen: Sorbian languages
|
475
|
+
wln: Walloon
|
476
|
+
wol: Wolof
|
477
|
+
xal: Kalmyk
|
478
|
+
xho: Xhosa
|
479
|
+
yao: Yao (Africa)
|
480
|
+
yap: Yapese
|
481
|
+
yid: Yiddish
|
482
|
+
yor: Yoruba
|
483
|
+
ypk: Yupik languages
|
484
|
+
zap: Zapotec
|
485
|
+
zen: Zenaga
|
486
|
+
zha: Zhuang
|
487
|
+
znd: Zande
|
488
|
+
zul: Zulu
|
489
|
+
zun: Zuni
|
490
|
+
# zxx: null
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Traject::Indexer#each_record" do
|
4
|
+
before do
|
5
|
+
@indexer = Traject::Indexer.new
|
6
|
+
end
|
7
|
+
|
8
|
+
describe "checks arguments" do
|
9
|
+
it "rejects no-arg block" do
|
10
|
+
assert_raises(ArgumentError) do
|
11
|
+
@indexer.each_record do
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
it "rejects three-arg block" do
|
16
|
+
assert_raises(ArgumentError) do
|
17
|
+
@indexer.each_record do |one, two, three|
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
it "accepts one-arg block" do
|
22
|
+
@indexer.each_record do |record|
|
23
|
+
end
|
24
|
+
end
|
25
|
+
it "accepts two-arg block" do
|
26
|
+
@indexer.each_record do |record, context|
|
27
|
+
end
|
28
|
+
end
|
29
|
+
it "accepts variable arity block" do
|
30
|
+
@indexer.each_record do |*variable|
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,206 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
require 'traject/indexer'
|
4
|
+
require 'traject/macros/marc21_semantics'
|
5
|
+
|
6
|
+
require 'json'
|
7
|
+
require 'marc/record'
|
8
|
+
|
9
|
+
# See also marc_extractor_test.rb for more detailed tests on marc extraction,
|
10
|
+
# this is just a basic test to make sure our macro works passing through to there
|
11
|
+
# and other options.
|
12
|
+
describe "Traject::Macros::Marc21Semantics" do
|
13
|
+
Marc21Semantics = Traject::Macros::Marc21Semantics # shortcut
|
14
|
+
|
15
|
+
before do
|
16
|
+
@indexer = Traject::Indexer.new
|
17
|
+
@indexer.extend Marc21Semantics
|
18
|
+
|
19
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
20
|
+
end
|
21
|
+
|
22
|
+
it "oclcnum" do
|
23
|
+
@indexer.instance_eval do
|
24
|
+
to_field "oclcnum", oclcnum
|
25
|
+
end
|
26
|
+
output = @indexer.map_record(@record)
|
27
|
+
|
28
|
+
assert_equal %w{2710183 47971712}, output["oclcnum"]
|
29
|
+
end
|
30
|
+
|
31
|
+
describe "marc_sortable_author" do
|
32
|
+
# these probably should be taking only certain subfields, but we're copying
|
33
|
+
# from SolrMarc that didn't do so either and nobody noticed, so not bothering for now.
|
34
|
+
before do
|
35
|
+
@indexer.instance_eval do
|
36
|
+
to_field "author_sort", marc_sortable_author
|
37
|
+
end
|
38
|
+
end
|
39
|
+
it "collates author and title" do
|
40
|
+
output = @indexer.map_record(@record)
|
41
|
+
|
42
|
+
assert_equal ["Herman, Edward S.Manufacturing consent : the political economy of the mass media / Edward S. Herman and Noam Chomsky ; with a new introduction by the authors."], output["author_sort"]
|
43
|
+
end
|
44
|
+
it "respects non-filing" do
|
45
|
+
@record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first
|
46
|
+
|
47
|
+
output = @indexer.map_record(@record)
|
48
|
+
|
49
|
+
assert_equal ["Business renaissance quarterly [electronic resource]."], output["author_sort"]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "marc_sortable_title" do
|
54
|
+
before do
|
55
|
+
@indexer.instance_eval { to_field "title_sort", marc_sortable_title }
|
56
|
+
end
|
57
|
+
it "works" do
|
58
|
+
output = @indexer.map_record(@record)
|
59
|
+
assert_equal ["Manufacturing consent : the political economy of the mass media"], output["title_sort"]
|
60
|
+
end
|
61
|
+
it "respects non-filing" do
|
62
|
+
@record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first
|
63
|
+
output = @indexer.map_record(@record)
|
64
|
+
|
65
|
+
assert_equal ["Business renaissance quarterly"], output["title_sort"]
|
66
|
+
end
|
67
|
+
it "works with a record with no 245$ab" do
|
68
|
+
@record = MARC::Reader.new(support_file_path "245_no_ab.marc").to_a.first
|
69
|
+
output = @indexer.map_record(@record)
|
70
|
+
assert_equal ["Papers"], output["title_sort"]
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
describe "marc_languages" do
|
75
|
+
before do
|
76
|
+
@indexer.instance_eval {to_field "languages", marc_languages() }
|
77
|
+
end
|
78
|
+
|
79
|
+
it "unpacks packed 041a and translates" do
|
80
|
+
@record = MARC::Reader.new(support_file_path "packed_041a_lang.marc").to_a.first
|
81
|
+
output = @indexer.map_record(@record)
|
82
|
+
|
83
|
+
assert_equal ["English", "French", "German", "Italian", "Spanish", "Russian"], output["languages"]
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
describe "marc_instrumentation_humanized" do
|
88
|
+
before do
|
89
|
+
@record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
|
90
|
+
@indexer.instance_eval {to_field "instrumentation", marc_instrumentation_humanized }
|
91
|
+
end
|
92
|
+
|
93
|
+
it "translates, de-duping" do
|
94
|
+
output = @indexer.map_record(@record)
|
95
|
+
|
96
|
+
assert_equal ["Larger ensemble, Unspecified", "Piano", "Soprano voice", "Tenor voice", "Violin", "Larger ensemble, Ethnic", "Guitar", "Voices, Unspecified"], output["instrumentation"]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
describe "marc_instrument_codes_normalized" do
|
101
|
+
before do
|
102
|
+
@record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
|
103
|
+
@indexer.instance_eval {to_field "instrument_codes", marc_instrument_codes_normalized }
|
104
|
+
end
|
105
|
+
it "normalizes, de-duping" do
|
106
|
+
output = @indexer.map_record(@record)
|
107
|
+
|
108
|
+
assert_equal ["on", "ka01", "ka", "va01", "va", "vd01", "vd", "sa01", "sa", "oy", "tb01", "tb", "vn12", "vn"],
|
109
|
+
output["instrument_codes"]
|
110
|
+
end
|
111
|
+
it "codes soloist 048$b" do
|
112
|
+
@record = MARC::Reader.new(support_file_path "louis_armstrong.marc").to_a.first
|
113
|
+
output = @indexer.map_record(@record)
|
114
|
+
|
115
|
+
assert_equal ["bb01", "bb01.s", "bb", "bb.s", "oe"],
|
116
|
+
output["instrument_codes"]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
describe "publication_date" do
|
121
|
+
# there are way too many edge cases for us to test em all, but we'll test some of em.
|
122
|
+
it "pulls out 008 date_type s" do
|
123
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
124
|
+
assert_equal 2002, Marc21Semantics.publication_date(@record)
|
125
|
+
end
|
126
|
+
it "uses start date for date_type c continuing resource" do
|
127
|
+
@record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first
|
128
|
+
assert_equal 2006, Marc21Semantics.publication_date(@record)
|
129
|
+
end
|
130
|
+
it "returns nil when the records really got nothing" do
|
131
|
+
@record = MARC::Reader.new(support_file_path "emptyish_record.marc").to_a.first
|
132
|
+
assert_equal nil, Marc21Semantics.publication_date(@record)
|
133
|
+
end
|
134
|
+
it "estimates with a single 'u'" do
|
135
|
+
@record = MARC::Reader.new(support_file_path "date_with_u.marc").to_a.first
|
136
|
+
# was 184u as date1 on a continuing resource. For continuing resources,
|
137
|
+
# we take the first date. And need to deal with the u.
|
138
|
+
assert_equal 1845, Marc21Semantics.publication_date(@record)
|
139
|
+
end
|
140
|
+
it "resorts to 260c" do
|
141
|
+
@record = MARC::Reader.new(support_file_path "date_resort_to_260.marc").to_a.first
|
142
|
+
assert_equal 1980, Marc21Semantics.publication_date(@record)
|
143
|
+
end
|
144
|
+
it "works with date type r missing date2" do
|
145
|
+
@record = MARC::Reader.new(support_file_path "date_type_r_missing_date2.marc").to_a.first
|
146
|
+
assert_equal 1957, Marc21Semantics.publication_date(@record)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe "marc_lcc_to_broad_category" do
|
151
|
+
before do
|
152
|
+
@indexer.instance_eval {to_field "discipline_facet", marc_lcc_to_broad_category }
|
153
|
+
end
|
154
|
+
it "maps a simple example" do
|
155
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
156
|
+
output = @indexer.map_record(@record)
|
157
|
+
|
158
|
+
assert_equal ["Language & Literature"], output["discipline_facet"]
|
159
|
+
end
|
160
|
+
it "maps to default" do
|
161
|
+
@record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
|
162
|
+
output = @indexer.map_record(@record)
|
163
|
+
assert_equal ["Unknown"], output["discipline_facet"]
|
164
|
+
end
|
165
|
+
it "maps to nothing if none and no default" do
|
166
|
+
@indexer.instance_eval {to_field "discipline_no_default", marc_lcc_to_broad_category(:default => nil)}
|
167
|
+
@record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first
|
168
|
+
output = @indexer.map_record(@record)
|
169
|
+
|
170
|
+
assert_nil output["discipline_no_default"]
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
describe "marc_geo_facet" do
|
175
|
+
before do
|
176
|
+
@indexer.instance_eval {to_field "geo_facet", marc_geo_facet }
|
177
|
+
end
|
178
|
+
it "maps a complicated record" do
|
179
|
+
@record = MARC::Reader.new(support_file_path "multi_geo.marc").to_a.first
|
180
|
+
output = @indexer.map_record(@record)
|
181
|
+
|
182
|
+
assert_equal ["Europe", "Middle East", "Africa, North", "Agora (Athens, Greece)", "Rome (Italy)", "Italy"],
|
183
|
+
output["geo_facet"]
|
184
|
+
end
|
185
|
+
it "maps nothing on a record with no geo" do
|
186
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
187
|
+
output = @indexer.map_record(@record)
|
188
|
+
assert_nil output["geo_facet"]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
describe "marc_era_facet" do
|
193
|
+
before do
|
194
|
+
@indexer.instance_eval {to_field "era_facet", marc_era_facet}
|
195
|
+
end
|
196
|
+
it "maps a complicated record" do
|
197
|
+
@record = MARC::Reader.new(support_file_path "multi_era.marc").to_a.first
|
198
|
+
output = @indexer.map_record(@record)
|
199
|
+
|
200
|
+
assert_equal ["Early modern, 1500-1700", "17th century", "Great Britain: Puritan Revolution, 1642-1660", "Great Britain: Civil War, 1642-1649", "1642-1660"],
|
201
|
+
output["era_facet"]
|
202
|
+
end
|
203
|
+
|
204
|
+
end
|
205
|
+
|
206
|
+
end
|