traject 3.3.0 → 3.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/CHANGES.md +23 -2
- data/README.md +23 -2
- data/doc/settings.md +4 -2
- data/doc/xml.md +12 -0
- data/examples/marc/tiny.xml +35 -0
- data/lib/traject/command_line.rb +34 -43
- data/lib/traject/debug_writer.rb +1 -1
- data/lib/traject/macros/marc21.rb +3 -3
- data/lib/traject/macros/marc21_semantics.rb +7 -3
- data/lib/traject/macros/nokogiri_macros.rb +9 -3
- data/lib/traject/macros/transformation.rb +30 -0
- data/lib/traject/marc_extractor.rb +3 -3
- data/lib/traject/nokogiri_reader.rb +2 -0
- data/lib/traject/solr_json_writer.rb +28 -10
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/marc_languages.yaml +77 -48
- data/test/command_line_test.rb +52 -0
- data/test/debug_writer_test.rb +13 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +4 -0
- data/test/indexer/macros/transformation_test.rb +110 -0
- data/test/indexer/nokogiri_indexer_test.rb +35 -0
- data/test/indexer/read_write_test.rb +14 -3
- data/test/solr_json_writer_test.rb +45 -10
- data/test/test_support/missing-second-date.marc +1 -0
- data/traject.gemspec +3 -3
- metadata +19 -21
- data/.travis.yml +0 -16
@@ -10,18 +10,21 @@ ady: Adygei
|
|
10
10
|
afa: Afroasiatic (Other)
|
11
11
|
afh: Afrihili (Artificial language)
|
12
12
|
afr: Afrikaans
|
13
|
-
|
13
|
+
ain: Ainu
|
14
|
+
ajm: Aljamía
|
14
15
|
aka: Akan
|
15
16
|
akk: Akkadian
|
16
17
|
alb: Albanian
|
17
18
|
ale: Aleut
|
18
19
|
alg: Algonquian (Other)
|
20
|
+
alt: Altai
|
19
21
|
amh: Amharic
|
20
|
-
ang: English, Old (ca. 450-1100)
|
22
|
+
ang: "English, Old (ca. 450-1100)"
|
23
|
+
anp: Angika
|
21
24
|
apa: Apache languages
|
22
25
|
ara: Arabic
|
23
26
|
arc: Aramaic
|
24
|
-
arg: Aragonese
|
27
|
+
arg: Aragonese
|
25
28
|
arm: Armenian
|
26
29
|
arn: Mapuche
|
27
30
|
arp: Arapaho
|
@@ -36,7 +39,7 @@ ave: Avestan
|
|
36
39
|
awa: Awadhi
|
37
40
|
aym: Aymara
|
38
41
|
aze: Azerbaijani
|
39
|
-
bad: Banda
|
42
|
+
bad: Banda languages
|
40
43
|
bai: Bamileke languages
|
41
44
|
bak: Bashkir
|
42
45
|
bal: Baluchi
|
@@ -51,7 +54,7 @@ bem: Bemba
|
|
51
54
|
ben: Bengali
|
52
55
|
ber: Berber (Other)
|
53
56
|
bho: Bhojpuri
|
54
|
-
bih: Bihari
|
57
|
+
bih: Bihari (Other)
|
55
58
|
bik: Bikol
|
56
59
|
bin: Edo
|
57
60
|
bis: Bislama
|
@@ -65,6 +68,7 @@ bua: Buriat
|
|
65
68
|
bug: Bugis
|
66
69
|
bul: Bulgarian
|
67
70
|
bur: Burmese
|
71
|
+
byn: Bilin
|
68
72
|
cad: Caddo
|
69
73
|
cai: Central American Indian (Other)
|
70
74
|
cam: Khmer
|
@@ -78,7 +82,7 @@ chb: Chibcha
|
|
78
82
|
che: Chechen
|
79
83
|
chg: Chagatai
|
80
84
|
chi: Chinese
|
81
|
-
chk:
|
85
|
+
chk: Chuukese
|
82
86
|
chm: Mari
|
83
87
|
chn: Chinook jargon
|
84
88
|
cho: Choctaw
|
@@ -88,15 +92,17 @@ chu: Church Slavic
|
|
88
92
|
chv: Chuvash
|
89
93
|
chy: Cheyenne
|
90
94
|
cmc: Chamic languages
|
95
|
+
cnr: Montenegrin
|
91
96
|
cop: Coptic
|
92
97
|
cor: Cornish
|
93
98
|
cos: Corsican
|
94
|
-
cpe: Creoles and Pidgins, English-based (Other)
|
95
|
-
cpf: Creoles and Pidgins, French-based (Other)
|
96
|
-
cpp: Creoles and Pidgins, Portuguese-based (Other)
|
99
|
+
cpe: "Creoles and Pidgins, English-based (Other)"
|
100
|
+
cpf: "Creoles and Pidgins, French-based (Other)"
|
101
|
+
cpp: "Creoles and Pidgins, Portuguese-based (Other)"
|
97
102
|
cre: Cree
|
98
103
|
crh: Crimean Tatar
|
99
104
|
crp: Creoles and Pidgins (Other)
|
105
|
+
csb: Kashubian
|
100
106
|
cus: Cushitic (Other)
|
101
107
|
cze: Czech
|
102
108
|
dak: Dakota
|
@@ -104,14 +110,15 @@ dan: Danish
|
|
104
110
|
dar: Dargwa
|
105
111
|
day: Dayak
|
106
112
|
del: Delaware
|
107
|
-
den:
|
113
|
+
den: Slavey
|
108
114
|
dgr: Dogrib
|
109
115
|
din: Dinka
|
110
116
|
div: Divehi
|
111
117
|
doi: Dogri
|
112
118
|
dra: Dravidian (Other)
|
119
|
+
dsb: Lower Sorbian
|
113
120
|
dua: Duala
|
114
|
-
dum: Dutch, Middle (ca. 1050-1350)
|
121
|
+
dum: "Dutch, Middle (ca. 1050-1350)"
|
115
122
|
dut: Dutch
|
116
123
|
dyu: Dyula
|
117
124
|
dzo: Dzongkha
|
@@ -120,7 +127,7 @@ egy: Egyptian
|
|
120
127
|
eka: Ekajuk
|
121
128
|
elx: Elamite
|
122
129
|
eng: English
|
123
|
-
enm: English, Middle (1100-1500)
|
130
|
+
enm: "English, Middle (1100-1500)"
|
124
131
|
epo: Esperanto
|
125
132
|
esk: Eskimo languages
|
126
133
|
esp: Esperanto
|
@@ -133,18 +140,21 @@ fao: Faroese
|
|
133
140
|
far: Faroese
|
134
141
|
fat: Fanti
|
135
142
|
fij: Fijian
|
143
|
+
fil: Filipino
|
136
144
|
fin: Finnish
|
137
145
|
fiu: Finno-Ugrian (Other)
|
138
146
|
fon: Fon
|
139
147
|
fre: French
|
140
148
|
fri: Frisian
|
141
|
-
frm: French, Middle (ca.
|
142
|
-
fro: French, Old (ca. 842-
|
149
|
+
frm: "French, Middle (ca. 1300-1600)"
|
150
|
+
fro: "French, Old (ca. 842-1300)"
|
151
|
+
frr: North Frisian
|
152
|
+
frs: East Frisian
|
143
153
|
fry: Frisian
|
144
154
|
ful: Fula
|
145
155
|
fur: Friulian
|
146
|
-
gaa:
|
147
|
-
gae: Scottish
|
156
|
+
gaa: Gã
|
157
|
+
gae: Scottish Gaelix
|
148
158
|
gag: Galician
|
149
159
|
gal: Oromo
|
150
160
|
gay: Gayo
|
@@ -158,15 +168,16 @@ gla: Scottish Gaelic
|
|
158
168
|
gle: Irish
|
159
169
|
glg: Galician
|
160
170
|
glv: Manx
|
161
|
-
gmh: German, Middle High (ca. 1050-1500)
|
162
|
-
goh: German, Old High (ca. 750-1050)
|
171
|
+
gmh: "German, Middle High (ca. 1050-1500)"
|
172
|
+
goh: "German, Old High (ca. 750-1050)"
|
163
173
|
gon: Gondi
|
164
174
|
gor: Gorontalo
|
165
175
|
got: Gothic
|
166
176
|
grb: Grebo
|
167
|
-
grc: Greek, Ancient (to 1453)
|
168
|
-
gre: Greek, Modern (1453-
|
177
|
+
grc: "Greek, Ancient (to 1453)"
|
178
|
+
gre: "Greek, Modern (1453-)"
|
169
179
|
grn: Guarani
|
180
|
+
gsw: Swiss German
|
170
181
|
gua: Guarani
|
171
182
|
guj: Gujarati
|
172
183
|
gwi: Gwich'in
|
@@ -177,11 +188,13 @@ haw: Hawaiian
|
|
177
188
|
heb: Hebrew
|
178
189
|
her: Herero
|
179
190
|
hil: Hiligaynon
|
180
|
-
him:
|
191
|
+
him: Western Pahari languages
|
181
192
|
hin: Hindi
|
182
193
|
hit: Hittite
|
183
194
|
hmn: Hmong
|
184
195
|
hmo: Hiri Motu
|
196
|
+
hrv: Croatian
|
197
|
+
hsb: Upper Sorbian
|
185
198
|
hun: Hungarian
|
186
199
|
hup: Hupa
|
187
200
|
iba: Iban
|
@@ -205,16 +218,17 @@ iri: Irish
|
|
205
218
|
iro: Iroquoian (Other)
|
206
219
|
ita: Italian
|
207
220
|
jav: Javanese
|
221
|
+
jbo: Lojban (Artificial language)
|
208
222
|
jpn: Japanese
|
209
223
|
jpr: Judeo-Persian
|
210
224
|
jrb: Judeo-Arabic
|
211
225
|
kaa: Kara-Kalpak
|
212
226
|
kab: Kabyle
|
213
227
|
kac: Kachin
|
214
|
-
kal:
|
228
|
+
kal: Kalâtdlisut
|
215
229
|
kam: Kamba
|
216
230
|
kan: Kannada
|
217
|
-
kar: Karen
|
231
|
+
kar: Karen languages
|
218
232
|
kas: Kashmiri
|
219
233
|
kau: Kanuri
|
220
234
|
kaw: Kawi
|
@@ -232,19 +246,21 @@ kok: Konkani
|
|
232
246
|
kom: Komi
|
233
247
|
kon: Kongo
|
234
248
|
kor: Korean
|
235
|
-
kos:
|
249
|
+
kos: Kosraean
|
236
250
|
kpe: Kpelle
|
237
|
-
|
251
|
+
krc: Karachay-Balkar
|
252
|
+
krl: Karelian
|
253
|
+
kro: Kru (Other)
|
238
254
|
kru: Kurukh
|
239
255
|
kua: Kuanyama
|
240
256
|
kum: Kumyk
|
241
257
|
kur: Kurdish
|
242
258
|
kus: Kusaie
|
243
|
-
kut:
|
259
|
+
kut: Kootenai
|
244
260
|
lad: Ladino
|
245
|
-
lah:
|
246
|
-
lam: Lamba
|
247
|
-
lan: Occitan (post
|
261
|
+
lah: Lahndā
|
262
|
+
lam: Lamba (Zambia and Congo)
|
263
|
+
lan: Occitan (post 1500)
|
248
264
|
lao: Lao
|
249
265
|
lap: Sami
|
250
266
|
lat: Latin
|
@@ -255,11 +271,11 @@ lin: Lingala
|
|
255
271
|
lit: Lithuanian
|
256
272
|
lol: Mongo-Nkundu
|
257
273
|
loz: Lozi
|
258
|
-
ltz:
|
274
|
+
ltz: Luxembourgish
|
259
275
|
lua: Luba-Lulua
|
260
276
|
lub: Luba-Katanga
|
261
277
|
lug: Ganda
|
262
|
-
lui:
|
278
|
+
lui: Luiseño
|
263
279
|
lun: Lunda
|
264
280
|
luo: Luo (Kenya and Tanzania)
|
265
281
|
lus: Lushai
|
@@ -274,12 +290,13 @@ man: Mandingo
|
|
274
290
|
mao: Maori
|
275
291
|
map: Austronesian (Other)
|
276
292
|
mar: Marathi
|
277
|
-
mas:
|
293
|
+
mas: Maasai
|
278
294
|
max: Manx
|
279
295
|
may: Malay
|
296
|
+
mdf: Moksha
|
280
297
|
mdr: Mandar
|
281
298
|
men: Mende
|
282
|
-
mga: Irish, Middle (ca. 1100-1550)
|
299
|
+
mga: "Irish, Middle (ca. 1100-1550)"
|
283
300
|
mic: Micmac
|
284
301
|
min: Minangkabau
|
285
302
|
mis: Miscellaneous languages
|
@@ -293,12 +310,14 @@ mno: Manobo languages
|
|
293
310
|
moh: Mohawk
|
294
311
|
mol: Moldavian
|
295
312
|
mon: Mongolian
|
296
|
-
mos:
|
313
|
+
mos: Mooré
|
297
314
|
mul: Multiple languages
|
298
315
|
mun: Munda (Other)
|
299
316
|
mus: Creek
|
317
|
+
mwl: Mirandese
|
300
318
|
mwr: Marwari
|
301
319
|
myn: Mayan languages
|
320
|
+
myv: Erzya
|
302
321
|
nah: Nahuatl
|
303
322
|
nai: North American Indian (Other)
|
304
323
|
nap: Neapolitan Italian
|
@@ -314,12 +333,14 @@ nia: Nias
|
|
314
333
|
nic: Niger-Kordofanian (Other)
|
315
334
|
niu: Niuean
|
316
335
|
nno: Norwegian (Nynorsk)
|
317
|
-
nob: Norwegian (
|
336
|
+
nob: Norwegian (Bokmål)
|
318
337
|
nog: Nogai
|
319
338
|
non: Old Norse
|
320
339
|
nor: Norwegian
|
340
|
+
nqo: N'Ko
|
321
341
|
nso: Northern Sotho
|
322
342
|
nub: Nubian languages
|
343
|
+
nwc: "Newari, Old"
|
323
344
|
nya: Nyanja
|
324
345
|
nym: Nyamwezi
|
325
346
|
nyn: Nyankole
|
@@ -331,7 +352,7 @@ ori: Oriya
|
|
331
352
|
orm: Oromo
|
332
353
|
osa: Osage
|
333
354
|
oss: Ossetic
|
334
|
-
ota: Turkish, Ottoman
|
355
|
+
ota: "Turkish, Ottoman"
|
335
356
|
oto: Otomian languages
|
336
357
|
paa: Papuan (Other)
|
337
358
|
pag: Pangasinan
|
@@ -346,10 +367,10 @@ phi: Philippine (Other)
|
|
346
367
|
phn: Phoenician
|
347
368
|
pli: Pali
|
348
369
|
pol: Polish
|
349
|
-
pon:
|
370
|
+
pon: Pohnpeian
|
350
371
|
por: Portuguese
|
351
372
|
pra: Prakrit languages
|
352
|
-
pro:
|
373
|
+
pro: Provençal (to 1500)
|
353
374
|
pus: Pushto
|
354
375
|
que: Quechua
|
355
376
|
raj: Rajasthani
|
@@ -360,6 +381,7 @@ roh: Raeto-Romance
|
|
360
381
|
rom: Romani
|
361
382
|
rum: Romanian
|
362
383
|
run: Rundi
|
384
|
+
rup: Aromanian
|
363
385
|
rus: Russian
|
364
386
|
sad: Sandawe
|
365
387
|
sag: Sango (Ubangi Creole)
|
@@ -372,11 +394,12 @@ sao: Samoan
|
|
372
394
|
sas: Sasak
|
373
395
|
sat: Santali
|
374
396
|
scc: Serbian
|
397
|
+
scn: Sicilian Italian
|
375
398
|
sco: Scots
|
376
399
|
scr: Croatian
|
377
400
|
sel: Selkup
|
378
401
|
sem: Semitic (Other)
|
379
|
-
sga: Irish, Old (to 1100)
|
402
|
+
sga: "Irish, Old (to 1100)"
|
380
403
|
sgn: Sign languages
|
381
404
|
shn: Shan
|
382
405
|
sho: Shona
|
@@ -404,6 +427,8 @@ son: Songhai
|
|
404
427
|
sot: Sotho
|
405
428
|
spa: Spanish
|
406
429
|
srd: Sardinian
|
430
|
+
srn: Sranan
|
431
|
+
srp: Serbian
|
407
432
|
srr: Serer
|
408
433
|
ssa: Nilo-Saharan (Other)
|
409
434
|
sso: Sotho
|
@@ -415,7 +440,8 @@ sux: Sumerian
|
|
415
440
|
swa: Swahili
|
416
441
|
swe: Swedish
|
417
442
|
swz: Swazi
|
418
|
-
|
443
|
+
syc: Syriac
|
444
|
+
syr: "Syriac, Modern"
|
419
445
|
tag: Tagalog
|
420
446
|
tah: Tahitian
|
421
447
|
tai: Tai (Other)
|
@@ -431,10 +457,11 @@ tgk: Tajik
|
|
431
457
|
tgl: Tagalog
|
432
458
|
tha: Thai
|
433
459
|
tib: Tibetan
|
434
|
-
tig:
|
460
|
+
tig: Tigré
|
435
461
|
tir: Tigrinya
|
436
462
|
tiv: Tiv
|
437
463
|
tkl: Tokelauan
|
464
|
+
tlh: Klingon (Artificial language)
|
438
465
|
tli: Tlingit
|
439
466
|
tmh: Tamashek
|
440
467
|
tog: Tonga (Nyasa)
|
@@ -464,17 +491,17 @@ uzb: Uzbek
|
|
464
491
|
vai: Vai
|
465
492
|
ven: Venda
|
466
493
|
vie: Vietnamese
|
467
|
-
vol:
|
494
|
+
vol: Volapük
|
468
495
|
vot: Votic
|
469
496
|
wak: Wakashan languages
|
470
|
-
wal:
|
497
|
+
wal: Wolayta
|
471
498
|
war: Waray
|
472
|
-
was:
|
499
|
+
was: Washoe
|
473
500
|
wel: Welsh
|
474
|
-
wen: Sorbian
|
501
|
+
wen: Sorbian (Other)
|
475
502
|
wln: Walloon
|
476
503
|
wol: Wolof
|
477
|
-
xal:
|
504
|
+
xal: Oirat
|
478
505
|
xho: Xhosa
|
479
506
|
yao: Yao (Africa)
|
480
507
|
yap: Yapese
|
@@ -482,9 +509,11 @@ yid: Yiddish
|
|
482
509
|
yor: Yoruba
|
483
510
|
ypk: Yupik languages
|
484
511
|
zap: Zapotec
|
512
|
+
zbl: Blissymbolics
|
485
513
|
zen: Zenaga
|
486
514
|
zha: Zhuang
|
487
|
-
znd: Zande
|
515
|
+
znd: Zande languages
|
488
516
|
zul: Zulu
|
489
517
|
zun: Zuni
|
490
|
-
# zxx:
|
518
|
+
# zxx: No linguistic content
|
519
|
+
zza: Zaza
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# we mostly unit test with a Traject::Indexer itself and lower-level, but
|
2
|
+
# we need at least some basic top-level integration actually command line tests,
|
3
|
+
# this is a start, we can add more.
|
4
|
+
#
|
5
|
+
# Should we be testing Traject::CommandLine as an object instead of/in addition to
|
6
|
+
# actually testing shell-out to command line call? Maybe.
|
7
|
+
|
8
|
+
require 'test_helper'
|
9
|
+
|
10
|
+
describe "Shell out to command line" do
|
11
|
+
# just encapsuluate using the minitest capture helper, but also
|
12
|
+
# getting and returning exit code
|
13
|
+
#
|
14
|
+
# out, err, result = execute_with_args("-c configuration")
|
15
|
+
def execute_with_args(args)
|
16
|
+
out, err = capture_subprocess_io do
|
17
|
+
system("./bin/traject #{args}")
|
18
|
+
end
|
19
|
+
|
20
|
+
return out, err, $?
|
21
|
+
end
|
22
|
+
|
23
|
+
it "can display version" do
|
24
|
+
out, err, result = execute_with_args("-v")
|
25
|
+
|
26
|
+
assert result.success?, "Expected subprocess exit code to be success.\nSTDERR:\n#{err}\n\nSTDOUT:\n#{out}"
|
27
|
+
assert_equal err, "traject version #{Traject::VERSION}\n"
|
28
|
+
end
|
29
|
+
|
30
|
+
it "can display help text" do
|
31
|
+
out, err, result = execute_with_args("-h")
|
32
|
+
|
33
|
+
assert result.success?, "Expected subprocess exit code to be success.\nSTDERR:\n#{err}\n\nSTDOUT:\n#{out}"
|
34
|
+
assert err.start_with?("traject [options] -c configuration.rb [-c config2.rb] file.mrc")
|
35
|
+
end
|
36
|
+
|
37
|
+
it "handles bad argument" do
|
38
|
+
out, err, result = execute_with_args("--no-such-arg")
|
39
|
+
refute result.success?
|
40
|
+
|
41
|
+
assert err.start_with?("Error: unknown option `--no-such-arg'\nExiting...\n")
|
42
|
+
end
|
43
|
+
|
44
|
+
it "does basic dry run" do
|
45
|
+
out, err, result = execute_with_args("--debug-mode -s one=two -s three=four -c test/test_support/demo_config.rb test/test_support/emptyish_record.marc")
|
46
|
+
|
47
|
+
assert result.success?, "Expected subprocess exit code to be success.\nSTDERR:\n#{err}\n\nSTDOUT:\n#{out}"
|
48
|
+
assert_includes err, "executing with: `--debug-mode -s one=two -s three=four"
|
49
|
+
assert_match /bib_1000165 +author_sort +Collection la/, out
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
data/test/debug_writer_test.rb
CHANGED
@@ -73,6 +73,19 @@ describe 'Simple output' do
|
|
73
73
|
|
74
74
|
end
|
75
75
|
|
76
|
+
it "deals ok with nil values" do
|
77
|
+
record_with_nil_value = {"id"=>["2710183"], "title"=>["Manufacturing consent : the political economy of the mass media /"], "xyz"=>nil}
|
78
|
+
@writer.put Traject::Indexer::Context.new(:output_hash => record_with_nil_value)
|
79
|
+
expected = [
|
80
|
+
"#{@id} id #{@id}",
|
81
|
+
"#{@id} title #{@title}",
|
82
|
+
"#{@id} xyz",
|
83
|
+
"\n"
|
84
|
+
]
|
85
|
+
assert_equal expected.join("\n").gsub(/\s/, ''), @io.string.gsub(/\s/, '')
|
86
|
+
@writer.close
|
87
|
+
|
88
|
+
end
|
76
89
|
end
|
77
90
|
|
78
91
|
|
@@ -209,6 +209,10 @@ describe "Traject::Macros::Marc21Semantics" do
|
|
209
209
|
@record = MARC::Reader.new(support_file_path "date_type_r_missing_date2.marc").to_a.first
|
210
210
|
assert_equal 1957, Marc21Semantics.publication_date(@record)
|
211
211
|
end
|
212
|
+
it "provides a fallback for a missing second date" do
|
213
|
+
@record = MARC::Reader.new(support_file_path "missing-second-date.marc").to_a.first
|
214
|
+
assert_equal 1678, Marc21Semantics.publication_date(@record)
|
215
|
+
end
|
212
216
|
|
213
217
|
it "works correctly with date type 'q'" do
|
214
218
|
val = @record['008'].value
|
@@ -174,4 +174,114 @@ describe "Traject::Macros::Transformation" do
|
|
174
174
|
end
|
175
175
|
end
|
176
176
|
|
177
|
+
describe "delete_if" do
|
178
|
+
|
179
|
+
describe "argument is an Array" do
|
180
|
+
it "filters out selected values from accumulatd values" do
|
181
|
+
arg = [ "one", "three"]
|
182
|
+
|
183
|
+
@indexer.configure do
|
184
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
185
|
+
end
|
186
|
+
|
187
|
+
output = @indexer.map_record(@record)
|
188
|
+
assert_equal ["two"], output["test"]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
describe "argument is a Set" do
|
193
|
+
it "filters out selected values from accumulatd values" do
|
194
|
+
arg = [ "one", "three"].to_set
|
195
|
+
|
196
|
+
@indexer.configure do
|
197
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
198
|
+
end
|
199
|
+
|
200
|
+
output = @indexer.map_record(@record)
|
201
|
+
assert_equal ["two"], output["test"]
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
describe "argument is a Regex" do
|
206
|
+
it "filters out selected values from accumulatd values" do
|
207
|
+
arg = /^t/
|
208
|
+
|
209
|
+
@indexer.configure do
|
210
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
211
|
+
end
|
212
|
+
|
213
|
+
output = @indexer.map_record(@record)
|
214
|
+
assert_equal ["one"], output["test"]
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
describe "argument is a Procedure or Lambda" do
|
219
|
+
it "filters out selected values from accumulatd values" do
|
220
|
+
arg = ->(v) { v == "one" }
|
221
|
+
|
222
|
+
@indexer.configure do
|
223
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
224
|
+
end
|
225
|
+
|
226
|
+
output = @indexer.map_record(@record)
|
227
|
+
assert_equal ["two", "three"], output["test"]
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
describe "select" do
|
233
|
+
|
234
|
+
describe "argument is an Array" do
|
235
|
+
it "selects a subset of values from accumulatd values" do
|
236
|
+
arg = [ "one", "three", "four"]
|
237
|
+
|
238
|
+
@indexer.configure do
|
239
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
240
|
+
end
|
241
|
+
|
242
|
+
output = @indexer.map_record(@record)
|
243
|
+
assert_equal ["one", "three"], output["test"]
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
describe "argument is a Set" do
|
248
|
+
it "selects a subset of values from accumulatd values" do
|
249
|
+
arg = [ "one", "three", "four"].to_set
|
250
|
+
|
251
|
+
@indexer.configure do
|
252
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
253
|
+
end
|
254
|
+
|
255
|
+
output = @indexer.map_record(@record)
|
256
|
+
assert_equal ["one", "three"], output["test"]
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
describe "argument is a Regex" do
|
261
|
+
it "selects a subset of values from accumulatd values" do
|
262
|
+
arg = /^t/
|
263
|
+
|
264
|
+
@indexer.configure do
|
265
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
266
|
+
end
|
267
|
+
|
268
|
+
output = @indexer.map_record(@record)
|
269
|
+
assert_equal ["two", "three"], output["test"]
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
describe "argument is a Procedure or Lambda" do
|
274
|
+
it "selects a subset of values from accumulatd values" do
|
275
|
+
arg = ->(v) { v != "one" }
|
276
|
+
|
277
|
+
@indexer.configure do
|
278
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
279
|
+
end
|
280
|
+
|
281
|
+
output = @indexer.map_record(@record)
|
282
|
+
assert_equal ["two", "three"], output["test"]
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
177
287
|
end
|
@@ -109,6 +109,41 @@ describe "Traject::NokogiriIndexer" do
|
|
109
109
|
result["name"].name == "name"
|
110
110
|
})
|
111
111
|
end
|
112
|
+
end
|
112
113
|
|
114
|
+
describe "xpath to attribute" do
|
115
|
+
let(:indexer) do
|
116
|
+
namespaces = @namespaces
|
117
|
+
Traject::Indexer::NokogiriIndexer.new("nokogiri.namespaces" => namespaces,
|
118
|
+
"nokogiri.each_record_xpath" => "//oai:record") do
|
119
|
+
to_field "status", extract_xpath("//oai:record/oai:header/@status")
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
let(:records) { Traject::NokogiriReader.new(StringIO.new(
|
124
|
+
<<-XML
|
125
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
126
|
+
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
|
127
|
+
<responseDate>2020-03-03T04:16:09Z</responseDate>
|
128
|
+
<request verb="ListRecords" metadataPrefix="marc21" set="blacklight" from="2020-03-02T20:47:11Z">https://na02.alma.exlibrisgroup.com/view/oai/01TULI_INST/request</request>
|
129
|
+
<ListRecords>
|
130
|
+
<record>
|
131
|
+
<header status="deleted">
|
132
|
+
<identifier>oai:alma.01TULI_INST:991025803889703811</identifier>
|
133
|
+
<datestamp>2020-03-03T03:54:35Z</datestamp>
|
134
|
+
<setSpec>blacklight</setSpec>
|
135
|
+
<setSpec>rapid_print_journals</setSpec>
|
136
|
+
<setSpec>blacklight_qa</setSpec>
|
137
|
+
</header>
|
138
|
+
</record>
|
139
|
+
</ListRecords>
|
140
|
+
</OAI-PMH>
|
141
|
+
XML
|
142
|
+
), []).to_a }
|
143
|
+
|
144
|
+
it "extracts the correct attribute" do
|
145
|
+
statuses = indexer.map_record(records.first)["status"]
|
146
|
+
assert_equal ["deleted"], statuses
|
147
|
+
end
|
113
148
|
end
|
114
149
|
end
|
@@ -7,7 +7,8 @@ memory_writer_class = Class.new do
|
|
7
7
|
# store them in a class variable so we can test em later
|
8
8
|
# Supress the warning message
|
9
9
|
original_verbose, $VERBOSE = $VERBOSE, nil
|
10
|
-
|
10
|
+
@settings = settings
|
11
|
+
self.class.store_last_writer_settings(@settings)
|
11
12
|
# Activate warning messages again.
|
12
13
|
$VERBOSE = original_verbose
|
13
14
|
@settings["memory_writer.added"] = []
|
@@ -20,6 +21,16 @@ memory_writer_class = Class.new do
|
|
20
21
|
def close
|
21
22
|
@settings["memory_writer.closed"] = true
|
22
23
|
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def self.store_last_writer_settings(settings)
|
28
|
+
@last_writer_settings = settings
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.last_writer_settings
|
32
|
+
@last_writer_settings
|
33
|
+
end
|
23
34
|
end
|
24
35
|
|
25
36
|
describe "Traject::Indexer#process" do
|
@@ -53,7 +64,7 @@ describe "Traject::Indexer#process" do
|
|
53
64
|
|
54
65
|
# Grab the settings out of a class variable where we left em,
|
55
66
|
# as a convenient place to store outcomes so we can test em.
|
56
|
-
writer_settings = memory_writer_class.
|
67
|
+
writer_settings = memory_writer_class.last_writer_settings
|
57
68
|
|
58
69
|
assert writer_settings["memory_writer.added"]
|
59
70
|
assert_equal 30, writer_settings["memory_writer.added"].length
|
@@ -146,7 +157,7 @@ describe "Traject::Indexer#process" do
|
|
146
157
|
it "parses and loads" do
|
147
158
|
@indexer.process([@file1, @file2])
|
148
159
|
# kinda ridic, yeah.
|
149
|
-
output_hashes = memory_writer_class.
|
160
|
+
output_hashes = memory_writer_class.last_writer_settings["memory_writer.added"].collect(&:output_hash)
|
150
161
|
|
151
162
|
assert_length 2, output_hashes
|
152
163
|
assert output_hashes.all? { |hash| hash["title"].length > 0 }
|