traject 3.3.0 → 3.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +35 -0
- data/CHANGES.md +23 -2
- data/README.md +23 -2
- data/doc/settings.md +4 -2
- data/doc/xml.md +12 -0
- data/examples/marc/tiny.xml +35 -0
- data/lib/traject/command_line.rb +34 -43
- data/lib/traject/debug_writer.rb +1 -1
- data/lib/traject/macros/marc21.rb +3 -3
- data/lib/traject/macros/marc21_semantics.rb +7 -3
- data/lib/traject/macros/nokogiri_macros.rb +9 -3
- data/lib/traject/macros/transformation.rb +30 -0
- data/lib/traject/marc_extractor.rb +3 -3
- data/lib/traject/nokogiri_reader.rb +2 -0
- data/lib/traject/solr_json_writer.rb +28 -10
- data/lib/traject/version.rb +1 -1
- data/lib/translation_maps/marc_languages.yaml +77 -48
- data/test/command_line_test.rb +52 -0
- data/test/debug_writer_test.rb +13 -0
- data/test/indexer/macros/macros_marc21_semantics_test.rb +4 -0
- data/test/indexer/macros/transformation_test.rb +110 -0
- data/test/indexer/nokogiri_indexer_test.rb +35 -0
- data/test/indexer/read_write_test.rb +14 -3
- data/test/solr_json_writer_test.rb +45 -10
- data/test/test_support/missing-second-date.marc +1 -0
- data/traject.gemspec +3 -3
- metadata +19 -21
- data/.travis.yml +0 -16
@@ -10,18 +10,21 @@ ady: Adygei
|
|
10
10
|
afa: Afroasiatic (Other)
|
11
11
|
afh: Afrihili (Artificial language)
|
12
12
|
afr: Afrikaans
|
13
|
-
|
13
|
+
ain: Ainu
|
14
|
+
ajm: Aljamía
|
14
15
|
aka: Akan
|
15
16
|
akk: Akkadian
|
16
17
|
alb: Albanian
|
17
18
|
ale: Aleut
|
18
19
|
alg: Algonquian (Other)
|
20
|
+
alt: Altai
|
19
21
|
amh: Amharic
|
20
|
-
ang: English, Old (ca. 450-1100)
|
22
|
+
ang: "English, Old (ca. 450-1100)"
|
23
|
+
anp: Angika
|
21
24
|
apa: Apache languages
|
22
25
|
ara: Arabic
|
23
26
|
arc: Aramaic
|
24
|
-
arg: Aragonese
|
27
|
+
arg: Aragonese
|
25
28
|
arm: Armenian
|
26
29
|
arn: Mapuche
|
27
30
|
arp: Arapaho
|
@@ -36,7 +39,7 @@ ave: Avestan
|
|
36
39
|
awa: Awadhi
|
37
40
|
aym: Aymara
|
38
41
|
aze: Azerbaijani
|
39
|
-
bad: Banda
|
42
|
+
bad: Banda languages
|
40
43
|
bai: Bamileke languages
|
41
44
|
bak: Bashkir
|
42
45
|
bal: Baluchi
|
@@ -51,7 +54,7 @@ bem: Bemba
|
|
51
54
|
ben: Bengali
|
52
55
|
ber: Berber (Other)
|
53
56
|
bho: Bhojpuri
|
54
|
-
bih: Bihari
|
57
|
+
bih: Bihari (Other)
|
55
58
|
bik: Bikol
|
56
59
|
bin: Edo
|
57
60
|
bis: Bislama
|
@@ -65,6 +68,7 @@ bua: Buriat
|
|
65
68
|
bug: Bugis
|
66
69
|
bul: Bulgarian
|
67
70
|
bur: Burmese
|
71
|
+
byn: Bilin
|
68
72
|
cad: Caddo
|
69
73
|
cai: Central American Indian (Other)
|
70
74
|
cam: Khmer
|
@@ -78,7 +82,7 @@ chb: Chibcha
|
|
78
82
|
che: Chechen
|
79
83
|
chg: Chagatai
|
80
84
|
chi: Chinese
|
81
|
-
chk:
|
85
|
+
chk: Chuukese
|
82
86
|
chm: Mari
|
83
87
|
chn: Chinook jargon
|
84
88
|
cho: Choctaw
|
@@ -88,15 +92,17 @@ chu: Church Slavic
|
|
88
92
|
chv: Chuvash
|
89
93
|
chy: Cheyenne
|
90
94
|
cmc: Chamic languages
|
95
|
+
cnr: Montenegrin
|
91
96
|
cop: Coptic
|
92
97
|
cor: Cornish
|
93
98
|
cos: Corsican
|
94
|
-
cpe: Creoles and Pidgins, English-based (Other)
|
95
|
-
cpf: Creoles and Pidgins, French-based (Other)
|
96
|
-
cpp: Creoles and Pidgins, Portuguese-based (Other)
|
99
|
+
cpe: "Creoles and Pidgins, English-based (Other)"
|
100
|
+
cpf: "Creoles and Pidgins, French-based (Other)"
|
101
|
+
cpp: "Creoles and Pidgins, Portuguese-based (Other)"
|
97
102
|
cre: Cree
|
98
103
|
crh: Crimean Tatar
|
99
104
|
crp: Creoles and Pidgins (Other)
|
105
|
+
csb: Kashubian
|
100
106
|
cus: Cushitic (Other)
|
101
107
|
cze: Czech
|
102
108
|
dak: Dakota
|
@@ -104,14 +110,15 @@ dan: Danish
|
|
104
110
|
dar: Dargwa
|
105
111
|
day: Dayak
|
106
112
|
del: Delaware
|
107
|
-
den:
|
113
|
+
den: Slavey
|
108
114
|
dgr: Dogrib
|
109
115
|
din: Dinka
|
110
116
|
div: Divehi
|
111
117
|
doi: Dogri
|
112
118
|
dra: Dravidian (Other)
|
119
|
+
dsb: Lower Sorbian
|
113
120
|
dua: Duala
|
114
|
-
dum: Dutch, Middle (ca. 1050-1350)
|
121
|
+
dum: "Dutch, Middle (ca. 1050-1350)"
|
115
122
|
dut: Dutch
|
116
123
|
dyu: Dyula
|
117
124
|
dzo: Dzongkha
|
@@ -120,7 +127,7 @@ egy: Egyptian
|
|
120
127
|
eka: Ekajuk
|
121
128
|
elx: Elamite
|
122
129
|
eng: English
|
123
|
-
enm: English, Middle (1100-1500)
|
130
|
+
enm: "English, Middle (1100-1500)"
|
124
131
|
epo: Esperanto
|
125
132
|
esk: Eskimo languages
|
126
133
|
esp: Esperanto
|
@@ -133,18 +140,21 @@ fao: Faroese
|
|
133
140
|
far: Faroese
|
134
141
|
fat: Fanti
|
135
142
|
fij: Fijian
|
143
|
+
fil: Filipino
|
136
144
|
fin: Finnish
|
137
145
|
fiu: Finno-Ugrian (Other)
|
138
146
|
fon: Fon
|
139
147
|
fre: French
|
140
148
|
fri: Frisian
|
141
|
-
frm: French, Middle (ca.
|
142
|
-
fro: French, Old (ca. 842-
|
149
|
+
frm: "French, Middle (ca. 1300-1600)"
|
150
|
+
fro: "French, Old (ca. 842-1300)"
|
151
|
+
frr: North Frisian
|
152
|
+
frs: East Frisian
|
143
153
|
fry: Frisian
|
144
154
|
ful: Fula
|
145
155
|
fur: Friulian
|
146
|
-
gaa:
|
147
|
-
gae: Scottish
|
156
|
+
gaa: Gã
|
157
|
+
gae: Scottish Gaelix
|
148
158
|
gag: Galician
|
149
159
|
gal: Oromo
|
150
160
|
gay: Gayo
|
@@ -158,15 +168,16 @@ gla: Scottish Gaelic
|
|
158
168
|
gle: Irish
|
159
169
|
glg: Galician
|
160
170
|
glv: Manx
|
161
|
-
gmh: German, Middle High (ca. 1050-1500)
|
162
|
-
goh: German, Old High (ca. 750-1050)
|
171
|
+
gmh: "German, Middle High (ca. 1050-1500)"
|
172
|
+
goh: "German, Old High (ca. 750-1050)"
|
163
173
|
gon: Gondi
|
164
174
|
gor: Gorontalo
|
165
175
|
got: Gothic
|
166
176
|
grb: Grebo
|
167
|
-
grc: Greek, Ancient (to 1453)
|
168
|
-
gre: Greek, Modern (1453-
|
177
|
+
grc: "Greek, Ancient (to 1453)"
|
178
|
+
gre: "Greek, Modern (1453-)"
|
169
179
|
grn: Guarani
|
180
|
+
gsw: Swiss German
|
170
181
|
gua: Guarani
|
171
182
|
guj: Gujarati
|
172
183
|
gwi: Gwich'in
|
@@ -177,11 +188,13 @@ haw: Hawaiian
|
|
177
188
|
heb: Hebrew
|
178
189
|
her: Herero
|
179
190
|
hil: Hiligaynon
|
180
|
-
him:
|
191
|
+
him: Western Pahari languages
|
181
192
|
hin: Hindi
|
182
193
|
hit: Hittite
|
183
194
|
hmn: Hmong
|
184
195
|
hmo: Hiri Motu
|
196
|
+
hrv: Croatian
|
197
|
+
hsb: Upper Sorbian
|
185
198
|
hun: Hungarian
|
186
199
|
hup: Hupa
|
187
200
|
iba: Iban
|
@@ -205,16 +218,17 @@ iri: Irish
|
|
205
218
|
iro: Iroquoian (Other)
|
206
219
|
ita: Italian
|
207
220
|
jav: Javanese
|
221
|
+
jbo: Lojban (Artificial language)
|
208
222
|
jpn: Japanese
|
209
223
|
jpr: Judeo-Persian
|
210
224
|
jrb: Judeo-Arabic
|
211
225
|
kaa: Kara-Kalpak
|
212
226
|
kab: Kabyle
|
213
227
|
kac: Kachin
|
214
|
-
kal:
|
228
|
+
kal: Kalâtdlisut
|
215
229
|
kam: Kamba
|
216
230
|
kan: Kannada
|
217
|
-
kar: Karen
|
231
|
+
kar: Karen languages
|
218
232
|
kas: Kashmiri
|
219
233
|
kau: Kanuri
|
220
234
|
kaw: Kawi
|
@@ -232,19 +246,21 @@ kok: Konkani
|
|
232
246
|
kom: Komi
|
233
247
|
kon: Kongo
|
234
248
|
kor: Korean
|
235
|
-
kos:
|
249
|
+
kos: Kosraean
|
236
250
|
kpe: Kpelle
|
237
|
-
|
251
|
+
krc: Karachay-Balkar
|
252
|
+
krl: Karelian
|
253
|
+
kro: Kru (Other)
|
238
254
|
kru: Kurukh
|
239
255
|
kua: Kuanyama
|
240
256
|
kum: Kumyk
|
241
257
|
kur: Kurdish
|
242
258
|
kus: Kusaie
|
243
|
-
kut:
|
259
|
+
kut: Kootenai
|
244
260
|
lad: Ladino
|
245
|
-
lah:
|
246
|
-
lam: Lamba
|
247
|
-
lan: Occitan (post
|
261
|
+
lah: Lahndā
|
262
|
+
lam: Lamba (Zambia and Congo)
|
263
|
+
lan: Occitan (post 1500)
|
248
264
|
lao: Lao
|
249
265
|
lap: Sami
|
250
266
|
lat: Latin
|
@@ -255,11 +271,11 @@ lin: Lingala
|
|
255
271
|
lit: Lithuanian
|
256
272
|
lol: Mongo-Nkundu
|
257
273
|
loz: Lozi
|
258
|
-
ltz:
|
274
|
+
ltz: Luxembourgish
|
259
275
|
lua: Luba-Lulua
|
260
276
|
lub: Luba-Katanga
|
261
277
|
lug: Ganda
|
262
|
-
lui:
|
278
|
+
lui: Luiseño
|
263
279
|
lun: Lunda
|
264
280
|
luo: Luo (Kenya and Tanzania)
|
265
281
|
lus: Lushai
|
@@ -274,12 +290,13 @@ man: Mandingo
|
|
274
290
|
mao: Maori
|
275
291
|
map: Austronesian (Other)
|
276
292
|
mar: Marathi
|
277
|
-
mas:
|
293
|
+
mas: Maasai
|
278
294
|
max: Manx
|
279
295
|
may: Malay
|
296
|
+
mdf: Moksha
|
280
297
|
mdr: Mandar
|
281
298
|
men: Mende
|
282
|
-
mga: Irish, Middle (ca. 1100-1550)
|
299
|
+
mga: "Irish, Middle (ca. 1100-1550)"
|
283
300
|
mic: Micmac
|
284
301
|
min: Minangkabau
|
285
302
|
mis: Miscellaneous languages
|
@@ -293,12 +310,14 @@ mno: Manobo languages
|
|
293
310
|
moh: Mohawk
|
294
311
|
mol: Moldavian
|
295
312
|
mon: Mongolian
|
296
|
-
mos:
|
313
|
+
mos: Mooré
|
297
314
|
mul: Multiple languages
|
298
315
|
mun: Munda (Other)
|
299
316
|
mus: Creek
|
317
|
+
mwl: Mirandese
|
300
318
|
mwr: Marwari
|
301
319
|
myn: Mayan languages
|
320
|
+
myv: Erzya
|
302
321
|
nah: Nahuatl
|
303
322
|
nai: North American Indian (Other)
|
304
323
|
nap: Neapolitan Italian
|
@@ -314,12 +333,14 @@ nia: Nias
|
|
314
333
|
nic: Niger-Kordofanian (Other)
|
315
334
|
niu: Niuean
|
316
335
|
nno: Norwegian (Nynorsk)
|
317
|
-
nob: Norwegian (
|
336
|
+
nob: Norwegian (Bokmål)
|
318
337
|
nog: Nogai
|
319
338
|
non: Old Norse
|
320
339
|
nor: Norwegian
|
340
|
+
nqo: N'Ko
|
321
341
|
nso: Northern Sotho
|
322
342
|
nub: Nubian languages
|
343
|
+
nwc: "Newari, Old"
|
323
344
|
nya: Nyanja
|
324
345
|
nym: Nyamwezi
|
325
346
|
nyn: Nyankole
|
@@ -331,7 +352,7 @@ ori: Oriya
|
|
331
352
|
orm: Oromo
|
332
353
|
osa: Osage
|
333
354
|
oss: Ossetic
|
334
|
-
ota: Turkish, Ottoman
|
355
|
+
ota: "Turkish, Ottoman"
|
335
356
|
oto: Otomian languages
|
336
357
|
paa: Papuan (Other)
|
337
358
|
pag: Pangasinan
|
@@ -346,10 +367,10 @@ phi: Philippine (Other)
|
|
346
367
|
phn: Phoenician
|
347
368
|
pli: Pali
|
348
369
|
pol: Polish
|
349
|
-
pon:
|
370
|
+
pon: Pohnpeian
|
350
371
|
por: Portuguese
|
351
372
|
pra: Prakrit languages
|
352
|
-
pro:
|
373
|
+
pro: Provençal (to 1500)
|
353
374
|
pus: Pushto
|
354
375
|
que: Quechua
|
355
376
|
raj: Rajasthani
|
@@ -360,6 +381,7 @@ roh: Raeto-Romance
|
|
360
381
|
rom: Romani
|
361
382
|
rum: Romanian
|
362
383
|
run: Rundi
|
384
|
+
rup: Aromanian
|
363
385
|
rus: Russian
|
364
386
|
sad: Sandawe
|
365
387
|
sag: Sango (Ubangi Creole)
|
@@ -372,11 +394,12 @@ sao: Samoan
|
|
372
394
|
sas: Sasak
|
373
395
|
sat: Santali
|
374
396
|
scc: Serbian
|
397
|
+
scn: Sicilian Italian
|
375
398
|
sco: Scots
|
376
399
|
scr: Croatian
|
377
400
|
sel: Selkup
|
378
401
|
sem: Semitic (Other)
|
379
|
-
sga: Irish, Old (to 1100)
|
402
|
+
sga: "Irish, Old (to 1100)"
|
380
403
|
sgn: Sign languages
|
381
404
|
shn: Shan
|
382
405
|
sho: Shona
|
@@ -404,6 +427,8 @@ son: Songhai
|
|
404
427
|
sot: Sotho
|
405
428
|
spa: Spanish
|
406
429
|
srd: Sardinian
|
430
|
+
srn: Sranan
|
431
|
+
srp: Serbian
|
407
432
|
srr: Serer
|
408
433
|
ssa: Nilo-Saharan (Other)
|
409
434
|
sso: Sotho
|
@@ -415,7 +440,8 @@ sux: Sumerian
|
|
415
440
|
swa: Swahili
|
416
441
|
swe: Swedish
|
417
442
|
swz: Swazi
|
418
|
-
|
443
|
+
syc: Syriac
|
444
|
+
syr: "Syriac, Modern"
|
419
445
|
tag: Tagalog
|
420
446
|
tah: Tahitian
|
421
447
|
tai: Tai (Other)
|
@@ -431,10 +457,11 @@ tgk: Tajik
|
|
431
457
|
tgl: Tagalog
|
432
458
|
tha: Thai
|
433
459
|
tib: Tibetan
|
434
|
-
tig:
|
460
|
+
tig: Tigré
|
435
461
|
tir: Tigrinya
|
436
462
|
tiv: Tiv
|
437
463
|
tkl: Tokelauan
|
464
|
+
tlh: Klingon (Artificial language)
|
438
465
|
tli: Tlingit
|
439
466
|
tmh: Tamashek
|
440
467
|
tog: Tonga (Nyasa)
|
@@ -464,17 +491,17 @@ uzb: Uzbek
|
|
464
491
|
vai: Vai
|
465
492
|
ven: Venda
|
466
493
|
vie: Vietnamese
|
467
|
-
vol:
|
494
|
+
vol: Volapük
|
468
495
|
vot: Votic
|
469
496
|
wak: Wakashan languages
|
470
|
-
wal:
|
497
|
+
wal: Wolayta
|
471
498
|
war: Waray
|
472
|
-
was:
|
499
|
+
was: Washoe
|
473
500
|
wel: Welsh
|
474
|
-
wen: Sorbian
|
501
|
+
wen: Sorbian (Other)
|
475
502
|
wln: Walloon
|
476
503
|
wol: Wolof
|
477
|
-
xal:
|
504
|
+
xal: Oirat
|
478
505
|
xho: Xhosa
|
479
506
|
yao: Yao (Africa)
|
480
507
|
yap: Yapese
|
@@ -482,9 +509,11 @@ yid: Yiddish
|
|
482
509
|
yor: Yoruba
|
483
510
|
ypk: Yupik languages
|
484
511
|
zap: Zapotec
|
512
|
+
zbl: Blissymbolics
|
485
513
|
zen: Zenaga
|
486
514
|
zha: Zhuang
|
487
|
-
znd: Zande
|
515
|
+
znd: Zande languages
|
488
516
|
zul: Zulu
|
489
517
|
zun: Zuni
|
490
|
-
# zxx:
|
518
|
+
# zxx: No linguistic content
|
519
|
+
zza: Zaza
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# we mostly unit test with a Traject::Indexer itself and lower-level, but
|
2
|
+
# we need at least some basic top-level integration actually command line tests,
|
3
|
+
# this is a start, we can add more.
|
4
|
+
#
|
5
|
+
# Should we be testing Traject::CommandLine as an object instead of/in addition to
|
6
|
+
# actually testing shell-out to command line call? Maybe.
|
7
|
+
|
8
|
+
require 'test_helper'
|
9
|
+
|
10
|
+
describe "Shell out to command line" do
|
11
|
+
# just encapsuluate using the minitest capture helper, but also
|
12
|
+
# getting and returning exit code
|
13
|
+
#
|
14
|
+
# out, err, result = execute_with_args("-c configuration")
|
15
|
+
def execute_with_args(args)
|
16
|
+
out, err = capture_subprocess_io do
|
17
|
+
system("./bin/traject #{args}")
|
18
|
+
end
|
19
|
+
|
20
|
+
return out, err, $?
|
21
|
+
end
|
22
|
+
|
23
|
+
it "can display version" do
|
24
|
+
out, err, result = execute_with_args("-v")
|
25
|
+
|
26
|
+
assert result.success?, "Expected subprocess exit code to be success.\nSTDERR:\n#{err}\n\nSTDOUT:\n#{out}"
|
27
|
+
assert_equal err, "traject version #{Traject::VERSION}\n"
|
28
|
+
end
|
29
|
+
|
30
|
+
it "can display help text" do
|
31
|
+
out, err, result = execute_with_args("-h")
|
32
|
+
|
33
|
+
assert result.success?, "Expected subprocess exit code to be success.\nSTDERR:\n#{err}\n\nSTDOUT:\n#{out}"
|
34
|
+
assert err.start_with?("traject [options] -c configuration.rb [-c config2.rb] file.mrc")
|
35
|
+
end
|
36
|
+
|
37
|
+
it "handles bad argument" do
|
38
|
+
out, err, result = execute_with_args("--no-such-arg")
|
39
|
+
refute result.success?
|
40
|
+
|
41
|
+
assert err.start_with?("Error: unknown option `--no-such-arg'\nExiting...\n")
|
42
|
+
end
|
43
|
+
|
44
|
+
it "does basic dry run" do
|
45
|
+
out, err, result = execute_with_args("--debug-mode -s one=two -s three=four -c test/test_support/demo_config.rb test/test_support/emptyish_record.marc")
|
46
|
+
|
47
|
+
assert result.success?, "Expected subprocess exit code to be success.\nSTDERR:\n#{err}\n\nSTDOUT:\n#{out}"
|
48
|
+
assert_includes err, "executing with: `--debug-mode -s one=two -s three=four"
|
49
|
+
assert_match /bib_1000165 +author_sort +Collection la/, out
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
data/test/debug_writer_test.rb
CHANGED
@@ -73,6 +73,19 @@ describe 'Simple output' do
|
|
73
73
|
|
74
74
|
end
|
75
75
|
|
76
|
+
it "deals ok with nil values" do
|
77
|
+
record_with_nil_value = {"id"=>["2710183"], "title"=>["Manufacturing consent : the political economy of the mass media /"], "xyz"=>nil}
|
78
|
+
@writer.put Traject::Indexer::Context.new(:output_hash => record_with_nil_value)
|
79
|
+
expected = [
|
80
|
+
"#{@id} id #{@id}",
|
81
|
+
"#{@id} title #{@title}",
|
82
|
+
"#{@id} xyz",
|
83
|
+
"\n"
|
84
|
+
]
|
85
|
+
assert_equal expected.join("\n").gsub(/\s/, ''), @io.string.gsub(/\s/, '')
|
86
|
+
@writer.close
|
87
|
+
|
88
|
+
end
|
76
89
|
end
|
77
90
|
|
78
91
|
|
@@ -209,6 +209,10 @@ describe "Traject::Macros::Marc21Semantics" do
|
|
209
209
|
@record = MARC::Reader.new(support_file_path "date_type_r_missing_date2.marc").to_a.first
|
210
210
|
assert_equal 1957, Marc21Semantics.publication_date(@record)
|
211
211
|
end
|
212
|
+
it "provides a fallback for a missing second date" do
|
213
|
+
@record = MARC::Reader.new(support_file_path "missing-second-date.marc").to_a.first
|
214
|
+
assert_equal 1678, Marc21Semantics.publication_date(@record)
|
215
|
+
end
|
212
216
|
|
213
217
|
it "works correctly with date type 'q'" do
|
214
218
|
val = @record['008'].value
|
@@ -174,4 +174,114 @@ describe "Traject::Macros::Transformation" do
|
|
174
174
|
end
|
175
175
|
end
|
176
176
|
|
177
|
+
describe "delete_if" do
|
178
|
+
|
179
|
+
describe "argument is an Array" do
|
180
|
+
it "filters out selected values from accumulatd values" do
|
181
|
+
arg = [ "one", "three"]
|
182
|
+
|
183
|
+
@indexer.configure do
|
184
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
185
|
+
end
|
186
|
+
|
187
|
+
output = @indexer.map_record(@record)
|
188
|
+
assert_equal ["two"], output["test"]
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
describe "argument is a Set" do
|
193
|
+
it "filters out selected values from accumulatd values" do
|
194
|
+
arg = [ "one", "three"].to_set
|
195
|
+
|
196
|
+
@indexer.configure do
|
197
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
198
|
+
end
|
199
|
+
|
200
|
+
output = @indexer.map_record(@record)
|
201
|
+
assert_equal ["two"], output["test"]
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
describe "argument is a Regex" do
|
206
|
+
it "filters out selected values from accumulatd values" do
|
207
|
+
arg = /^t/
|
208
|
+
|
209
|
+
@indexer.configure do
|
210
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
211
|
+
end
|
212
|
+
|
213
|
+
output = @indexer.map_record(@record)
|
214
|
+
assert_equal ["one"], output["test"]
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
describe "argument is a Procedure or Lambda" do
|
219
|
+
it "filters out selected values from accumulatd values" do
|
220
|
+
arg = ->(v) { v == "one" }
|
221
|
+
|
222
|
+
@indexer.configure do
|
223
|
+
to_field "test", literal("one"), literal("two"), literal("three"), delete_if(arg)
|
224
|
+
end
|
225
|
+
|
226
|
+
output = @indexer.map_record(@record)
|
227
|
+
assert_equal ["two", "three"], output["test"]
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
describe "select" do
|
233
|
+
|
234
|
+
describe "argument is an Array" do
|
235
|
+
it "selects a subset of values from accumulatd values" do
|
236
|
+
arg = [ "one", "three", "four"]
|
237
|
+
|
238
|
+
@indexer.configure do
|
239
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
240
|
+
end
|
241
|
+
|
242
|
+
output = @indexer.map_record(@record)
|
243
|
+
assert_equal ["one", "three"], output["test"]
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
describe "argument is a Set" do
|
248
|
+
it "selects a subset of values from accumulatd values" do
|
249
|
+
arg = [ "one", "three", "four"].to_set
|
250
|
+
|
251
|
+
@indexer.configure do
|
252
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
253
|
+
end
|
254
|
+
|
255
|
+
output = @indexer.map_record(@record)
|
256
|
+
assert_equal ["one", "three"], output["test"]
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
describe "argument is a Regex" do
|
261
|
+
it "selects a subset of values from accumulatd values" do
|
262
|
+
arg = /^t/
|
263
|
+
|
264
|
+
@indexer.configure do
|
265
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
266
|
+
end
|
267
|
+
|
268
|
+
output = @indexer.map_record(@record)
|
269
|
+
assert_equal ["two", "three"], output["test"]
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
273
|
+
describe "argument is a Procedure or Lambda" do
|
274
|
+
it "selects a subset of values from accumulatd values" do
|
275
|
+
arg = ->(v) { v != "one" }
|
276
|
+
|
277
|
+
@indexer.configure do
|
278
|
+
to_field "test", literal("one"), literal("two"), literal("three"), select(arg)
|
279
|
+
end
|
280
|
+
|
281
|
+
output = @indexer.map_record(@record)
|
282
|
+
assert_equal ["two", "three"], output["test"]
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
177
287
|
end
|
@@ -109,6 +109,41 @@ describe "Traject::NokogiriIndexer" do
|
|
109
109
|
result["name"].name == "name"
|
110
110
|
})
|
111
111
|
end
|
112
|
+
end
|
112
113
|
|
114
|
+
describe "xpath to attribute" do
|
115
|
+
let(:indexer) do
|
116
|
+
namespaces = @namespaces
|
117
|
+
Traject::Indexer::NokogiriIndexer.new("nokogiri.namespaces" => namespaces,
|
118
|
+
"nokogiri.each_record_xpath" => "//oai:record") do
|
119
|
+
to_field "status", extract_xpath("//oai:record/oai:header/@status")
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
let(:records) { Traject::NokogiriReader.new(StringIO.new(
|
124
|
+
<<-XML
|
125
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
126
|
+
<OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
|
127
|
+
<responseDate>2020-03-03T04:16:09Z</responseDate>
|
128
|
+
<request verb="ListRecords" metadataPrefix="marc21" set="blacklight" from="2020-03-02T20:47:11Z">https://na02.alma.exlibrisgroup.com/view/oai/01TULI_INST/request</request>
|
129
|
+
<ListRecords>
|
130
|
+
<record>
|
131
|
+
<header status="deleted">
|
132
|
+
<identifier>oai:alma.01TULI_INST:991025803889703811</identifier>
|
133
|
+
<datestamp>2020-03-03T03:54:35Z</datestamp>
|
134
|
+
<setSpec>blacklight</setSpec>
|
135
|
+
<setSpec>rapid_print_journals</setSpec>
|
136
|
+
<setSpec>blacklight_qa</setSpec>
|
137
|
+
</header>
|
138
|
+
</record>
|
139
|
+
</ListRecords>
|
140
|
+
</OAI-PMH>
|
141
|
+
XML
|
142
|
+
), []).to_a }
|
143
|
+
|
144
|
+
it "extracts the correct attribute" do
|
145
|
+
statuses = indexer.map_record(records.first)["status"]
|
146
|
+
assert_equal ["deleted"], statuses
|
147
|
+
end
|
113
148
|
end
|
114
149
|
end
|
@@ -7,7 +7,8 @@ memory_writer_class = Class.new do
|
|
7
7
|
# store them in a class variable so we can test em later
|
8
8
|
# Supress the warning message
|
9
9
|
original_verbose, $VERBOSE = $VERBOSE, nil
|
10
|
-
|
10
|
+
@settings = settings
|
11
|
+
self.class.store_last_writer_settings(@settings)
|
11
12
|
# Activate warning messages again.
|
12
13
|
$VERBOSE = original_verbose
|
13
14
|
@settings["memory_writer.added"] = []
|
@@ -20,6 +21,16 @@ memory_writer_class = Class.new do
|
|
20
21
|
def close
|
21
22
|
@settings["memory_writer.closed"] = true
|
22
23
|
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def self.store_last_writer_settings(settings)
|
28
|
+
@last_writer_settings = settings
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.last_writer_settings
|
32
|
+
@last_writer_settings
|
33
|
+
end
|
23
34
|
end
|
24
35
|
|
25
36
|
describe "Traject::Indexer#process" do
|
@@ -53,7 +64,7 @@ describe "Traject::Indexer#process" do
|
|
53
64
|
|
54
65
|
# Grab the settings out of a class variable where we left em,
|
55
66
|
# as a convenient place to store outcomes so we can test em.
|
56
|
-
writer_settings = memory_writer_class.
|
67
|
+
writer_settings = memory_writer_class.last_writer_settings
|
57
68
|
|
58
69
|
assert writer_settings["memory_writer.added"]
|
59
70
|
assert_equal 30, writer_settings["memory_writer.added"].length
|
@@ -146,7 +157,7 @@ describe "Traject::Indexer#process" do
|
|
146
157
|
it "parses and loads" do
|
147
158
|
@indexer.process([@file1, @file2])
|
148
159
|
# kinda ridic, yeah.
|
149
|
-
output_hashes = memory_writer_class.
|
160
|
+
output_hashes = memory_writer_class.last_writer_settings["memory_writer.added"].collect(&:output_hash)
|
150
161
|
|
151
162
|
assert_length 2, output_hashes
|
152
163
|
assert output_hashes.all? { |hash| hash["title"].length > 0 }
|