traject 2.0.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.travis.yml +27 -0
- data/.yardopts +3 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +20 -0
- data/README.md +461 -0
- data/Rakefile +21 -0
- data/bench/bench.rb +30 -0
- data/bin/traject +16 -0
- data/doc/batch_execution.md +243 -0
- data/doc/extending.md +190 -0
- data/doc/indexing_rules.md +265 -0
- data/doc/other_commands.md +47 -0
- data/doc/settings.md +101 -0
- data/lib/tasks/load_maps.rake +48 -0
- data/lib/traject.rb +11 -0
- data/lib/traject/command_line.rb +301 -0
- data/lib/traject/csv_writer.rb +34 -0
- data/lib/traject/debug_writer.rb +47 -0
- data/lib/traject/delimited_writer.rb +110 -0
- data/lib/traject/indexer.rb +613 -0
- data/lib/traject/indexer/settings.rb +110 -0
- data/lib/traject/json_writer.rb +51 -0
- data/lib/traject/line_writer.rb +63 -0
- data/lib/traject/macros/basic.rb +9 -0
- data/lib/traject/macros/marc21.rb +223 -0
- data/lib/traject/macros/marc21_semantics.rb +584 -0
- data/lib/traject/macros/marc_format_classifier.rb +197 -0
- data/lib/traject/marc_extractor.rb +410 -0
- data/lib/traject/marc_reader.rb +89 -0
- data/lib/traject/mock_reader.rb +97 -0
- data/lib/traject/ndj_reader.rb +40 -0
- data/lib/traject/null_writer.rb +22 -0
- data/lib/traject/qualified_const_get.rb +40 -0
- data/lib/traject/solr_json_writer.rb +277 -0
- data/lib/traject/thread_pool.rb +161 -0
- data/lib/traject/translation_map.rb +267 -0
- data/lib/traject/util.rb +52 -0
- data/lib/traject/version.rb +3 -0
- data/lib/traject/yaml_writer.rb +9 -0
- data/lib/translation_maps/lcc_top_level.yaml +26 -0
- data/lib/translation_maps/marc_genre_007.yaml +9 -0
- data/lib/translation_maps/marc_genre_leader.yaml +22 -0
- data/lib/translation_maps/marc_geographic.yaml +589 -0
- data/lib/translation_maps/marc_instruments.yaml +102 -0
- data/lib/translation_maps/marc_languages.yaml +490 -0
- data/test/debug_writer_test.rb +38 -0
- data/test/delimited_writer_test.rb +104 -0
- data/test/indexer/each_record_test.rb +59 -0
- data/test/indexer/macros_marc21_semantics_test.rb +391 -0
- data/test/indexer/macros_marc21_test.rb +190 -0
- data/test/indexer/macros_test.rb +40 -0
- data/test/indexer/map_record_test.rb +209 -0
- data/test/indexer/read_write_test.rb +101 -0
- data/test/indexer/settings_test.rb +152 -0
- data/test/indexer/to_field_test.rb +77 -0
- data/test/marc_extractor_test.rb +412 -0
- data/test/marc_format_classifier_test.rb +98 -0
- data/test/marc_reader_test.rb +110 -0
- data/test/solr_json_writer_test.rb +248 -0
- data/test/test_helper.rb +90 -0
- data/test/test_support/245_no_ab.marc +1 -0
- data/test/test_support/880_with_no_6.utf8.marc +1 -0
- data/test/test_support/bad_subfield_code.marc +1 -0
- data/test/test_support/bad_utf_byte.utf8.marc +1 -0
- data/test/test_support/date_resort_to_260.marc +1 -0
- data/test/test_support/date_type_r_missing_date2.marc +1 -0
- data/test/test_support/date_with_u.marc +1 -0
- data/test/test_support/demo_config.rb +155 -0
- data/test/test_support/emptyish_record.marc +1 -0
- data/test/test_support/escaped_character_reference.marc8.marc +1 -0
- data/test/test_support/george_eliot.marc +1 -0
- data/test/test_support/hebrew880s.marc +1 -0
- data/test/test_support/louis_armstrong.marc +1 -0
- data/test/test_support/manufacturing_consent.marc +1 -0
- data/test/test_support/manuscript_online_thesis.marc +1 -0
- data/test/test_support/microform_online_conference.marc +1 -0
- data/test/test_support/multi_era.marc +1 -0
- data/test/test_support/multi_geo.marc +1 -0
- data/test/test_support/musical_cage.marc +1 -0
- data/test/test_support/nature.marc +1 -0
- data/test/test_support/one-marc8.mrc +1 -0
- data/test/test_support/online_only.marc +1 -0
- data/test/test_support/packed_041a_lang.marc +1 -0
- data/test/test_support/test_data.utf8.json +30 -0
- data/test/test_support/test_data.utf8.marc.xml +2609 -0
- data/test/test_support/test_data.utf8.mrc +1 -0
- data/test/test_support/test_data.utf8.mrc.gz +0 -0
- data/test/test_support/the_business_ren.marc +1 -0
- data/test/translation_map_test.rb +225 -0
- data/test/translation_maps/bad_ruby.rb +8 -0
- data/test/translation_maps/bad_yaml.yaml +1 -0
- data/test/translation_maps/both_map.rb +1 -0
- data/test/translation_maps/both_map.yaml +1 -0
- data/test/translation_maps/default_literal.rb +10 -0
- data/test/translation_maps/default_passthrough.rb +10 -0
- data/test/translation_maps/marc_040a_translate_test.yaml +1 -0
- data/test/translation_maps/properties_map.properties +5 -0
- data/test/translation_maps/ruby_map.rb +10 -0
- data/test/translation_maps/translate_array_test.yaml +8 -0
- data/test/translation_maps/yaml_map.yaml +7 -0
- data/traject.gemspec +47 -0
- metadata +382 -0
@@ -0,0 +1,190 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
require 'traject/indexer'
|
4
|
+
require 'traject/macros/marc21'
|
5
|
+
|
6
|
+
require 'json'
|
7
|
+
require 'marc'
|
8
|
+
|
9
|
+
# See also marc_extractor_test.rb for more detailed tests on marc extraction,
|
10
|
+
# this is just a basic test to make sure our macro works passing through to there
|
11
|
+
# and other options.
|
12
|
+
describe "Traject::Macros::Marc21" do
|
13
|
+
Marc21 = Traject::Macros::Marc21 # shortcut
|
14
|
+
|
15
|
+
before do
|
16
|
+
@indexer = Traject::Indexer.new
|
17
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
18
|
+
end
|
19
|
+
|
20
|
+
describe "extract_marc" do
|
21
|
+
it "extracts marc" do
|
22
|
+
@indexer.instance_eval do
|
23
|
+
to_field "title", extract_marc("245ab")
|
24
|
+
end
|
25
|
+
|
26
|
+
output = @indexer.map_record(@record)
|
27
|
+
|
28
|
+
assert_equal ["Manufacturing consent : the political economy of the mass media /"], output["title"]
|
29
|
+
assert_equal({}, @indexer.map_record(empty_record))
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
it "respects :first=>true option" do
|
34
|
+
@indexer.instance_eval do
|
35
|
+
to_field "other_id", extract_marc("035a", :first => true)
|
36
|
+
end
|
37
|
+
|
38
|
+
output = @indexer.map_record(@record)
|
39
|
+
|
40
|
+
assert_length 1, output["other_id"]
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
it "trims punctuation with :trim_punctuation => true" do
|
45
|
+
@indexer.instance_eval do
|
46
|
+
to_field "title", extract_marc("245ab", :trim_punctuation => true)
|
47
|
+
end
|
48
|
+
|
49
|
+
output = @indexer.map_record(@record)
|
50
|
+
|
51
|
+
assert_equal ["Manufacturing consent : the political economy of the mass media"], output["title"]
|
52
|
+
assert_equal({}, @indexer.map_record(empty_record))
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
it "respects :default option" do
|
57
|
+
@indexer.instance_eval do
|
58
|
+
to_field "only_default", extract_marc("9999", :default => "DEFAULT VALUE")
|
59
|
+
end
|
60
|
+
output = @indexer.map_record(@record)
|
61
|
+
|
62
|
+
assert_equal ["DEFAULT VALUE"], output["only_default"]
|
63
|
+
end
|
64
|
+
|
65
|
+
it "de-duplicates by default, respects :allow_duplicates" do
|
66
|
+
# Add a second 008
|
67
|
+
f = @record.fields('008').first
|
68
|
+
@record.append(f)
|
69
|
+
|
70
|
+
@indexer.instance_eval do
|
71
|
+
to_field "lang1", extract_marc('008[35-37]')
|
72
|
+
to_field "lang2", extract_marc('008[35-37]', :allow_duplicates=>true)
|
73
|
+
end
|
74
|
+
|
75
|
+
output = @indexer.map_record(@record)
|
76
|
+
assert_equal ["eng"], output['lang1']
|
77
|
+
assert_equal ["eng", "eng"], output['lang2']
|
78
|
+
assert_equal({}, @indexer.map_record(empty_record))
|
79
|
+
end
|
80
|
+
|
81
|
+
it "fails on an extra/misspelled argument to extract_marc" do
|
82
|
+
assert_raises(RuntimeError) do
|
83
|
+
@indexer.instance_eval do
|
84
|
+
to_field "foo", extract_marc("9999", :misspelled => "Who cares")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
it "Marc21::trim_punctuation class method" do
|
93
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three")
|
94
|
+
|
95
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three,")
|
96
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three/")
|
97
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three;")
|
98
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three:")
|
99
|
+
assert_equal "one two three .", Marc21.trim_punctuation("one two three .")
|
100
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three.")
|
101
|
+
|
102
|
+
assert_equal "one two [three]", Marc21.trim_punctuation("one two [three]")
|
103
|
+
assert_equal "one two three", Marc21.trim_punctuation("one two three]")
|
104
|
+
assert_equal "one two three", Marc21.trim_punctuation("[one two three")
|
105
|
+
assert_equal "one two three", Marc21.trim_punctuation("[one two three]")
|
106
|
+
|
107
|
+
# This one was a bug before
|
108
|
+
assert_equal "Feminism and art", Marc21.trim_punctuation("Feminism and art.")
|
109
|
+
end
|
110
|
+
|
111
|
+
it "uses :translation_map" do
|
112
|
+
@indexer.instance_eval do
|
113
|
+
to_field "cataloging_agency", extract_marc("040a", :separator => nil, :translation_map => "marc_040a_translate_test")
|
114
|
+
end
|
115
|
+
output = @indexer.map_record(@record)
|
116
|
+
|
117
|
+
assert_equal ["Library of Congress"], output["cataloging_agency"]
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
describe "serialized_marc" do
|
122
|
+
it "serializes xml" do
|
123
|
+
@indexer.instance_eval do
|
124
|
+
to_field "marc_record", serialized_marc(:format => "xml")
|
125
|
+
end
|
126
|
+
output = @indexer.map_record(@record)
|
127
|
+
|
128
|
+
assert_length 1, output["marc_record"]
|
129
|
+
assert_kind_of String, output["marc_record"].first
|
130
|
+
roundtrip_record = MARC::XMLReader.new(StringIO.new(output["marc_record"].first)).first
|
131
|
+
assert_equal @record, roundtrip_record
|
132
|
+
end
|
133
|
+
|
134
|
+
it "serializes binary UUEncoded" do
|
135
|
+
@indexer.instance_eval do
|
136
|
+
to_field "marc_record", serialized_marc(:format => "binary")
|
137
|
+
end
|
138
|
+
output = @indexer.map_record(@record)
|
139
|
+
|
140
|
+
assert_length 1, output["marc_record"]
|
141
|
+
assert_kind_of String, output["marc_record"].first
|
142
|
+
|
143
|
+
decoded = Base64.decode64( output["marc_record"].first )
|
144
|
+
|
145
|
+
# just check the marc header for now
|
146
|
+
assert_start_with "02067cam a2200469", decoded
|
147
|
+
end
|
148
|
+
|
149
|
+
it "serializes binary raw" do
|
150
|
+
@indexer.instance_eval do
|
151
|
+
to_field "marc_record", serialized_marc(:format => "binary", :binary_escape => false)
|
152
|
+
end
|
153
|
+
output = @indexer.map_record(@record)
|
154
|
+
|
155
|
+
assert_length 1, output["marc_record"]
|
156
|
+
assert_kind_of String, output["marc_record"].first
|
157
|
+
|
158
|
+
# just check the marc header for now
|
159
|
+
assert_start_with "02067cam a2200469", output["marc_record"].first
|
160
|
+
end
|
161
|
+
|
162
|
+
it "serializes json" do
|
163
|
+
@indexer.instance_eval do
|
164
|
+
to_field "marc_record", serialized_marc(:format => "json")
|
165
|
+
end
|
166
|
+
output = @indexer.map_record(@record)
|
167
|
+
|
168
|
+
assert_length 1, output["marc_record"]
|
169
|
+
|
170
|
+
# okay, let's actually deserialize it, why not
|
171
|
+
|
172
|
+
hash = JSON.parse( output["marc_record"].first )
|
173
|
+
|
174
|
+
deserialized = MARC::Record.new_from_hash(hash)
|
175
|
+
|
176
|
+
assert_equal @record, deserialized
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
it "#extract_all_marc_values" do
|
181
|
+
@indexer.instance_eval do
|
182
|
+
to_field "text", extract_all_marc_values
|
183
|
+
end
|
184
|
+
output = @indexer.map_record(@record)
|
185
|
+
|
186
|
+
assert_length 13, output["text"]
|
187
|
+
end
|
188
|
+
|
189
|
+
|
190
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Indexer Macros:" do
|
4
|
+
before do
|
5
|
+
@indexer = Traject::Indexer.new
|
6
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
7
|
+
end
|
8
|
+
|
9
|
+
it "works with simple literal" do
|
10
|
+
@indexer.instance_eval do
|
11
|
+
extend Traject::Macros::Basic
|
12
|
+
|
13
|
+
to_field "source", literal("MY LIBRARY")
|
14
|
+
end
|
15
|
+
|
16
|
+
output = @indexer.map_record(@record)
|
17
|
+
|
18
|
+
assert_equal ["MY LIBRARY"], output["source"]
|
19
|
+
end
|
20
|
+
|
21
|
+
it "works with macro AND block" do
|
22
|
+
called = false
|
23
|
+
|
24
|
+
@indexer.instance_eval do
|
25
|
+
extend Traject::Macros::Basic
|
26
|
+
to_field "source", literal("MY LIBRARY") do |record, accumulator, context|
|
27
|
+
called = true
|
28
|
+
accumulator << "SECOND VALUE"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
output = @indexer.map_record(@record)
|
33
|
+
|
34
|
+
assert called
|
35
|
+
assert_equal ["MY LIBRARY", "SECOND VALUE"], output["source"]
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
|
40
|
+
end
|
@@ -0,0 +1,209 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe "Traject::Indexer#map_record" do
|
4
|
+
before do
|
5
|
+
@indexer = Traject::Indexer.new
|
6
|
+
@record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
describe "with no indexing rules" do
|
11
|
+
it "returns empty hash" do
|
12
|
+
output = @indexer.map_record(@record)
|
13
|
+
|
14
|
+
assert_kind_of Hash, output
|
15
|
+
assert_empty output
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
describe "#to_field" do
|
20
|
+
it "works with block" do
|
21
|
+
called = false
|
22
|
+
|
23
|
+
@indexer.to_field("title") do |record, accumulator|
|
24
|
+
assert_kind_of MARC::Record, record
|
25
|
+
assert_kind_of Array, accumulator
|
26
|
+
|
27
|
+
called = true # by the power of closure!
|
28
|
+
accumulator << "Some Title"
|
29
|
+
end
|
30
|
+
|
31
|
+
output = @indexer.map_record(@record)
|
32
|
+
|
33
|
+
assert called
|
34
|
+
assert_kind_of Hash, output
|
35
|
+
assert_equal ["Some Title"], output["title"]
|
36
|
+
end
|
37
|
+
|
38
|
+
it "works with a lambda arg" do
|
39
|
+
called = false
|
40
|
+
|
41
|
+
logic = lambda do |record, accumulator|
|
42
|
+
assert_kind_of MARC::Record, record
|
43
|
+
assert_kind_of Array, accumulator
|
44
|
+
|
45
|
+
called = true # by the power of closure!
|
46
|
+
accumulator << "Some Title"
|
47
|
+
end
|
48
|
+
|
49
|
+
@indexer.to_field("title", logic)
|
50
|
+
|
51
|
+
output = @indexer.map_record(@record)
|
52
|
+
|
53
|
+
assert called
|
54
|
+
assert_kind_of Hash, output
|
55
|
+
assert_equal ["Some Title"], output["title"]
|
56
|
+
end
|
57
|
+
|
58
|
+
it "works with both lambda and Proc" do
|
59
|
+
block_called = false
|
60
|
+
|
61
|
+
lambda_arg = lambda do |record, accumulator|
|
62
|
+
accumulator << "Lambda-provided Value"
|
63
|
+
end
|
64
|
+
|
65
|
+
@indexer.to_field("title", lambda_arg) do |record, accumulator|
|
66
|
+
assert_includes accumulator, "Lambda-provided Value"
|
67
|
+
accumulator << "Block-provided Value"
|
68
|
+
|
69
|
+
block_called = true
|
70
|
+
end
|
71
|
+
|
72
|
+
output = @indexer.map_record(@record)
|
73
|
+
|
74
|
+
assert block_called
|
75
|
+
assert_includes output["title"], "Lambda-provided Value"
|
76
|
+
assert_includes output["title"], "Block-provided Value"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe "multiple to_field blocks" do
|
81
|
+
it "get called in order" do
|
82
|
+
order = []
|
83
|
+
@indexer.to_field("title") do |rec, acc|
|
84
|
+
order << :first_one
|
85
|
+
acc << "First"
|
86
|
+
end
|
87
|
+
@indexer.to_field("title") do |rec, acc|
|
88
|
+
order << :second_one
|
89
|
+
acc << "Second"
|
90
|
+
end
|
91
|
+
|
92
|
+
output = @indexer.map_record(@record)
|
93
|
+
|
94
|
+
assert_equal [:first_one, :second_one], order
|
95
|
+
assert_equal ["First", "Second"], output["title"]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
describe "context argument" do
|
100
|
+
it "is third argument to block" do
|
101
|
+
called = false
|
102
|
+
@indexer.to_field("title") do |record, accumulator, context|
|
103
|
+
called = true
|
104
|
+
|
105
|
+
assert_kind_of Traject::Indexer::Context, context
|
106
|
+
|
107
|
+
assert_kind_of Hash, context.clipboard
|
108
|
+
assert_kind_of Hash, context.output_hash
|
109
|
+
|
110
|
+
assert_same @record, record
|
111
|
+
assert_same record, context.source_record
|
112
|
+
assert_same @indexer.settings, context.settings
|
113
|
+
end
|
114
|
+
|
115
|
+
@indexer.map_record @record
|
116
|
+
|
117
|
+
assert called
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
describe "#each_record" do
|
122
|
+
it "is called with one-arg record" do
|
123
|
+
called = false
|
124
|
+
@indexer.each_record do |record|
|
125
|
+
called = true
|
126
|
+
assert_kind_of MARC::Record, record
|
127
|
+
end
|
128
|
+
@indexer.map_record(@record)
|
129
|
+
|
130
|
+
assert called, "each_record was called"
|
131
|
+
end
|
132
|
+
it "is called with two-arg record and context" do
|
133
|
+
called = false
|
134
|
+
@indexer.each_record do |record, context|
|
135
|
+
called = true
|
136
|
+
assert_kind_of MARC::Record, record
|
137
|
+
assert_kind_of Traject::Indexer::Context, context
|
138
|
+
end
|
139
|
+
@indexer.map_record(@record)
|
140
|
+
|
141
|
+
assert called, "each_record was called"
|
142
|
+
end
|
143
|
+
it "accepts lambda AND block" do
|
144
|
+
lambda_arg = lambda do |record, context|
|
145
|
+
context.output_hash["field"] ||= []
|
146
|
+
context.output_hash["field"] << "first"
|
147
|
+
end
|
148
|
+
|
149
|
+
@indexer.each_record(lambda_arg) do |record, context|
|
150
|
+
context.output_hash["field"] ||= []
|
151
|
+
context.output_hash["field"] << "second"
|
152
|
+
end
|
153
|
+
|
154
|
+
output = @indexer.map_record(@record)
|
155
|
+
|
156
|
+
assert_equal %w{first second}, output["field"]
|
157
|
+
end
|
158
|
+
it "is called in order with #to_field" do
|
159
|
+
@indexer.to_field("foo") {|record, accumulator| accumulator << "first"}
|
160
|
+
@indexer.each_record {|record, context| context.output_hash["foo"] << "second" }
|
161
|
+
@indexer.to_field("foo") {|record, accumulator| accumulator << "third"}
|
162
|
+
|
163
|
+
output = @indexer.map_record(@record)
|
164
|
+
|
165
|
+
assert_equal %w{first second third}, output["foo"]
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
describe "map_to_context!" do
|
170
|
+
before do
|
171
|
+
@context = Traject::Indexer::Context.new(:source_record => @record, :settings => @indexer.settings, :position => 10 )
|
172
|
+
end
|
173
|
+
it "passes context to indexing routines" do
|
174
|
+
called = false
|
175
|
+
@indexer.to_field("title") do |record, accumulator, context|
|
176
|
+
called = true
|
177
|
+
assert_kind_of Traject::Indexer::Context, context
|
178
|
+
assert_same @context, context
|
179
|
+
end
|
180
|
+
|
181
|
+
context = @indexer.map_to_context!(@context)
|
182
|
+
|
183
|
+
assert_same @context, context
|
184
|
+
|
185
|
+
assert called, "Called mapping routine"
|
186
|
+
end
|
187
|
+
|
188
|
+
it "skips records" do
|
189
|
+
|
190
|
+
@indexer.to_field("beforeSkip") do |rec, acc|
|
191
|
+
acc << "Before"
|
192
|
+
end
|
193
|
+
|
194
|
+
@indexer.to_field('radical') do |rec, acc, context|
|
195
|
+
context.skip!("Chomsky!") if rec['245'].to_s =~ /Chomsky/
|
196
|
+
end
|
197
|
+
|
198
|
+
@indexer.to_field('afterSkip') do |rec, acc|
|
199
|
+
acc << "After. Should never happen"
|
200
|
+
end
|
201
|
+
|
202
|
+
output = @indexer.map_record(@record)
|
203
|
+
assert_equal ['Before'], output['beforeSkip']
|
204
|
+
assert_nil output['afterSkip']
|
205
|
+
end
|
206
|
+
|
207
|
+
end
|
208
|
+
|
209
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
# A little Traject Writer that just keeps everything
|
4
|
+
# in an array, just added to settings for easy access
|
5
|
+
memory_writer_class = Class.new do
|
6
|
+
def initialize(settings)
|
7
|
+
# store them in a class variable so we can test em later
|
8
|
+
@@last_writer_settings = @settings = settings
|
9
|
+
@settings["memory_writer.added"] = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def put(hash)
|
13
|
+
@settings["memory_writer.added"] << hash
|
14
|
+
end
|
15
|
+
|
16
|
+
def close
|
17
|
+
@settings["memory_writer.closed"] = true
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "Traject::Indexer#process" do
|
22
|
+
before do
|
23
|
+
# no threading for these tests
|
24
|
+
@indexer = Traject::Indexer.new("processing_thread_pool" => nil)
|
25
|
+
@indexer.writer_class = memory_writer_class
|
26
|
+
@file = File.open(support_file_path "test_data.utf8.mrc")
|
27
|
+
end
|
28
|
+
|
29
|
+
it "works" do
|
30
|
+
# oops, this times_called counter isn't thread-safe under multi-threading
|
31
|
+
# is why this fails sometimes.
|
32
|
+
# fixed to be single-threaded for these tests.
|
33
|
+
times_called = 0
|
34
|
+
@indexer.to_field("title") do |record, accumulator, context|
|
35
|
+
times_called += 1
|
36
|
+
accumulator << "ADDED TITLE"
|
37
|
+
|
38
|
+
assert context.index_step, "Context has #index_step set"
|
39
|
+
assert_equal "title", context.index_step.field_name
|
40
|
+
|
41
|
+
assert context.logger, "Context knows #logger"
|
42
|
+
|
43
|
+
assert_equal times_called, context.position
|
44
|
+
end
|
45
|
+
|
46
|
+
return_value = @indexer.process( @file )
|
47
|
+
|
48
|
+
assert return_value, "Returns `true` on success"
|
49
|
+
|
50
|
+
# Grab the settings out of a class variable where we left em,
|
51
|
+
# as a convenient place to store outcomes so we can test em.
|
52
|
+
writer_settings = memory_writer_class.class_variable_get("@@last_writer_settings")
|
53
|
+
|
54
|
+
assert writer_settings["memory_writer.added"]
|
55
|
+
assert_equal 30, writer_settings["memory_writer.added"].length
|
56
|
+
assert_kind_of Traject::Indexer::Context, writer_settings["memory_writer.added"].first
|
57
|
+
assert_equal ["ADDED TITLE"], writer_settings["memory_writer.added"].first.output_hash["title"]
|
58
|
+
|
59
|
+
# logger provided in settings
|
60
|
+
assert writer_settings["logger"]
|
61
|
+
|
62
|
+
assert writer_settings["memory_writer.closed"]
|
63
|
+
end
|
64
|
+
|
65
|
+
require 'traject/null_writer'
|
66
|
+
it "calls after_processing after processing" do
|
67
|
+
@indexer = Traject::Indexer.new(
|
68
|
+
"writer_class_name" => "Traject::NullWriter"
|
69
|
+
)
|
70
|
+
@file = File.open(support_file_path "test_data.utf8.mrc")
|
71
|
+
|
72
|
+
called = []
|
73
|
+
|
74
|
+
@indexer.after_processing do
|
75
|
+
called << :one
|
76
|
+
end
|
77
|
+
@indexer.after_processing do
|
78
|
+
called << :two
|
79
|
+
end
|
80
|
+
|
81
|
+
@indexer.process(@file)
|
82
|
+
|
83
|
+
assert_equal [:one, :two], called, "Both after_processing hooks called, in order"
|
84
|
+
end
|
85
|
+
|
86
|
+
describe "demo_config.rb" do
|
87
|
+
before do
|
88
|
+
@indexer = Traject::Indexer.new(
|
89
|
+
"writer_class_name" => "Traject::NullWriter"
|
90
|
+
)
|
91
|
+
end
|
92
|
+
|
93
|
+
it "parses and loads" do
|
94
|
+
conf_path = support_file_path "demo_config.rb"
|
95
|
+
File.open(conf_path) do |file_io|
|
96
|
+
@indexer.instance_eval(file_io.read, conf_path)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|