traject 2.3.3 → 2.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -7
- data/CHANGES.md +3 -0
- data/lib/traject/indexer.rb +4 -38
- data/lib/traject/indexer/step.rb +17 -0
- data/lib/traject/version.rb +1 -1
- data/test/indexer/load_config_file_test.rb +26 -6
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70d8b265e2e00866a63fdc067172ee1174efb068
|
4
|
+
data.tar.gz: 6588b8231b636d268765a5428a607b731a34fe7f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 564a834087b4b5d0b032a9a0797cd1587f241afcbf35bf0c3aa928724f9f26fb8d3e7220d43e909c9bf4c03dd91873113e4d2c081ee68ad12ed151ff3ba34284
|
7
|
+
data.tar.gz: 9cbc506f6f2ab5bcbf77811c5018a91d8b588eff2d0c75960b3aada514756e6d90bce441da3db59d1aaa0a9a0f62b3e0c56d1be2c494aae091b06a61f94de46c
|
data/.travis.yml
CHANGED
data/CHANGES.md
CHANGED
data/lib/traject/indexer.rb
CHANGED
@@ -184,7 +184,7 @@ class Traject::Indexer
|
|
184
184
|
@after_processing_steps = []
|
185
185
|
end
|
186
186
|
|
187
|
-
# Pass a string file path, or a File object, for
|
187
|
+
# Pass a string file path, a Pathname, or a File object, for
|
188
188
|
# a config file to load into indexer.
|
189
189
|
#
|
190
190
|
# Can raise:
|
@@ -195,9 +195,9 @@ class Traject::Indexer
|
|
195
195
|
def load_config_file(file_path)
|
196
196
|
File.open(file_path) do |file|
|
197
197
|
begin
|
198
|
-
self.instance_eval(file.read, file_path)
|
198
|
+
self.instance_eval(file.read, file_path.to_s)
|
199
199
|
rescue ScriptError, StandardError => e
|
200
|
-
raise ConfigLoadError.new(file_path, e)
|
200
|
+
raise ConfigLoadError.new(file_path.to_s, e)
|
201
201
|
end
|
202
202
|
end
|
203
203
|
end
|
@@ -342,12 +342,10 @@ class Traject::Indexer
|
|
342
342
|
|
343
343
|
# Set the index step for error reporting
|
344
344
|
context.index_step = index_step
|
345
|
-
|
345
|
+
log_mapping_errors(context, index_step) do
|
346
346
|
index_step.execute(context) # will always return [] for an each_record step
|
347
347
|
end
|
348
348
|
|
349
|
-
add_accumulator_to_context!(accumulator, context) if index_step.to_field_step?
|
350
|
-
|
351
349
|
# And unset the index step now that we're finished
|
352
350
|
context.index_step = nil
|
353
351
|
end
|
@@ -355,37 +353,6 @@ class Traject::Indexer
|
|
355
353
|
return context
|
356
354
|
end
|
357
355
|
|
358
|
-
|
359
|
-
# Add the accumulator to the context with the correct field name
|
360
|
-
# Do post-processing on the accumulator (remove nil values, allow empty
|
361
|
-
# fields, etc)
|
362
|
-
#
|
363
|
-
# Only get here if we've got a to_field step; otherwise the
|
364
|
-
# call to get a field_name will throw an error
|
365
|
-
|
366
|
-
ALLOW_NIL_VALUES = "allow_nil_values".freeze
|
367
|
-
ALLOW_EMPTY_FIELDS = "allow_empty_fields".freeze
|
368
|
-
ALLOW_DUPLICATE_VALUES = "allow_duplicate_values".freeze
|
369
|
-
|
370
|
-
def add_accumulator_to_context!(accumulator, context)
|
371
|
-
|
372
|
-
accumulator.compact! unless settings[ALLOW_NIL_VALUES]
|
373
|
-
return if accumulator.empty? and not (settings[ALLOW_EMPTY_FIELDS])
|
374
|
-
|
375
|
-
field_name = context.index_step.field_name
|
376
|
-
context.output_hash[field_name] ||= []
|
377
|
-
|
378
|
-
existing_accumulator = context.output_hash[field_name].concat(accumulator)
|
379
|
-
existing_accumulator.uniq! unless settings[ALLOW_DUPLICATE_VALUES]
|
380
|
-
|
381
|
-
rescue NameError => e
|
382
|
-
msg = "Tried to call add_accumulator_to_context with a non-to_field step"
|
383
|
-
msg += context.index_step.inspect
|
384
|
-
logger.error msg
|
385
|
-
raise ArgumentError.new(msg)
|
386
|
-
end
|
387
|
-
|
388
|
-
|
389
356
|
# just a wrapper that captures and records any unexpected
|
390
357
|
# errors raised in mapping, along with contextual information
|
391
358
|
# on record and location in source file of mapping rule.
|
@@ -414,7 +381,6 @@ class Traject::Indexer
|
|
414
381
|
end
|
415
382
|
end
|
416
383
|
|
417
|
-
|
418
384
|
# Processes a stream of records, reading from the configured Reader,
|
419
385
|
# mapping according to configured mapping rules, and then writing
|
420
386
|
# to configured Writer.
|
data/lib/traject/indexer/step.rb
CHANGED
@@ -150,9 +150,26 @@ class Traject::Indexer
|
|
150
150
|
end
|
151
151
|
|
152
152
|
|
153
|
+
add_accumulator_to_context!(accumulator, context)
|
153
154
|
return accumulator
|
154
155
|
end
|
155
156
|
|
157
|
+
# Add the accumulator to the context with the correct field name
|
158
|
+
# Do post-processing on the accumulator (remove nil values, allow empty
|
159
|
+
# fields, etc)
|
160
|
+
ALLOW_NIL_VALUES = "allow_nil_values".freeze
|
161
|
+
ALLOW_EMPTY_FIELDS = "allow_empty_fields".freeze
|
162
|
+
ALLOW_DUPLICATE_VALUES = "allow_duplicate_values".freeze
|
163
|
+
|
164
|
+
def add_accumulator_to_context!(accumulator, context)
|
165
|
+
accumulator.compact! unless context.settings[ALLOW_NIL_VALUES]
|
166
|
+
return if accumulator.empty? and not (context.settings[ALLOW_EMPTY_FIELDS])
|
167
|
+
|
168
|
+
context.output_hash[field_name] ||= []
|
169
|
+
|
170
|
+
existing_accumulator = context.output_hash[field_name].concat(accumulator)
|
171
|
+
existing_accumulator.uniq! unless context.settings[ALLOW_DUPLICATE_VALUES]
|
172
|
+
end
|
156
173
|
end
|
157
174
|
|
158
175
|
# A class representing a block of logic called after
|
data/lib/traject/version.rb
CHANGED
@@ -20,7 +20,7 @@ describe "Traject::Indexer#load_config_path" do
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
describe "with good config" do
|
23
|
+
describe "with good config provided" do
|
24
24
|
before do
|
25
25
|
@config_file = tmp_config_file_with(%Q{
|
26
26
|
settings do
|
@@ -32,11 +32,19 @@ describe "Traject::Indexer#load_config_path" do
|
|
32
32
|
after do
|
33
33
|
@config_file.unlink
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
|
+
|
37
|
+
it "loads config file by path (as a String)" do
|
36
38
|
@indexer.load_config_file(@config_file.path)
|
37
39
|
|
38
40
|
assert_equal "our_value", @indexer.settings["our_key"]
|
39
41
|
end
|
42
|
+
|
43
|
+
it "loads config file by path (as a Pathname)" do
|
44
|
+
@indexer.load_config_file(Pathname.new(@config_file.path))
|
45
|
+
|
46
|
+
assert_equal "our_value", @indexer.settings["our_key"]
|
47
|
+
end
|
40
48
|
end
|
41
49
|
|
42
50
|
describe "with error in config" do
|
@@ -60,7 +68,7 @@ describe "Traject::Indexer#load_config_path" do
|
|
60
68
|
assert_equal 4, e.config_file_lineno
|
61
69
|
end
|
62
70
|
|
63
|
-
it "raises good error on StandardError type" do
|
71
|
+
it "raises good error on StandardError type (when passing String)" do
|
64
72
|
@config_file = tmp_config_file_with(%Q{
|
65
73
|
# Intentional non-syntax error, bad extract_marc spec
|
66
74
|
to_field "foo", extract_marc("#%^%^%^")
|
@@ -74,6 +82,20 @@ describe "Traject::Indexer#load_config_path" do
|
|
74
82
|
assert_equal @config_file.path, e.config_file
|
75
83
|
assert_equal 3, e.config_file_lineno
|
76
84
|
end
|
85
|
+
|
86
|
+
it "raises good error on StandardError type (when passing Pathname)" do
|
87
|
+
@config_file = tmp_config_file_with(%Q{
|
88
|
+
# Intentional non-syntax error, bad extract_marc spec
|
89
|
+
to_field "foo", extract_marc("#%^%^%^")
|
90
|
+
})
|
91
|
+
|
92
|
+
e = assert_raises(Traject::Indexer::ConfigLoadError) do
|
93
|
+
@indexer.load_config_file(Pathname.new(@config_file.path))
|
94
|
+
end
|
95
|
+
|
96
|
+
assert_kind_of StandardError, e.original
|
97
|
+
assert_equal @config_file.path, e.config_file
|
98
|
+
end
|
77
99
|
end
|
78
100
|
|
79
101
|
|
@@ -84,6 +106,4 @@ describe "Traject::Indexer#load_config_path" do
|
|
84
106
|
|
85
107
|
return file
|
86
108
|
end
|
87
|
-
|
88
|
-
|
89
|
-
end
|
109
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.3.
|
4
|
+
version: 2.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Rochkind
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-12-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: concurrent-ruby
|
@@ -331,7 +331,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
331
331
|
version: '0'
|
332
332
|
requirements: []
|
333
333
|
rubyforge_project:
|
334
|
-
rubygems_version: 2.6.
|
334
|
+
rubygems_version: 2.6.13
|
335
335
|
signing_key:
|
336
336
|
specification_version: 4
|
337
337
|
summary: Index MARC to Solr; or generally process source records to hash-like structures
|