traject 2.3.3 → 2.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -7
- data/CHANGES.md +3 -0
- data/lib/traject/indexer.rb +4 -38
- data/lib/traject/indexer/step.rb +17 -0
- data/lib/traject/version.rb +1 -1
- data/test/indexer/load_config_file_test.rb +26 -6
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70d8b265e2e00866a63fdc067172ee1174efb068
|
4
|
+
data.tar.gz: 6588b8231b636d268765a5428a607b731a34fe7f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 564a834087b4b5d0b032a9a0797cd1587f241afcbf35bf0c3aa928724f9f26fb8d3e7220d43e909c9bf4c03dd91873113e4d2c081ee68ad12ed151ff3ba34284
|
7
|
+
data.tar.gz: 9cbc506f6f2ab5bcbf77811c5018a91d8b588eff2d0c75960b3aada514756e6d90bce441da3db59d1aaa0a9a0f62b3e0c56d1be2c494aae091b06a61f94de46c
|
data/.travis.yml
CHANGED
data/CHANGES.md
CHANGED
data/lib/traject/indexer.rb
CHANGED
@@ -184,7 +184,7 @@ class Traject::Indexer
|
|
184
184
|
@after_processing_steps = []
|
185
185
|
end
|
186
186
|
|
187
|
-
# Pass a string file path, or a File object, for
|
187
|
+
# Pass a string file path, a Pathname, or a File object, for
|
188
188
|
# a config file to load into indexer.
|
189
189
|
#
|
190
190
|
# Can raise:
|
@@ -195,9 +195,9 @@ class Traject::Indexer
|
|
195
195
|
def load_config_file(file_path)
|
196
196
|
File.open(file_path) do |file|
|
197
197
|
begin
|
198
|
-
self.instance_eval(file.read, file_path)
|
198
|
+
self.instance_eval(file.read, file_path.to_s)
|
199
199
|
rescue ScriptError, StandardError => e
|
200
|
-
raise ConfigLoadError.new(file_path, e)
|
200
|
+
raise ConfigLoadError.new(file_path.to_s, e)
|
201
201
|
end
|
202
202
|
end
|
203
203
|
end
|
@@ -342,12 +342,10 @@ class Traject::Indexer
|
|
342
342
|
|
343
343
|
# Set the index step for error reporting
|
344
344
|
context.index_step = index_step
|
345
|
-
|
345
|
+
log_mapping_errors(context, index_step) do
|
346
346
|
index_step.execute(context) # will always return [] for an each_record step
|
347
347
|
end
|
348
348
|
|
349
|
-
add_accumulator_to_context!(accumulator, context) if index_step.to_field_step?
|
350
|
-
|
351
349
|
# And unset the index step now that we're finished
|
352
350
|
context.index_step = nil
|
353
351
|
end
|
@@ -355,37 +353,6 @@ class Traject::Indexer
|
|
355
353
|
return context
|
356
354
|
end
|
357
355
|
|
358
|
-
|
359
|
-
# Add the accumulator to the context with the correct field name
|
360
|
-
# Do post-processing on the accumulator (remove nil values, allow empty
|
361
|
-
# fields, etc)
|
362
|
-
#
|
363
|
-
# Only get here if we've got a to_field step; otherwise the
|
364
|
-
# call to get a field_name will throw an error
|
365
|
-
|
366
|
-
ALLOW_NIL_VALUES = "allow_nil_values".freeze
|
367
|
-
ALLOW_EMPTY_FIELDS = "allow_empty_fields".freeze
|
368
|
-
ALLOW_DUPLICATE_VALUES = "allow_duplicate_values".freeze
|
369
|
-
|
370
|
-
def add_accumulator_to_context!(accumulator, context)
|
371
|
-
|
372
|
-
accumulator.compact! unless settings[ALLOW_NIL_VALUES]
|
373
|
-
return if accumulator.empty? and not (settings[ALLOW_EMPTY_FIELDS])
|
374
|
-
|
375
|
-
field_name = context.index_step.field_name
|
376
|
-
context.output_hash[field_name] ||= []
|
377
|
-
|
378
|
-
existing_accumulator = context.output_hash[field_name].concat(accumulator)
|
379
|
-
existing_accumulator.uniq! unless settings[ALLOW_DUPLICATE_VALUES]
|
380
|
-
|
381
|
-
rescue NameError => e
|
382
|
-
msg = "Tried to call add_accumulator_to_context with a non-to_field step"
|
383
|
-
msg += context.index_step.inspect
|
384
|
-
logger.error msg
|
385
|
-
raise ArgumentError.new(msg)
|
386
|
-
end
|
387
|
-
|
388
|
-
|
389
356
|
# just a wrapper that captures and records any unexpected
|
390
357
|
# errors raised in mapping, along with contextual information
|
391
358
|
# on record and location in source file of mapping rule.
|
@@ -414,7 +381,6 @@ class Traject::Indexer
|
|
414
381
|
end
|
415
382
|
end
|
416
383
|
|
417
|
-
|
418
384
|
# Processes a stream of records, reading from the configured Reader,
|
419
385
|
# mapping according to configured mapping rules, and then writing
|
420
386
|
# to configured Writer.
|
data/lib/traject/indexer/step.rb
CHANGED
@@ -150,9 +150,26 @@ class Traject::Indexer
|
|
150
150
|
end
|
151
151
|
|
152
152
|
|
153
|
+
add_accumulator_to_context!(accumulator, context)
|
153
154
|
return accumulator
|
154
155
|
end
|
155
156
|
|
157
|
+
# Add the accumulator to the context with the correct field name
|
158
|
+
# Do post-processing on the accumulator (remove nil values, allow empty
|
159
|
+
# fields, etc)
|
160
|
+
ALLOW_NIL_VALUES = "allow_nil_values".freeze
|
161
|
+
ALLOW_EMPTY_FIELDS = "allow_empty_fields".freeze
|
162
|
+
ALLOW_DUPLICATE_VALUES = "allow_duplicate_values".freeze
|
163
|
+
|
164
|
+
def add_accumulator_to_context!(accumulator, context)
|
165
|
+
accumulator.compact! unless context.settings[ALLOW_NIL_VALUES]
|
166
|
+
return if accumulator.empty? and not (context.settings[ALLOW_EMPTY_FIELDS])
|
167
|
+
|
168
|
+
context.output_hash[field_name] ||= []
|
169
|
+
|
170
|
+
existing_accumulator = context.output_hash[field_name].concat(accumulator)
|
171
|
+
existing_accumulator.uniq! unless context.settings[ALLOW_DUPLICATE_VALUES]
|
172
|
+
end
|
156
173
|
end
|
157
174
|
|
158
175
|
# A class representing a block of logic called after
|
data/lib/traject/version.rb
CHANGED
@@ -20,7 +20,7 @@ describe "Traject::Indexer#load_config_path" do
|
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
describe "with good config" do
|
23
|
+
describe "with good config provided" do
|
24
24
|
before do
|
25
25
|
@config_file = tmp_config_file_with(%Q{
|
26
26
|
settings do
|
@@ -32,11 +32,19 @@ describe "Traject::Indexer#load_config_path" do
|
|
32
32
|
after do
|
33
33
|
@config_file.unlink
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
|
+
|
37
|
+
it "loads config file by path (as a String)" do
|
36
38
|
@indexer.load_config_file(@config_file.path)
|
37
39
|
|
38
40
|
assert_equal "our_value", @indexer.settings["our_key"]
|
39
41
|
end
|
42
|
+
|
43
|
+
it "loads config file by path (as a Pathname)" do
|
44
|
+
@indexer.load_config_file(Pathname.new(@config_file.path))
|
45
|
+
|
46
|
+
assert_equal "our_value", @indexer.settings["our_key"]
|
47
|
+
end
|
40
48
|
end
|
41
49
|
|
42
50
|
describe "with error in config" do
|
@@ -60,7 +68,7 @@ describe "Traject::Indexer#load_config_path" do
|
|
60
68
|
assert_equal 4, e.config_file_lineno
|
61
69
|
end
|
62
70
|
|
63
|
-
it "raises good error on StandardError type" do
|
71
|
+
it "raises good error on StandardError type (when passing String)" do
|
64
72
|
@config_file = tmp_config_file_with(%Q{
|
65
73
|
# Intentional non-syntax error, bad extract_marc spec
|
66
74
|
to_field "foo", extract_marc("#%^%^%^")
|
@@ -74,6 +82,20 @@ describe "Traject::Indexer#load_config_path" do
|
|
74
82
|
assert_equal @config_file.path, e.config_file
|
75
83
|
assert_equal 3, e.config_file_lineno
|
76
84
|
end
|
85
|
+
|
86
|
+
it "raises good error on StandardError type (when passing Pathname)" do
|
87
|
+
@config_file = tmp_config_file_with(%Q{
|
88
|
+
# Intentional non-syntax error, bad extract_marc spec
|
89
|
+
to_field "foo", extract_marc("#%^%^%^")
|
90
|
+
})
|
91
|
+
|
92
|
+
e = assert_raises(Traject::Indexer::ConfigLoadError) do
|
93
|
+
@indexer.load_config_file(Pathname.new(@config_file.path))
|
94
|
+
end
|
95
|
+
|
96
|
+
assert_kind_of StandardError, e.original
|
97
|
+
assert_equal @config_file.path, e.config_file
|
98
|
+
end
|
77
99
|
end
|
78
100
|
|
79
101
|
|
@@ -84,6 +106,4 @@ describe "Traject::Indexer#load_config_path" do
|
|
84
106
|
|
85
107
|
return file
|
86
108
|
end
|
87
|
-
|
88
|
-
|
89
|
-
end
|
109
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: traject
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.3.
|
4
|
+
version: 2.3.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Rochkind
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-
|
12
|
+
date: 2017-12-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: concurrent-ruby
|
@@ -331,7 +331,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
331
331
|
version: '0'
|
332
332
|
requirements: []
|
333
333
|
rubyforge_project:
|
334
|
-
rubygems_version: 2.6.
|
334
|
+
rubygems_version: 2.6.13
|
335
335
|
signing_key:
|
336
336
|
specification_version: 4
|
337
337
|
summary: Index MARC to Solr; or generally process source records to hash-like structures
|