traject_sequel_writer 0.10.0 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -0
- data/lib/traject/sequel_writer.rb +6 -1
- data/lib/traject_sequel_writer/version.rb +1 -1
- data/test/test_traject_sequel_writer.rb +56 -6
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81efee56a46a47e26c17eff759d4cc0a7dc36726
|
4
|
+
data.tar.gz: 72a8923c02d9e6052342c4c953fbc9cc34857ca2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7e5a7a0710a5e8fa437bf4a8976119ef8f4a8a8995a7eb4633364eb17ef666e6472d44f5bef965c434d208ed9c3cd101638276f74572164bd53c3c03ae1629c3
|
7
|
+
data.tar.gz: ba50260b19700091db4d1827bcf80446595b40a7b53933312028c4524da1143af425c65f7a6bd481fb41b7322bb87355661effbc7cf25c1ddf2b691ed75cb616
|
data/README.md
CHANGED
@@ -58,6 +58,12 @@ Still, your Context output_hash's must provide output key/values for every colum
|
|
58
58
|
null will be inserted for that column. Keys in the output_hash that don't match output columns
|
59
59
|
will be ignored.
|
60
60
|
|
61
|
+
Note that traject output_hash's have values that are arrays of potentially multiple values. If
|
62
|
+
multiple values are present, they will be joined with a comma or with set `sequel_writer.internal_delimiter`.
|
63
|
+
For non-string type db fields, this will probably raise.
|
64
|
+
`traject_sequel_writer` also accepts single values in output_hash as an alternative, which isn't really traject's
|
65
|
+
API, but experimenting to see if it's helpful rather than confusing to accept this alternate too.
|
66
|
+
|
61
67
|
### All settings
|
62
68
|
|
63
69
|
* `sequel_writer.connection_string` : [Sequel connection string](http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html)
|
@@ -67,6 +73,7 @@ will be ignored.
|
|
67
73
|
column_names not present in the Traject::Context#output_hash will end up with SQL `null` inserted.
|
68
74
|
* `sequel_writer.thread_pool_size` Number of threads to use for writing to DB. Default 1, should be good.
|
69
75
|
* `sequel_writer.batch_size` Count of records to batch together in a single multi-row SQL `INSERT`. Default 100. Should be good.
|
76
|
+
* `sequel_writer.internal_delimiter` -- Delimiter _within_ a field, for multiple values. Default is comma.
|
70
77
|
|
71
78
|
### Using as a side-channel additional output
|
72
79
|
|
@@ -58,6 +58,8 @@ module Traject
|
|
58
58
|
@thread_pool = Traject::ThreadPool.new(@thread_pool_size)
|
59
59
|
|
60
60
|
@after_send_batch_callbacks = Array(@settings["sequel_writer.after_send_batch"] || [])
|
61
|
+
|
62
|
+
@internal_delimiter = @settings["sequel_writer.internal_delimiter"] || ","
|
61
63
|
end
|
62
64
|
|
63
65
|
# Get the logger from the settings, or default to an effectively null logger
|
@@ -137,7 +139,10 @@ module Traject
|
|
137
139
|
end
|
138
140
|
|
139
141
|
def hash_to_array(column_names, hash)
|
140
|
-
column_names.collect
|
142
|
+
column_names.collect do |c|
|
143
|
+
v = hash[c.to_s]
|
144
|
+
v.kind_of?(Array) ? v.join(@internal_delimiter) : v
|
145
|
+
end
|
141
146
|
end
|
142
147
|
|
143
148
|
def after_send_batch(&block)
|
@@ -26,6 +26,56 @@ describe "Traject::SequelWriter" do
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
|
29
|
+
describe "with multiple values" do
|
30
|
+
it "joins multiple string values" do
|
31
|
+
@writer = self.writer
|
32
|
+
|
33
|
+
context = Traject::Indexer::Context.new
|
34
|
+
context.output_hash.merge!(
|
35
|
+
"string_a" => ["String_One", "String_Two"],
|
36
|
+
"string_b" => ["String_B_One"]
|
37
|
+
)
|
38
|
+
@writer.put context
|
39
|
+
@writer.close
|
40
|
+
|
41
|
+
assert @writer.db_table.where(:string_a => "String_One,String_Two", :string_b => "String_B_One").count == 1, "Expected written row with expected values"
|
42
|
+
end
|
43
|
+
|
44
|
+
after do
|
45
|
+
@writer.db_table.delete
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
describe "with only single values in output hash" do
|
50
|
+
# not really traject API, but we allow it anyway.
|
51
|
+
it "still writes" do
|
52
|
+
@writer = self.writer
|
53
|
+
|
54
|
+
context = Traject::Indexer::Context.new
|
55
|
+
context.output_hash.merge!(
|
56
|
+
"id" => "ignore_me", # should ignore pk by default
|
57
|
+
"string_a" => "String_a",
|
58
|
+
"string_b" => "String_b",
|
59
|
+
"no_such_column" => "ignore me",
|
60
|
+
"boolean_a" => true,
|
61
|
+
"int_a" => 1001
|
62
|
+
)
|
63
|
+
@writer.put context
|
64
|
+
@writer.close
|
65
|
+
|
66
|
+
row = @writer.db_table.first
|
67
|
+
|
68
|
+
assert_equal "String_a", row[:string_a]
|
69
|
+
assert_equal "String_b", row[:string_b]
|
70
|
+
assert_equal true, row[:boolean_a]
|
71
|
+
assert_equal 1001, row[:int_a]
|
72
|
+
end
|
73
|
+
|
74
|
+
after do
|
75
|
+
@writer.db_table.delete
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
29
79
|
|
30
80
|
it "writes with sequel.database parameter instead of connection_str" do
|
31
81
|
sequel_db = Sequel.connect(TEST_SEQUEL_CONNECT_STR)
|
@@ -90,12 +140,12 @@ describe "Traject::SequelWriter" do
|
|
90
140
|
(1..num).each do |i|
|
91
141
|
context = Traject::Indexer::Context.new
|
92
142
|
context.output_hash.merge!(
|
93
|
-
"id" => "ignore_me", # should ignore pk by default
|
94
|
-
"string_a" => "String_a #{i}",
|
95
|
-
"string_b" => "String_b #{i}",
|
96
|
-
"no_such_column" => "ignore me",
|
97
|
-
"boolean_a" => (i % 2 == 0) ? true : false,
|
98
|
-
"int_a" => i
|
143
|
+
"id" => ["ignore_me"], # should ignore pk by default
|
144
|
+
"string_a" => ["String_a #{i}"],
|
145
|
+
"string_b" => ["String_b #{i}"],
|
146
|
+
"no_such_column" => ["ignore me"],
|
147
|
+
"boolean_a" => [(i % 2 == 0) ? true : false],
|
148
|
+
"int_a" => [i]
|
99
149
|
)
|
100
150
|
writer.put context
|
101
151
|
end
|