traject_sequel_writer 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -0
- data/lib/traject/sequel_writer.rb +6 -1
- data/lib/traject_sequel_writer/version.rb +1 -1
- data/test/test_traject_sequel_writer.rb +56 -6
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 81efee56a46a47e26c17eff759d4cc0a7dc36726
|
|
4
|
+
data.tar.gz: 72a8923c02d9e6052342c4c953fbc9cc34857ca2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7e5a7a0710a5e8fa437bf4a8976119ef8f4a8a8995a7eb4633364eb17ef666e6472d44f5bef965c434d208ed9c3cd101638276f74572164bd53c3c03ae1629c3
|
|
7
|
+
data.tar.gz: ba50260b19700091db4d1827bcf80446595b40a7b53933312028c4524da1143af425c65f7a6bd481fb41b7322bb87355661effbc7cf25c1ddf2b691ed75cb616
|
data/README.md
CHANGED
|
@@ -58,6 +58,12 @@ Still, your Context output_hash's must provide output key/values for every colum
|
|
|
58
58
|
null will be inserted for that column. Keys in the output_hash that don't match output columns
|
|
59
59
|
will be ignored.
|
|
60
60
|
|
|
61
|
+
Note that traject output_hash's have values that are arrays of potentially multiple values. If
|
|
62
|
+
multiple values are present, they will be joined with a comma or with set `sequel_writer.internal_delimiter`.
|
|
63
|
+
For non-string type db fields, this will probably raise.
|
|
64
|
+
`traject_sequel_writer` also accepts single values in output_hash as an alternative, which isn't really traject's
|
|
65
|
+
API, but experimenting to see if it's helpful rather than confusing to accept this alternate too.
|
|
66
|
+
|
|
61
67
|
### All settings
|
|
62
68
|
|
|
63
69
|
* `sequel_writer.connection_string` : [Sequel connection string](http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html)
|
|
@@ -67,6 +73,7 @@ will be ignored.
|
|
|
67
73
|
column_names not present in the Traject::Context#output_hash will end up with SQL `null` inserted.
|
|
68
74
|
* `sequel_writer.thread_pool_size` Number of threads to use for writing to DB. Default 1, should be good.
|
|
69
75
|
* `sequel_writer.batch_size` Count of records to batch together in a single multi-row SQL `INSERT`. Default 100. Should be good.
|
|
76
|
+
* `sequel_writer.internal_delimiter` -- Delimiter _within_ a field, for multiple values. Default is comma.
|
|
70
77
|
|
|
71
78
|
### Using as a side-channel additional output
|
|
72
79
|
|
|
@@ -58,6 +58,8 @@ module Traject
|
|
|
58
58
|
@thread_pool = Traject::ThreadPool.new(@thread_pool_size)
|
|
59
59
|
|
|
60
60
|
@after_send_batch_callbacks = Array(@settings["sequel_writer.after_send_batch"] || [])
|
|
61
|
+
|
|
62
|
+
@internal_delimiter = @settings["sequel_writer.internal_delimiter"] || ","
|
|
61
63
|
end
|
|
62
64
|
|
|
63
65
|
# Get the logger from the settings, or default to an effectively null logger
|
|
@@ -137,7 +139,10 @@ module Traject
|
|
|
137
139
|
end
|
|
138
140
|
|
|
139
141
|
def hash_to_array(column_names, hash)
|
|
140
|
-
column_names.collect
|
|
142
|
+
column_names.collect do |c|
|
|
143
|
+
v = hash[c.to_s]
|
|
144
|
+
v.kind_of?(Array) ? v.join(@internal_delimiter) : v
|
|
145
|
+
end
|
|
141
146
|
end
|
|
142
147
|
|
|
143
148
|
def after_send_batch(&block)
|
|
@@ -26,6 +26,56 @@ describe "Traject::SequelWriter" do
|
|
|
26
26
|
end
|
|
27
27
|
end
|
|
28
28
|
|
|
29
|
+
describe "with multiple values" do
|
|
30
|
+
it "joins multiple string values" do
|
|
31
|
+
@writer = self.writer
|
|
32
|
+
|
|
33
|
+
context = Traject::Indexer::Context.new
|
|
34
|
+
context.output_hash.merge!(
|
|
35
|
+
"string_a" => ["String_One", "String_Two"],
|
|
36
|
+
"string_b" => ["String_B_One"]
|
|
37
|
+
)
|
|
38
|
+
@writer.put context
|
|
39
|
+
@writer.close
|
|
40
|
+
|
|
41
|
+
assert @writer.db_table.where(:string_a => "String_One,String_Two", :string_b => "String_B_One").count == 1, "Expected written row with expected values"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
after do
|
|
45
|
+
@writer.db_table.delete
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
describe "with only single values in output hash" do
|
|
50
|
+
# not really traject API, but we allow it anyway.
|
|
51
|
+
it "still writes" do
|
|
52
|
+
@writer = self.writer
|
|
53
|
+
|
|
54
|
+
context = Traject::Indexer::Context.new
|
|
55
|
+
context.output_hash.merge!(
|
|
56
|
+
"id" => "ignore_me", # should ignore pk by default
|
|
57
|
+
"string_a" => "String_a",
|
|
58
|
+
"string_b" => "String_b",
|
|
59
|
+
"no_such_column" => "ignore me",
|
|
60
|
+
"boolean_a" => true,
|
|
61
|
+
"int_a" => 1001
|
|
62
|
+
)
|
|
63
|
+
@writer.put context
|
|
64
|
+
@writer.close
|
|
65
|
+
|
|
66
|
+
row = @writer.db_table.first
|
|
67
|
+
|
|
68
|
+
assert_equal "String_a", row[:string_a]
|
|
69
|
+
assert_equal "String_b", row[:string_b]
|
|
70
|
+
assert_equal true, row[:boolean_a]
|
|
71
|
+
assert_equal 1001, row[:int_a]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
after do
|
|
75
|
+
@writer.db_table.delete
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
29
79
|
|
|
30
80
|
it "writes with sequel.database parameter instead of connection_str" do
|
|
31
81
|
sequel_db = Sequel.connect(TEST_SEQUEL_CONNECT_STR)
|
|
@@ -90,12 +140,12 @@ describe "Traject::SequelWriter" do
|
|
|
90
140
|
(1..num).each do |i|
|
|
91
141
|
context = Traject::Indexer::Context.new
|
|
92
142
|
context.output_hash.merge!(
|
|
93
|
-
"id" => "ignore_me", # should ignore pk by default
|
|
94
|
-
"string_a" => "String_a #{i}",
|
|
95
|
-
"string_b" => "String_b #{i}",
|
|
96
|
-
"no_such_column" => "ignore me",
|
|
97
|
-
"boolean_a" => (i % 2 == 0) ? true : false,
|
|
98
|
-
"int_a" => i
|
|
143
|
+
"id" => ["ignore_me"], # should ignore pk by default
|
|
144
|
+
"string_a" => ["String_a #{i}"],
|
|
145
|
+
"string_b" => ["String_b #{i}"],
|
|
146
|
+
"no_such_column" => ["ignore me"],
|
|
147
|
+
"boolean_a" => [(i % 2 == 0) ? true : false],
|
|
148
|
+
"int_a" => [i]
|
|
99
149
|
)
|
|
100
150
|
writer.put context
|
|
101
151
|
end
|