traject_sequel_writer 0.10.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 42e56a44ac7e5553dda9631506785b5a042946fd
4
- data.tar.gz: 5ad685bb36e80fb1b3484a3a5595d7f9d26119eb
3
+ metadata.gz: 81efee56a46a47e26c17eff759d4cc0a7dc36726
4
+ data.tar.gz: 72a8923c02d9e6052342c4c953fbc9cc34857ca2
5
5
  SHA512:
6
- metadata.gz: 00ed6ac0f25b29168bacd10dec92617ec11078eb9820542c607721d6c16c93849ab29571787a0fae6caeadca408e816e1cdba0d8cc0ce5e48e601232f903a6e2
7
- data.tar.gz: 2daf97c5855a8d5f2d74c20a1957a89de0c66d20a0a9bdd373cd14e4141eb06644c8622fd32ea192d0924cfb03e0567516d3cb8c8edf97b615f14874c7860faa
6
+ metadata.gz: 7e5a7a0710a5e8fa437bf4a8976119ef8f4a8a8995a7eb4633364eb17ef666e6472d44f5bef965c434d208ed9c3cd101638276f74572164bd53c3c03ae1629c3
7
+ data.tar.gz: ba50260b19700091db4d1827bcf80446595b40a7b53933312028c4524da1143af425c65f7a6bd481fb41b7322bb87355661effbc7cf25c1ddf2b691ed75cb616
data/README.md CHANGED
@@ -58,6 +58,12 @@ Still, your Context output_hash's must provide output key/values for every colum
58
58
  null will be inserted for that column. Keys in the output_hash that don't match output columns
59
59
  will be ignored.
60
60
 
61
+ Note that traject output_hash's have values that are arrays of potentially multiple values. If
62
+ multiple values are present, they will be joined with a comma or with set `sequel_writer.internal_delimiter`.
63
+ For non-string type db fields, this will probably raise.
64
+ `traject_sequel_writer` also accepts single values in output_hash as an alternative, which isn't really traject's
65
+ API, but experimenting to see if it's helpful rather than confusing to accept this alternate too.
66
+
61
67
  ### All settings
62
68
 
63
69
  * `sequel_writer.connection_string` : [Sequel connection string](http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html)
@@ -67,6 +73,7 @@ will be ignored.
67
73
  column_names not present in the Traject::Context#output_hash will end up with SQL `null` inserted.
68
74
  * `sequel_writer.thread_pool_size` Number of threads to use for writing to DB. Default 1, should be good.
69
75
  * `sequel_writer.batch_size` Count of records to batch together in a single multi-row SQL `INSERT`. Default 100. Should be good.
76
+ * `sequel_writer.internal_delimiter` -- Delimiter _within_ a field, for multiple values. Default is comma.
70
77
 
71
78
  ### Using as a side-channel additional output
72
79
 
@@ -58,6 +58,8 @@ module Traject
58
58
  @thread_pool = Traject::ThreadPool.new(@thread_pool_size)
59
59
 
60
60
  @after_send_batch_callbacks = Array(@settings["sequel_writer.after_send_batch"] || [])
61
+
62
+ @internal_delimiter = @settings["sequel_writer.internal_delimiter"] || ","
61
63
  end
62
64
 
63
65
  # Get the logger from the settings, or default to an effectively null logger
@@ -137,7 +139,10 @@ module Traject
137
139
  end
138
140
 
139
141
  def hash_to_array(column_names, hash)
140
- column_names.collect {|c| hash[c.to_s]}
142
+ column_names.collect do |c|
143
+ v = hash[c.to_s]
144
+ v.kind_of?(Array) ? v.join(@internal_delimiter) : v
145
+ end
141
146
  end
142
147
 
143
148
  def after_send_batch(&block)
@@ -1,3 +1,3 @@
1
1
  module TrajectSequelWriter
2
- VERSION = "0.10.0"
2
+ VERSION = "0.11.0"
3
3
  end
@@ -26,6 +26,56 @@ describe "Traject::SequelWriter" do
26
26
  end
27
27
  end
28
28
 
29
+ describe "with multiple values" do
30
+ it "joins multiple string values" do
31
+ @writer = self.writer
32
+
33
+ context = Traject::Indexer::Context.new
34
+ context.output_hash.merge!(
35
+ "string_a" => ["String_One", "String_Two"],
36
+ "string_b" => ["String_B_One"]
37
+ )
38
+ @writer.put context
39
+ @writer.close
40
+
41
+ assert @writer.db_table.where(:string_a => "String_One,String_Two", :string_b => "String_B_One").count == 1, "Expected written row with expected values"
42
+ end
43
+
44
+ after do
45
+ @writer.db_table.delete
46
+ end
47
+ end
48
+
49
+ describe "with only single values in output hash" do
50
+ # not really traject API, but we allow it anyway.
51
+ it "still writes" do
52
+ @writer = self.writer
53
+
54
+ context = Traject::Indexer::Context.new
55
+ context.output_hash.merge!(
56
+ "id" => "ignore_me", # should ignore pk by default
57
+ "string_a" => "String_a",
58
+ "string_b" => "String_b",
59
+ "no_such_column" => "ignore me",
60
+ "boolean_a" => true,
61
+ "int_a" => 1001
62
+ )
63
+ @writer.put context
64
+ @writer.close
65
+
66
+ row = @writer.db_table.first
67
+
68
+ assert_equal "String_a", row[:string_a]
69
+ assert_equal "String_b", row[:string_b]
70
+ assert_equal true, row[:boolean_a]
71
+ assert_equal 1001, row[:int_a]
72
+ end
73
+
74
+ after do
75
+ @writer.db_table.delete
76
+ end
77
+ end
78
+
29
79
 
30
80
  it "writes with sequel.database parameter instead of connection_str" do
31
81
  sequel_db = Sequel.connect(TEST_SEQUEL_CONNECT_STR)
@@ -90,12 +140,12 @@ describe "Traject::SequelWriter" do
90
140
  (1..num).each do |i|
91
141
  context = Traject::Indexer::Context.new
92
142
  context.output_hash.merge!(
93
- "id" => "ignore_me", # should ignore pk by default
94
- "string_a" => "String_a #{i}",
95
- "string_b" => "String_b #{i}",
96
- "no_such_column" => "ignore me",
97
- "boolean_a" => (i % 2 == 0) ? true : false,
98
- "int_a" => i
143
+ "id" => ["ignore_me"], # should ignore pk by default
144
+ "string_a" => ["String_a #{i}"],
145
+ "string_b" => ["String_b #{i}"],
146
+ "no_such_column" => ["ignore me"],
147
+ "boolean_a" => [(i % 2 == 0) ? true : false],
148
+ "int_a" => [i]
99
149
  )
100
150
  writer.put context
101
151
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject_sequel_writer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Rochkind