traject_sequel_writer 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 42e56a44ac7e5553dda9631506785b5a042946fd
4
- data.tar.gz: 5ad685bb36e80fb1b3484a3a5595d7f9d26119eb
3
+ metadata.gz: 81efee56a46a47e26c17eff759d4cc0a7dc36726
4
+ data.tar.gz: 72a8923c02d9e6052342c4c953fbc9cc34857ca2
5
5
  SHA512:
6
- metadata.gz: 00ed6ac0f25b29168bacd10dec92617ec11078eb9820542c607721d6c16c93849ab29571787a0fae6caeadca408e816e1cdba0d8cc0ce5e48e601232f903a6e2
7
- data.tar.gz: 2daf97c5855a8d5f2d74c20a1957a89de0c66d20a0a9bdd373cd14e4141eb06644c8622fd32ea192d0924cfb03e0567516d3cb8c8edf97b615f14874c7860faa
6
+ metadata.gz: 7e5a7a0710a5e8fa437bf4a8976119ef8f4a8a8995a7eb4633364eb17ef666e6472d44f5bef965c434d208ed9c3cd101638276f74572164bd53c3c03ae1629c3
7
+ data.tar.gz: ba50260b19700091db4d1827bcf80446595b40a7b53933312028c4524da1143af425c65f7a6bd481fb41b7322bb87355661effbc7cf25c1ddf2b691ed75cb616
data/README.md CHANGED
@@ -58,6 +58,12 @@ Still, your Context output_hash's must provide output key/values for every colum
58
58
  null will be inserted for that column. Keys in the output_hash that don't match output columns
59
59
  will be ignored.
60
60
 
61
+ Note that traject output_hash's have values that are arrays of potentially multiple values. If
62
+ multiple values are present, they will be joined with a comma or with set `sequel_writer.internal_delimiter`.
63
+ For non-string type db fields, this will probably raise.
64
+ `traject_sequel_writer` also accepts single values in output_hash as an alternative, which isn't really traject's
65
+ API, but experimenting to see if it's helpful rather than confusing to accept this alternate too.
66
+
61
67
  ### All settings
62
68
 
63
69
  * `sequel_writer.connection_string` : [Sequel connection string](http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html)
@@ -67,6 +73,7 @@ will be ignored.
67
73
  column_names not present in the Traject::Context#output_hash will end up with SQL `null` inserted.
68
74
  * `sequel_writer.thread_pool_size` Number of threads to use for writing to DB. Default 1, should be good.
69
75
  * `sequel_writer.batch_size` Count of records to batch together in a single multi-row SQL `INSERT`. Default 100. Should be good.
76
+ * `sequel_writer.internal_delimiter` -- Delimiter _within_ a field, for multiple values. Default is comma.
70
77
 
71
78
  ### Using as a side-channel additional output
72
79
 
@@ -58,6 +58,8 @@ module Traject
58
58
  @thread_pool = Traject::ThreadPool.new(@thread_pool_size)
59
59
 
60
60
  @after_send_batch_callbacks = Array(@settings["sequel_writer.after_send_batch"] || [])
61
+
62
+ @internal_delimiter = @settings["sequel_writer.internal_delimiter"] || ","
61
63
  end
62
64
 
63
65
  # Get the logger from the settings, or default to an effectively null logger
@@ -137,7 +139,10 @@ module Traject
137
139
  end
138
140
 
139
141
  def hash_to_array(column_names, hash)
140
- column_names.collect {|c| hash[c.to_s]}
142
+ column_names.collect do |c|
143
+ v = hash[c.to_s]
144
+ v.kind_of?(Array) ? v.join(@internal_delimiter) : v
145
+ end
141
146
  end
142
147
 
143
148
  def after_send_batch(&block)
@@ -1,3 +1,3 @@
1
1
  module TrajectSequelWriter
2
- VERSION = "0.10.0"
2
+ VERSION = "0.11.0"
3
3
  end
@@ -26,6 +26,56 @@ describe "Traject::SequelWriter" do
26
26
  end
27
27
  end
28
28
 
29
+ describe "with multiple values" do
30
+ it "joins multiple string values" do
31
+ @writer = self.writer
32
+
33
+ context = Traject::Indexer::Context.new
34
+ context.output_hash.merge!(
35
+ "string_a" => ["String_One", "String_Two"],
36
+ "string_b" => ["String_B_One"]
37
+ )
38
+ @writer.put context
39
+ @writer.close
40
+
41
+ assert @writer.db_table.where(:string_a => "String_One,String_Two", :string_b => "String_B_One").count == 1, "Expected written row with expected values"
42
+ end
43
+
44
+ after do
45
+ @writer.db_table.delete
46
+ end
47
+ end
48
+
49
+ describe "with only single values in output hash" do
50
+ # not really traject API, but we allow it anyway.
51
+ it "still writes" do
52
+ @writer = self.writer
53
+
54
+ context = Traject::Indexer::Context.new
55
+ context.output_hash.merge!(
56
+ "id" => "ignore_me", # should ignore pk by default
57
+ "string_a" => "String_a",
58
+ "string_b" => "String_b",
59
+ "no_such_column" => "ignore me",
60
+ "boolean_a" => true,
61
+ "int_a" => 1001
62
+ )
63
+ @writer.put context
64
+ @writer.close
65
+
66
+ row = @writer.db_table.first
67
+
68
+ assert_equal "String_a", row[:string_a]
69
+ assert_equal "String_b", row[:string_b]
70
+ assert_equal true, row[:boolean_a]
71
+ assert_equal 1001, row[:int_a]
72
+ end
73
+
74
+ after do
75
+ @writer.db_table.delete
76
+ end
77
+ end
78
+
29
79
 
30
80
  it "writes with sequel.database parameter instead of connection_str" do
31
81
  sequel_db = Sequel.connect(TEST_SEQUEL_CONNECT_STR)
@@ -90,12 +140,12 @@ describe "Traject::SequelWriter" do
90
140
  (1..num).each do |i|
91
141
  context = Traject::Indexer::Context.new
92
142
  context.output_hash.merge!(
93
- "id" => "ignore_me", # should ignore pk by default
94
- "string_a" => "String_a #{i}",
95
- "string_b" => "String_b #{i}",
96
- "no_such_column" => "ignore me",
97
- "boolean_a" => (i % 2 == 0) ? true : false,
98
- "int_a" => i
143
+ "id" => ["ignore_me"], # should ignore pk by default
144
+ "string_a" => ["String_a #{i}"],
145
+ "string_b" => ["String_b #{i}"],
146
+ "no_such_column" => ["ignore me"],
147
+ "boolean_a" => [(i % 2 == 0) ? true : false],
148
+ "int_a" => [i]
99
149
  )
100
150
  writer.put context
101
151
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: traject_sequel_writer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Rochkind