RubyGems - traject_sequel_writer - Versions diffs - 0.10.0 → 0.11.0 - Mend

traject_sequel_writer 0.10.0 → 0.11.0

Files changed (6) hide show

checksums.yaml +4 -4
data/README.md +7 -0
data/lib/traject/sequel_writer.rb +6 -1
data/lib/traject_sequel_writer/version.rb +1 -1
data/test/test_traject_sequel_writer.rb +56 -6
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 42e56a44ac7e5553dda9631506785b5a042946fd
-  data.tar.gz: 5ad685bb36e80fb1b3484a3a5595d7f9d26119eb
+  metadata.gz: 81efee56a46a47e26c17eff759d4cc0a7dc36726
+  data.tar.gz: 72a8923c02d9e6052342c4c953fbc9cc34857ca2
 SHA512:
-  metadata.gz: 00ed6ac0f25b29168bacd10dec92617ec11078eb9820542c607721d6c16c93849ab29571787a0fae6caeadca408e816e1cdba0d8cc0ce5e48e601232f903a6e2
-  data.tar.gz: 2daf97c5855a8d5f2d74c20a1957a89de0c66d20a0a9bdd373cd14e4141eb06644c8622fd32ea192d0924cfb03e0567516d3cb8c8edf97b615f14874c7860faa
+  metadata.gz: 7e5a7a0710a5e8fa437bf4a8976119ef8f4a8a8995a7eb4633364eb17ef666e6472d44f5bef965c434d208ed9c3cd101638276f74572164bd53c3c03ae1629c3
+  data.tar.gz: ba50260b19700091db4d1827bcf80446595b40a7b53933312028c4524da1143af425c65f7a6bd481fb41b7322bb87355661effbc7cf25c1ddf2b691ed75cb616

data/README.md CHANGED Viewed

@@ -58,6 +58,12 @@ Still, your Context output_hash's must provide output key/values for every colum
 null will be inserted for that column. Keys in the output_hash that don't match output columns
 will be ignored.
+Note that traject output_hash's have values that are arrays of potentially multiple values. If
+multiple values are present, they will be joined with a comma or with set `sequel_writer.internal_delimiter`.
+For non-string type db fields, this will probably raise.
+`traject_sequel_writer` also accepts single values in output_hash as an alternative, which isn't really traject's
+API, but experimenting to see if it's helpful rather than confusing to accept this alternate too.
 ### All settings
 * `sequel_writer.connection_string` : [Sequel connection string](http://sequel.jeremyevans.net/rdoc/files/doc/opening_databases_rdoc.html)
@@ -67,6 +73,7 @@ will be ignored.
   column_names not present in the Traject::Context#output_hash will end up with SQL `null` inserted.
 * `sequel_writer.thread_pool_size` Number of threads to use for writing to DB. Default 1, should be good.
 * `sequel_writer.batch_size` Count of records to batch together in a single multi-row SQL `INSERT`. Default 100. Should be good.
+* `sequel_writer.internal_delimiter` -- Delimiter _within_ a field, for multiple values. Default is comma.
 ### Using as a side-channel additional output

data/lib/traject/sequel_writer.rb CHANGED Viewed

@@ -58,6 +58,8 @@ module Traject
       @thread_pool = Traject::ThreadPool.new(@thread_pool_size)
       @after_send_batch_callbacks = Array(@settings["sequel_writer.after_send_batch"] || [])
+      @internal_delimiter = @settings["sequel_writer.internal_delimiter"] || ","
     end
     # Get the logger from the settings, or default to an effectively null logger
@@ -137,7 +139,10 @@ module Traject
     end
     def hash_to_array(column_names, hash)
-      column_names.collect {|c| hash[c.to_s]}
+      column_names.collect do |c|
+        v = hash[c.to_s]
+        v.kind_of?(Array) ? v.join(@internal_delimiter) : v
+      end
     end
     def after_send_batch(&block)

data/lib/traject_sequel_writer/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module TrajectSequelWriter
-  VERSION = "0.10.0"
+  VERSION = "0.11.0"
 end

data/test/test_traject_sequel_writer.rb CHANGED Viewed

@@ -26,6 +26,56 @@ describe "Traject::SequelWriter" do
     end
   end
+  describe "with multiple values" do
+    it "joins multiple string values" do
+      @writer = self.writer
+      context = Traject::Indexer::Context.new
+      context.output_hash.merge!(
+        "string_a" => ["String_One", "String_Two"],
+        "string_b" => ["String_B_One"]
+      )
+      @writer.put context
+      @writer.close
+      assert @writer.db_table.where(:string_a => "String_One,String_Two", :string_b => "String_B_One").count == 1, "Expected written row with expected values"
+    end
+    after do
+      @writer.db_table.delete
+    end
+  end
+  describe "with only single values in output hash" do
+    # not really traject API, but we allow it anyway.
+    it "still writes" do
+     @writer = self.writer
+      context = Traject::Indexer::Context.new
+      context.output_hash.merge!(
+        "id" => "ignore_me", # should ignore pk by default
+        "string_a" => "String_a",
+        "string_b" => "String_b",
+        "no_such_column" => "ignore me",
+        "boolean_a" => true,
+        "int_a" => 1001
+      )
+      @writer.put context
+      @writer.close
+      row = @writer.db_table.first
+      assert_equal "String_a", row[:string_a]
+      assert_equal "String_b", row[:string_b]
+      assert_equal true, row[:boolean_a]
+      assert_equal 1001, row[:int_a]
+    end
+    after do
+      @writer.db_table.delete
+    end
+  end
   it "writes with sequel.database parameter instead of connection_str" do
     sequel_db = Sequel.connect(TEST_SEQUEL_CONNECT_STR)
@@ -90,12 +140,12 @@ describe "Traject::SequelWriter" do
     (1..num).each do |i|
         context = Traject::Indexer::Context.new
         context.output_hash.merge!(
-          "id" => "ignore_me", # should ignore pk by default
-          "string_a" => "String_a #{i}",
-          "string_b" => "String_b #{i}",
-          "no_such_column" => "ignore me",
-          "boolean_a" => (i % 2 == 0) ? true : false,
-          "int_a" => i
+          "id" => ["ignore_me"], # should ignore pk by default
+          "string_a" => ["String_a #{i}"],
+          "string_b" => ["String_b #{i}"],
+          "no_such_column" => ["ignore me"],
+          "boolean_a" => [(i % 2 == 0) ? true : false],
+          "int_a" => [i]
         )
         writer.put context
       end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: traject_sequel_writer
 version: !ruby/object:Gem::Version
-  version: 0.10.0
+  version: 0.11.0
 platform: ruby
 authors:
 - Jonathan Rochkind