RubyGems - active_data_frame - Versions diffs - 0.1.8 → 0.1.9 - Mend

active_data_frame 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml +4 -4
data/lib/active_data_frame/database.rb +60 -18
data/lib/active_data_frame/row.rb +3 -13
data/lib/active_data_frame/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 3b36c2b8ec3874ed2b924e3f53b7d9ee35adda6a
-  data.tar.gz: 6607e3ca1023d6bb28162c773b9852a13f1f2271
+  metadata.gz: b24b99c34980b2896aa5fa479037d6bb4741c372
+  data.tar.gz: f77a43a58a962f5be8c84e7541494b55ba3a7550
 SHA512:
-  metadata.gz: 7e05379d9e8c5d91adc2c237dac5fdba7cc739a129bdb377119bbfcac9cfdc24ad5c4c5529d4a5c957d1e13366fb800971a0d5196bf7d8bf14286b3bd88fd259
-  data.tar.gz: 7fb1f15a7d9e66920e938f6a5ec9fcf11a56a4beacac2c85b8f208c10528a318ececc708130c82722bcdf23fd54335bb8e5cbcd9f10cee9c3c0ef60c4f95c51b
+  metadata.gz: 7a7cf7e447b22b1c91bcc2d643d54316c76cc64d7ff4cb6062b228cd4a9c92d8d39372f659c80eca79aa4912408f96519ac7df90e6673ec55a6691932ad9fdc7
+  data.tar.gz: ba0a86aa56d65689669097d1544f26b312ac03ad5203e36800dea227eb19471b6a407ac1fd43ff7e268cdd7cd8bd73c97bb1d4d69b27dd8a0cba547e675784ef

data/lib/active_data_frame/database.rb CHANGED

@@ -59,20 +59,51 @@ module ActiveDataFrame
       flush! unless self.batching
     end
-    def bulk_upsert(updates, inserts)
+    def bulk_upsert(upserts, scope=nil)
       Database.batch do
-        updates.group_by(&:keys).transform_values{|v| v.map(&:values) }.each do |columns, rows|
-          update = rows.map{|df_id, period_index, *values| [period_index, [values, df_id]] }
-          bulk_update(update, columns - [:data_frame_id, :period_index])
-        end
-        inserts.group_by(&:keys).transform_values{|v| v.map(&:values) }.each do |columns, rows|
-          insert = rows.map{|df_id, period_index, *values| [period_index, [values, df_id]] }
-          bulk_insert(insert, columns - [:data_frame_id, :period_index])
+        case ActiveRecord::Base.connection_config[:adapter]
+        when 'postgresql'.freeze
+          upserts.group_by(&:keys).each do |columns, value_list|
+            columns = columns - [:data_frame_id, :period_index]
+            inserts = ''
+            value_list.each do |row|
+              df_id, period_index, *values = row.values
+              inserts <<  "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
+            end
+            sql = %Q{
+              INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type)
+              VALUES #{inserts[0..-2]}
+              ON CONFLICT(data_frame_id, period_index, data_frame_type) DO UPDATE
+              SET #{columns.map{|c| "#{c} = excluded.#{c} "}.join(',')}
+            }
+            Database.execute sql
+          end
+        when 'mysql2'.freeze
+          upserts.group_by(&:keys).each do |columns, rows|
+            update = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
+            bulk_update(update, columns - [:data_frame_id, :period_index])
+          end
+        else
+          all_update_indices     = scope[].pluck(:data_frame_id, :period_index)
+          grouped_update_indices = all_update_indices.group_by(&:first).transform_values{|value| Set.new(value.map!(&:last)) }
+          updates, inserts = upserts.partition{|upsert| grouped_update_indices[upsert[:data_frame_id]]&.include?(upsert[:period_index]) }
+          updates.group_by(&:keys).each do |columns, rows|
+            update = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
+            bulk_update(update, columns - [:data_frame_id, :period_index])
+          end
+          inserts.group_by(&:keys).each do |columns, rows|
+            insert = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
+            bulk_insert(insert, columns - [:data_frame_id, :period_index])
+          end
         end
       end
     end
     ##
-    # Update block data for all blocks in a single call
+    # Fast update block data for all blocks in a single call.
+    # Uses UPDATE + SET in PostgreSQL
+    # Uses INSERT ON CONFLICT for MySQL (Upsert)
+    # Uses UPDATE with CASE on others
     ##
     def bulk_update(existing, columns=block_type::COLUMNS)
       existing.each_slice(ActiveDataFrame.update_max_batch_size) do |existing_slice|
@@ -143,6 +174,7 @@ module ActiveDataFrame
       end
     end
     def bulk_delete(id, indices)
       indices.each_slice(ActiveDataFrame.delete_max_batch_size) do |slice|
         # puts "Deleting slice of #{slice.length}"
@@ -152,20 +184,30 @@ module ActiveDataFrame
     ##
     # Insert block data for all blocks in a single call
+    # PostgreSQL uses COPY, others use multi-statement insert
     ##
     def bulk_insert(new_blocks, columns=block_type::COLUMNS)
       new_blocks.each_slice(ActiveDataFrame.insert_max_batch_size) do |new_blocks_slice|
-        # puts "Inserting slice of #{new_blocks_slice.length}"
-        inserts = ''
-        new_blocks_slice.each do |period_index, (values, df_id)|
-          inserts << \
-          case ActiveRecord::Base.connection_config[:adapter]
-          when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
-          else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
+        if ActiveRecord::Base.connection_config[:adapter] == 'postgresql'
+          copy_statement = "COPY #{block_type.table_name} (#{columns.join(',')},data_frame_id,period_index,data_frame_type) FROM STDIN CSV"
+          db_conn = ActiveRecord::Base.connection.raw_connection
+          db_conn.copy_data(copy_statement) do
+            new_blocks_slice.each do |period_index, (values, df_id)|
+              db_conn.put_copy_data((values + [df_id, period_index, data_frame_type.name]).join(',') << "\n")
+            end
+          end
+        else
+          inserts = ''
+          new_blocks_slice.each do |period_index, (values, df_id)|
+            inserts << \
+            case ActiveRecord::Base.connection_config[:adapter]
+            when 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
+            else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
+            end
           end
+          sql = "INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
+          Database.execute sql
         end
-        sql = "INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
-        Database.execute sql
       end
     end
   end

data/lib/active_data_frame/row.rb CHANGED

@@ -31,9 +31,7 @@ module ActiveDataFrame
       bounds                 = get_bounds(from, to, block_type)
       scope                  = block_type.where(data_frame_type: data_frame_type.name, data_frame_id: rows.select(:id))
       scope                  = scope.where(data_frame_id: values.keys) if values.kind_of?(Hash)
-      all_update_indices     = scope.where(period_index: bounds.from.index..bounds.to.index).order(data_frame_id: :asc, period_index: :asc).pluck(:data_frame_id, :period_index)
-      grouped_update_indices = all_update_indices.group_by(&:first).transform_values{|value| Set.new(value.map!(&:last)) }
-      instance_ids           = rows.pluck(:id)
+      instance_ids           = rows.loaded? ? rows.map(&:id) : rows.pluck(:id)
       instance_ids           &= values.keys if values.kind_of?(Hash)
       upserts = to_enum(:iterate_bounds, [bounds], block_type).flat_map do |index, left, right, cursor, size|
         instance_ids.map do |instance_id|
@@ -42,8 +40,7 @@ module ActiveDataFrame
         end
       end
-      update, insert = upserts.partition{|upsert| grouped_update_indices[upsert[:data_frame_id]]&.include?(upsert[:period_index]) }
-      Database.for_types(block: block_type, df: data_frame_type).bulk_upsert(update, insert)
+      Database.for_types(block: block_type, df: data_frame_type).bulk_upsert(upserts, ->{scope.where(period_index: bounds.from.index..bounds.to.index)})
       values
     end
@@ -58,16 +55,10 @@ module ActiveDataFrame
     def upsert(from, values)
       to             = (from + values.length) - 1
       bounds         = get_bounds(from, to)
-      update_indices = Set.new(scope.where(period_index: bounds.from.index..bounds.to.index).order(period_index: :asc).pluck(:period_index))
-      # Detect blocks in bounds:
-      # - If existing and covered, do an update without load
-      # - If existing and uncovered, do a small write (without load)
-      # - If not existing, insert!
       upserts = to_enum(:iterate_bounds, [bounds]).map do |index, left, right, cursor, size|
         [[:data_frame_id, self.instance.id], [:period_index, index], *(left.succ..right.succ).map{|v| :"t#{v}" }.zip(values[cursor...cursor + size])].to_h
       end
-      update, insert = upserts.partition{|upsert| update_indices.include?(upsert[:period_index]) }
-      database.bulk_upsert(update, insert)
+      database.bulk_upsert(upserts, ->{ scope.where(period_index: bounds.from.index..bounds.to.index)})
       values
     end
@@ -99,7 +90,6 @@ module ActiveDataFrame
         end
       end
       database.bulk_delete(self.instance.id, deleted_indices) unless deleted_indices.size.zero?
       database.bulk_update(existing)       unless existing.size.zero?
       database.bulk_insert(new_blocks)     unless new_blocks.size.zero?

data/lib/active_data_frame/version.rb CHANGED

@@ -1,3 +1,3 @@
 module ActiveDataFrame
-  VERSION = "0.1.8"
+  VERSION = "0.1.9"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: active_data_frame
 version: !ruby/object:Gem::Version
-  version: 0.1.8
+  version: 0.1.9
 platform: ruby
 authors:
 - Wouter Coppieters
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-08-12 00:00:00.000000000 Z
+date: 2019-10-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler