active_data_frame 0.1.8 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b36c2b8ec3874ed2b924e3f53b7d9ee35adda6a
4
- data.tar.gz: 6607e3ca1023d6bb28162c773b9852a13f1f2271
3
+ metadata.gz: b24b99c34980b2896aa5fa479037d6bb4741c372
4
+ data.tar.gz: f77a43a58a962f5be8c84e7541494b55ba3a7550
5
5
  SHA512:
6
- metadata.gz: 7e05379d9e8c5d91adc2c237dac5fdba7cc739a129bdb377119bbfcac9cfdc24ad5c4c5529d4a5c957d1e13366fb800971a0d5196bf7d8bf14286b3bd88fd259
7
- data.tar.gz: 7fb1f15a7d9e66920e938f6a5ec9fcf11a56a4beacac2c85b8f208c10528a318ececc708130c82722bcdf23fd54335bb8e5cbcd9f10cee9c3c0ef60c4f95c51b
6
+ metadata.gz: 7a7cf7e447b22b1c91bcc2d643d54316c76cc64d7ff4cb6062b228cd4a9c92d8d39372f659c80eca79aa4912408f96519ac7df90e6673ec55a6691932ad9fdc7
7
+ data.tar.gz: ba0a86aa56d65689669097d1544f26b312ac03ad5203e36800dea227eb19471b6a407ac1fd43ff7e268cdd7cd8bd73c97bb1d4d69b27dd8a0cba547e675784ef
@@ -59,20 +59,51 @@ module ActiveDataFrame
59
59
  flush! unless self.batching
60
60
  end
61
61
 
62
- def bulk_upsert(updates, inserts)
62
+ def bulk_upsert(upserts, scope=nil)
63
63
  Database.batch do
64
- updates.group_by(&:keys).transform_values{|v| v.map(&:values) }.each do |columns, rows|
65
- update = rows.map{|df_id, period_index, *values| [period_index, [values, df_id]] }
66
- bulk_update(update, columns - [:data_frame_id, :period_index])
67
- end
68
- inserts.group_by(&:keys).transform_values{|v| v.map(&:values) }.each do |columns, rows|
69
- insert = rows.map{|df_id, period_index, *values| [period_index, [values, df_id]] }
70
- bulk_insert(insert, columns - [:data_frame_id, :period_index])
64
+ case ActiveRecord::Base.connection_config[:adapter]
65
+ when 'postgresql'.freeze
66
+ upserts.group_by(&:keys).each do |columns, value_list|
67
+ columns = columns - [:data_frame_id, :period_index]
68
+ inserts = ''
69
+ value_list.each do |row|
70
+ df_id, period_index, *values = row.values
71
+ inserts << "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
72
+ end
73
+ sql = %Q{
74
+ INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type)
75
+ VALUES #{inserts[0..-2]}
76
+ ON CONFLICT(data_frame_id, period_index, data_frame_type) DO UPDATE
77
+ SET #{columns.map{|c| "#{c} = excluded.#{c} "}.join(',')}
78
+ }
79
+ Database.execute sql
80
+ end
81
+ when 'mysql2'.freeze
82
+ upserts.group_by(&:keys).each do |columns, rows|
83
+ update = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
84
+ bulk_update(update, columns - [:data_frame_id, :period_index])
85
+ end
86
+ else
87
+ all_update_indices = scope[].pluck(:data_frame_id, :period_index)
88
+ grouped_update_indices = all_update_indices.group_by(&:first).transform_values{|value| Set.new(value.map!(&:last)) }
89
+ updates, inserts = upserts.partition{|upsert| grouped_update_indices[upsert[:data_frame_id]]&.include?(upsert[:period_index]) }
90
+ updates.group_by(&:keys).each do |columns, rows|
91
+ update = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
92
+ bulk_update(update, columns - [:data_frame_id, :period_index])
93
+ end
94
+ inserts.group_by(&:keys).each do |columns, rows|
95
+ insert = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
96
+ bulk_insert(insert, columns - [:data_frame_id, :period_index])
97
+ end
71
98
  end
72
99
  end
73
100
  end
101
+
74
102
  ##
75
- # Update block data for all blocks in a single call
103
+ # Fast update block data for all blocks in a single call.
104
+ # Uses UPDATE + SET in PostgreSQL
105
+ # Uses INSERT ON CONFLICT for MySQL (Upsert)
106
+ # Uses UPDATE with CASE on others
76
107
  ##
77
108
  def bulk_update(existing, columns=block_type::COLUMNS)
78
109
  existing.each_slice(ActiveDataFrame.update_max_batch_size) do |existing_slice|
@@ -143,6 +174,7 @@ module ActiveDataFrame
143
174
  end
144
175
  end
145
176
 
177
+
146
178
  def bulk_delete(id, indices)
147
179
  indices.each_slice(ActiveDataFrame.delete_max_batch_size) do |slice|
148
180
  # puts "Deleting slice of #{slice.length}"
@@ -152,20 +184,30 @@ module ActiveDataFrame
152
184
 
153
185
  ##
154
186
  # Insert block data for all blocks in a single call
187
+ # PostgreSQL uses COPY, others use multi-statement insert
155
188
  ##
156
189
  def bulk_insert(new_blocks, columns=block_type::COLUMNS)
157
190
  new_blocks.each_slice(ActiveDataFrame.insert_max_batch_size) do |new_blocks_slice|
158
- # puts "Inserting slice of #{new_blocks_slice.length}"
159
- inserts = ''
160
- new_blocks_slice.each do |period_index, (values, df_id)|
161
- inserts << \
162
- case ActiveRecord::Base.connection_config[:adapter]
163
- when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
164
- else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
191
+ if ActiveRecord::Base.connection_config[:adapter] == 'postgresql'
192
+ copy_statement = "COPY #{block_type.table_name} (#{columns.join(',')},data_frame_id,period_index,data_frame_type) FROM STDIN CSV"
193
+ db_conn = ActiveRecord::Base.connection.raw_connection
194
+ db_conn.copy_data(copy_statement) do
195
+ new_blocks_slice.each do |period_index, (values, df_id)|
196
+ db_conn.put_copy_data((values + [df_id, period_index, data_frame_type.name]).join(',') << "\n")
197
+ end
198
+ end
199
+ else
200
+ inserts = ''
201
+ new_blocks_slice.each do |period_index, (values, df_id)|
202
+ inserts << \
203
+ case ActiveRecord::Base.connection_config[:adapter]
204
+ when 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
205
+ else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
206
+ end
165
207
  end
208
+ sql = "INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
209
+ Database.execute sql
166
210
  end
167
- sql = "INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
168
- Database.execute sql
169
211
  end
170
212
  end
171
213
  end
@@ -31,9 +31,7 @@ module ActiveDataFrame
31
31
  bounds = get_bounds(from, to, block_type)
32
32
  scope = block_type.where(data_frame_type: data_frame_type.name, data_frame_id: rows.select(:id))
33
33
  scope = scope.where(data_frame_id: values.keys) if values.kind_of?(Hash)
34
- all_update_indices = scope.where(period_index: bounds.from.index..bounds.to.index).order(data_frame_id: :asc, period_index: :asc).pluck(:data_frame_id, :period_index)
35
- grouped_update_indices = all_update_indices.group_by(&:first).transform_values{|value| Set.new(value.map!(&:last)) }
36
- instance_ids = rows.pluck(:id)
34
+ instance_ids = rows.loaded? ? rows.map(&:id) : rows.pluck(:id)
37
35
  instance_ids &= values.keys if values.kind_of?(Hash)
38
36
  upserts = to_enum(:iterate_bounds, [bounds], block_type).flat_map do |index, left, right, cursor, size|
39
37
  instance_ids.map do |instance_id|
@@ -42,8 +40,7 @@ module ActiveDataFrame
42
40
  end
43
41
  end
44
42
 
45
- update, insert = upserts.partition{|upsert| grouped_update_indices[upsert[:data_frame_id]]&.include?(upsert[:period_index]) }
46
- Database.for_types(block: block_type, df: data_frame_type).bulk_upsert(update, insert)
43
+ Database.for_types(block: block_type, df: data_frame_type).bulk_upsert(upserts, ->{scope.where(period_index: bounds.from.index..bounds.to.index)})
47
44
  values
48
45
  end
49
46
 
@@ -58,16 +55,10 @@ module ActiveDataFrame
58
55
  def upsert(from, values)
59
56
  to = (from + values.length) - 1
60
57
  bounds = get_bounds(from, to)
61
- update_indices = Set.new(scope.where(period_index: bounds.from.index..bounds.to.index).order(period_index: :asc).pluck(:period_index))
62
- # Detect blocks in bounds:
63
- # - If existing and covered, do an update without load
64
- # - If existing and uncovered, do a small write (without load)
65
- # - If not existing, insert!
66
58
  upserts = to_enum(:iterate_bounds, [bounds]).map do |index, left, right, cursor, size|
67
59
  [[:data_frame_id, self.instance.id], [:period_index, index], *(left.succ..right.succ).map{|v| :"t#{v}" }.zip(values[cursor...cursor + size])].to_h
68
60
  end
69
- update, insert = upserts.partition{|upsert| update_indices.include?(upsert[:period_index]) }
70
- database.bulk_upsert(update, insert)
61
+ database.bulk_upsert(upserts, ->{ scope.where(period_index: bounds.from.index..bounds.to.index)})
71
62
  values
72
63
  end
73
64
 
@@ -99,7 +90,6 @@ module ActiveDataFrame
99
90
  end
100
91
  end
101
92
 
102
-
103
93
  database.bulk_delete(self.instance.id, deleted_indices) unless deleted_indices.size.zero?
104
94
  database.bulk_update(existing) unless existing.size.zero?
105
95
  database.bulk_insert(new_blocks) unless new_blocks.size.zero?
@@ -1,3 +1,3 @@
1
1
  module ActiveDataFrame
2
- VERSION = "0.1.8"
2
+ VERSION = "0.1.9"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active_data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wouter Coppieters
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-08-12 00:00:00.000000000 Z
11
+ date: 2019-10-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler