active_data_frame 0.1.8 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3b36c2b8ec3874ed2b924e3f53b7d9ee35adda6a
4
- data.tar.gz: 6607e3ca1023d6bb28162c773b9852a13f1f2271
3
+ metadata.gz: b24b99c34980b2896aa5fa479037d6bb4741c372
4
+ data.tar.gz: f77a43a58a962f5be8c84e7541494b55ba3a7550
5
5
  SHA512:
6
- metadata.gz: 7e05379d9e8c5d91adc2c237dac5fdba7cc739a129bdb377119bbfcac9cfdc24ad5c4c5529d4a5c957d1e13366fb800971a0d5196bf7d8bf14286b3bd88fd259
7
- data.tar.gz: 7fb1f15a7d9e66920e938f6a5ec9fcf11a56a4beacac2c85b8f208c10528a318ececc708130c82722bcdf23fd54335bb8e5cbcd9f10cee9c3c0ef60c4f95c51b
6
+ metadata.gz: 7a7cf7e447b22b1c91bcc2d643d54316c76cc64d7ff4cb6062b228cd4a9c92d8d39372f659c80eca79aa4912408f96519ac7df90e6673ec55a6691932ad9fdc7
7
+ data.tar.gz: ba0a86aa56d65689669097d1544f26b312ac03ad5203e36800dea227eb19471b6a407ac1fd43ff7e268cdd7cd8bd73c97bb1d4d69b27dd8a0cba547e675784ef
@@ -59,20 +59,51 @@ module ActiveDataFrame
59
59
  flush! unless self.batching
60
60
  end
61
61
 
62
- def bulk_upsert(updates, inserts)
62
+ def bulk_upsert(upserts, scope=nil)
63
63
  Database.batch do
64
- updates.group_by(&:keys).transform_values{|v| v.map(&:values) }.each do |columns, rows|
65
- update = rows.map{|df_id, period_index, *values| [period_index, [values, df_id]] }
66
- bulk_update(update, columns - [:data_frame_id, :period_index])
67
- end
68
- inserts.group_by(&:keys).transform_values{|v| v.map(&:values) }.each do |columns, rows|
69
- insert = rows.map{|df_id, period_index, *values| [period_index, [values, df_id]] }
70
- bulk_insert(insert, columns - [:data_frame_id, :period_index])
64
+ case ActiveRecord::Base.connection_config[:adapter]
65
+ when 'postgresql'.freeze
66
+ upserts.group_by(&:keys).each do |columns, value_list|
67
+ columns = columns - [:data_frame_id, :period_index]
68
+ inserts = ''
69
+ value_list.each do |row|
70
+ df_id, period_index, *values = row.values
71
+ inserts << "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
72
+ end
73
+ sql = %Q{
74
+ INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type)
75
+ VALUES #{inserts[0..-2]}
76
+ ON CONFLICT(data_frame_id, period_index, data_frame_type) DO UPDATE
77
+ SET #{columns.map{|c| "#{c} = excluded.#{c} "}.join(',')}
78
+ }
79
+ Database.execute sql
80
+ end
81
+ when 'mysql2'.freeze
82
+ upserts.group_by(&:keys).each do |columns, rows|
83
+ update = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
84
+ bulk_update(update, columns - [:data_frame_id, :period_index])
85
+ end
86
+ else
87
+ all_update_indices = scope[].pluck(:data_frame_id, :period_index)
88
+ grouped_update_indices = all_update_indices.group_by(&:first).transform_values{|value| Set.new(value.map!(&:last)) }
89
+ updates, inserts = upserts.partition{|upsert| grouped_update_indices[upsert[:data_frame_id]]&.include?(upsert[:period_index]) }
90
+ updates.group_by(&:keys).each do |columns, rows|
91
+ update = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
92
+ bulk_update(update, columns - [:data_frame_id, :period_index])
93
+ end
94
+ inserts.group_by(&:keys).each do |columns, rows|
95
+ insert = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
96
+ bulk_insert(insert, columns - [:data_frame_id, :period_index])
97
+ end
71
98
  end
72
99
  end
73
100
  end
101
+
74
102
  ##
75
- # Update block data for all blocks in a single call
103
+ # Fast update block data for all blocks in a single call.
104
+ # Uses UPDATE + SET in PostgreSQL
105
+ # Uses INSERT ON CONFLICT for MySQL (Upsert)
106
+ # Uses UPDATE with CASE on others
76
107
  ##
77
108
  def bulk_update(existing, columns=block_type::COLUMNS)
78
109
  existing.each_slice(ActiveDataFrame.update_max_batch_size) do |existing_slice|
@@ -143,6 +174,7 @@ module ActiveDataFrame
143
174
  end
144
175
  end
145
176
 
177
+
146
178
  def bulk_delete(id, indices)
147
179
  indices.each_slice(ActiveDataFrame.delete_max_batch_size) do |slice|
148
180
  # puts "Deleting slice of #{slice.length}"
@@ -152,20 +184,30 @@ module ActiveDataFrame
152
184
 
153
185
  ##
154
186
  # Insert block data for all blocks in a single call
187
+ # PostgreSQL uses COPY, others use multi-statement insert
155
188
  ##
156
189
  def bulk_insert(new_blocks, columns=block_type::COLUMNS)
157
190
  new_blocks.each_slice(ActiveDataFrame.insert_max_batch_size) do |new_blocks_slice|
158
- # puts "Inserting slice of #{new_blocks_slice.length}"
159
- inserts = ''
160
- new_blocks_slice.each do |period_index, (values, df_id)|
161
- inserts << \
162
- case ActiveRecord::Base.connection_config[:adapter]
163
- when 'postgresql', 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
164
- else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
191
+ if ActiveRecord::Base.connection_config[:adapter] == 'postgresql'
192
+ copy_statement = "COPY #{block_type.table_name} (#{columns.join(',')},data_frame_id,period_index,data_frame_type) FROM STDIN CSV"
193
+ db_conn = ActiveRecord::Base.connection.raw_connection
194
+ db_conn.copy_data(copy_statement) do
195
+ new_blocks_slice.each do |period_index, (values, df_id)|
196
+ db_conn.put_copy_data((values + [df_id, period_index, data_frame_type.name]).join(',') << "\n")
197
+ end
198
+ end
199
+ else
200
+ inserts = ''
201
+ new_blocks_slice.each do |period_index, (values, df_id)|
202
+ inserts << \
203
+ case ActiveRecord::Base.connection_config[:adapter]
204
+ when 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
205
+ else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
206
+ end
165
207
  end
208
+ sql = "INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
209
+ Database.execute sql
166
210
  end
167
- sql = "INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
168
- Database.execute sql
169
211
  end
170
212
  end
171
213
  end
@@ -31,9 +31,7 @@ module ActiveDataFrame
31
31
  bounds = get_bounds(from, to, block_type)
32
32
  scope = block_type.where(data_frame_type: data_frame_type.name, data_frame_id: rows.select(:id))
33
33
  scope = scope.where(data_frame_id: values.keys) if values.kind_of?(Hash)
34
- all_update_indices = scope.where(period_index: bounds.from.index..bounds.to.index).order(data_frame_id: :asc, period_index: :asc).pluck(:data_frame_id, :period_index)
35
- grouped_update_indices = all_update_indices.group_by(&:first).transform_values{|value| Set.new(value.map!(&:last)) }
36
- instance_ids = rows.pluck(:id)
34
+ instance_ids = rows.loaded? ? rows.map(&:id) : rows.pluck(:id)
37
35
  instance_ids &= values.keys if values.kind_of?(Hash)
38
36
  upserts = to_enum(:iterate_bounds, [bounds], block_type).flat_map do |index, left, right, cursor, size|
39
37
  instance_ids.map do |instance_id|
@@ -42,8 +40,7 @@ module ActiveDataFrame
42
40
  end
43
41
  end
44
42
 
45
- update, insert = upserts.partition{|upsert| grouped_update_indices[upsert[:data_frame_id]]&.include?(upsert[:period_index]) }
46
- Database.for_types(block: block_type, df: data_frame_type).bulk_upsert(update, insert)
43
+ Database.for_types(block: block_type, df: data_frame_type).bulk_upsert(upserts, ->{scope.where(period_index: bounds.from.index..bounds.to.index)})
47
44
  values
48
45
  end
49
46
 
@@ -58,16 +55,10 @@ module ActiveDataFrame
58
55
  def upsert(from, values)
59
56
  to = (from + values.length) - 1
60
57
  bounds = get_bounds(from, to)
61
- update_indices = Set.new(scope.where(period_index: bounds.from.index..bounds.to.index).order(period_index: :asc).pluck(:period_index))
62
- # Detect blocks in bounds:
63
- # - If existing and covered, do an update without load
64
- # - If existing and uncovered, do a small write (without load)
65
- # - If not existing, insert!
66
58
  upserts = to_enum(:iterate_bounds, [bounds]).map do |index, left, right, cursor, size|
67
59
  [[:data_frame_id, self.instance.id], [:period_index, index], *(left.succ..right.succ).map{|v| :"t#{v}" }.zip(values[cursor...cursor + size])].to_h
68
60
  end
69
- update, insert = upserts.partition{|upsert| update_indices.include?(upsert[:period_index]) }
70
- database.bulk_upsert(update, insert)
61
+ database.bulk_upsert(upserts, ->{ scope.where(period_index: bounds.from.index..bounds.to.index)})
71
62
  values
72
63
  end
73
64
 
@@ -99,7 +90,6 @@ module ActiveDataFrame
99
90
  end
100
91
  end
101
92
 
102
-
103
93
  database.bulk_delete(self.instance.id, deleted_indices) unless deleted_indices.size.zero?
104
94
  database.bulk_update(existing) unless existing.size.zero?
105
95
  database.bulk_insert(new_blocks) unless new_blocks.size.zero?
@@ -1,3 +1,3 @@
1
1
  module ActiveDataFrame
2
- VERSION = "0.1.8"
2
+ VERSION = "0.1.9"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: active_data_frame
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.8
4
+ version: 0.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Wouter Coppieters
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-08-12 00:00:00.000000000 Z
11
+ date: 2019-10-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler