active_data_frame 0.1.8 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/active_data_frame/database.rb +60 -18
- data/lib/active_data_frame/row.rb +3 -13
- data/lib/active_data_frame/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b24b99c34980b2896aa5fa479037d6bb4741c372
|
4
|
+
data.tar.gz: f77a43a58a962f5be8c84e7541494b55ba3a7550
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a7cf7e447b22b1c91bcc2d643d54316c76cc64d7ff4cb6062b228cd4a9c92d8d39372f659c80eca79aa4912408f96519ac7df90e6673ec55a6691932ad9fdc7
|
7
|
+
data.tar.gz: ba0a86aa56d65689669097d1544f26b312ac03ad5203e36800dea227eb19471b6a407ac1fd43ff7e268cdd7cd8bd73c97bb1d4d69b27dd8a0cba547e675784ef
|
@@ -59,20 +59,51 @@ module ActiveDataFrame
|
|
59
59
|
flush! unless self.batching
|
60
60
|
end
|
61
61
|
|
62
|
-
def bulk_upsert(
|
62
|
+
def bulk_upsert(upserts, scope=nil)
|
63
63
|
Database.batch do
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
64
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
65
|
+
when 'postgresql'.freeze
|
66
|
+
upserts.group_by(&:keys).each do |columns, value_list|
|
67
|
+
columns = columns - [:data_frame_id, :period_index]
|
68
|
+
inserts = ''
|
69
|
+
value_list.each do |row|
|
70
|
+
df_id, period_index, *values = row.values
|
71
|
+
inserts << "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
|
72
|
+
end
|
73
|
+
sql = %Q{
|
74
|
+
INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type)
|
75
|
+
VALUES #{inserts[0..-2]}
|
76
|
+
ON CONFLICT(data_frame_id, period_index, data_frame_type) DO UPDATE
|
77
|
+
SET #{columns.map{|c| "#{c} = excluded.#{c} "}.join(',')}
|
78
|
+
}
|
79
|
+
Database.execute sql
|
80
|
+
end
|
81
|
+
when 'mysql2'.freeze
|
82
|
+
upserts.group_by(&:keys).each do |columns, rows|
|
83
|
+
update = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
|
84
|
+
bulk_update(update, columns - [:data_frame_id, :period_index])
|
85
|
+
end
|
86
|
+
else
|
87
|
+
all_update_indices = scope[].pluck(:data_frame_id, :period_index)
|
88
|
+
grouped_update_indices = all_update_indices.group_by(&:first).transform_values{|value| Set.new(value.map!(&:last)) }
|
89
|
+
updates, inserts = upserts.partition{|upsert| grouped_update_indices[upsert[:data_frame_id]]&.include?(upsert[:period_index]) }
|
90
|
+
updates.group_by(&:keys).each do |columns, rows|
|
91
|
+
update = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
|
92
|
+
bulk_update(update, columns - [:data_frame_id, :period_index])
|
93
|
+
end
|
94
|
+
inserts.group_by(&:keys).each do |columns, rows|
|
95
|
+
insert = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
|
96
|
+
bulk_insert(insert, columns - [:data_frame_id, :period_index])
|
97
|
+
end
|
71
98
|
end
|
72
99
|
end
|
73
100
|
end
|
101
|
+
|
74
102
|
##
|
75
|
-
#
|
103
|
+
# Fast update block data for all blocks in a single call.
|
104
|
+
# Uses UPDATE + SET in PostgreSQL
|
105
|
+
# Uses INSERT ON CONFLICT for MySQL (Upsert)
|
106
|
+
# Uses UPDATE with CASE on others
|
76
107
|
##
|
77
108
|
def bulk_update(existing, columns=block_type::COLUMNS)
|
78
109
|
existing.each_slice(ActiveDataFrame.update_max_batch_size) do |existing_slice|
|
@@ -143,6 +174,7 @@ module ActiveDataFrame
|
|
143
174
|
end
|
144
175
|
end
|
145
176
|
|
177
|
+
|
146
178
|
def bulk_delete(id, indices)
|
147
179
|
indices.each_slice(ActiveDataFrame.delete_max_batch_size) do |slice|
|
148
180
|
# puts "Deleting slice of #{slice.length}"
|
@@ -152,20 +184,30 @@ module ActiveDataFrame
|
|
152
184
|
|
153
185
|
##
|
154
186
|
# Insert block data for all blocks in a single call
|
187
|
+
# PostgreSQL uses COPY, others use multi-statement insert
|
155
188
|
##
|
156
189
|
def bulk_insert(new_blocks, columns=block_type::COLUMNS)
|
157
190
|
new_blocks.each_slice(ActiveDataFrame.insert_max_batch_size) do |new_blocks_slice|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
191
|
+
if ActiveRecord::Base.connection_config[:adapter] == 'postgresql'
|
192
|
+
copy_statement = "COPY #{block_type.table_name} (#{columns.join(',')},data_frame_id,period_index,data_frame_type) FROM STDIN CSV"
|
193
|
+
db_conn = ActiveRecord::Base.connection.raw_connection
|
194
|
+
db_conn.copy_data(copy_statement) do
|
195
|
+
new_blocks_slice.each do |period_index, (values, df_id)|
|
196
|
+
db_conn.put_copy_data((values + [df_id, period_index, data_frame_type.name]).join(',') << "\n")
|
197
|
+
end
|
198
|
+
end
|
199
|
+
else
|
200
|
+
inserts = ''
|
201
|
+
new_blocks_slice.each do |period_index, (values, df_id)|
|
202
|
+
inserts << \
|
203
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
204
|
+
when 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
|
205
|
+
else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
|
206
|
+
end
|
165
207
|
end
|
208
|
+
sql = "INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
|
209
|
+
Database.execute sql
|
166
210
|
end
|
167
|
-
sql = "INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
|
168
|
-
Database.execute sql
|
169
211
|
end
|
170
212
|
end
|
171
213
|
end
|
@@ -31,9 +31,7 @@ module ActiveDataFrame
|
|
31
31
|
bounds = get_bounds(from, to, block_type)
|
32
32
|
scope = block_type.where(data_frame_type: data_frame_type.name, data_frame_id: rows.select(:id))
|
33
33
|
scope = scope.where(data_frame_id: values.keys) if values.kind_of?(Hash)
|
34
|
-
|
35
|
-
grouped_update_indices = all_update_indices.group_by(&:first).transform_values{|value| Set.new(value.map!(&:last)) }
|
36
|
-
instance_ids = rows.pluck(:id)
|
34
|
+
instance_ids = rows.loaded? ? rows.map(&:id) : rows.pluck(:id)
|
37
35
|
instance_ids &= values.keys if values.kind_of?(Hash)
|
38
36
|
upserts = to_enum(:iterate_bounds, [bounds], block_type).flat_map do |index, left, right, cursor, size|
|
39
37
|
instance_ids.map do |instance_id|
|
@@ -42,8 +40,7 @@ module ActiveDataFrame
|
|
42
40
|
end
|
43
41
|
end
|
44
42
|
|
45
|
-
|
46
|
-
Database.for_types(block: block_type, df: data_frame_type).bulk_upsert(update, insert)
|
43
|
+
Database.for_types(block: block_type, df: data_frame_type).bulk_upsert(upserts, ->{scope.where(period_index: bounds.from.index..bounds.to.index)})
|
47
44
|
values
|
48
45
|
end
|
49
46
|
|
@@ -58,16 +55,10 @@ module ActiveDataFrame
|
|
58
55
|
def upsert(from, values)
|
59
56
|
to = (from + values.length) - 1
|
60
57
|
bounds = get_bounds(from, to)
|
61
|
-
update_indices = Set.new(scope.where(period_index: bounds.from.index..bounds.to.index).order(period_index: :asc).pluck(:period_index))
|
62
|
-
# Detect blocks in bounds:
|
63
|
-
# - If existing and covered, do an update without load
|
64
|
-
# - If existing and uncovered, do a small write (without load)
|
65
|
-
# - If not existing, insert!
|
66
58
|
upserts = to_enum(:iterate_bounds, [bounds]).map do |index, left, right, cursor, size|
|
67
59
|
[[:data_frame_id, self.instance.id], [:period_index, index], *(left.succ..right.succ).map{|v| :"t#{v}" }.zip(values[cursor...cursor + size])].to_h
|
68
60
|
end
|
69
|
-
|
70
|
-
database.bulk_upsert(update, insert)
|
61
|
+
database.bulk_upsert(upserts, ->{ scope.where(period_index: bounds.from.index..bounds.to.index)})
|
71
62
|
values
|
72
63
|
end
|
73
64
|
|
@@ -99,7 +90,6 @@ module ActiveDataFrame
|
|
99
90
|
end
|
100
91
|
end
|
101
92
|
|
102
|
-
|
103
93
|
database.bulk_delete(self.instance.id, deleted_indices) unless deleted_indices.size.zero?
|
104
94
|
database.bulk_update(existing) unless existing.size.zero?
|
105
95
|
database.bulk_insert(new_blocks) unless new_blocks.size.zero?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_data_frame
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wouter Coppieters
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|