active_data_frame 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/active_data_frame/database.rb +60 -18
- data/lib/active_data_frame/row.rb +3 -13
- data/lib/active_data_frame/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b24b99c34980b2896aa5fa479037d6bb4741c372
|
4
|
+
data.tar.gz: f77a43a58a962f5be8c84e7541494b55ba3a7550
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a7cf7e447b22b1c91bcc2d643d54316c76cc64d7ff4cb6062b228cd4a9c92d8d39372f659c80eca79aa4912408f96519ac7df90e6673ec55a6691932ad9fdc7
|
7
|
+
data.tar.gz: ba0a86aa56d65689669097d1544f26b312ac03ad5203e36800dea227eb19471b6a407ac1fd43ff7e268cdd7cd8bd73c97bb1d4d69b27dd8a0cba547e675784ef
|
@@ -59,20 +59,51 @@ module ActiveDataFrame
|
|
59
59
|
flush! unless self.batching
|
60
60
|
end
|
61
61
|
|
62
|
-
def bulk_upsert(
|
62
|
+
def bulk_upsert(upserts, scope=nil)
|
63
63
|
Database.batch do
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
64
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
65
|
+
when 'postgresql'.freeze
|
66
|
+
upserts.group_by(&:keys).each do |columns, value_list|
|
67
|
+
columns = columns - [:data_frame_id, :period_index]
|
68
|
+
inserts = ''
|
69
|
+
value_list.each do |row|
|
70
|
+
df_id, period_index, *values = row.values
|
71
|
+
inserts << "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
|
72
|
+
end
|
73
|
+
sql = %Q{
|
74
|
+
INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type)
|
75
|
+
VALUES #{inserts[0..-2]}
|
76
|
+
ON CONFLICT(data_frame_id, period_index, data_frame_type) DO UPDATE
|
77
|
+
SET #{columns.map{|c| "#{c} = excluded.#{c} "}.join(',')}
|
78
|
+
}
|
79
|
+
Database.execute sql
|
80
|
+
end
|
81
|
+
when 'mysql2'.freeze
|
82
|
+
upserts.group_by(&:keys).each do |columns, rows|
|
83
|
+
update = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
|
84
|
+
bulk_update(update, columns - [:data_frame_id, :period_index])
|
85
|
+
end
|
86
|
+
else
|
87
|
+
all_update_indices = scope[].pluck(:data_frame_id, :period_index)
|
88
|
+
grouped_update_indices = all_update_indices.group_by(&:first).transform_values{|value| Set.new(value.map!(&:last)) }
|
89
|
+
updates, inserts = upserts.partition{|upsert| grouped_update_indices[upsert[:data_frame_id]]&.include?(upsert[:period_index]) }
|
90
|
+
updates.group_by(&:keys).each do |columns, rows|
|
91
|
+
update = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
|
92
|
+
bulk_update(update, columns - [:data_frame_id, :period_index])
|
93
|
+
end
|
94
|
+
inserts.group_by(&:keys).each do |columns, rows|
|
95
|
+
insert = rows.map(&:values).map{|df_id, period_index, *values| [period_index, [values, df_id]] }
|
96
|
+
bulk_insert(insert, columns - [:data_frame_id, :period_index])
|
97
|
+
end
|
71
98
|
end
|
72
99
|
end
|
73
100
|
end
|
101
|
+
|
74
102
|
##
|
75
|
-
#
|
103
|
+
# Fast update block data for all blocks in a single call.
|
104
|
+
# Uses UPDATE + SET in PostgreSQL
|
105
|
+
# Uses INSERT ON CONFLICT for MySQL (Upsert)
|
106
|
+
# Uses UPDATE with CASE on others
|
76
107
|
##
|
77
108
|
def bulk_update(existing, columns=block_type::COLUMNS)
|
78
109
|
existing.each_slice(ActiveDataFrame.update_max_batch_size) do |existing_slice|
|
@@ -143,6 +174,7 @@ module ActiveDataFrame
|
|
143
174
|
end
|
144
175
|
end
|
145
176
|
|
177
|
+
|
146
178
|
def bulk_delete(id, indices)
|
147
179
|
indices.each_slice(ActiveDataFrame.delete_max_batch_size) do |slice|
|
148
180
|
# puts "Deleting slice of #{slice.length}"
|
@@ -152,20 +184,30 @@ module ActiveDataFrame
|
|
152
184
|
|
153
185
|
##
|
154
186
|
# Insert block data for all blocks in a single call
|
187
|
+
# PostgreSQL uses COPY, others use multi-statement insert
|
155
188
|
##
|
156
189
|
def bulk_insert(new_blocks, columns=block_type::COLUMNS)
|
157
190
|
new_blocks.each_slice(ActiveDataFrame.insert_max_batch_size) do |new_blocks_slice|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
191
|
+
if ActiveRecord::Base.connection_config[:adapter] == 'postgresql'
|
192
|
+
copy_statement = "COPY #{block_type.table_name} (#{columns.join(',')},data_frame_id,period_index,data_frame_type) FROM STDIN CSV"
|
193
|
+
db_conn = ActiveRecord::Base.connection.raw_connection
|
194
|
+
db_conn.copy_data(copy_statement) do
|
195
|
+
new_blocks_slice.each do |period_index, (values, df_id)|
|
196
|
+
db_conn.put_copy_data((values + [df_id, period_index, data_frame_type.name]).join(',') << "\n")
|
197
|
+
end
|
198
|
+
end
|
199
|
+
else
|
200
|
+
inserts = ''
|
201
|
+
new_blocks_slice.each do |period_index, (values, df_id)|
|
202
|
+
inserts << \
|
203
|
+
case ActiveRecord::Base.connection_config[:adapter]
|
204
|
+
when 'mysql2' then "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
|
205
|
+
else "(#{values.map{|v| v.inspect.gsub('"',"'") }.join(',')}, #{df_id}, #{period_index}, '#{data_frame_type.name}'),"
|
206
|
+
end
|
165
207
|
end
|
208
|
+
sql = "INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
|
209
|
+
Database.execute sql
|
166
210
|
end
|
167
|
-
sql = "INSERT INTO #{block_type.table_name} (#{columns.join(',')}, data_frame_id, period_index, data_frame_type) VALUES #{inserts[0..-2]}"
|
168
|
-
Database.execute sql
|
169
211
|
end
|
170
212
|
end
|
171
213
|
end
|
@@ -31,9 +31,7 @@ module ActiveDataFrame
|
|
31
31
|
bounds = get_bounds(from, to, block_type)
|
32
32
|
scope = block_type.where(data_frame_type: data_frame_type.name, data_frame_id: rows.select(:id))
|
33
33
|
scope = scope.where(data_frame_id: values.keys) if values.kind_of?(Hash)
|
34
|
-
|
35
|
-
grouped_update_indices = all_update_indices.group_by(&:first).transform_values{|value| Set.new(value.map!(&:last)) }
|
36
|
-
instance_ids = rows.pluck(:id)
|
34
|
+
instance_ids = rows.loaded? ? rows.map(&:id) : rows.pluck(:id)
|
37
35
|
instance_ids &= values.keys if values.kind_of?(Hash)
|
38
36
|
upserts = to_enum(:iterate_bounds, [bounds], block_type).flat_map do |index, left, right, cursor, size|
|
39
37
|
instance_ids.map do |instance_id|
|
@@ -42,8 +40,7 @@ module ActiveDataFrame
|
|
42
40
|
end
|
43
41
|
end
|
44
42
|
|
45
|
-
|
46
|
-
Database.for_types(block: block_type, df: data_frame_type).bulk_upsert(update, insert)
|
43
|
+
Database.for_types(block: block_type, df: data_frame_type).bulk_upsert(upserts, ->{scope.where(period_index: bounds.from.index..bounds.to.index)})
|
47
44
|
values
|
48
45
|
end
|
49
46
|
|
@@ -58,16 +55,10 @@ module ActiveDataFrame
|
|
58
55
|
def upsert(from, values)
|
59
56
|
to = (from + values.length) - 1
|
60
57
|
bounds = get_bounds(from, to)
|
61
|
-
update_indices = Set.new(scope.where(period_index: bounds.from.index..bounds.to.index).order(period_index: :asc).pluck(:period_index))
|
62
|
-
# Detect blocks in bounds:
|
63
|
-
# - If existing and covered, do an update without load
|
64
|
-
# - If existing and uncovered, do a small write (without load)
|
65
|
-
# - If not existing, insert!
|
66
58
|
upserts = to_enum(:iterate_bounds, [bounds]).map do |index, left, right, cursor, size|
|
67
59
|
[[:data_frame_id, self.instance.id], [:period_index, index], *(left.succ..right.succ).map{|v| :"t#{v}" }.zip(values[cursor...cursor + size])].to_h
|
68
60
|
end
|
69
|
-
|
70
|
-
database.bulk_upsert(update, insert)
|
61
|
+
database.bulk_upsert(upserts, ->{ scope.where(period_index: bounds.from.index..bounds.to.index)})
|
71
62
|
values
|
72
63
|
end
|
73
64
|
|
@@ -99,7 +90,6 @@ module ActiveDataFrame
|
|
99
90
|
end
|
100
91
|
end
|
101
92
|
|
102
|
-
|
103
93
|
database.bulk_delete(self.instance.id, deleted_indices) unless deleted_indices.size.zero?
|
104
94
|
database.bulk_update(existing) unless existing.size.zero?
|
105
95
|
database.bulk_insert(new_blocks) unless new_blocks.size.zero?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: active_data_frame
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Wouter Coppieters
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|