blackstack-warehouse 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/blackstack-warehouse.rb +291 -0
- metadata +84 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ce62893601761762171f47413fc906df8912aafe4b3d3d897851534ecef2a5bc
|
4
|
+
data.tar.gz: d49308f2ee192b72f90300f42afa17e34d0332512a5d7855b4b9b4c2542d4745
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 924f0ddad141527c9de4d58fee6d4f03cff860af2cc347e7aeb3ca4e8f173a3d9141f44be018c8e7a07745908b5f4ca335366480e0826aa41263d9e6bef9794e
|
7
|
+
data.tar.gz: e73fe6e8d276c8f4ebb44148ab1a148a5ba0143a40254e54550241513f90c4b87449d35aee59f4ee6fdbc96eea8114b34c821b81168dc6e00df527bece38f0bc
|
@@ -0,0 +1,291 @@
|
|
1
|
+
require 'blackstack-db'
|
2
|
+
require 'simple_cloud_logging'
|
3
|
+
|
4
|
+
module BlackStack
|
5
|
+
class Warehouse
|
6
|
+
AGE_UNITS = [:minutes, :hours, :days, :weeks, :months, :years]
|
7
|
+
@@tables = []
|
8
|
+
|
9
|
+
def self.create(
|
10
|
+
origin: , # table name from where I will get the database
|
11
|
+
archive: nil, # table name where I will store the database
|
12
|
+
logger: nil
|
13
|
+
)
|
14
|
+
archive ||= "#{origin.to_s}_archive"
|
15
|
+
l = logger || BlackStack::DummyLogger.new(nil)
|
16
|
+
|
17
|
+
l.logs 'Creating archivement table... '
|
18
|
+
if DB.table_exists?(archive)
|
19
|
+
l.logf 'already exists'.yellow
|
20
|
+
else
|
21
|
+
DB.create_table archive.to_sym
|
22
|
+
l.logf 'done'.green
|
23
|
+
end
|
24
|
+
|
25
|
+
l.logs 'Adding columns... '
|
26
|
+
DB.schema(origin.to_sym).each { |k, col|
|
27
|
+
l.logs "Adding column: #{k.to_s.blue}... "
|
28
|
+
begin
|
29
|
+
DB.alter_table archive.to_sym do
|
30
|
+
add_column k, col[:db_type]
|
31
|
+
end
|
32
|
+
l.logf 'done'.green
|
33
|
+
rescue => e
|
34
|
+
l.logf 'skipped'.yellow #+ " (error: #{e.message})"
|
35
|
+
end
|
36
|
+
}
|
37
|
+
l.logf 'done'.green
|
38
|
+
end # def self.create
|
39
|
+
|
40
|
+
# Move data from origin to archive.
|
41
|
+
# Parameters:
|
42
|
+
# - origin: Symbol. Name of the table to take data from. Example: :post. Mandatory.
|
43
|
+
# - archive: Symbol. Name of the table to store the data. Example: :post_archive. Default: "#{origin.to_s}_archive".
|
44
|
+
# - primary_key: Array of Symbols. Columns of the primary key. Example: [:id]. Default: [:id].
|
45
|
+
# - age_field: Symbol. Column to use to calculate the age of the record. Example: :create_time. Default: :create_time.
|
46
|
+
# - age_to_archive: Integer. Example: 1 (days). 0 means never archive.
|
47
|
+
# - age_units: Symbol. :minutes, :hours, :days, :weeks, :months or :years. Default: :hours.
|
48
|
+
# - batch_size: Integer. Number of records to move in each batch. Default: 1000.
|
49
|
+
#
|
50
|
+
def self.archive(
|
51
|
+
origin: ,
|
52
|
+
archive: nil,
|
53
|
+
primary_key: :id,
|
54
|
+
age_field: :create_time,
|
55
|
+
age_to_archive: 1,
|
56
|
+
age_units: :hours,
|
57
|
+
batch_size: 1000,
|
58
|
+
logger: nil
|
59
|
+
)
|
60
|
+
l = logger || BlackStack::DummyLogger.new(nil)
|
61
|
+
archive ||= "#{origin.to_s}_archive".to_sym
|
62
|
+
err = []
|
63
|
+
|
64
|
+
err << 'origin must be a symbol' unless origin.is_a? Symbol
|
65
|
+
err << 'archive must be a symbol' unless archive.is_a? Symbol
|
66
|
+
err << 'primary_key must be a symbol' unless primary_key.is_a? Symbol
|
67
|
+
err << 'age_field must be a symbol' unless age_field.is_a? Symbol
|
68
|
+
err << 'age_to_archive must be an integer' unless age_to_archive.is_a? Integer
|
69
|
+
err << 'age_to_archive must be greater than or equal to 0' unless age_to_archive >= 0
|
70
|
+
err << "age_units must be #{AGE_UNITS.join(', ')}" unless AGE_UNITS.include? age_units
|
71
|
+
err << 'batch_size must be an integer' unless batch_size.is_a? Integer
|
72
|
+
err << 'batch_size must be greater than 0' unless batch_size > 0
|
73
|
+
|
74
|
+
raise err.join("\n") unless err.empty?
|
75
|
+
|
76
|
+
# select all records where age is greater than age_to_archive days.
|
77
|
+
l.logs 'Insert into the archive... '
|
78
|
+
records = DB[origin.to_sym].where(Sequel.lit("
|
79
|
+
\"#{age_field.to_s}\" < CAST('#{now}' AS TIMESTAMP) - INTERVAL '#{age_to_archive} #{age_units.to_s}'
|
80
|
+
")).except(DB[archive])
|
81
|
+
l.logf 'done'.green + "(#{records.count.to_s.blue} records)"
|
82
|
+
|
83
|
+
# split records in batches of batch_size
|
84
|
+
# insert records into archive table.
|
85
|
+
i = 0
|
86
|
+
batches = records.each_slice(batch_size)
|
87
|
+
batches.each { |batch|
|
88
|
+
i += 1
|
89
|
+
# inserting in the archive table, only if doesn't exist a record with the same key
|
90
|
+
l.logs "Inserting batch #{i.to_s} into archive... "
|
91
|
+
exists = DB[archive.to_sym].where(primary_key => batch.map { |r| r[primary_key] }).count > 0
|
92
|
+
if exists
|
93
|
+
raise "Record(s) already exists in archive table."
|
94
|
+
else
|
95
|
+
DB[archive.to_sym].multi_insert(batch)
|
96
|
+
l.logf 'done'.green
|
97
|
+
end
|
98
|
+
}
|
99
|
+
|
100
|
+
# select all records in the origin that already exist in the archive.
|
101
|
+
l.logs 'Delete from the origin... '
|
102
|
+
records = DB[origin.to_sym].intersect(DB[archive])
|
103
|
+
l.logf 'done'.green + "(#{records.count.to_s.blue} records)"
|
104
|
+
|
105
|
+
# split records in batches of batch_size
|
106
|
+
# delete records from origin table.
|
107
|
+
i = 0
|
108
|
+
batches = records.each_slice(batch_size)
|
109
|
+
batches.each { |batch|
|
110
|
+
i += 1
|
111
|
+
l.logs "Deleting batch #{i.to_s} from origin... "
|
112
|
+
DB[origin.to_sym].where(primary_key => batch.map { |r| r[primary_key] }).delete
|
113
|
+
l.logf 'done'.green
|
114
|
+
}
|
115
|
+
end # def self.archive
|
116
|
+
|
117
|
+
# Delete data from the archive permanently.
|
118
|
+
# Parameters:
|
119
|
+
# - origin: Symbol. Name of the table to take data from. Example: :post. Mandatory.
|
120
|
+
# - archive: Symbol. Name of the table to store the data. Example: :post_archive. Default: "#{origin.to_s}_archive".
|
121
|
+
# - primary_key: Array of Symbols. Columns of the primary key. Example: [:id]. Default: [:id].
|
122
|
+
# - age_field: Symbol. Column to use to calculate the age of the record. Example: :create_time. Default: :create_time.
|
123
|
+
# - age_to_drain: Integer. Example: 90 (days). 0 means never drain.
|
124
|
+
# - age_units: Symbol. :minutes, :hours, :days, :weeks, :months or :years. Default: :hours.
|
125
|
+
# - batch_size: Integer. Number of records to move in each batch. Default: 1000.
|
126
|
+
#
|
127
|
+
def self.drain(
|
128
|
+
origin: ,
|
129
|
+
archive: nil,
|
130
|
+
primary_key: :id,
|
131
|
+
age_field: :create_time,
|
132
|
+
age_to_drain: 90,
|
133
|
+
age_units: :hours,
|
134
|
+
batch_size: 1000,
|
135
|
+
logger: nil
|
136
|
+
)
|
137
|
+
l = logger || BlackStack::DummyLogger.new(nil)
|
138
|
+
archive ||= "#{origin.to_s}_archive".to_sym
|
139
|
+
err = []
|
140
|
+
|
141
|
+
err << 'origin must be a symbol' unless origin.is_a? Symbol
|
142
|
+
err << 'archive must be a symbol' unless archive.is_a? Symbol
|
143
|
+
err << 'primary_key must be a symbol' unless primary_key.is_a? Symbol
|
144
|
+
err << 'age_field must be a symbol' unless age_field.is_a? Symbol
|
145
|
+
err << 'age_to_drain must be an integer' unless age_to_drain.is_a? Integer
|
146
|
+
err << 'age_to_drain must be greater than or equal to 0' unless age_to_drain >= 0
|
147
|
+
err << "age_units must be #{AGE_UNITS.join(', ')}" unless AGE_UNITS.include? age_units
|
148
|
+
err << 'batch_size must be an integer' unless batch_size.is_a? Integer
|
149
|
+
err << 'batch_size must be greater than 0' unless batch_size > 0
|
150
|
+
|
151
|
+
raise err.join("\n") unless err.empty?
|
152
|
+
|
153
|
+
# select all records where age is greater than age_to_drain days.
|
154
|
+
l.logs 'Delete from the archive... '
|
155
|
+
records = DB[archive.to_sym].where(Sequel.lit("
|
156
|
+
\"#{age_field.to_s}\" < CAST('#{now}' AS TIMESTAMP) - INTERVAL '#{age_to_drain} #{age_units.to_s}'
|
157
|
+
"))
|
158
|
+
l.logf 'done'.green + "(#{records.count.to_s.blue} records)"
|
159
|
+
|
160
|
+
# split records in batches of batch_size
|
161
|
+
# delete records from origin table.
|
162
|
+
i = 0
|
163
|
+
batches = records.each_slice(batch_size)
|
164
|
+
batches.each { |batch|
|
165
|
+
i += 1
|
166
|
+
l.logs "Deleting batch #{i.to_s} from origin... "
|
167
|
+
DB[archive.to_sym].where(primary_key => batch.map { |r| r[primary_key] }).delete
|
168
|
+
l.logf 'done'.green
|
169
|
+
}
|
170
|
+
end # def self.drain
|
171
|
+
|
172
|
+
# Delete data from the archive permanently.
|
173
|
+
# Parameters:
|
174
|
+
# - origin: Symbol. Name of the table to take data from. Example: :post. Mandatory.
|
175
|
+
# - archive: Symbol. Name of the table to store the data. Example: :post_archive. Default: "#{origin.to_s}_archive".
|
176
|
+
# - primary_key: Array of Symbols. Columns of the primary key. Example: [:id]. Default: [:id].
|
177
|
+
# - age_field: Symbol. Column to use to calculate the age of the record. Example: :create_time. Default: :create_time.
|
178
|
+
# - age_to_drain: Integer. Example: 90 (days). 0 means never drain.
|
179
|
+
# - age_units: Symbol. :minutes, :hours, :days, :weeks, :months or :years. Default: :hours.
|
180
|
+
# - batch_size: Integer. Number of records to move in each batch. Default: 1000.
|
181
|
+
#
|
182
|
+
def self.set_one_table(
|
183
|
+
origin: ,
|
184
|
+
archive: nil,
|
185
|
+
primary_key: :id,
|
186
|
+
age_field: :create_time,
|
187
|
+
age_to_archive: 1,
|
188
|
+
age_to_drain: 90,
|
189
|
+
age_units: :hours,
|
190
|
+
batch_size: 1000,
|
191
|
+
logger: nil
|
192
|
+
)
|
193
|
+
l = logger || BlackStack::DummyLogger.new(nil)
|
194
|
+
|
195
|
+
archive ||= "#{origin.to_s}_archive".to_sym
|
196
|
+
err = []
|
197
|
+
err << 'origin must be a symbol' unless origin.is_a? Symbol
|
198
|
+
err << 'archive must be a symbol' unless archive.is_a? Symbol
|
199
|
+
err << 'primary_key must be a symbol' unless primary_key.is_a? Symbol
|
200
|
+
err << 'age_field must be a symbol' unless age_field.is_a? Symbol
|
201
|
+
err << 'age_to_drain must be an integer' unless age_to_drain.is_a? Integer
|
202
|
+
err << 'age_to_drain must be greater than or equal to 0' unless age_to_drain >= 0
|
203
|
+
err << 'age_to_archive must be an integer' unless age_to_archive.is_a? Integer
|
204
|
+
err << 'age_to_archive must be greater than or equal to 0' unless age_to_archive >= 0
|
205
|
+
err << "age_units must be #{AGE_UNITS.join(', ')}" unless AGE_UNITS.include? age_units
|
206
|
+
err << 'batch_size must be an integer' unless batch_size.is_a? Integer
|
207
|
+
err << 'batch_size must be greater than 0' unless batch_size > 0
|
208
|
+
|
209
|
+
raise err.join("\n") unless err.empty?
|
210
|
+
|
211
|
+
h = @@tables.find { |t| t[:origin] == origin }
|
212
|
+
if h.nil?
|
213
|
+
h = {
|
214
|
+
origin: origin,
|
215
|
+
archive: archive,
|
216
|
+
primary_key: primary_key,
|
217
|
+
age_field: age_field,
|
218
|
+
age_to_archive: age_to_archive,
|
219
|
+
age_to_drain: age_to_drain,
|
220
|
+
age_units: age_units,
|
221
|
+
batch_size: batch_size
|
222
|
+
}
|
223
|
+
@@tables << h
|
224
|
+
else
|
225
|
+
h[:archive] = archive
|
226
|
+
h[:primary_key] = primary_key
|
227
|
+
h[:age_field] = age_field
|
228
|
+
h[:age_to_archive] = age_to_archive
|
229
|
+
h[:age_to_drain] = age_to_drain
|
230
|
+
h[:age_units] = age_units
|
231
|
+
h[:batch_size] = batch_size
|
232
|
+
end
|
233
|
+
|
234
|
+
self.create(
|
235
|
+
origin: h[:origin],
|
236
|
+
archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
|
237
|
+
logger: l
|
238
|
+
)
|
239
|
+
end # def self.set_one_table
|
240
|
+
|
241
|
+
# set a list of tables to archive and drain.
|
242
|
+
def self.set(arr, logger: nil)
|
243
|
+
raise "Argument must be an array of hashes" unless arr.is_a? Array
|
244
|
+
raise "Argument must be an array of hashes" unless arr.all? { |e| e.is_a? Hash }
|
245
|
+
arr.each { |h|
|
246
|
+
self.set_one_table(
|
247
|
+
origin: h[:origin],
|
248
|
+
archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
|
249
|
+
primary_key: h[:primary_key] || :id,
|
250
|
+
age_field: h[:age_field] || :create_time,
|
251
|
+
age_to_archive: h[:age_to_archive] || 1,
|
252
|
+
age_to_drain: h[:age_to_drain] || 90,
|
253
|
+
age_units: h[:age_units] || :hours,
|
254
|
+
batch_size: h[:batch_size] || 1000,
|
255
|
+
logger: logger
|
256
|
+
)
|
257
|
+
}
|
258
|
+
end # def self.set
|
259
|
+
|
260
|
+
def self.archive_all(logger:nil)
|
261
|
+
@@tables.each { |h|
|
262
|
+
self.archive(
|
263
|
+
origin: h[:origin],
|
264
|
+
archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
|
265
|
+
primary_key: h[:primary_key] || :id,
|
266
|
+
age_field: h[:age_field] || :create_time,
|
267
|
+
age_to_archive: h[:age_to_archive] || 1,
|
268
|
+
age_units: h[:age_units] || :hours,
|
269
|
+
batch_size: h[:batch_size] || 1000,
|
270
|
+
logger: logger
|
271
|
+
)
|
272
|
+
}
|
273
|
+
end # def self.archive_all
|
274
|
+
|
275
|
+
def self.drain_all(logger:nil)
|
276
|
+
@@tables.each { |h|
|
277
|
+
self.drain(
|
278
|
+
origin: h[:origin],
|
279
|
+
archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
|
280
|
+
primary_key: h[:primary_key] || :id,
|
281
|
+
age_field: h[:age_field] || :create_time,
|
282
|
+
age_to_drain: h[:age_to_drain] || 90,
|
283
|
+
age_units: h[:age_units] || :hours,
|
284
|
+
batch_size: h[:batch_size] || 1000,
|
285
|
+
logger: logger
|
286
|
+
)
|
287
|
+
}
|
288
|
+
end # def self.drain_all
|
289
|
+
|
290
|
+
end # class Warehouse
|
291
|
+
end # module BlackStack
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: blackstack-warehouse
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Leandro Daniel Sardi
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-05-19 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: blackstack-db
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.0.9
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.0.9
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.0.9
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.0.9
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: simple_cloud_logging
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 1.2.2
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.2.2
|
43
|
+
type: :runtime
|
44
|
+
prerelease: false
|
45
|
+
version_requirements: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - "~>"
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: 1.2.2
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.2.2
|
53
|
+
description: 'Implement Data Retention Terms in your SaaS, easily. BlackStack Warehouse
|
54
|
+
perofrms data archiving into a replicated database schema automatically: https://github.com/leandrosardi/blackstack-warehouse.'
|
55
|
+
email: leandro@connectionsphere.com
|
56
|
+
executables: []
|
57
|
+
extensions: []
|
58
|
+
extra_rdoc_files: []
|
59
|
+
files:
|
60
|
+
- lib/blackstack-warehouse.rb
|
61
|
+
homepage: https://rubygems.org/gems/blackstack-warehouse
|
62
|
+
licenses:
|
63
|
+
- MIT
|
64
|
+
metadata: {}
|
65
|
+
post_install_message:
|
66
|
+
rdoc_options: []
|
67
|
+
require_paths:
|
68
|
+
- lib
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
79
|
+
requirements: []
|
80
|
+
rubygems_version: 3.3.7
|
81
|
+
signing_key:
|
82
|
+
specification_version: 4
|
83
|
+
summary: Implement Data Retention Terms in your SaaS, easily.
|
84
|
+
test_files: []
|