blackstack-warehouse 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/blackstack-warehouse.rb +291 -0
  3. metadata +84 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ce62893601761762171f47413fc906df8912aafe4b3d3d897851534ecef2a5bc
4
+ data.tar.gz: d49308f2ee192b72f90300f42afa17e34d0332512a5d7855b4b9b4c2542d4745
5
+ SHA512:
6
+ metadata.gz: 924f0ddad141527c9de4d58fee6d4f03cff860af2cc347e7aeb3ca4e8f173a3d9141f44be018c8e7a07745908b5f4ca335366480e0826aa41263d9e6bef9794e
7
+ data.tar.gz: e73fe6e8d276c8f4ebb44148ab1a148a5ba0143a40254e54550241513f90c4b87449d35aee59f4ee6fdbc96eea8114b34c821b81168dc6e00df527bece38f0bc
@@ -0,0 +1,291 @@
1
+ require 'blackstack-db'
2
+ require 'simple_cloud_logging'
3
+
4
+ module BlackStack
5
+ class Warehouse
6
+ AGE_UNITS = [:minutes, :hours, :days, :weeks, :months, :years]
7
+ @@tables = []
8
+
9
+ def self.create(
10
+ origin: , # table name from where I will get the database
11
+ archive: nil, # table name where I will store the database
12
+ logger: nil
13
+ )
14
+ archive ||= "#{origin.to_s}_archive"
15
+ l = logger || BlackStack::DummyLogger.new(nil)
16
+
17
+ l.logs 'Creating archivement table... '
18
+ if DB.table_exists?(archive)
19
+ l.logf 'already exists'.yellow
20
+ else
21
+ DB.create_table archive.to_sym
22
+ l.logf 'done'.green
23
+ end
24
+
25
+ l.logs 'Adding columns... '
26
+ DB.schema(origin.to_sym).each { |k, col|
27
+ l.logs "Adding column: #{k.to_s.blue}... "
28
+ begin
29
+ DB.alter_table archive.to_sym do
30
+ add_column k, col[:db_type]
31
+ end
32
+ l.logf 'done'.green
33
+ rescue => e
34
+ l.logf 'skipped'.yellow #+ " (error: #{e.message})"
35
+ end
36
+ }
37
+ l.logf 'done'.green
38
+ end # def self.create
39
+
40
+ # Move data from origin to archive.
41
+ # Parameters:
42
+ # - origin: Symbol. Name of the table to take data from. Example: :post. Mandatory.
43
+ # - archive: Symbol. Name of the table to store the data. Example: :post_archive. Default: "#{origin.to_s}_archive".
44
+ # - primary_key: Array of Symbols. Columns of the primary key. Example: [:id]. Default: [:id].
45
+ # - age_field: Symbol. Column to use to calculate the age of the record. Example: :create_time. Default: :create_time.
46
+ # - age_to_archive: Integer. Example: 1 (days). 0 means never archive.
47
+ # - age_units: Symbol. :minutes, :hours, :days, :weeks, :months or :years. Default: :hours.
48
+ # - batch_size: Integer. Number of records to move in each batch. Default: 1000.
49
+ #
50
+ def self.archive(
51
+ origin: ,
52
+ archive: nil,
53
+ primary_key: :id,
54
+ age_field: :create_time,
55
+ age_to_archive: 1,
56
+ age_units: :hours,
57
+ batch_size: 1000,
58
+ logger: nil
59
+ )
60
+ l = logger || BlackStack::DummyLogger.new(nil)
61
+ archive ||= "#{origin.to_s}_archive".to_sym
62
+ err = []
63
+
64
+ err << 'origin must be a symbol' unless origin.is_a? Symbol
65
+ err << 'archive must be a symbol' unless archive.is_a? Symbol
66
+ err << 'primary_key must be a symbol' unless primary_key.is_a? Symbol
67
+ err << 'age_field must be a symbol' unless age_field.is_a? Symbol
68
+ err << 'age_to_archive must be an integer' unless age_to_archive.is_a? Integer
69
+ err << 'age_to_archive must be greater than or equal to 0' unless age_to_archive >= 0
70
+ err << "age_units must be #{AGE_UNITS.join(', ')}" unless AGE_UNITS.include? age_units
71
+ err << 'batch_size must be an integer' unless batch_size.is_a? Integer
72
+ err << 'batch_size must be greater than 0' unless batch_size > 0
73
+
74
+ raise err.join("\n") unless err.empty?
75
+
76
+ # select all records where age is greater than age_to_archive days.
77
+ l.logs 'Insert into the archive... '
78
+ records = DB[origin.to_sym].where(Sequel.lit("
79
+ \"#{age_field.to_s}\" < CAST('#{now}' AS TIMESTAMP) - INTERVAL '#{age_to_archive} #{age_units.to_s}'
80
+ ")).except(DB[archive])
81
+ l.logf 'done'.green + "(#{records.count.to_s.blue} records)"
82
+
83
+ # split records in batches of batch_size
84
+ # insert records into archive table.
85
+ i = 0
86
+ batches = records.each_slice(batch_size)
87
+ batches.each { |batch|
88
+ i += 1
89
+ # inserting in the archive table, only if doesn't exist a record with the same key
90
+ l.logs "Inserting batch #{i.to_s} into archive... "
91
+ exists = DB[archive.to_sym].where(primary_key => batch.map { |r| r[primary_key] }).count > 0
92
+ if exists
93
+ raise "Record(s) already exists in archive table."
94
+ else
95
+ DB[archive.to_sym].multi_insert(batch)
96
+ l.logf 'done'.green
97
+ end
98
+ }
99
+
100
+ # select all records in the origin that already exist in the archive.
101
+ l.logs 'Delete from the origin... '
102
+ records = DB[origin.to_sym].intersect(DB[archive])
103
+ l.logf 'done'.green + "(#{records.count.to_s.blue} records)"
104
+
105
+ # split records in batches of batch_size
106
+ # delete records from origin table.
107
+ i = 0
108
+ batches = records.each_slice(batch_size)
109
+ batches.each { |batch|
110
+ i += 1
111
+ l.logs "Deleting batch #{i.to_s} from origin... "
112
+ DB[origin.to_sym].where(primary_key => batch.map { |r| r[primary_key] }).delete
113
+ l.logf 'done'.green
114
+ }
115
+ end # def self.archive
116
+
117
+ # Delete data from the archive permanently.
118
+ # Parameters:
119
+ # - origin: Symbol. Name of the table to take data from. Example: :post. Mandatory.
120
+ # - archive: Symbol. Name of the table to store the data. Example: :post_archive. Default: "#{origin.to_s}_archive".
121
+ # - primary_key: Array of Symbols. Columns of the primary key. Example: [:id]. Default: [:id].
122
+ # - age_field: Symbol. Column to use to calculate the age of the record. Example: :create_time. Default: :create_time.
123
+ # - age_to_drain: Integer. Example: 90 (days). 0 means never drain.
124
+ # - age_units: Symbol. :minutes, :hours, :days, :weeks, :months or :years. Default: :hours.
125
+ # - batch_size: Integer. Number of records to move in each batch. Default: 1000.
126
+ #
127
+ def self.drain(
128
+ origin: ,
129
+ archive: nil,
130
+ primary_key: :id,
131
+ age_field: :create_time,
132
+ age_to_drain: 90,
133
+ age_units: :hours,
134
+ batch_size: 1000,
135
+ logger: nil
136
+ )
137
+ l = logger || BlackStack::DummyLogger.new(nil)
138
+ archive ||= "#{origin.to_s}_archive".to_sym
139
+ err = []
140
+
141
+ err << 'origin must be a symbol' unless origin.is_a? Symbol
142
+ err << 'archive must be a symbol' unless archive.is_a? Symbol
143
+ err << 'primary_key must be a symbol' unless primary_key.is_a? Symbol
144
+ err << 'age_field must be a symbol' unless age_field.is_a? Symbol
145
+ err << 'age_to_drain must be an integer' unless age_to_drain.is_a? Integer
146
+ err << 'age_to_drain must be greater than or equal to 0' unless age_to_drain >= 0
147
+ err << "age_units must be #{AGE_UNITS.join(', ')}" unless AGE_UNITS.include? age_units
148
+ err << 'batch_size must be an integer' unless batch_size.is_a? Integer
149
+ err << 'batch_size must be greater than 0' unless batch_size > 0
150
+
151
+ raise err.join("\n") unless err.empty?
152
+
153
+ # select all records where age is greater than age_to_drain days.
154
+ l.logs 'Delete from the archive... '
155
+ records = DB[archive.to_sym].where(Sequel.lit("
156
+ \"#{age_field.to_s}\" < CAST('#{now}' AS TIMESTAMP) - INTERVAL '#{age_to_drain} #{age_units.to_s}'
157
+ "))
158
+ l.logf 'done'.green + "(#{records.count.to_s.blue} records)"
159
+
160
+ # split records in batches of batch_size
161
+ # delete records from origin table.
162
+ i = 0
163
+ batches = records.each_slice(batch_size)
164
+ batches.each { |batch|
165
+ i += 1
166
+ l.logs "Deleting batch #{i.to_s} from origin... "
167
+ DB[archive.to_sym].where(primary_key => batch.map { |r| r[primary_key] }).delete
168
+ l.logf 'done'.green
169
+ }
170
+ end # def self.drain
171
+
172
+ # Delete data from the archive permanently.
173
+ # Parameters:
174
+ # - origin: Symbol. Name of the table to take data from. Example: :post. Mandatory.
175
+ # - archive: Symbol. Name of the table to store the data. Example: :post_archive. Default: "#{origin.to_s}_archive".
176
+ # - primary_key: Array of Symbols. Columns of the primary key. Example: [:id]. Default: [:id].
177
+ # - age_field: Symbol. Column to use to calculate the age of the record. Example: :create_time. Default: :create_time.
178
+ # - age_to_drain: Integer. Example: 90 (days). 0 means never drain.
179
+ # - age_units: Symbol. :minutes, :hours, :days, :weeks, :months or :years. Default: :hours.
180
+ # - batch_size: Integer. Number of records to move in each batch. Default: 1000.
181
+ #
182
+ def self.set_one_table(
183
+ origin: ,
184
+ archive: nil,
185
+ primary_key: :id,
186
+ age_field: :create_time,
187
+ age_to_archive: 1,
188
+ age_to_drain: 90,
189
+ age_units: :hours,
190
+ batch_size: 1000,
191
+ logger: nil
192
+ )
193
+ l = logger || BlackStack::DummyLogger.new(nil)
194
+
195
+ archive ||= "#{origin.to_s}_archive".to_sym
196
+ err = []
197
+ err << 'origin must be a symbol' unless origin.is_a? Symbol
198
+ err << 'archive must be a symbol' unless archive.is_a? Symbol
199
+ err << 'primary_key must be a symbol' unless primary_key.is_a? Symbol
200
+ err << 'age_field must be a symbol' unless age_field.is_a? Symbol
201
+ err << 'age_to_drain must be an integer' unless age_to_drain.is_a? Integer
202
+ err << 'age_to_drain must be greater than or equal to 0' unless age_to_drain >= 0
203
+ err << 'age_to_archive must be an integer' unless age_to_archive.is_a? Integer
204
+ err << 'age_to_archive must be greater than or equal to 0' unless age_to_archive >= 0
205
+ err << "age_units must be #{AGE_UNITS.join(', ')}" unless AGE_UNITS.include? age_units
206
+ err << 'batch_size must be an integer' unless batch_size.is_a? Integer
207
+ err << 'batch_size must be greater than 0' unless batch_size > 0
208
+
209
+ raise err.join("\n") unless err.empty?
210
+
211
+ h = @@tables.find { |t| t[:origin] == origin }
212
+ if h.nil?
213
+ h = {
214
+ origin: origin,
215
+ archive: archive,
216
+ primary_key: primary_key,
217
+ age_field: age_field,
218
+ age_to_archive: age_to_archive,
219
+ age_to_drain: age_to_drain,
220
+ age_units: age_units,
221
+ batch_size: batch_size
222
+ }
223
+ @@tables << h
224
+ else
225
+ h[:archive] = archive
226
+ h[:primary_key] = primary_key
227
+ h[:age_field] = age_field
228
+ h[:age_to_archive] = age_to_archive
229
+ h[:age_to_drain] = age_to_drain
230
+ h[:age_units] = age_units
231
+ h[:batch_size] = batch_size
232
+ end
233
+
234
+ self.create(
235
+ origin: h[:origin],
236
+ archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
237
+ logger: l
238
+ )
239
+ end # def self.set_one_table
240
+
241
+ # set a list of tables to archive and drain.
242
+ def self.set(arr, logger: nil)
243
+ raise "Argument must be an array of hashes" unless arr.is_a? Array
244
+ raise "Argument must be an array of hashes" unless arr.all? { |e| e.is_a? Hash }
245
+ arr.each { |h|
246
+ self.set_one_table(
247
+ origin: h[:origin],
248
+ archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
249
+ primary_key: h[:primary_key] || :id,
250
+ age_field: h[:age_field] || :create_time,
251
+ age_to_archive: h[:age_to_archive] || 1,
252
+ age_to_drain: h[:age_to_drain] || 90,
253
+ age_units: h[:age_units] || :hours,
254
+ batch_size: h[:batch_size] || 1000,
255
+ logger: logger
256
+ )
257
+ }
258
+ end # def self.set
259
+
260
+ def self.archive_all(logger:nil)
261
+ @@tables.each { |h|
262
+ self.archive(
263
+ origin: h[:origin],
264
+ archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
265
+ primary_key: h[:primary_key] || :id,
266
+ age_field: h[:age_field] || :create_time,
267
+ age_to_archive: h[:age_to_archive] || 1,
268
+ age_units: h[:age_units] || :hours,
269
+ batch_size: h[:batch_size] || 1000,
270
+ logger: logger
271
+ )
272
+ }
273
+ end # def self.archive_all
274
+
275
+ def self.drain_all(logger:nil)
276
+ @@tables.each { |h|
277
+ self.drain(
278
+ origin: h[:origin],
279
+ archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
280
+ primary_key: h[:primary_key] || :id,
281
+ age_field: h[:age_field] || :create_time,
282
+ age_to_drain: h[:age_to_drain] || 90,
283
+ age_units: h[:age_units] || :hours,
284
+ batch_size: h[:batch_size] || 1000,
285
+ logger: logger
286
+ )
287
+ }
288
+ end # def self.drain_all
289
+
290
+ end # class Warehouse
291
+ end # module BlackStack
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: blackstack-warehouse
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Leandro Daniel Sardi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-05-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: blackstack-db
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.0.9
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.0.9
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: 1.0.9
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.0.9
33
+ - !ruby/object:Gem::Dependency
34
+ name: simple_cloud_logging
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 1.2.2
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.2.2
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: 1.2.2
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.2.2
53
+ description: 'Implement Data Retention Terms in your SaaS, easily. BlackStack Warehouse
54
+ perofrms data archiving into a replicated database schema automatically: https://github.com/leandrosardi/blackstack-warehouse.'
55
+ email: leandro@connectionsphere.com
56
+ executables: []
57
+ extensions: []
58
+ extra_rdoc_files: []
59
+ files:
60
+ - lib/blackstack-warehouse.rb
61
+ homepage: https://rubygems.org/gems/blackstack-warehouse
62
+ licenses:
63
+ - MIT
64
+ metadata: {}
65
+ post_install_message:
66
+ rdoc_options: []
67
+ require_paths:
68
+ - lib
69
+ required_ruby_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ requirements: []
80
+ rubygems_version: 3.3.7
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: Implement Data Retention Terms in your SaaS, easily.
84
+ test_files: []