blackstack-warehouse 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/blackstack-warehouse.rb +291 -0
  3. metadata +84 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ce62893601761762171f47413fc906df8912aafe4b3d3d897851534ecef2a5bc
4
+ data.tar.gz: d49308f2ee192b72f90300f42afa17e34d0332512a5d7855b4b9b4c2542d4745
5
+ SHA512:
6
+ metadata.gz: 924f0ddad141527c9de4d58fee6d4f03cff860af2cc347e7aeb3ca4e8f173a3d9141f44be018c8e7a07745908b5f4ca335366480e0826aa41263d9e6bef9794e
7
+ data.tar.gz: e73fe6e8d276c8f4ebb44148ab1a148a5ba0143a40254e54550241513f90c4b87449d35aee59f4ee6fdbc96eea8114b34c821b81168dc6e00df527bece38f0bc
@@ -0,0 +1,291 @@
1
+ require 'blackstack-db'
2
+ require 'simple_cloud_logging'
3
+
4
+ module BlackStack
5
+ class Warehouse
6
+ AGE_UNITS = [:minutes, :hours, :days, :weeks, :months, :years]
7
+ @@tables = []
8
+
9
+ def self.create(
10
+ origin: , # table name from where I will get the database
11
+ archive: nil, # table name where I will store the database
12
+ logger: nil
13
+ )
14
+ archive ||= "#{origin.to_s}_archive"
15
+ l = logger || BlackStack::DummyLogger.new(nil)
16
+
17
+ l.logs 'Creating archivement table... '
18
+ if DB.table_exists?(archive)
19
+ l.logf 'already exists'.yellow
20
+ else
21
+ DB.create_table archive.to_sym
22
+ l.logf 'done'.green
23
+ end
24
+
25
+ l.logs 'Adding columns... '
26
+ DB.schema(origin.to_sym).each { |k, col|
27
+ l.logs "Adding column: #{k.to_s.blue}... "
28
+ begin
29
+ DB.alter_table archive.to_sym do
30
+ add_column k, col[:db_type]
31
+ end
32
+ l.logf 'done'.green
33
+ rescue => e
34
+ l.logf 'skipped'.yellow #+ " (error: #{e.message})"
35
+ end
36
+ }
37
+ l.logf 'done'.green
38
+ end # def self.create
39
+
40
+ # Move data from origin to archive.
41
+ # Parameters:
42
+ # - origin: Symbol. Name of the table to take data from. Example: :post. Mandatory.
43
+ # - archive: Symbol. Name of the table to store the data. Example: :post_archive. Default: "#{origin.to_s}_archive".
44
+ # - primary_key: Array of Symbols. Columns of the primary key. Example: [:id]. Default: [:id].
45
+ # - age_field: Symbol. Column to use to calculate the age of the record. Example: :create_time. Default: :create_time.
46
+ # - age_to_archive: Integer. Example: 1 (days). 0 means never archive.
47
+ # - age_units: Symbol. :minutes, :hours, :days, :weeks, :months or :years. Default: :hours.
48
+ # - batch_size: Integer. Number of records to move in each batch. Default: 1000.
49
+ #
50
+ def self.archive(
51
+ origin: ,
52
+ archive: nil,
53
+ primary_key: :id,
54
+ age_field: :create_time,
55
+ age_to_archive: 1,
56
+ age_units: :hours,
57
+ batch_size: 1000,
58
+ logger: nil
59
+ )
60
+ l = logger || BlackStack::DummyLogger.new(nil)
61
+ archive ||= "#{origin.to_s}_archive".to_sym
62
+ err = []
63
+
64
+ err << 'origin must be a symbol' unless origin.is_a? Symbol
65
+ err << 'archive must be a symbol' unless archive.is_a? Symbol
66
+ err << 'primary_key must be a symbol' unless primary_key.is_a? Symbol
67
+ err << 'age_field must be a symbol' unless age_field.is_a? Symbol
68
+ err << 'age_to_archive must be an integer' unless age_to_archive.is_a? Integer
69
+ err << 'age_to_archive must be greater than or equal to 0' unless age_to_archive >= 0
70
+ err << "age_units must be #{AGE_UNITS.join(', ')}" unless AGE_UNITS.include? age_units
71
+ err << 'batch_size must be an integer' unless batch_size.is_a? Integer
72
+ err << 'batch_size must be greater than 0' unless batch_size > 0
73
+
74
+ raise err.join("\n") unless err.empty?
75
+
76
+ # select all records where age is greater than age_to_archive days.
77
+ l.logs 'Insert into the archive... '
78
+ records = DB[origin.to_sym].where(Sequel.lit("
79
+ \"#{age_field.to_s}\" < CAST('#{now}' AS TIMESTAMP) - INTERVAL '#{age_to_archive} #{age_units.to_s}'
80
+ ")).except(DB[archive])
81
+ l.logf 'done'.green + "(#{records.count.to_s.blue} records)"
82
+
83
+ # split records in batches of batch_size
84
+ # insert records into archive table.
85
+ i = 0
86
+ batches = records.each_slice(batch_size)
87
+ batches.each { |batch|
88
+ i += 1
89
+ # inserting in the archive table, only if doesn't exist a record with the same key
90
+ l.logs "Inserting batch #{i.to_s} into archive... "
91
+ exists = DB[archive.to_sym].where(primary_key => batch.map { |r| r[primary_key] }).count > 0
92
+ if exists
93
+ raise "Record(s) already exists in archive table."
94
+ else
95
+ DB[archive.to_sym].multi_insert(batch)
96
+ l.logf 'done'.green
97
+ end
98
+ }
99
+
100
+ # select all records in the origin that already exist in the archive.
101
+ l.logs 'Delete from the origin... '
102
+ records = DB[origin.to_sym].intersect(DB[archive])
103
+ l.logf 'done'.green + "(#{records.count.to_s.blue} records)"
104
+
105
+ # split records in batches of batch_size
106
+ # delete records from origin table.
107
+ i = 0
108
+ batches = records.each_slice(batch_size)
109
+ batches.each { |batch|
110
+ i += 1
111
+ l.logs "Deleting batch #{i.to_s} from origin... "
112
+ DB[origin.to_sym].where(primary_key => batch.map { |r| r[primary_key] }).delete
113
+ l.logf 'done'.green
114
+ }
115
+ end # def self.archive
116
+
117
+ # Delete data from the archive permanently.
118
+ # Parameters:
119
+ # - origin: Symbol. Name of the table to take data from. Example: :post. Mandatory.
120
+ # - archive: Symbol. Name of the table to store the data. Example: :post_archive. Default: "#{origin.to_s}_archive".
121
+ # - primary_key: Array of Symbols. Columns of the primary key. Example: [:id]. Default: [:id].
122
+ # - age_field: Symbol. Column to use to calculate the age of the record. Example: :create_time. Default: :create_time.
123
+ # - age_to_drain: Integer. Example: 90 (days). 0 means never drain.
124
+ # - age_units: Symbol. :minutes, :hours, :days, :weeks, :months or :years. Default: :hours.
125
+ # - batch_size: Integer. Number of records to move in each batch. Default: 1000.
126
+ #
127
+ def self.drain(
128
+ origin: ,
129
+ archive: nil,
130
+ primary_key: :id,
131
+ age_field: :create_time,
132
+ age_to_drain: 90,
133
+ age_units: :hours,
134
+ batch_size: 1000,
135
+ logger: nil
136
+ )
137
+ l = logger || BlackStack::DummyLogger.new(nil)
138
+ archive ||= "#{origin.to_s}_archive".to_sym
139
+ err = []
140
+
141
+ err << 'origin must be a symbol' unless origin.is_a? Symbol
142
+ err << 'archive must be a symbol' unless archive.is_a? Symbol
143
+ err << 'primary_key must be a symbol' unless primary_key.is_a? Symbol
144
+ err << 'age_field must be a symbol' unless age_field.is_a? Symbol
145
+ err << 'age_to_drain must be an integer' unless age_to_drain.is_a? Integer
146
+ err << 'age_to_drain must be greater than or equal to 0' unless age_to_drain >= 0
147
+ err << "age_units must be #{AGE_UNITS.join(', ')}" unless AGE_UNITS.include? age_units
148
+ err << 'batch_size must be an integer' unless batch_size.is_a? Integer
149
+ err << 'batch_size must be greater than 0' unless batch_size > 0
150
+
151
+ raise err.join("\n") unless err.empty?
152
+
153
+ # select all records where age is greater than age_to_drain days.
154
+ l.logs 'Delete from the archive... '
155
+ records = DB[archive.to_sym].where(Sequel.lit("
156
+ \"#{age_field.to_s}\" < CAST('#{now}' AS TIMESTAMP) - INTERVAL '#{age_to_drain} #{age_units.to_s}'
157
+ "))
158
+ l.logf 'done'.green + "(#{records.count.to_s.blue} records)"
159
+
160
+ # split records in batches of batch_size
161
+ # delete records from origin table.
162
+ i = 0
163
+ batches = records.each_slice(batch_size)
164
+ batches.each { |batch|
165
+ i += 1
166
+ l.logs "Deleting batch #{i.to_s} from origin... "
167
+ DB[archive.to_sym].where(primary_key => batch.map { |r| r[primary_key] }).delete
168
+ l.logf 'done'.green
169
+ }
170
+ end # def self.drain
171
+
172
+ # Delete data from the archive permanently.
173
+ # Parameters:
174
+ # - origin: Symbol. Name of the table to take data from. Example: :post. Mandatory.
175
+ # - archive: Symbol. Name of the table to store the data. Example: :post_archive. Default: "#{origin.to_s}_archive".
176
+ # - primary_key: Array of Symbols. Columns of the primary key. Example: [:id]. Default: [:id].
177
+ # - age_field: Symbol. Column to use to calculate the age of the record. Example: :create_time. Default: :create_time.
178
+ # - age_to_drain: Integer. Example: 90 (days). 0 means never drain.
179
+ # - age_units: Symbol. :minutes, :hours, :days, :weeks, :months or :years. Default: :hours.
180
+ # - batch_size: Integer. Number of records to move in each batch. Default: 1000.
181
+ #
182
+ def self.set_one_table(
183
+ origin: ,
184
+ archive: nil,
185
+ primary_key: :id,
186
+ age_field: :create_time,
187
+ age_to_archive: 1,
188
+ age_to_drain: 90,
189
+ age_units: :hours,
190
+ batch_size: 1000,
191
+ logger: nil
192
+ )
193
+ l = logger || BlackStack::DummyLogger.new(nil)
194
+
195
+ archive ||= "#{origin.to_s}_archive".to_sym
196
+ err = []
197
+ err << 'origin must be a symbol' unless origin.is_a? Symbol
198
+ err << 'archive must be a symbol' unless archive.is_a? Symbol
199
+ err << 'primary_key must be a symbol' unless primary_key.is_a? Symbol
200
+ err << 'age_field must be a symbol' unless age_field.is_a? Symbol
201
+ err << 'age_to_drain must be an integer' unless age_to_drain.is_a? Integer
202
+ err << 'age_to_drain must be greater than or equal to 0' unless age_to_drain >= 0
203
+ err << 'age_to_archive must be an integer' unless age_to_archive.is_a? Integer
204
+ err << 'age_to_archive must be greater than or equal to 0' unless age_to_archive >= 0
205
+ err << "age_units must be #{AGE_UNITS.join(', ')}" unless AGE_UNITS.include? age_units
206
+ err << 'batch_size must be an integer' unless batch_size.is_a? Integer
207
+ err << 'batch_size must be greater than 0' unless batch_size > 0
208
+
209
+ raise err.join("\n") unless err.empty?
210
+
211
+ h = @@tables.find { |t| t[:origin] == origin }
212
+ if h.nil?
213
+ h = {
214
+ origin: origin,
215
+ archive: archive,
216
+ primary_key: primary_key,
217
+ age_field: age_field,
218
+ age_to_archive: age_to_archive,
219
+ age_to_drain: age_to_drain,
220
+ age_units: age_units,
221
+ batch_size: batch_size
222
+ }
223
+ @@tables << h
224
+ else
225
+ h[:archive] = archive
226
+ h[:primary_key] = primary_key
227
+ h[:age_field] = age_field
228
+ h[:age_to_archive] = age_to_archive
229
+ h[:age_to_drain] = age_to_drain
230
+ h[:age_units] = age_units
231
+ h[:batch_size] = batch_size
232
+ end
233
+
234
+ self.create(
235
+ origin: h[:origin],
236
+ archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
237
+ logger: l
238
+ )
239
+ end # def self.set_one_table
240
+
241
+ # set a list of tables to archive and drain.
242
+ def self.set(arr, logger: nil)
243
+ raise "Argument must be an array of hashes" unless arr.is_a? Array
244
+ raise "Argument must be an array of hashes" unless arr.all? { |e| e.is_a? Hash }
245
+ arr.each { |h|
246
+ self.set_one_table(
247
+ origin: h[:origin],
248
+ archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
249
+ primary_key: h[:primary_key] || :id,
250
+ age_field: h[:age_field] || :create_time,
251
+ age_to_archive: h[:age_to_archive] || 1,
252
+ age_to_drain: h[:age_to_drain] || 90,
253
+ age_units: h[:age_units] || :hours,
254
+ batch_size: h[:batch_size] || 1000,
255
+ logger: logger
256
+ )
257
+ }
258
+ end # def self.set
259
+
260
+ def self.archive_all(logger:nil)
261
+ @@tables.each { |h|
262
+ self.archive(
263
+ origin: h[:origin],
264
+ archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
265
+ primary_key: h[:primary_key] || :id,
266
+ age_field: h[:age_field] || :create_time,
267
+ age_to_archive: h[:age_to_archive] || 1,
268
+ age_units: h[:age_units] || :hours,
269
+ batch_size: h[:batch_size] || 1000,
270
+ logger: logger
271
+ )
272
+ }
273
+ end # def self.archive_all
274
+
275
+ def self.drain_all(logger:nil)
276
+ @@tables.each { |h|
277
+ self.drain(
278
+ origin: h[:origin],
279
+ archive: h[:archive] || "#{h[:origin].to_s}_archive".to_sym,
280
+ primary_key: h[:primary_key] || :id,
281
+ age_field: h[:age_field] || :create_time,
282
+ age_to_drain: h[:age_to_drain] || 90,
283
+ age_units: h[:age_units] || :hours,
284
+ batch_size: h[:batch_size] || 1000,
285
+ logger: logger
286
+ )
287
+ }
288
+ end # def self.drain_all
289
+
290
+ end # class Warehouse
291
+ end # module BlackStack
metadata ADDED
@@ -0,0 +1,84 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: blackstack-warehouse
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Leandro Daniel Sardi
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-05-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: blackstack-db
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.0.9
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 1.0.9
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: 1.0.9
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.0.9
33
+ - !ruby/object:Gem::Dependency
34
+ name: simple_cloud_logging
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: 1.2.2
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: 1.2.2
43
+ type: :runtime
44
+ prerelease: false
45
+ version_requirements: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - "~>"
48
+ - !ruby/object:Gem::Version
49
+ version: 1.2.2
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 1.2.2
53
+ description: 'Implement Data Retention Terms in your SaaS, easily. BlackStack Warehouse
54
+ perofrms data archiving into a replicated database schema automatically: https://github.com/leandrosardi/blackstack-warehouse.'
55
+ email: leandro@connectionsphere.com
56
+ executables: []
57
+ extensions: []
58
+ extra_rdoc_files: []
59
+ files:
60
+ - lib/blackstack-warehouse.rb
61
+ homepage: https://rubygems.org/gems/blackstack-warehouse
62
+ licenses:
63
+ - MIT
64
+ metadata: {}
65
+ post_install_message:
66
+ rdoc_options: []
67
+ require_paths:
68
+ - lib
69
+ required_ruby_version: !ruby/object:Gem::Requirement
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ version: '0'
74
+ required_rubygems_version: !ruby/object:Gem::Requirement
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: '0'
79
+ requirements: []
80
+ rubygems_version: 3.3.7
81
+ signing_key:
82
+ specification_version: 4
83
+ summary: Implement Data Retention Terms in your SaaS, easily.
84
+ test_files: []