tartarus-rb 0.2.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ffb5cafa941ec43d2c07a6745ef9b75b91c31784649bbf04187e90f6960812e0
4
- data.tar.gz: 9615766c2064a4e127c836ca4cf6ad9b0f2925d08246e4486b9f9fd382884e4f
3
+ metadata.gz: f7669ce816882e4f9607e41afde316cbe50027291a0dd89008d3baaa9ca6b308
4
+ data.tar.gz: d03d89b04491a2dce92820674e94c895cac9cad706b790927aab9129f8c15b26
5
5
  SHA512:
6
- metadata.gz: a97481429cc2c5faa977d75baae80c78e17f56cfa0323b76e613a91a57586bbfb6890442e340b7d9aff88eb7a155f694174109a92317d77e7b8c4c367224ae50
7
- data.tar.gz: 143dc43864c453873d6154f52d1504eff11f0f88e406e69893fd62937ea8b59899e4c1fdd344adc65fab4e4d6f15c8dcee4cd5c09c14e86a19d47be2e17d403d
6
+ metadata.gz: e3906591489650a6e4f9ce9f3b0eaa17ab2a95797096abcf8bba9f50855a7463f0a2f1354017f3df512519775765e807ece44b4c75cbc2b3b3bd326f67a6ffcf
7
+ data.tar.gz: 48d503bc4bbea2be0196778d5dca18b9d9770cf069685128860f4567fdc1727f46042e009b43a86dc9697b217c8a6b5816a45eae1ed935f362ce63b42c2301e2
data/.env.sample ADDED
@@ -0,0 +1,4 @@
1
+ AWS_REGION=""
2
+ AWS_SECRET=""
3
+ AWS_KEY=""
4
+ VAULT_NAME=""
data/.gitignore CHANGED
@@ -7,6 +7,8 @@
7
7
  /spec/reports/
8
8
  /tmp/
9
9
  /spec/test.db
10
+ /spec/tmp/
11
+ .env
10
12
 
11
13
  # rspec failure tracking
12
14
  .rspec_status
data/.travis.yml CHANGED
@@ -3,6 +3,13 @@ language: ruby
3
3
  cache: bundler
4
4
  rvm:
5
5
  - 2.7.2
6
+ env:
7
+ global:
8
+ - AWS_KEY=AWS_KEY
9
+ - AWS_SECRET=AWS_SECRET
10
+ - AWS_REGION="us-east-2"
11
+ - VAULT_NAME=VAULT_NAME
6
12
  before_install: gem install bundler -v 2.1.4
7
13
  services:
8
14
  - redis-server
15
+ - postgresql
data/Changelog.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # Changelog
2
2
 
3
+ ## Master
4
+
5
+ ## 0.5.0
6
+
7
+ - Provide ability to explicitly set the name of archivable item to have multiple ways of archiving the same model
8
+
9
+ ## 0.4.1
10
+
11
+ - Do not make Glacier a required dependency if not used
12
+
13
+ ## 0.4.0
14
+
15
+ - Add Glacier remote storage support to upload data before deleting it
16
+
17
+ ## 0.3.0
18
+
19
+ - Add `delete_all_using_limit_in_batches` strategy
20
+
3
21
  ## 0.2.0
4
22
  - Add support for deleting and destroying in batches
5
23
  - Add integration tests with a real database and ActiveRecord
data/Gemfile.lock CHANGED
@@ -1,13 +1,26 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tartarus-rb (0.2.0)
4
+ tartarus-rb (0.5.0)
5
5
  sidekiq (>= 5)
6
6
  sidekiq-cron (~> 1)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
+ actionpack (6.1.0.rc1)
12
+ actionview (= 6.1.0.rc1)
13
+ activesupport (= 6.1.0.rc1)
14
+ rack (~> 2.0, >= 2.0.9)
15
+ rack-test (>= 0.6.3)
16
+ rails-dom-testing (~> 2.0)
17
+ rails-html-sanitizer (~> 1.0, >= 1.2.0)
18
+ actionview (6.1.0.rc1)
19
+ activesupport (= 6.1.0.rc1)
20
+ builder (~> 3.1)
21
+ erubi (~> 1.4)
22
+ rails-dom-testing (~> 2.0)
23
+ rails-html-sanitizer (~> 1.1, >= 1.2.0)
11
24
  activemodel (6.1.0.rc1)
12
25
  activesupport (= 6.1.0.rc1)
13
26
  activerecord (6.1.0.rc1)
@@ -19,21 +32,74 @@ GEM
19
32
  minitest (>= 5.1)
20
33
  tzinfo (~> 2.0)
21
34
  zeitwerk (~> 2.3)
35
+ addressable (2.7.0)
36
+ public_suffix (>= 2.0.2, < 5.0)
37
+ aws-eventstream (1.1.0)
38
+ aws-partitions (1.424.0)
39
+ aws-sdk-core (3.112.0)
40
+ aws-eventstream (~> 1, >= 1.0.2)
41
+ aws-partitions (~> 1, >= 1.239.0)
42
+ aws-sigv4 (~> 1.1)
43
+ jmespath (~> 1.0)
44
+ aws-sdk-glacier (1.36.0)
45
+ aws-sdk-core (~> 3, >= 3.112.0)
46
+ aws-sigv4 (~> 1.1)
47
+ aws-sigv4 (1.2.2)
48
+ aws-eventstream (~> 1, >= 1.0.2)
49
+ builder (3.2.4)
22
50
  concurrent-ruby (1.1.7)
23
51
  connection_pool (2.2.3)
52
+ crack (0.4.3)
53
+ safe_yaml (~> 1.0.0)
54
+ crass (1.0.6)
24
55
  diff-lcs (1.4.4)
56
+ dotenv (2.7.6)
57
+ erubi (1.10.0)
25
58
  et-orbi (1.2.4)
26
59
  tzinfo
27
- fugit (1.4.0)
60
+ fugit (1.4.5)
28
61
  et-orbi (~> 1.1, >= 1.1.8)
29
62
  raabro (~> 1.4)
63
+ hashdiff (1.0.0)
30
64
  i18n (1.8.5)
31
65
  concurrent-ruby (~> 1.0)
66
+ jmespath (1.4.0)
67
+ loofah (2.9.0)
68
+ crass (~> 1.0.2)
69
+ nokogiri (>= 1.5.9)
70
+ method_source (1.0.0)
71
+ mini_portile2 (2.5.0)
32
72
  minitest (5.14.2)
73
+ nokogiri (1.11.1)
74
+ mini_portile2 (~> 2.5.0)
75
+ racc (~> 1.4)
76
+ pg (1.2.3)
77
+ postgres-copy (1.5.0)
78
+ activerecord (>= 5.1)
79
+ pg (>= 0.17)
80
+ responders
81
+ public_suffix (4.0.4)
33
82
  raabro (1.4.0)
83
+ racc (1.5.2)
34
84
  rack (2.2.3)
85
+ rack-test (1.1.0)
86
+ rack (>= 1.0, < 3)
87
+ rails-dom-testing (2.0.3)
88
+ activesupport (>= 4.2.0)
89
+ nokogiri (>= 1.6)
90
+ rails-html-sanitizer (1.3.0)
91
+ loofah (~> 2.3)
92
+ railties (6.1.0.rc1)
93
+ actionpack (= 6.1.0.rc1)
94
+ activesupport (= 6.1.0.rc1)
95
+ method_source
96
+ rake (>= 0.8.7)
97
+ thor (~> 1.0)
35
98
  rake (13.0.1)
36
99
  redis (4.2.2)
100
+ responders (3.0.1)
101
+ actionpack (>= 5.0)
102
+ railties (>= 5.0)
37
103
  rspec (3.9.0)
38
104
  rspec-core (~> 3.9.0)
39
105
  rspec-expectations (~> 3.9.0)
@@ -50,6 +116,7 @@ GEM
50
116
  rspec-core (~> 3.0, >= 3.0.0)
51
117
  sidekiq (>= 2.4.0)
52
118
  rspec-support (3.9.3)
119
+ safe_yaml (1.0.5)
53
120
  sidekiq (6.1.2)
54
121
  connection_pool (>= 2.2.2)
55
122
  rack (~> 2.0)
@@ -57,9 +124,15 @@ GEM
57
124
  sidekiq-cron (1.2.0)
58
125
  fugit (~> 1.1)
59
126
  sidekiq (>= 4.2.1)
60
- sqlite3 (1.4.2)
127
+ thor (1.1.0)
128
+ timecop (0.9.2)
61
129
  tzinfo (2.0.2)
62
130
  concurrent-ruby (~> 1.0)
131
+ vcr (6.0.0)
132
+ webmock (3.7.6)
133
+ addressable (>= 2.3.6)
134
+ crack (>= 0.3.2)
135
+ hashdiff (>= 0.4.0, < 2.0.0)
63
136
  zeitwerk (2.4.1)
64
137
 
65
138
  PLATFORMS
@@ -67,11 +140,17 @@ PLATFORMS
67
140
 
68
141
  DEPENDENCIES
69
142
  activerecord (~> 6)
143
+ aws-sdk-glacier
144
+ dotenv
145
+ pg
146
+ postgres-copy
70
147
  rake (~> 13.0)
71
148
  rspec (~> 3.0)
72
149
  rspec-sidekiq
73
- sqlite3
74
150
  tartarus-rb!
151
+ timecop
152
+ vcr
153
+ webmock
75
154
 
76
155
  BUNDLED WITH
77
156
  2.1.4
data/README.md CHANGED
@@ -56,12 +56,23 @@ if File.exist?(schedule_file) && Sidekiq.server?
56
56
  item.timestamp_field = :created_at
57
57
  end
58
58
 
59
+ glacier_configuration = Tartarus::RemoteStorage::Glacier::Configuration.build(
60
+ aws_key: ENV.fetch("AWS_KEY"),
61
+ aws_secret: ENV.fetch("AWS_SECRET"),
62
+ aws_region: ENV.fetch("AWS_REGION"),
63
+ vault_name: ENV.fetch("GLACIER_VAULT_NAME"),
64
+ root_path: Rails.root.to_s,
65
+ archive_registry_factory: ArchiveRegistry,
66
+ )
67
+ # don't forget about installing `aws-sdk-glacier` gem
68
+
59
69
  tartarus.register do |item|
60
70
  item.model = YetAnotherModel
61
71
  item.cron = "5 6 * * *"
62
72
  item.queue = "default"
63
73
  item.timestamp_field = :created_at
64
74
  item.archive_items_older_than = -> { 1.week.ago }
75
+ item.remote_storage = Tartarus::RemoteStorage::Glacier.new(glacier_configuration)
65
76
  end
66
77
 
67
78
  tartarus.schedule # this method must be called to create jobs for sidekiq-cron!
@@ -71,6 +82,31 @@ end
71
82
 
72
83
  You can use the following config params:
73
84
  - `model` - a name of the ActiveReord model you want to archive, required
85
+ - `name` - name of your strategy, optional. It fallbacks `model.to_s`. It's important to set in in cases when you have several strategies for the same model:
86
+ ```rb
87
+ tartarus.register do |item|
88
+ item.model = InternalEvent
89
+ item.name = "archive_account_and_user_internal_events"
90
+ item.cron = "5 5 * * *"
91
+ item.queue = "default"
92
+ item.tenants_range = -> { ["Account", "User"] }
93
+ item.tenant_id_field = :model_type
94
+ item.archive_items_older_than = -> { 30.days.ago }
95
+ item.timestamp_field = :created_at
96
+ end
97
+
98
+ tartarus.register do |item|
99
+ item.model = InternalEvent
100
+ item.name = "archive_post_and_comment_internal_events"
101
+ item.cron = "5 15 * * *"
102
+ item.queue = "default"
103
+ item.tenants_range = -> { ["Post", "Comment"] }
104
+ item.tenant_id_field = :model_type
105
+ item.archive_items_older_than = -> { 10.days.ago }
106
+ item.timestamp_field = :created_at
107
+ end
108
+ ```
109
+
74
110
  - `cron` - cron syntax, required
75
111
  - `queue` - name of the sidekiq queue you want to use for execution of the jobs, required
76
112
  - `tenants_range` - optional, use if you want to scope items by a tenant (or any field that can be used for partitioning). It doesn't have to be ActiveRecord collection, could be just an array. Must be a proc/lambda/object responding to `call` method. For ActvieRecord collection, `find_each` loop will be used for optimization.
@@ -78,7 +114,92 @@ You can use the following config params:
78
114
  - `tenant_id_field` - required when using tenant_value_source/tenant_value_source. It's a DB column that will be used for scoping records by a tenant. For example, here it would be: `ModelThatYouWantToArchive.where(account_uuid: value_of_uuid_from_some_active_account)`
79
115
  - `archive_items_older_than` - required, for defining retention policy
80
116
  - `timestamp_field` - required, used for performing a query using the value from `archive_items_older_than`
81
- - `archive_with` - optional (defaults to `delete_all`). Could be `delete_all`, `destroy_all`, `delete_all_without_batches`, `destroy_all_without_batches`
117
+ - `archive_with` - optional (defaults to `delete_all`). Could be `delete_all`, `destroy_all`, `delete_all_without_batches`, `destroy_all_without_batches`, `delete_all_using_limit_in_batches`
118
+ - `batch_size` - optional (defaults to `10_000`, used with `delete_all_using_limit_in_batches` strategy)
119
+ - `remote_storage` - optional (defaults to `Tartarus::RemoteStorage::Null` which does nothing). Use this option if you want store the data somewhere before deleting it.
120
+
121
+ ### Remote Storage
122
+
123
+ Currently, only `Glacier` (for AWS Glacier) is supported. Also, it works only with Postgres database and requires [postgres-copy](https://github.com/diogob/postgres-copy).
124
+
125
+ To take advantage of this feature you will need a couple of things:
126
+ 1. Apply `acts_as_copy_target` to the archivable model (from `postgres-copy` gem).
127
+ 2. Create a model that will be used as a registry for all uploads that happened.
128
+ 3. Install `aws-sdk-glacier` gem.
129
+
130
+ If you want to make `Version` model archivable and use `ArchiveRegistry` as the registry, you will need the following models and tables:
131
+
132
+ ``` rb
133
+ database.create_table(:archive_registries) do |t|
134
+ t.string :glacier_location, null: false
135
+ t.string :glacier_checksum, null: false
136
+ t.string :glacier_archive_id, null: false
137
+ t.string :archivable_model, null: false
138
+ t.string :tenant_id_field
139
+ t.string :tenant_id
140
+ t.datetime :completed_at, null: false
141
+ end
142
+
143
+ database.create_table(:versions) do |t|
144
+ end
145
+
146
+ class Version < ApplicationRecord
147
+ acts_as_copy_target
148
+ end
149
+
150
+ class ArchiveRegistry < ApplicationRecord
151
+ end
152
+ ```
153
+
154
+ You can use the above schema for the registry model as it contains all needed fields.
155
+
156
+ To initialize the service:
157
+
158
+ ``` rb
159
+ glacier_configuration = Tartarus::RemoteStorage::Glacier::Configuration.build(
160
+ aws_key: ENV.fetch("AWS_KEY"),
161
+ aws_secret: ENV.fetch("AWS_SECRET"),
162
+ aws_region: ENV.fetch("AWS_REGION"),
163
+ vault_name: ENV.fetch("GLACIER_VAULT_NAME"),
164
+ root_path: Rails.root.to_s,
165
+ archive_registry_factory: ArchiveRegistry,
166
+ )
167
+ Tartarus::RemoteStorage::Glacier.new(glacier_configuration)
168
+ ```
169
+
170
+ You can also pass `account_id` (by default "-" string will be used):
171
+
172
+ ``` rb
173
+ glacier_configuration = Tartarus::RemoteStorage::Glacier::Configuration.build(
174
+ aws_key: ENV.fetch("AWS_KEY"),
175
+ aws_secret: ENV.fetch("AWS_SECRET"),
176
+ aws_region: ENV.fetch("AWS_REGION"),
177
+ vault_name: ENV.fetch("GLACIER_VAULT_NAME"),
178
+ root_path: Rails.root.to_s,
179
+ archive_registry_factory: ArchiveRegistry,
180
+ account_id: "some_account_id"
181
+ )
182
+ Tartarus::RemoteStorage::Glacier.new(glacier_configuration)
183
+ ```
184
+
185
+ **Important** - do not use Glacier Storage for large batches (> 4 GB) as multipart uploads are not supported yet.
186
+
187
+
188
+ If you know what you are doing, you can add your own storage, as long as it complies with the following interface:
189
+
190
+ ``` rb
191
+ class Glacier
192
+ attr_reader :configuration
193
+ private :configuration
194
+
195
+ def initialize(configuration)
196
+ @configuration = configuration
197
+ end
198
+
199
+ def store(collection, archivable_model, tenant_id: nil, tenant_id_field: nil)
200
+ end
201
+ end
202
+ ```
82
203
 
83
204
  ### Testing before actually using it
84
205
 
@@ -87,7 +208,6 @@ You might want to verify that the gem works in the way you expect it to work. Fo
87
208
  1. scheduling/enqueueing: use `Tartarus::ScheduleArchivingModel#schedule` - for example, `Tartarus::ScheduleArchivingModel.new.schedule("PaperTrailVersion")`, it's going to enqueue either `Tartarus::Sidekiq::ArchiveModelWithTenantJob` or `Tartarus::Sidekiq::ArchiveModelWithoutTenantJob`, depending on the config.
88
209
  2. execution of the archiving logic: use `Tartarus::ArchiveModelWithTenant#archive` (for example, `Tartarus::ArchiveModelWithTenant.new.archive("PaperTrailVersion", "User")`) or `Tartarus::ArchiveModelWithoutTenant#archive` (for example, `Tartarus::ArchiveModelWithoutTenant.new.archive("PaperTrailVersion")`)
89
210
 
90
-
91
211
  You might also want to check `spec/integration` to get an idea how the integration tests were written.
92
212
 
93
213
  ## Development
@@ -96,10 +216,6 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
96
216
 
97
217
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
98
218
 
99
- ## TODO
100
-
101
- - add support for uploading archives to AWS Glacier before deleting items
102
-
103
219
  ## Contributing
104
220
 
105
221
  Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/tartarus-rb.
data/lib/tartarus.rb CHANGED
@@ -4,6 +4,7 @@ require "tartarus/archivable_item/sidekiq_cron_job_serializer"
4
4
  require "tartarus/archive_strategy"
5
5
  require "tartarus/archive_strategy/delete_all"
6
6
  require "tartarus/archive_strategy/delete_all_without_batches"
7
+ require "tartarus/archive_strategy/delete_all_using_limit_in_batches"
7
8
  require "tartarus/archive_strategy/destroy_all"
8
9
  require "tartarus/archive_strategy/destroy_all_without_batches"
9
10
  require "tartarus/archive_strategy/extract_batch"
@@ -17,6 +18,7 @@ require "tartarus/rb/version"
17
18
  require "tartarus/registry"
18
19
  require "tartarus/repository"
19
20
  require "tartarus/schedule_archiving_model"
21
+ require "tartarus/remote_storage"
20
22
  require "sidekiq/cron/job"
21
23
  require "sidekiq"
22
24
 
@@ -13,6 +13,7 @@ class Tartarus
13
13
  ensure_column_exists(collection, model_name, tenant_id_field)
14
14
 
15
15
  collection.where("#{timestamp_field} < ?", timestamp).where(tenant_id_field => tenant_id)
16
+ .order(tenant_id_field, timestamp_field)
16
17
  end
17
18
 
18
19
  def items_older_than(model_name, timestamp_field, timestamp)
@@ -1,7 +1,7 @@
1
1
  class Tartarus::ArchivableItem
2
2
  REQUIRED_ATTRIBUTES_NAMES = %i(model cron queue archive_items_older_than timestamp_field active_job
3
3
  archive_with tenant_value_source).freeze
4
- OPTIONAL_ATTRIBUTES_NAMES = %i(tenants_range tenant_id_field).freeze
4
+ OPTIONAL_ATTRIBUTES_NAMES = %i(tenants_range tenant_id_field batch_size remote_storage name).freeze
5
5
 
6
6
  attr_accessor *(REQUIRED_ATTRIBUTES_NAMES + OPTIONAL_ATTRIBUTES_NAMES)
7
7
 
@@ -45,6 +45,16 @@ class Tartarus::ArchivableItem
45
45
  @archive_with ||= :delete_all
46
46
  end
47
47
 
48
+ def batch_size
49
+ return @batch_size if defined?(@batch_size)
50
+
51
+ @batch_size ||= 10_000
52
+ end
53
+
54
+ def name
55
+ @name || @model.to_s
56
+ end
57
+
48
58
  def validate!
49
59
  validate_presence
50
60
  end
@@ -54,11 +64,11 @@ class Tartarus::ArchivableItem
54
64
  end
55
65
 
56
66
  def archive_strategy(factory: Tartarus::ArchiveStrategy.new)
57
- factory.for(archive_with)
67
+ factory.for(archive_with, batch_size: batch_size)
58
68
  end
59
69
 
60
- def for_model?(provided_model_name)
61
- model.to_s == provided_model_name.to_s
70
+ def remote_storage
71
+ @remote_storage || Tartarus::RemoteStorage::Null
62
72
  end
63
73
 
64
74
  private
@@ -7,7 +7,7 @@ class Tartarus
7
7
  description: description_for_item(archivable_item),
8
8
  cron: archivable_item.cron,
9
9
  class: Tartarus::Sidekiq::ScheduleArchivingModelJob,
10
- args: [archivable_item.model],
10
+ args: [archivable_item.name],
11
11
  queue: archivable_item.queue,
12
12
  active_job: archivable_item.active_job
13
13
  }
@@ -16,7 +16,7 @@ class Tartarus
16
16
  private
17
17
 
18
18
  def name_for_item(archivable_item)
19
- "TARTARUS_#{archivable_item.model}"
19
+ "TARTARUS_#{archivable_item.name}"
20
20
  end
21
21
 
22
22
  def description_for_item(archivable_item)
@@ -7,18 +7,20 @@ class Tartarus::ArchiveModelWithTenant
7
7
  @repository = repository
8
8
  end
9
9
 
10
- def archive(model_name, tenant_id)
11
- archivable_item = registry.find_by_model(model_name)
12
-
13
- archivable_item.archive_strategy.call(collection_to_archive(model_name, archivable_item, tenant_id))
10
+ def archive(archivable_item_name, tenant_id)
11
+ archivable_item = registry.find_by_name(archivable_item_name)
12
+ collection = collection_to_archive(archivable_item, tenant_id)
13
+ archivable_item.remote_storage.store(collection, archivable_item.name, tenant_id: tenant_id,
14
+ tenant_id_field: archivable_item.tenant_id_field)
15
+ archivable_item.archive_strategy.call(collection)
14
16
  end
15
17
 
16
18
  private
17
19
 
18
- def collection_to_archive(model_name, archivable_item, tenant_id)
20
+ def collection_to_archive(archivable_item, tenant_id)
19
21
  repository
20
22
  .items_older_than_for_tenant(
21
- model_name,
23
+ archivable_item.model,
22
24
  archivable_item.timestamp_field, archivable_item.archive_items_older_than.call,
23
25
  archivable_item.tenant_id_field, tenant_id
24
26
  )
@@ -7,18 +7,19 @@ class Tartarus::ArchiveModelWithoutTenant
7
7
  @repository = repository
8
8
  end
9
9
 
10
- def archive(model_name)
11
- archivable_item = registry.find_by_model(model_name)
12
-
13
- archivable_item.archive_strategy.call(collection_to_archive(model_name, archivable_item))
10
+ def archive(archivable_item_name)
11
+ archivable_item = registry.find_by_name(archivable_item_name)
12
+ collection = collection_to_archive(archivable_item)
13
+ archivable_item.remote_storage.store(collection, archivable_item.name)
14
+ archivable_item.archive_strategy.call(collection)
14
15
  end
15
16
 
16
17
  private
17
18
 
18
- def collection_to_archive(model_name, archivable_item)
19
+ def collection_to_archive(archivable_item)
19
20
  repository
20
21
  .items_older_than(
21
- model_name,
22
+ archivable_item.model,
22
23
  archivable_item.timestamp_field, archivable_item.archive_items_older_than.call
23
24
  )
24
25
  end
@@ -1,5 +1,5 @@
1
1
  class Tartarus::ArchiveStrategy
2
- def for(strategy_name)
2
+ def for(strategy_name, batch_size: 0)
3
3
  case strategy_name.to_sym
4
4
  when :delete_all
5
5
  Tartarus::ArchiveStrategy::DeleteAll.new
@@ -9,6 +9,8 @@ class Tartarus::ArchiveStrategy
9
9
  Tartarus::ArchiveStrategy::DeleteAllWithoutBatches.new
10
10
  when :destroy_all_without_batches
11
11
  Tartarus::ArchiveStrategy::DestroyAllWithoutBatches.new
12
+ when :delete_all_using_limit_in_batches
13
+ Tartarus::ArchiveStrategy::DeleteAllUsingLimitInBatches.new(batch_size: batch_size)
12
14
  else
13
15
  raise "unknown strategy: #{strategy_name}"
14
16
  end
@@ -0,0 +1,20 @@
1
+ class Tartarus
2
+ class ArchiveStrategy
3
+ class DeleteAllUsingLimitInBatches
4
+ attr_reader :batch_size
5
+ private :batch_size
6
+
7
+ def initialize(batch_size:)
8
+ @batch_size = batch_size
9
+ end
10
+
11
+ def call(collection)
12
+ num = 1
13
+
14
+ while num > 0
15
+ num = collection.limit(batch_size).delete_all
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -1,5 +1,5 @@
1
1
  class Tartarus
2
2
  module Rb
3
- VERSION = "0.2.0"
3
+ VERSION = "0.5.0"
4
4
  end
5
5
  end
@@ -16,8 +16,8 @@ class Tartarus::Registry
16
16
  @storage << item
17
17
  end
18
18
 
19
- def find_by_model(model)
20
- storage.find(->{ raise "#{model} not found in registry" }) { |item| item.for_model?(model) }
19
+ def find_by_name(name)
20
+ storage.find(->{ raise "#{name} not found in registry" }) { |item| item.name == name }
21
21
  end
22
22
 
23
23
  def reset
@@ -0,0 +1,4 @@
1
+ module Tartarus::RemoteStorage
2
+ autoload :Glacier, "tartarus/remote_storage/glacier"
3
+ autoload :Null, "tartarus/remote_storage/null"
4
+ end
@@ -0,0 +1,67 @@
1
+ require "aws-sdk-glacier"
2
+ require "tartarus/remote_storage/glacier/client"
3
+ require "tartarus/remote_storage/glacier/file"
4
+ require "tartarus/remote_storage/glacier/csv_export"
5
+ require "tartarus/remote_storage/glacier/register_upload"
6
+ require "tartarus/remote_storage/glacier/configuration"
7
+
8
+ class Tartarus
9
+ module RemoteStorage
10
+ class Glacier
11
+ attr_reader :configuration, :clock
12
+ private :configuration, :clock
13
+
14
+ def initialize(configuration, clock: Time)
15
+ @configuration = configuration
16
+ @clock = clock
17
+ end
18
+
19
+ def store(collection, archivable_model, tenant_id: nil, tenant_id_field: nil)
20
+ path_to_file = path_to_file_for(archivable_model, tenant_id_field, tenant_id)
21
+ export_to_csv(collection, path_to_file)
22
+ glacier_file = Tartarus::RemoteStorage::Glacier::File.new(::File.new(path_to_file))
23
+ glacier_response = upload(glacier_file)
24
+ register_upload(glacier_response, archivable_model, tenant_id_field, tenant_id)
25
+ ensure
26
+ glacier_file.delete_from_local_storage if glacier_file
27
+ end
28
+
29
+ private
30
+
31
+ def upload(file)
32
+ client.upload_archive(configuration.vault_name, file)
33
+ end
34
+
35
+ def client
36
+ @client ||= begin
37
+ Tartarus::RemoteStorage::Glacier::Client.new(
38
+ key: configuration.aws_key,
39
+ secret: configuration.aws_secret,
40
+ region: configuration.aws_region,
41
+ account_id: configuration.account_id,
42
+ )
43
+ end
44
+ end
45
+
46
+ def export_to_csv(collection, path_to_file)
47
+ Tartarus::RemoteStorage::Glacier::CsvExport
48
+ .new(configuration.storage_directory)
49
+ .export(collection, path_to_file)
50
+ end
51
+
52
+
53
+ def register_upload(glacier_response, archivable_model, tenant_id_field, tenant_id)
54
+ Tartarus::RemoteStorage::Glacier::RegisterUpload.new(configuration.archive_registry_factory).register(
55
+ glacier_response,
56
+ archivable_model,
57
+ tenant_id_field,
58
+ tenant_id
59
+ )
60
+ end
61
+
62
+ def path_to_file_for(archivable_model, tenant_id_field, tenant_id)
63
+ "#{configuration.storage_directory}/#{archivable_model}_#{tenant_id_field}_#{tenant_id}_#{clock.now.to_i}.csv"
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Tartarus
4
+ module RemoteStorage
5
+ class Glacier
6
+ class Client
7
+ attr_reader :client, :account_id
8
+ private :client, :account_id
9
+
10
+ def initialize(key:, secret:, region:, account_id:)
11
+ @client = Aws::Glacier::Client.new(credentials: Aws::Credentials.new(key, secret), region: region)
12
+ @account_id = account_id
13
+ end
14
+
15
+ def upload_archive(vault_name, file)
16
+ client.upload_archive(
17
+ account_id: account_id,
18
+ archive_description: file.description,
19
+ body: file.body,
20
+ vault_name: vault_name
21
+ )
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,43 @@
1
+ class Tartarus
2
+ module RemoteStorage
3
+ class Glacier
4
+ class Configuration
5
+ DEFAULT_ACCOUNT_ID = "-"
6
+ private_constant :DEFAULT_ACCOUNT_ID
7
+
8
+ REQUIRED_ATTRIBUTES_NAMES = %i(aws_key aws_secret aws_region account_id vault_name root_path
9
+ archive_registry_factory).freeze
10
+ attr_accessor *REQUIRED_ATTRIBUTES_NAMES
11
+
12
+ def self.build(aws_key:, aws_secret:, aws_region:, account_id: DEFAULT_ACCOUNT_ID, vault_name:, root_path:, archive_registry_factory:)
13
+ new.tap do |config|
14
+ config.aws_key = aws_key
15
+ config.aws_secret = aws_secret
16
+ config.aws_region = aws_region
17
+ config.account_id = account_id
18
+ config.vault_name = vault_name
19
+ config.root_path = root_path
20
+ config.archive_registry_factory = archive_registry_factory
21
+ config.validate!
22
+ end
23
+ end
24
+
25
+ def validate!
26
+ validate_presence
27
+ end
28
+
29
+ def storage_directory
30
+ "#{root_path}/tmp/tartarus/#{archive_registry_factory}"
31
+ end
32
+
33
+ private
34
+
35
+ def validate_presence
36
+ REQUIRED_ATTRIBUTES_NAMES.each do |attribute|
37
+ raise ":#{attribute} must be present" if public_send(attribute).nil?
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,38 @@
1
+ class Tartarus
2
+ module RemoteStorage
3
+ class Glacier
4
+ class CsvExport
5
+ FILE_MODE = "w"
6
+ DELIMITER = ";"
7
+ NO_PATH_FOR_EXPORT = nil
8
+ ENCODING = "UTF-8"
9
+ private_constant :FILE_MODE, :DELIMITER, :NO_PATH_FOR_EXPORT, :ENCODING
10
+
11
+ attr_reader :storage_directory, :file_service, :file_utils
12
+ private :storage_directory, :file_service, :file_utils
13
+
14
+ def initialize(storage_directory, file_service: ::File, file_utils: FileUtils)
15
+ @storage_directory = storage_directory
16
+ @file_service = file_service
17
+ @file_utils = file_utils
18
+ end
19
+
20
+ def export(collection, path_to_file)
21
+ with_csv_export_file(path_to_file) do |file|
22
+ collection.copy_to(NO_PATH_FOR_EXPORT, delimiter: DELIMITER) do |line|
23
+ file.write(line.force_encoding(ENCODING))
24
+ end
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def with_csv_export_file(path_to_file, &block)
31
+ file_utils.mkdir_p(storage_directory) if !file_service.exist?(storage_directory)
32
+
33
+ file_service.open(path_to_file, FILE_MODE, &block)
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,31 @@
1
+ require "delegate"
2
+
3
+ class Tartarus
4
+ module RemoteStorage
5
+ class Glacier
6
+ class File < SimpleDelegator
7
+ def description
8
+ file_service.basename(self, ".*")
9
+ end
10
+
11
+ def body
12
+ self
13
+ end
14
+
15
+ def checksum
16
+ Digest::SHA256.file(path)
17
+ end
18
+
19
+ def delete_from_local_storage
20
+ file_service.delete(path)
21
+ end
22
+
23
+ private
24
+
25
+ def file_service
26
+ ::File
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,28 @@
1
+ class Tartarus
2
+ module RemoteStorage
3
+ class Glacier
4
+ class RegisterUpload
5
+ attr_reader :archive_registry_factory, :clock
6
+ private :archive_registry_factory, :clock
7
+
8
+ def initialize(archive_registry_factory, clock: Time)
9
+ @archive_registry_factory = archive_registry_factory
10
+ @clock = clock
11
+ end
12
+
13
+ def register(glacier_response, archivable_model, tenant_id_field, tenant_id)
14
+ archive_registry_factory.new.tap do |archive_registry|
15
+ archive_registry.glacier_location = glacier_response.location
16
+ archive_registry.glacier_checksum = glacier_response.checksum
17
+ archive_registry.glacier_archive_id = glacier_response.archive_id
18
+ archive_registry.archivable_model = archivable_model
19
+ archive_registry.tenant_id_field = tenant_id_field
20
+ archive_registry.tenant_id = tenant_id
21
+ archive_registry.completed_at = clock.now
22
+ archive_registry.save!
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,8 @@
1
+ class Tartarus
2
+ module RemoteStorage
3
+ class Null
4
+ def self.store(*)
5
+ end
6
+ end
7
+ end
8
+ end
@@ -6,15 +6,15 @@ class Tartarus::ScheduleArchivingModel
6
6
  @registry = registry
7
7
  end
8
8
 
9
- def schedule(model_name)
10
- archivable_item = registry.find_by_model(model_name)
9
+ def schedule(archivable_item_name)
10
+ archivable_item = registry.find_by_name(archivable_item_name)
11
11
 
12
12
  if archivable_item.scope_by_tenant?
13
13
  each_tenant(archivable_item) do |tenant|
14
- enqueue(Tartarus::Sidekiq::ArchiveModelWithTenantJob, archivable_item.queue, model_name, tenant)
14
+ enqueue(Tartarus::Sidekiq::ArchiveModelWithTenantJob, archivable_item.queue, archivable_item.name, tenant)
15
15
  end
16
16
  else
17
- enqueue(Tartarus::Sidekiq::ArchiveModelWithoutTenantJob, archivable_item.queue, model_name)
17
+ enqueue(Tartarus::Sidekiq::ArchiveModelWithoutTenantJob, archivable_item.queue, archivable_item.name)
18
18
  end
19
19
  end
20
20
 
@@ -4,8 +4,8 @@ class Tartarus
4
4
  class Sidekiq::ArchiveModelWithTenantJob
5
5
  include ::Sidekiq::Worker
6
6
 
7
- def perform(model_name, tenant_id)
8
- Tartarus::ArchiveModelWithTenant.new.archive(model_name, tenant_id)
7
+ def perform(archivable_item_name, tenant_id)
8
+ Tartarus::ArchiveModelWithTenant.new.archive(archivable_item_name, tenant_id)
9
9
  end
10
10
  end
11
11
  end
@@ -4,8 +4,8 @@ class Tartarus
4
4
  class Sidekiq::ArchiveModelWithoutTenantJob
5
5
  include ::Sidekiq::Worker
6
6
 
7
- def perform(model_name)
8
- Tartarus::ArchiveModelWithoutTenant.new.archive(model_name)
7
+ def perform(archivable_item_name)
8
+ Tartarus::ArchiveModelWithoutTenant.new.archive(archivable_item_name)
9
9
  end
10
10
  end
11
11
  end
@@ -4,8 +4,8 @@ class Tartarus
4
4
  class Sidekiq::ScheduleArchivingModelJob
5
5
  include ::Sidekiq::Worker
6
6
 
7
- def perform(model_name)
8
- Tartarus::ScheduleArchivingModel.new.schedule(model_name)
7
+ def perform(archivable_item_name)
8
+ Tartarus::ScheduleArchivingModel.new.schedule(archivable_item_name)
9
9
  end
10
10
  end
11
11
  end
data/tartarus-rb.gemspec CHANGED
@@ -30,10 +30,17 @@ Gem::Specification.new do |spec|
30
30
  spec.add_dependency "sidekiq", ">= 5"
31
31
  spec.add_dependency "sidekiq-cron", "~> 1"
32
32
 
33
+
33
34
  spec.add_development_dependency "rake", "~> 13.0"
34
35
  spec.add_development_dependency "rspec", "~> 3.0"
35
36
  spec.add_development_dependency "rspec-sidekiq"
37
+ spec.add_development_dependency "aws-sdk-glacier"
36
38
 
37
39
  spec.add_development_dependency "activerecord", "~> 6"
38
- spec.add_development_dependency "sqlite3"
40
+ spec.add_development_dependency "pg"
41
+ spec.add_development_dependency "vcr"
42
+ spec.add_development_dependency "webmock"
43
+ spec.add_development_dependency "dotenv"
44
+ spec.add_development_dependency "postgres-copy"
45
+ spec.add_development_dependency "timecop"
39
46
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tartarus-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Karol Galanciak
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-04 00:00:00.000000000 Z
11
+ date: 2021-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sidekiq
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: aws-sdk-glacier
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: activerecord
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -95,7 +109,77 @@ dependencies:
95
109
  - !ruby/object:Gem::Version
96
110
  version: '6'
97
111
  - !ruby/object:Gem::Dependency
98
- name: sqlite3
112
+ name: pg
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: vcr
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: webmock
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: dotenv
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: postgres-copy
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: timecop
99
183
  requirement: !ruby/object:Gem::Requirement
100
184
  requirements:
101
185
  - - ">="
@@ -116,6 +200,7 @@ executables: []
116
200
  extensions: []
117
201
  extra_rdoc_files: []
118
202
  files:
203
+ - ".env.sample"
119
204
  - ".gitignore"
120
205
  - ".rspec"
121
206
  - ".travis.yml"
@@ -136,6 +221,7 @@ files:
136
221
  - lib/tartarus/archive_model_without_tenant.rb
137
222
  - lib/tartarus/archive_strategy.rb
138
223
  - lib/tartarus/archive_strategy/delete_all.rb
224
+ - lib/tartarus/archive_strategy/delete_all_using_limit_in_batches.rb
139
225
  - lib/tartarus/archive_strategy/delete_all_without_batches.rb
140
226
  - lib/tartarus/archive_strategy/destroy_all.rb
141
227
  - lib/tartarus/archive_strategy/destroy_all_without_batches.rb
@@ -143,6 +229,14 @@ files:
143
229
  - lib/tartarus/rb.rb
144
230
  - lib/tartarus/rb/version.rb
145
231
  - lib/tartarus/registry.rb
232
+ - lib/tartarus/remote_storage.rb
233
+ - lib/tartarus/remote_storage/glacier.rb
234
+ - lib/tartarus/remote_storage/glacier/client.rb
235
+ - lib/tartarus/remote_storage/glacier/configuration.rb
236
+ - lib/tartarus/remote_storage/glacier/csv_export.rb
237
+ - lib/tartarus/remote_storage/glacier/file.rb
238
+ - lib/tartarus/remote_storage/glacier/register_upload.rb
239
+ - lib/tartarus/remote_storage/null.rb
146
240
  - lib/tartarus/repository.rb
147
241
  - lib/tartarus/schedule_archiving_model.rb
148
242
  - lib/tartarus/sidekiq.rb