tartarus-rb 0.2.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ffb5cafa941ec43d2c07a6745ef9b75b91c31784649bbf04187e90f6960812e0
4
- data.tar.gz: 9615766c2064a4e127c836ca4cf6ad9b0f2925d08246e4486b9f9fd382884e4f
3
+ metadata.gz: f7669ce816882e4f9607e41afde316cbe50027291a0dd89008d3baaa9ca6b308
4
+ data.tar.gz: d03d89b04491a2dce92820674e94c895cac9cad706b790927aab9129f8c15b26
5
5
  SHA512:
6
- metadata.gz: a97481429cc2c5faa977d75baae80c78e17f56cfa0323b76e613a91a57586bbfb6890442e340b7d9aff88eb7a155f694174109a92317d77e7b8c4c367224ae50
7
- data.tar.gz: 143dc43864c453873d6154f52d1504eff11f0f88e406e69893fd62937ea8b59899e4c1fdd344adc65fab4e4d6f15c8dcee4cd5c09c14e86a19d47be2e17d403d
6
+ metadata.gz: e3906591489650a6e4f9ce9f3b0eaa17ab2a95797096abcf8bba9f50855a7463f0a2f1354017f3df512519775765e807ece44b4c75cbc2b3b3bd326f67a6ffcf
7
+ data.tar.gz: 48d503bc4bbea2be0196778d5dca18b9d9770cf069685128860f4567fdc1727f46042e009b43a86dc9697b217c8a6b5816a45eae1ed935f362ce63b42c2301e2
data/.env.sample ADDED
@@ -0,0 +1,4 @@
1
+ AWS_REGION=""
2
+ AWS_SECRET=""
3
+ AWS_KEY=""
4
+ VAULT_NAME=""
data/.gitignore CHANGED
@@ -7,6 +7,8 @@
7
7
  /spec/reports/
8
8
  /tmp/
9
9
  /spec/test.db
10
+ /spec/tmp/
11
+ .env
10
12
 
11
13
  # rspec failure tracking
12
14
  .rspec_status
data/.travis.yml CHANGED
@@ -3,6 +3,13 @@ language: ruby
3
3
  cache: bundler
4
4
  rvm:
5
5
  - 2.7.2
6
+ env:
7
+ global:
8
+ - AWS_KEY=AWS_KEY
9
+ - AWS_SECRET=AWS_SECRET
10
+ - AWS_REGION="us-east-2"
11
+ - VAULT_NAME=VAULT_NAME
6
12
  before_install: gem install bundler -v 2.1.4
7
13
  services:
8
14
  - redis-server
15
+ - postgresql
data/Changelog.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # Changelog
2
2
 
3
+ ## Master
4
+
5
+ ## 0.5.0
6
+
7
+ - Provide ability to explicitly set the name of archivable item to have multiple ways of archiving the same model
8
+
9
+ ## 0.4.1
10
+
11
+ - Do not make Glacier a required dependency if not used
12
+
13
+ ## 0.4.0
14
+
15
+ - Add Glacier remote storage support to upload data before deleting it
16
+
17
+ ## 0.3.0
18
+
19
+ - Add `delete_all_using_limit_in_batches` strategy
20
+
3
21
  ## 0.2.0
4
22
  - Add support for deleting and destroying in batches
5
23
  - Add integration tests with a real database and ActiveRecord
data/Gemfile.lock CHANGED
@@ -1,13 +1,26 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tartarus-rb (0.2.0)
4
+ tartarus-rb (0.5.0)
5
5
  sidekiq (>= 5)
6
6
  sidekiq-cron (~> 1)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
+ actionpack (6.1.0.rc1)
12
+ actionview (= 6.1.0.rc1)
13
+ activesupport (= 6.1.0.rc1)
14
+ rack (~> 2.0, >= 2.0.9)
15
+ rack-test (>= 0.6.3)
16
+ rails-dom-testing (~> 2.0)
17
+ rails-html-sanitizer (~> 1.0, >= 1.2.0)
18
+ actionview (6.1.0.rc1)
19
+ activesupport (= 6.1.0.rc1)
20
+ builder (~> 3.1)
21
+ erubi (~> 1.4)
22
+ rails-dom-testing (~> 2.0)
23
+ rails-html-sanitizer (~> 1.1, >= 1.2.0)
11
24
  activemodel (6.1.0.rc1)
12
25
  activesupport (= 6.1.0.rc1)
13
26
  activerecord (6.1.0.rc1)
@@ -19,21 +32,74 @@ GEM
19
32
  minitest (>= 5.1)
20
33
  tzinfo (~> 2.0)
21
34
  zeitwerk (~> 2.3)
35
+ addressable (2.7.0)
36
+ public_suffix (>= 2.0.2, < 5.0)
37
+ aws-eventstream (1.1.0)
38
+ aws-partitions (1.424.0)
39
+ aws-sdk-core (3.112.0)
40
+ aws-eventstream (~> 1, >= 1.0.2)
41
+ aws-partitions (~> 1, >= 1.239.0)
42
+ aws-sigv4 (~> 1.1)
43
+ jmespath (~> 1.0)
44
+ aws-sdk-glacier (1.36.0)
45
+ aws-sdk-core (~> 3, >= 3.112.0)
46
+ aws-sigv4 (~> 1.1)
47
+ aws-sigv4 (1.2.2)
48
+ aws-eventstream (~> 1, >= 1.0.2)
49
+ builder (3.2.4)
22
50
  concurrent-ruby (1.1.7)
23
51
  connection_pool (2.2.3)
52
+ crack (0.4.3)
53
+ safe_yaml (~> 1.0.0)
54
+ crass (1.0.6)
24
55
  diff-lcs (1.4.4)
56
+ dotenv (2.7.6)
57
+ erubi (1.10.0)
25
58
  et-orbi (1.2.4)
26
59
  tzinfo
27
- fugit (1.4.0)
60
+ fugit (1.4.5)
28
61
  et-orbi (~> 1.1, >= 1.1.8)
29
62
  raabro (~> 1.4)
63
+ hashdiff (1.0.0)
30
64
  i18n (1.8.5)
31
65
  concurrent-ruby (~> 1.0)
66
+ jmespath (1.4.0)
67
+ loofah (2.9.0)
68
+ crass (~> 1.0.2)
69
+ nokogiri (>= 1.5.9)
70
+ method_source (1.0.0)
71
+ mini_portile2 (2.5.0)
32
72
  minitest (5.14.2)
73
+ nokogiri (1.11.1)
74
+ mini_portile2 (~> 2.5.0)
75
+ racc (~> 1.4)
76
+ pg (1.2.3)
77
+ postgres-copy (1.5.0)
78
+ activerecord (>= 5.1)
79
+ pg (>= 0.17)
80
+ responders
81
+ public_suffix (4.0.4)
33
82
  raabro (1.4.0)
83
+ racc (1.5.2)
34
84
  rack (2.2.3)
85
+ rack-test (1.1.0)
86
+ rack (>= 1.0, < 3)
87
+ rails-dom-testing (2.0.3)
88
+ activesupport (>= 4.2.0)
89
+ nokogiri (>= 1.6)
90
+ rails-html-sanitizer (1.3.0)
91
+ loofah (~> 2.3)
92
+ railties (6.1.0.rc1)
93
+ actionpack (= 6.1.0.rc1)
94
+ activesupport (= 6.1.0.rc1)
95
+ method_source
96
+ rake (>= 0.8.7)
97
+ thor (~> 1.0)
35
98
  rake (13.0.1)
36
99
  redis (4.2.2)
100
+ responders (3.0.1)
101
+ actionpack (>= 5.0)
102
+ railties (>= 5.0)
37
103
  rspec (3.9.0)
38
104
  rspec-core (~> 3.9.0)
39
105
  rspec-expectations (~> 3.9.0)
@@ -50,6 +116,7 @@ GEM
50
116
  rspec-core (~> 3.0, >= 3.0.0)
51
117
  sidekiq (>= 2.4.0)
52
118
  rspec-support (3.9.3)
119
+ safe_yaml (1.0.5)
53
120
  sidekiq (6.1.2)
54
121
  connection_pool (>= 2.2.2)
55
122
  rack (~> 2.0)
@@ -57,9 +124,15 @@ GEM
57
124
  sidekiq-cron (1.2.0)
58
125
  fugit (~> 1.1)
59
126
  sidekiq (>= 4.2.1)
60
- sqlite3 (1.4.2)
127
+ thor (1.1.0)
128
+ timecop (0.9.2)
61
129
  tzinfo (2.0.2)
62
130
  concurrent-ruby (~> 1.0)
131
+ vcr (6.0.0)
132
+ webmock (3.7.6)
133
+ addressable (>= 2.3.6)
134
+ crack (>= 0.3.2)
135
+ hashdiff (>= 0.4.0, < 2.0.0)
63
136
  zeitwerk (2.4.1)
64
137
 
65
138
  PLATFORMS
@@ -67,11 +140,17 @@ PLATFORMS
67
140
 
68
141
  DEPENDENCIES
69
142
  activerecord (~> 6)
143
+ aws-sdk-glacier
144
+ dotenv
145
+ pg
146
+ postgres-copy
70
147
  rake (~> 13.0)
71
148
  rspec (~> 3.0)
72
149
  rspec-sidekiq
73
- sqlite3
74
150
  tartarus-rb!
151
+ timecop
152
+ vcr
153
+ webmock
75
154
 
76
155
  BUNDLED WITH
77
156
  2.1.4
data/README.md CHANGED
@@ -56,12 +56,23 @@ if File.exist?(schedule_file) && Sidekiq.server?
56
56
  item.timestamp_field = :created_at
57
57
  end
58
58
 
59
+ glacier_configuration = Tartarus::RemoteStorage::Glacier::Configuration.build(
60
+ aws_key: ENV.fetch("AWS_KEY"),
61
+ aws_secret: ENV.fetch("AWS_SECRET"),
62
+ aws_region: ENV.fetch("AWS_REGION"),
63
+ vault_name: ENV.fetch("GLACIER_VAULT_NAME"),
64
+ root_path: Rails.root.to_s,
65
+ archive_registry_factory: ArchiveRegistry,
66
+ )
67
+ # don't forget about installing `aws-sdk-glacier` gem
68
+
59
69
  tartarus.register do |item|
60
70
  item.model = YetAnotherModel
61
71
  item.cron = "5 6 * * *"
62
72
  item.queue = "default"
63
73
  item.timestamp_field = :created_at
64
74
  item.archive_items_older_than = -> { 1.week.ago }
75
+ item.remote_storage = Tartarus::RemoteStorage::Glacier.new(glacier_configuration)
65
76
  end
66
77
 
67
78
  tartarus.schedule # this method must be called to create jobs for sidekiq-cron!
@@ -71,6 +82,31 @@ end
71
82
 
72
83
  You can use the following config params:
73
84
  - `model` - a name of the ActiveReord model you want to archive, required
85
+ - `name` - name of your strategy, optional. It fallbacks `model.to_s`. It's important to set in in cases when you have several strategies for the same model:
86
+ ```rb
87
+ tartarus.register do |item|
88
+ item.model = InternalEvent
89
+ item.name = "archive_account_and_user_internal_events"
90
+ item.cron = "5 5 * * *"
91
+ item.queue = "default"
92
+ item.tenants_range = -> { ["Account", "User"] }
93
+ item.tenant_id_field = :model_type
94
+ item.archive_items_older_than = -> { 30.days.ago }
95
+ item.timestamp_field = :created_at
96
+ end
97
+
98
+ tartarus.register do |item|
99
+ item.model = InternalEvent
100
+ item.name = "archive_post_and_comment_internal_events"
101
+ item.cron = "5 15 * * *"
102
+ item.queue = "default"
103
+ item.tenants_range = -> { ["Post", "Comment"] }
104
+ item.tenant_id_field = :model_type
105
+ item.archive_items_older_than = -> { 10.days.ago }
106
+ item.timestamp_field = :created_at
107
+ end
108
+ ```
109
+
74
110
  - `cron` - cron syntax, required
75
111
  - `queue` - name of the sidekiq queue you want to use for execution of the jobs, required
76
112
  - `tenants_range` - optional, use if you want to scope items by a tenant (or any field that can be used for partitioning). It doesn't have to be ActiveRecord collection, could be just an array. Must be a proc/lambda/object responding to `call` method. For ActvieRecord collection, `find_each` loop will be used for optimization.
@@ -78,7 +114,92 @@ You can use the following config params:
78
114
  - `tenant_id_field` - required when using tenant_value_source/tenant_value_source. It's a DB column that will be used for scoping records by a tenant. For example, here it would be: `ModelThatYouWantToArchive.where(account_uuid: value_of_uuid_from_some_active_account)`
79
115
  - `archive_items_older_than` - required, for defining retention policy
80
116
  - `timestamp_field` - required, used for performing a query using the value from `archive_items_older_than`
81
- - `archive_with` - optional (defaults to `delete_all`). Could be `delete_all`, `destroy_all`, `delete_all_without_batches`, `destroy_all_without_batches`
117
+ - `archive_with` - optional (defaults to `delete_all`). Could be `delete_all`, `destroy_all`, `delete_all_without_batches`, `destroy_all_without_batches`, `delete_all_using_limit_in_batches`
118
+ - `batch_size` - optional (defaults to `10_000`, used with `delete_all_using_limit_in_batches` strategy)
119
+ - `remote_storage` - optional (defaults to `Tartarus::RemoteStorage::Null` which does nothing). Use this option if you want store the data somewhere before deleting it.
120
+
121
+ ### Remote Storage
122
+
123
+ Currently, only `Glacier` (for AWS Glacier) is supported. Also, it works only with Postgres database and requires [postgres-copy](https://github.com/diogob/postgres-copy).
124
+
125
+ To take advantage of this feature you will need a couple of things:
126
+ 1. Apply `acts_as_copy_target` to the archivable model (from `postgres-copy` gem).
127
+ 2. Create a model that will be used as a registry for all uploads that happened.
128
+ 3. Install `aws-sdk-glacier` gem.
129
+
130
+ If you want to make `Version` model archivable and use `ArchiveRegistry` as the registry, you will need the following models and tables:
131
+
132
+ ``` rb
133
+ database.create_table(:archive_registries) do |t|
134
+ t.string :glacier_location, null: false
135
+ t.string :glacier_checksum, null: false
136
+ t.string :glacier_archive_id, null: false
137
+ t.string :archivable_model, null: false
138
+ t.string :tenant_id_field
139
+ t.string :tenant_id
140
+ t.datetime :completed_at, null: false
141
+ end
142
+
143
+ database.create_table(:versions) do |t|
144
+ end
145
+
146
+ class Version < ApplicationRecord
147
+ acts_as_copy_target
148
+ end
149
+
150
+ class ArchiveRegistry < ApplicationRecord
151
+ end
152
+ ```
153
+
154
+ You can use the above schema for the registry model as it contains all needed fields.
155
+
156
+ To initialize the service:
157
+
158
+ ``` rb
159
+ glacier_configuration = Tartarus::RemoteStorage::Glacier::Configuration.build(
160
+ aws_key: ENV.fetch("AWS_KEY"),
161
+ aws_secret: ENV.fetch("AWS_SECRET"),
162
+ aws_region: ENV.fetch("AWS_REGION"),
163
+ vault_name: ENV.fetch("GLACIER_VAULT_NAME"),
164
+ root_path: Rails.root.to_s,
165
+ archive_registry_factory: ArchiveRegistry,
166
+ )
167
+ Tartarus::RemoteStorage::Glacier.new(glacier_configuration)
168
+ ```
169
+
170
+ You can also pass `account_id` (by default "-" string will be used):
171
+
172
+ ``` rb
173
+ glacier_configuration = Tartarus::RemoteStorage::Glacier::Configuration.build(
174
+ aws_key: ENV.fetch("AWS_KEY"),
175
+ aws_secret: ENV.fetch("AWS_SECRET"),
176
+ aws_region: ENV.fetch("AWS_REGION"),
177
+ vault_name: ENV.fetch("GLACIER_VAULT_NAME"),
178
+ root_path: Rails.root.to_s,
179
+ archive_registry_factory: ArchiveRegistry,
180
+ account_id: "some_account_id"
181
+ )
182
+ Tartarus::RemoteStorage::Glacier.new(glacier_configuration)
183
+ ```
184
+
185
+ **Important** - do not use Glacier Storage for large batches (> 4 GB) as multipart uploads are not supported yet.
186
+
187
+
188
+ If you know what you are doing, you can add your own storage, as long as it complies with the following interface:
189
+
190
+ ``` rb
191
+ class Glacier
192
+ attr_reader :configuration
193
+ private :configuration
194
+
195
+ def initialize(configuration)
196
+ @configuration = configuration
197
+ end
198
+
199
+ def store(collection, archivable_model, tenant_id: nil, tenant_id_field: nil)
200
+ end
201
+ end
202
+ ```
82
203
 
83
204
  ### Testing before actually using it
84
205
 
@@ -87,7 +208,6 @@ You might want to verify that the gem works in the way you expect it to work. Fo
87
208
  1. scheduling/enqueueing: use `Tartarus::ScheduleArchivingModel#schedule` - for example, `Tartarus::ScheduleArchivingModel.new.schedule("PaperTrailVersion")`, it's going to enqueue either `Tartarus::Sidekiq::ArchiveModelWithTenantJob` or `Tartarus::Sidekiq::ArchiveModelWithoutTenantJob`, depending on the config.
88
209
  2. execution of the archiving logic: use `Tartarus::ArchiveModelWithTenant#archive` (for example, `Tartarus::ArchiveModelWithTenant.new.archive("PaperTrailVersion", "User")`) or `Tartarus::ArchiveModelWithoutTenant#archive` (for example, `Tartarus::ArchiveModelWithoutTenant.new.archive("PaperTrailVersion")`)
89
210
 
90
-
91
211
  You might also want to check `spec/integration` to get an idea how the integration tests were written.
92
212
 
93
213
  ## Development
@@ -96,10 +216,6 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
96
216
 
97
217
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
98
218
 
99
- ## TODO
100
-
101
- - add support for uploading archives to AWS Glacier before deleting items
102
-
103
219
  ## Contributing
104
220
 
105
221
  Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/tartarus-rb.
data/lib/tartarus.rb CHANGED
@@ -4,6 +4,7 @@ require "tartarus/archivable_item/sidekiq_cron_job_serializer"
4
4
  require "tartarus/archive_strategy"
5
5
  require "tartarus/archive_strategy/delete_all"
6
6
  require "tartarus/archive_strategy/delete_all_without_batches"
7
+ require "tartarus/archive_strategy/delete_all_using_limit_in_batches"
7
8
  require "tartarus/archive_strategy/destroy_all"
8
9
  require "tartarus/archive_strategy/destroy_all_without_batches"
9
10
  require "tartarus/archive_strategy/extract_batch"
@@ -17,6 +18,7 @@ require "tartarus/rb/version"
17
18
  require "tartarus/registry"
18
19
  require "tartarus/repository"
19
20
  require "tartarus/schedule_archiving_model"
21
+ require "tartarus/remote_storage"
20
22
  require "sidekiq/cron/job"
21
23
  require "sidekiq"
22
24
 
@@ -13,6 +13,7 @@ class Tartarus
13
13
  ensure_column_exists(collection, model_name, tenant_id_field)
14
14
 
15
15
  collection.where("#{timestamp_field} < ?", timestamp).where(tenant_id_field => tenant_id)
16
+ .order(tenant_id_field, timestamp_field)
16
17
  end
17
18
 
18
19
  def items_older_than(model_name, timestamp_field, timestamp)
@@ -1,7 +1,7 @@
1
1
  class Tartarus::ArchivableItem
2
2
  REQUIRED_ATTRIBUTES_NAMES = %i(model cron queue archive_items_older_than timestamp_field active_job
3
3
  archive_with tenant_value_source).freeze
4
- OPTIONAL_ATTRIBUTES_NAMES = %i(tenants_range tenant_id_field).freeze
4
+ OPTIONAL_ATTRIBUTES_NAMES = %i(tenants_range tenant_id_field batch_size remote_storage name).freeze
5
5
 
6
6
  attr_accessor *(REQUIRED_ATTRIBUTES_NAMES + OPTIONAL_ATTRIBUTES_NAMES)
7
7
 
@@ -45,6 +45,16 @@ class Tartarus::ArchivableItem
45
45
  @archive_with ||= :delete_all
46
46
  end
47
47
 
48
+ def batch_size
49
+ return @batch_size if defined?(@batch_size)
50
+
51
+ @batch_size ||= 10_000
52
+ end
53
+
54
+ def name
55
+ @name || @model.to_s
56
+ end
57
+
48
58
  def validate!
49
59
  validate_presence
50
60
  end
@@ -54,11 +64,11 @@ class Tartarus::ArchivableItem
54
64
  end
55
65
 
56
66
  def archive_strategy(factory: Tartarus::ArchiveStrategy.new)
57
- factory.for(archive_with)
67
+ factory.for(archive_with, batch_size: batch_size)
58
68
  end
59
69
 
60
- def for_model?(provided_model_name)
61
- model.to_s == provided_model_name.to_s
70
+ def remote_storage
71
+ @remote_storage || Tartarus::RemoteStorage::Null
62
72
  end
63
73
 
64
74
  private
@@ -7,7 +7,7 @@ class Tartarus
7
7
  description: description_for_item(archivable_item),
8
8
  cron: archivable_item.cron,
9
9
  class: Tartarus::Sidekiq::ScheduleArchivingModelJob,
10
- args: [archivable_item.model],
10
+ args: [archivable_item.name],
11
11
  queue: archivable_item.queue,
12
12
  active_job: archivable_item.active_job
13
13
  }
@@ -16,7 +16,7 @@ class Tartarus
16
16
  private
17
17
 
18
18
  def name_for_item(archivable_item)
19
- "TARTARUS_#{archivable_item.model}"
19
+ "TARTARUS_#{archivable_item.name}"
20
20
  end
21
21
 
22
22
  def description_for_item(archivable_item)
@@ -7,18 +7,20 @@ class Tartarus::ArchiveModelWithTenant
7
7
  @repository = repository
8
8
  end
9
9
 
10
- def archive(model_name, tenant_id)
11
- archivable_item = registry.find_by_model(model_name)
12
-
13
- archivable_item.archive_strategy.call(collection_to_archive(model_name, archivable_item, tenant_id))
10
+ def archive(archivable_item_name, tenant_id)
11
+ archivable_item = registry.find_by_name(archivable_item_name)
12
+ collection = collection_to_archive(archivable_item, tenant_id)
13
+ archivable_item.remote_storage.store(collection, archivable_item.name, tenant_id: tenant_id,
14
+ tenant_id_field: archivable_item.tenant_id_field)
15
+ archivable_item.archive_strategy.call(collection)
14
16
  end
15
17
 
16
18
  private
17
19
 
18
- def collection_to_archive(model_name, archivable_item, tenant_id)
20
+ def collection_to_archive(archivable_item, tenant_id)
19
21
  repository
20
22
  .items_older_than_for_tenant(
21
- model_name,
23
+ archivable_item.model,
22
24
  archivable_item.timestamp_field, archivable_item.archive_items_older_than.call,
23
25
  archivable_item.tenant_id_field, tenant_id
24
26
  )
@@ -7,18 +7,19 @@ class Tartarus::ArchiveModelWithoutTenant
7
7
  @repository = repository
8
8
  end
9
9
 
10
- def archive(model_name)
11
- archivable_item = registry.find_by_model(model_name)
12
-
13
- archivable_item.archive_strategy.call(collection_to_archive(model_name, archivable_item))
10
+ def archive(archivable_item_name)
11
+ archivable_item = registry.find_by_name(archivable_item_name)
12
+ collection = collection_to_archive(archivable_item)
13
+ archivable_item.remote_storage.store(collection, archivable_item.name)
14
+ archivable_item.archive_strategy.call(collection)
14
15
  end
15
16
 
16
17
  private
17
18
 
18
- def collection_to_archive(model_name, archivable_item)
19
+ def collection_to_archive(archivable_item)
19
20
  repository
20
21
  .items_older_than(
21
- model_name,
22
+ archivable_item.model,
22
23
  archivable_item.timestamp_field, archivable_item.archive_items_older_than.call
23
24
  )
24
25
  end
@@ -1,5 +1,5 @@
1
1
  class Tartarus::ArchiveStrategy
2
- def for(strategy_name)
2
+ def for(strategy_name, batch_size: 0)
3
3
  case strategy_name.to_sym
4
4
  when :delete_all
5
5
  Tartarus::ArchiveStrategy::DeleteAll.new
@@ -9,6 +9,8 @@ class Tartarus::ArchiveStrategy
9
9
  Tartarus::ArchiveStrategy::DeleteAllWithoutBatches.new
10
10
  when :destroy_all_without_batches
11
11
  Tartarus::ArchiveStrategy::DestroyAllWithoutBatches.new
12
+ when :delete_all_using_limit_in_batches
13
+ Tartarus::ArchiveStrategy::DeleteAllUsingLimitInBatches.new(batch_size: batch_size)
12
14
  else
13
15
  raise "unknown strategy: #{strategy_name}"
14
16
  end
@@ -0,0 +1,20 @@
1
+ class Tartarus
2
+ class ArchiveStrategy
3
+ class DeleteAllUsingLimitInBatches
4
+ attr_reader :batch_size
5
+ private :batch_size
6
+
7
+ def initialize(batch_size:)
8
+ @batch_size = batch_size
9
+ end
10
+
11
+ def call(collection)
12
+ num = 1
13
+
14
+ while num > 0
15
+ num = collection.limit(batch_size).delete_all
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -1,5 +1,5 @@
1
1
  class Tartarus
2
2
  module Rb
3
- VERSION = "0.2.0"
3
+ VERSION = "0.5.0"
4
4
  end
5
5
  end
@@ -16,8 +16,8 @@ class Tartarus::Registry
16
16
  @storage << item
17
17
  end
18
18
 
19
- def find_by_model(model)
20
- storage.find(->{ raise "#{model} not found in registry" }) { |item| item.for_model?(model) }
19
+ def find_by_name(name)
20
+ storage.find(->{ raise "#{name} not found in registry" }) { |item| item.name == name }
21
21
  end
22
22
 
23
23
  def reset
@@ -0,0 +1,4 @@
1
+ module Tartarus::RemoteStorage
2
+ autoload :Glacier, "tartarus/remote_storage/glacier"
3
+ autoload :Null, "tartarus/remote_storage/null"
4
+ end
@@ -0,0 +1,67 @@
1
+ require "aws-sdk-glacier"
2
+ require "tartarus/remote_storage/glacier/client"
3
+ require "tartarus/remote_storage/glacier/file"
4
+ require "tartarus/remote_storage/glacier/csv_export"
5
+ require "tartarus/remote_storage/glacier/register_upload"
6
+ require "tartarus/remote_storage/glacier/configuration"
7
+
8
+ class Tartarus
9
+ module RemoteStorage
10
+ class Glacier
11
+ attr_reader :configuration, :clock
12
+ private :configuration, :clock
13
+
14
+ def initialize(configuration, clock: Time)
15
+ @configuration = configuration
16
+ @clock = clock
17
+ end
18
+
19
+ def store(collection, archivable_model, tenant_id: nil, tenant_id_field: nil)
20
+ path_to_file = path_to_file_for(archivable_model, tenant_id_field, tenant_id)
21
+ export_to_csv(collection, path_to_file)
22
+ glacier_file = Tartarus::RemoteStorage::Glacier::File.new(::File.new(path_to_file))
23
+ glacier_response = upload(glacier_file)
24
+ register_upload(glacier_response, archivable_model, tenant_id_field, tenant_id)
25
+ ensure
26
+ glacier_file.delete_from_local_storage if glacier_file
27
+ end
28
+
29
+ private
30
+
31
+ def upload(file)
32
+ client.upload_archive(configuration.vault_name, file)
33
+ end
34
+
35
+ def client
36
+ @client ||= begin
37
+ Tartarus::RemoteStorage::Glacier::Client.new(
38
+ key: configuration.aws_key,
39
+ secret: configuration.aws_secret,
40
+ region: configuration.aws_region,
41
+ account_id: configuration.account_id,
42
+ )
43
+ end
44
+ end
45
+
46
+ def export_to_csv(collection, path_to_file)
47
+ Tartarus::RemoteStorage::Glacier::CsvExport
48
+ .new(configuration.storage_directory)
49
+ .export(collection, path_to_file)
50
+ end
51
+
52
+
53
+ def register_upload(glacier_response, archivable_model, tenant_id_field, tenant_id)
54
+ Tartarus::RemoteStorage::Glacier::RegisterUpload.new(configuration.archive_registry_factory).register(
55
+ glacier_response,
56
+ archivable_model,
57
+ tenant_id_field,
58
+ tenant_id
59
+ )
60
+ end
61
+
62
+ def path_to_file_for(archivable_model, tenant_id_field, tenant_id)
63
+ "#{configuration.storage_directory}/#{archivable_model}_#{tenant_id_field}_#{tenant_id}_#{clock.now.to_i}.csv"
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Tartarus
4
+ module RemoteStorage
5
+ class Glacier
6
+ class Client
7
+ attr_reader :client, :account_id
8
+ private :client, :account_id
9
+
10
+ def initialize(key:, secret:, region:, account_id:)
11
+ @client = Aws::Glacier::Client.new(credentials: Aws::Credentials.new(key, secret), region: region)
12
+ @account_id = account_id
13
+ end
14
+
15
+ def upload_archive(vault_name, file)
16
+ client.upload_archive(
17
+ account_id: account_id,
18
+ archive_description: file.description,
19
+ body: file.body,
20
+ vault_name: vault_name
21
+ )
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,43 @@
1
+ class Tartarus
2
+ module RemoteStorage
3
+ class Glacier
4
+ class Configuration
5
+ DEFAULT_ACCOUNT_ID = "-"
6
+ private_constant :DEFAULT_ACCOUNT_ID
7
+
8
+ REQUIRED_ATTRIBUTES_NAMES = %i(aws_key aws_secret aws_region account_id vault_name root_path
9
+ archive_registry_factory).freeze
10
+ attr_accessor *REQUIRED_ATTRIBUTES_NAMES
11
+
12
+ def self.build(aws_key:, aws_secret:, aws_region:, account_id: DEFAULT_ACCOUNT_ID, vault_name:, root_path:, archive_registry_factory:)
13
+ new.tap do |config|
14
+ config.aws_key = aws_key
15
+ config.aws_secret = aws_secret
16
+ config.aws_region = aws_region
17
+ config.account_id = account_id
18
+ config.vault_name = vault_name
19
+ config.root_path = root_path
20
+ config.archive_registry_factory = archive_registry_factory
21
+ config.validate!
22
+ end
23
+ end
24
+
25
+ def validate!
26
+ validate_presence
27
+ end
28
+
29
+ def storage_directory
30
+ "#{root_path}/tmp/tartarus/#{archive_registry_factory}"
31
+ end
32
+
33
+ private
34
+
35
+ def validate_presence
36
+ REQUIRED_ATTRIBUTES_NAMES.each do |attribute|
37
+ raise ":#{attribute} must be present" if public_send(attribute).nil?
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,38 @@
1
+ class Tartarus
2
+ module RemoteStorage
3
+ class Glacier
4
+ class CsvExport
5
+ FILE_MODE = "w"
6
+ DELIMITER = ";"
7
+ NO_PATH_FOR_EXPORT = nil
8
+ ENCODING = "UTF-8"
9
+ private_constant :FILE_MODE, :DELIMITER, :NO_PATH_FOR_EXPORT, :ENCODING
10
+
11
+ attr_reader :storage_directory, :file_service, :file_utils
12
+ private :storage_directory, :file_service, :file_utils
13
+
14
+ def initialize(storage_directory, file_service: ::File, file_utils: FileUtils)
15
+ @storage_directory = storage_directory
16
+ @file_service = file_service
17
+ @file_utils = file_utils
18
+ end
19
+
20
+ def export(collection, path_to_file)
21
+ with_csv_export_file(path_to_file) do |file|
22
+ collection.copy_to(NO_PATH_FOR_EXPORT, delimiter: DELIMITER) do |line|
23
+ file.write(line.force_encoding(ENCODING))
24
+ end
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def with_csv_export_file(path_to_file, &block)
31
+ file_utils.mkdir_p(storage_directory) if !file_service.exist?(storage_directory)
32
+
33
+ file_service.open(path_to_file, FILE_MODE, &block)
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,31 @@
1
+ require "delegate"
2
+
3
+ class Tartarus
4
+ module RemoteStorage
5
+ class Glacier
6
+ class File < SimpleDelegator
7
+ def description
8
+ file_service.basename(self, ".*")
9
+ end
10
+
11
+ def body
12
+ self
13
+ end
14
+
15
+ def checksum
16
+ Digest::SHA256.file(path)
17
+ end
18
+
19
+ def delete_from_local_storage
20
+ file_service.delete(path)
21
+ end
22
+
23
+ private
24
+
25
+ def file_service
26
+ ::File
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,28 @@
1
+ class Tartarus
2
+ module RemoteStorage
3
+ class Glacier
4
+ class RegisterUpload
5
+ attr_reader :archive_registry_factory, :clock
6
+ private :archive_registry_factory, :clock
7
+
8
+ def initialize(archive_registry_factory, clock: Time)
9
+ @archive_registry_factory = archive_registry_factory
10
+ @clock = clock
11
+ end
12
+
13
+ def register(glacier_response, archivable_model, tenant_id_field, tenant_id)
14
+ archive_registry_factory.new.tap do |archive_registry|
15
+ archive_registry.glacier_location = glacier_response.location
16
+ archive_registry.glacier_checksum = glacier_response.checksum
17
+ archive_registry.glacier_archive_id = glacier_response.archive_id
18
+ archive_registry.archivable_model = archivable_model
19
+ archive_registry.tenant_id_field = tenant_id_field
20
+ archive_registry.tenant_id = tenant_id
21
+ archive_registry.completed_at = clock.now
22
+ archive_registry.save!
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,8 @@
1
+ class Tartarus
2
+ module RemoteStorage
3
+ class Null
4
+ def self.store(*)
5
+ end
6
+ end
7
+ end
8
+ end
@@ -6,15 +6,15 @@ class Tartarus::ScheduleArchivingModel
6
6
  @registry = registry
7
7
  end
8
8
 
9
- def schedule(model_name)
10
- archivable_item = registry.find_by_model(model_name)
9
+ def schedule(archivable_item_name)
10
+ archivable_item = registry.find_by_name(archivable_item_name)
11
11
 
12
12
  if archivable_item.scope_by_tenant?
13
13
  each_tenant(archivable_item) do |tenant|
14
- enqueue(Tartarus::Sidekiq::ArchiveModelWithTenantJob, archivable_item.queue, model_name, tenant)
14
+ enqueue(Tartarus::Sidekiq::ArchiveModelWithTenantJob, archivable_item.queue, archivable_item.name, tenant)
15
15
  end
16
16
  else
17
- enqueue(Tartarus::Sidekiq::ArchiveModelWithoutTenantJob, archivable_item.queue, model_name)
17
+ enqueue(Tartarus::Sidekiq::ArchiveModelWithoutTenantJob, archivable_item.queue, archivable_item.name)
18
18
  end
19
19
  end
20
20
 
@@ -4,8 +4,8 @@ class Tartarus
4
4
  class Sidekiq::ArchiveModelWithTenantJob
5
5
  include ::Sidekiq::Worker
6
6
 
7
- def perform(model_name, tenant_id)
8
- Tartarus::ArchiveModelWithTenant.new.archive(model_name, tenant_id)
7
+ def perform(archivable_item_name, tenant_id)
8
+ Tartarus::ArchiveModelWithTenant.new.archive(archivable_item_name, tenant_id)
9
9
  end
10
10
  end
11
11
  end
@@ -4,8 +4,8 @@ class Tartarus
4
4
  class Sidekiq::ArchiveModelWithoutTenantJob
5
5
  include ::Sidekiq::Worker
6
6
 
7
- def perform(model_name)
8
- Tartarus::ArchiveModelWithoutTenant.new.archive(model_name)
7
+ def perform(archivable_item_name)
8
+ Tartarus::ArchiveModelWithoutTenant.new.archive(archivable_item_name)
9
9
  end
10
10
  end
11
11
  end
@@ -4,8 +4,8 @@ class Tartarus
4
4
  class Sidekiq::ScheduleArchivingModelJob
5
5
  include ::Sidekiq::Worker
6
6
 
7
- def perform(model_name)
8
- Tartarus::ScheduleArchivingModel.new.schedule(model_name)
7
+ def perform(archivable_item_name)
8
+ Tartarus::ScheduleArchivingModel.new.schedule(archivable_item_name)
9
9
  end
10
10
  end
11
11
  end
data/tartarus-rb.gemspec CHANGED
@@ -30,10 +30,17 @@ Gem::Specification.new do |spec|
30
30
  spec.add_dependency "sidekiq", ">= 5"
31
31
  spec.add_dependency "sidekiq-cron", "~> 1"
32
32
 
33
+
33
34
  spec.add_development_dependency "rake", "~> 13.0"
34
35
  spec.add_development_dependency "rspec", "~> 3.0"
35
36
  spec.add_development_dependency "rspec-sidekiq"
37
+ spec.add_development_dependency "aws-sdk-glacier"
36
38
 
37
39
  spec.add_development_dependency "activerecord", "~> 6"
38
- spec.add_development_dependency "sqlite3"
40
+ spec.add_development_dependency "pg"
41
+ spec.add_development_dependency "vcr"
42
+ spec.add_development_dependency "webmock"
43
+ spec.add_development_dependency "dotenv"
44
+ spec.add_development_dependency "postgres-copy"
45
+ spec.add_development_dependency "timecop"
39
46
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tartarus-rb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Karol Galanciak
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-04 00:00:00.000000000 Z
11
+ date: 2021-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sidekiq
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: aws-sdk-glacier
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: activerecord
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -95,7 +109,77 @@ dependencies:
95
109
  - !ruby/object:Gem::Version
96
110
  version: '6'
97
111
  - !ruby/object:Gem::Dependency
98
- name: sqlite3
112
+ name: pg
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: vcr
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: webmock
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: dotenv
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
167
+ - !ruby/object:Gem::Dependency
168
+ name: postgres-copy
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
181
+ - !ruby/object:Gem::Dependency
182
+ name: timecop
99
183
  requirement: !ruby/object:Gem::Requirement
100
184
  requirements:
101
185
  - - ">="
@@ -116,6 +200,7 @@ executables: []
116
200
  extensions: []
117
201
  extra_rdoc_files: []
118
202
  files:
203
+ - ".env.sample"
119
204
  - ".gitignore"
120
205
  - ".rspec"
121
206
  - ".travis.yml"
@@ -136,6 +221,7 @@ files:
136
221
  - lib/tartarus/archive_model_without_tenant.rb
137
222
  - lib/tartarus/archive_strategy.rb
138
223
  - lib/tartarus/archive_strategy/delete_all.rb
224
+ - lib/tartarus/archive_strategy/delete_all_using_limit_in_batches.rb
139
225
  - lib/tartarus/archive_strategy/delete_all_without_batches.rb
140
226
  - lib/tartarus/archive_strategy/destroy_all.rb
141
227
  - lib/tartarus/archive_strategy/destroy_all_without_batches.rb
@@ -143,6 +229,14 @@ files:
143
229
  - lib/tartarus/rb.rb
144
230
  - lib/tartarus/rb/version.rb
145
231
  - lib/tartarus/registry.rb
232
+ - lib/tartarus/remote_storage.rb
233
+ - lib/tartarus/remote_storage/glacier.rb
234
+ - lib/tartarus/remote_storage/glacier/client.rb
235
+ - lib/tartarus/remote_storage/glacier/configuration.rb
236
+ - lib/tartarus/remote_storage/glacier/csv_export.rb
237
+ - lib/tartarus/remote_storage/glacier/file.rb
238
+ - lib/tartarus/remote_storage/glacier/register_upload.rb
239
+ - lib/tartarus/remote_storage/null.rb
146
240
  - lib/tartarus/repository.rb
147
241
  - lib/tartarus/schedule_archiving_model.rb
148
242
  - lib/tartarus/sidekiq.rb