cws3chk 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5c5979e26ae5b8fdb283cb68dbd10e9270f5703c
4
+ data.tar.gz: 6781fede45e5a2864aa4d23782e85e23d6f451d9
5
+ SHA512:
6
+ metadata.gz: 677899d7077e3e11fb1eba084a7d70dd0aee54bf6b4229d382353094020328c84bde651a6effb5ccb57cd618224d89c29fbdd326620d4d5ee430eea0618597b2
7
+ data.tar.gz: d164a44ae4a6f325bf23c0c90d34fe83581f37e7da373c4be5d0233c7b9d419e046c0ff173a1f21cd721bc722ce86356c4b1daf71e07fc7c3306bcb359a4446a
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in Cws3chk.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Antoine Qu'hen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ # Cws3chk
2
+
3
+ This gem checks the existance on S3 of the assets described by ActiveRecord
4
+ and Carrierwave.
5
+
6
+ * It loads the ids of the object with assets in a task and splits them into
7
+ groups. Each group is going to be processed by a Resque Job.
8
+ * It studies the groups of object by launching n threads. It checks for the
9
+ existence of the original file and the different versions.
10
+ * It stores the result of the check in Redis.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'Cws3chk'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ $ bundle
23
+
24
+ ## Usage
25
+
26
+ $ bundle exec rake Cws3chk:check
27
+ or
28
+
29
+ ```ruby
30
+ request = User.with_avatar
31
+ Cws3chk::Checker.new(request, :avatar, 250).check
32
+ ```
33
+ Then study your missig assets and fix them if needed:
34
+ ```ruby
35
+ redis.smembers 'Cws3chk::missing'
36
+ ```
37
+ You can also study the size of the resulting assets:
38
+ ```ruby
39
+ redis.smembers 'Cws3chk::metadata'
40
+ ```
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require "Cws3chk/tasks"
4
+
5
+ require 'rake/testtask'
6
+
7
+ Rake::TestTask.new do |t|
8
+ t.libs << 'test'
9
+ end
10
+
11
+ desc "Run tests"
12
+ task :default => :test
data/cws3chk.gemspec ADDED
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'Cws3chk/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cws3chk"
8
+ spec.version = Cws3chk::VERSION
9
+ spec.authors = ["Antoine Qu'hen"]
10
+ spec.email = ["antoinequhen@gmail.com"]
11
+ spec.summary = %q{Check assets are on S3 as Carrierwave says}
12
+ spec.description = %q{This gem studies the existency of AR model assets, including their versions, on S3 via Resque jobs.}
13
+ spec.homepage = "https://rubygems.org/gems/cws3chk"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'rails', '>= 4.1.8'
22
+ spec.add_runtime_dependency 'aws', '>= 2.10.2'
23
+ spec.add_runtime_dependency 'resque', '>= 1.25.2'
24
+ spec.add_runtime_dependency 'retryable_block', '>= 0.0.1'
25
+ spec.add_runtime_dependency 'threadify_procs', '>= 0.0.5'
26
+ spec.add_runtime_dependency 'redis', '>= 3.1.0'
27
+ spec.add_runtime_dependency 'json', '>= 1.8.2'
28
+
29
+ spec.add_development_dependency "bundler", "~> 1.7"
30
+ spec.add_development_dependency "rake", "~> 10.0"
31
+ spec.add_development_dependency "mocha", ">= 1.1.0"
32
+ end
@@ -0,0 +1,53 @@
1
+ require 'threadify_procs'
2
+ require 'Cws3chk/s3'
3
+ require 'Cws3chk/store'
4
+
5
+ # This class is in charge of checking the existance on S3 of the assets
6
+ # described by ActiveRecord + Carrierwave.
7
+ # It checks for the existence of the original file an the different versions.
8
+ # It stores the result of the check in Redis.
9
+ #
10
+ # After loading the data from the database, it performs the calls to S3 in
11
+ # parallel via threads.
12
+ class Cws3chk::Checker < Struct.new(:request, :mounted_column, :threads)
13
+ include ThreadifyProcs
14
+
15
+ def check
16
+ call_with_threads procs, number_of_threads: threads
17
+ end
18
+
19
+ private
20
+
21
+ def procs
22
+ [].tap do |_procs|
23
+ iterate_over_resources_and_versions do |resource, uploader, version|
24
+ _procs << Proc.new{ study resource, uploader, version }
25
+ end
26
+ end
27
+ end
28
+
29
+ def versions uploader
30
+ [nil] + uploader.versions.keys
31
+ end
32
+
33
+ def iterate_over_resources_and_versions
34
+ request.find_each do |resource|
35
+ next unless resource.public_send("#{mounted_column}?") # resource.image?
36
+ uploader = resource.public_send mounted_column # resource.image
37
+ versions(uploader).each do |version|
38
+ yield resource, uploader, version
39
+ end
40
+ end
41
+ end
42
+
43
+ def study resource, uploader, version
44
+ s3 = Cws3chk::S3.new uploader, version
45
+ store = Cws3chk::Store.new(
46
+ resource, mounted_column, version)
47
+ if s3.file_exists?
48
+ store.store_headers s3.headers
49
+ else
50
+ store.store_missing_asset
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,12 @@
1
+ # This job takes an ActiveRecord model, a column and a bunch of ids, and runs
2
+ # the check on all the versions of those objects. Including the original version.
3
+ class Cws3chk::Job
4
+ # TODO config
5
+ @queue = :z_low
6
+
7
+ def self.perform klass_name, ids_json, threads, mounted_column
8
+ ids = JSON.load ids_json
9
+ request = klass_name.constantize.where id: ids
10
+ Cws3chk::Checker.new(request, mounted_column, threads).check
11
+ end
12
+ end
@@ -0,0 +1,7 @@
1
+ module Cws3chk::Redis
2
+
3
+ # TODO config
4
+ def redis
5
+ @redis ||= ::RedisProxy
6
+ end
7
+ end
data/lib/cws3chk/s3.rb ADDED
@@ -0,0 +1,43 @@
1
+ require 'retryable_block'
2
+
3
+ class Cws3chk::S3 < Struct.new(:uploader, :version)
4
+ include RetryableBlock
5
+
6
+ # TODO put 1.kilobyte in config
7
+ def file_exists?
8
+ headers.present? && headers['content-length'].to_i > 1.kilobyte
9
+ end
10
+
11
+ def headers
12
+ @headers ||= s3_key_headers
13
+ end
14
+
15
+ private
16
+
17
+ def s3_key_headers
18
+ s3_key.tap do |key|
19
+ begin
20
+ retryable{ key.head }
21
+ print '.'
22
+ rescue => e
23
+ Rails.logger.warn "Cws3chk::S3 #{e.message}"
24
+ end
25
+ end.headers # Headers is blank if the head request has failed.
26
+ end
27
+
28
+ def s3_key
29
+ Aws::S3::Key.create bucket, s3_key_path
30
+ end
31
+
32
+ def bucket
33
+ if defined?(S3::Helper) == 'constant' && S3::Helper.class == Class
34
+ @bucket ||= S3::Helper.current_bucket
35
+ else
36
+ #TODO config
37
+ end
38
+ end
39
+
40
+ def s3_key_path
41
+ @s3_key_path ||= (version ? uploader.public_send(version) : uploader).path
42
+ end
43
+ end
@@ -0,0 +1,24 @@
1
+ require 'Cws3chk/redis'
2
+
3
+ class Cws3chk::Store < Struct.new(:resource, :mounted_column, :version)
4
+ include Cws3chk::Redis
5
+
6
+ # TODO redis or stdout > config
7
+ def store_missing_asset
8
+ redis.sadd 'Cws3chk::missing', base.to_json
9
+ end
10
+
11
+ def store_headers headers
12
+ redis.sadd 'Cws3chk::metadata',
13
+ (base + [headers['content-length']]).to_json
14
+ end
15
+
16
+ private
17
+
18
+ def base
19
+ [resource.class.name,
20
+ resource.id,
21
+ mounted_column,
22
+ version]
23
+ end
24
+ end
@@ -0,0 +1,29 @@
1
+ # 2X dynos support no more than 512 Processes or Threads
2
+ # An AWS connection needs its own thread.
3
+ desc <<-DESC
4
+ This task launchs Resque Jobs to study Carrierwave assets. The links to those
5
+ assets are comming from the database through ActiveRecord.
6
+
7
+ You need to provide the name of the ActiveRecord model, the mounted column, the
8
+ number of available workers and the number of threads per worker.
9
+
10
+ Default:
11
+ * number of workers = 100
12
+ * number of threads = 250
13
+
14
+ Remember:
15
+ * On Heroku a 2X dyno has a limit of 512 Processes or Threads.
16
+ * An AWS connection needs its own thread with the aws gem.
17
+ DESC
18
+ task :collect_missing_assets, :model, :mounted_column, :workers, :threads do
19
+ raise('No Rails environment !') unless Rake::Task[:environment]
20
+ Rake::Task[:environment].invoke
21
+ klass = args.model.camelize.constantize
22
+ ids = klass.where.not(args.mounted_column => nil).pluck(:id)
23
+ workers = args.workers.present? ? args.workers.to_i : 100
24
+ threads = args.threads.present? ? args.threads.to_i : 250
25
+ ids.each_slice(workers) do |ids_group|
26
+ Resque.enqueue Cws3chk::Job, klass.name, ids_group.to_json, threads,
27
+ mounted_column
28
+ end
29
+ end
@@ -0,0 +1,3 @@
1
+ module Cws3chk
2
+ VERSION = "0.0.1"
3
+ end
data/lib/cws3chk.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'Cws3chk/version'
2
+ require 'Cws3chk/job'
3
+ require 'Cws3chk/checker'
4
+ require 'active_support'
5
+ require 'active_support/core_ext'
6
+
7
+ module Cws3chk
8
+ end
@@ -0,0 +1,116 @@
1
+ require 'minitest/autorun'
2
+ require 'Cws3chk'
3
+ require 'mocha/mini_test'
4
+ require 'aws'
5
+
6
+ class Cws3chkTest < Minitest::Test
7
+
8
+ class Subject
9
+ def image?
10
+ true
11
+ end
12
+
13
+ def image
14
+ end
15
+ end
16
+
17
+ class FakeRedis
18
+ attr_reader :sets
19
+ def initialize
20
+ @sets = {}
21
+ end
22
+
23
+ def sadd key, value
24
+ (@sets[key] ||= []) << value
25
+ end
26
+ end
27
+
28
+ def setup
29
+ Cws3chk::Store.any_instance.stubs(:redis).returns(fake_redis)
30
+ end
31
+
32
+ def test_check_one_present_asset
33
+ set_s3_headers_to headers_of_present_asset
34
+ Cws3chk::Checker.new(request, :image, 2).check
35
+ keys = fake_redis.sets["Cws3chk::metadata"]
36
+ assert_equal keys.size, 2
37
+ assert_equal keys.sort, [
38
+ "[\"Cws3chkTest::Subject\",1,\"image\",\"pdf\",1999]",
39
+ "[\"Cws3chkTest::Subject\",1,\"image\",null,1999]"
40
+ ]
41
+ end
42
+
43
+ def test_check_one_missing_asset
44
+ set_s3_headers_to headers_of_missing_asset
45
+ Cws3chk::Checker.new(request, :image, 2).check
46
+ keys = fake_redis.sets["Cws3chk::missing"]
47
+ assert_equal keys.size, 2
48
+ assert_equal keys.sort, [
49
+ "[\"Cws3chkTest::Subject\",1,\"image\",\"pdf\"]",
50
+ "[\"Cws3chkTest::Subject\",1,\"image\",null]"
51
+ ]
52
+ end
53
+
54
+ private
55
+
56
+ def set_s3_headers_to headers
57
+ @headers = headers
58
+ Aws::S3::Key.stubs(:create).returns(s3_key)
59
+ end
60
+
61
+ def headers_of_present_asset
62
+ {'content-length' => 1999}
63
+ end
64
+
65
+ def headers_of_missing_asset
66
+ nil
67
+ end
68
+
69
+ def s3_key
70
+ mock('s3_key').tap do |_mock|
71
+ _mock.stubs(:head).returns(true)
72
+ _mock.stubs(:headers).returns(@headers)
73
+ end
74
+ end
75
+
76
+ def request
77
+ mock().tap do |_mock|
78
+ _mock.stubs(:find_each).yields(subject)
79
+ end
80
+ end
81
+
82
+ def subject
83
+ mock().tap do |_mock|
84
+ _mock.stubs(:image).returns(uploader)
85
+ _mock.stubs(:image?).returns(true)
86
+ _mock.stubs(:class).returns(Subject)
87
+ _mock.stubs(:id).returns(1)
88
+ end
89
+ end
90
+
91
+ def uploader
92
+ mock().tap do |_mock|
93
+ _mock.stubs(:versions).returns(mock keys: [:pdf])
94
+ _mock.stubs(:pdf).returns(pdf_version)
95
+ _mock.stubs(:path).returns(path)
96
+ end
97
+ end
98
+
99
+ def path
100
+ 'path_to_original'
101
+ end
102
+
103
+ def pdf_version
104
+ mock().tap do |_mock|
105
+ _mock.stubs(:path).returns(pdf_path)
106
+ end
107
+ end
108
+
109
+ def pdf_path
110
+ 'path_to_pdf_version'
111
+ end
112
+
113
+ def fake_redis
114
+ @fake_redis ||= FakeRedis.new
115
+ end
116
+ end
metadata ADDED
@@ -0,0 +1,202 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cws3chk
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Antoine Qu'hen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rails
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 4.1.8
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 4.1.8
27
+ - !ruby/object:Gem::Dependency
28
+ name: aws
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 2.10.2
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 2.10.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: resque
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 1.25.2
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.25.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: retryable_block
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.0.1
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: 0.0.1
69
+ - !ruby/object:Gem::Dependency
70
+ name: threadify_procs
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: 0.0.5
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: 0.0.5
83
+ - !ruby/object:Gem::Dependency
84
+ name: redis
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: 3.1.0
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: 3.1.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: json
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: 1.8.2
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 1.8.2
111
+ - !ruby/object:Gem::Dependency
112
+ name: bundler
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: '1.7'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ~>
123
+ - !ruby/object:Gem::Version
124
+ version: '1.7'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rake
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ~>
130
+ - !ruby/object:Gem::Version
131
+ version: '10.0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ~>
137
+ - !ruby/object:Gem::Version
138
+ version: '10.0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: mocha
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - '>='
144
+ - !ruby/object:Gem::Version
145
+ version: 1.1.0
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: 1.1.0
153
+ description: This gem studies the existency of AR model assets, including their versions,
154
+ on S3 via Resque jobs.
155
+ email:
156
+ - antoinequhen@gmail.com
157
+ executables: []
158
+ extensions: []
159
+ extra_rdoc_files: []
160
+ files:
161
+ - .gitignore
162
+ - Gemfile
163
+ - LICENSE.txt
164
+ - README.md
165
+ - Rakefile
166
+ - cws3chk.gemspec
167
+ - lib/cws3chk.rb
168
+ - lib/cws3chk/checker.rb
169
+ - lib/cws3chk/job.rb
170
+ - lib/cws3chk/redis.rb
171
+ - lib/cws3chk/s3.rb
172
+ - lib/cws3chk/store.rb
173
+ - lib/cws3chk/tasks.rb
174
+ - lib/cws3chk/version.rb
175
+ - test/test_cws3chk.rb
176
+ homepage: https://rubygems.org/gems/cws3chk
177
+ licenses:
178
+ - MIT
179
+ metadata: {}
180
+ post_install_message:
181
+ rdoc_options: []
182
+ require_paths:
183
+ - lib
184
+ required_ruby_version: !ruby/object:Gem::Requirement
185
+ requirements:
186
+ - - '>='
187
+ - !ruby/object:Gem::Version
188
+ version: '0'
189
+ required_rubygems_version: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - '>='
192
+ - !ruby/object:Gem::Version
193
+ version: '0'
194
+ requirements: []
195
+ rubyforge_project:
196
+ rubygems_version: 2.0.14
197
+ signing_key:
198
+ specification_version: 4
199
+ summary: Check assets are on S3 as Carrierwave says
200
+ test_files:
201
+ - test/test_cws3chk.rb
202
+ has_rdoc: