cws3chk 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5c5979e26ae5b8fdb283cb68dbd10e9270f5703c
4
+ data.tar.gz: 6781fede45e5a2864aa4d23782e85e23d6f451d9
5
+ SHA512:
6
+ metadata.gz: 677899d7077e3e11fb1eba084a7d70dd0aee54bf6b4229d382353094020328c84bde651a6effb5ccb57cd618224d89c29fbdd326620d4d5ee430eea0618597b2
7
+ data.tar.gz: d164a44ae4a6f325bf23c0c90d34fe83581f37e7da373c4be5d0233c7b9d419e046c0ff173a1f21cd721bc722ce86356c4b1daf71e07fc7c3306bcb359a4446a
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in Cws3chk.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Antoine Qu'hen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,40 @@
1
+ # Cws3chk
2
+
3
+ This gem checks the existance on S3 of the assets described by ActiveRecord
4
+ and Carrierwave.
5
+
6
+ * It loads the ids of the object with assets in a task and splits them into
7
+ groups. Each group is going to be processed by a Resque Job.
8
+ * It studies the groups of object by launching n threads. It checks for the
9
+ existence of the original file and the different versions.
10
+ * It stores the result of the check in Redis.
11
+
12
+ ## Installation
13
+
14
+ Add this line to your application's Gemfile:
15
+
16
+ ```ruby
17
+ gem 'Cws3chk'
18
+ ```
19
+
20
+ And then execute:
21
+
22
+ $ bundle
23
+
24
+ ## Usage
25
+
26
+ $ bundle exec rake Cws3chk:check
27
+ or
28
+
29
+ ```ruby
30
+ request = User.with_avatar
31
+ Cws3chk::Checker.new(request, :avatar, 250).check
32
+ ```
33
+ Then study your missig assets and fix them if needed:
34
+ ```ruby
35
+ redis.smembers 'Cws3chk::missing'
36
+ ```
37
+ You can also study the size of the resulting assets:
38
+ ```ruby
39
+ redis.smembers 'Cws3chk::metadata'
40
+ ```
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require "Cws3chk/tasks"
4
+
5
+ require 'rake/testtask'
6
+
7
+ Rake::TestTask.new do |t|
8
+ t.libs << 'test'
9
+ end
10
+
11
+ desc "Run tests"
12
+ task :default => :test
data/cws3chk.gemspec ADDED
@@ -0,0 +1,32 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'Cws3chk/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cws3chk"
8
+ spec.version = Cws3chk::VERSION
9
+ spec.authors = ["Antoine Qu'hen"]
10
+ spec.email = ["antoinequhen@gmail.com"]
11
+ spec.summary = %q{Check assets are on S3 as Carrierwave says}
12
+ spec.description = %q{This gem studies the existency of AR model assets, including their versions, on S3 via Resque jobs.}
13
+ spec.homepage = "https://rubygems.org/gems/cws3chk"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_runtime_dependency 'rails', '>= 4.1.8'
22
+ spec.add_runtime_dependency 'aws', '>= 2.10.2'
23
+ spec.add_runtime_dependency 'resque', '>= 1.25.2'
24
+ spec.add_runtime_dependency 'retryable_block', '>= 0.0.1'
25
+ spec.add_runtime_dependency 'threadify_procs', '>= 0.0.5'
26
+ spec.add_runtime_dependency 'redis', '>= 3.1.0'
27
+ spec.add_runtime_dependency 'json', '>= 1.8.2'
28
+
29
+ spec.add_development_dependency "bundler", "~> 1.7"
30
+ spec.add_development_dependency "rake", "~> 10.0"
31
+ spec.add_development_dependency "mocha", ">= 1.1.0"
32
+ end
@@ -0,0 +1,53 @@
1
+ require 'threadify_procs'
2
+ require 'Cws3chk/s3'
3
+ require 'Cws3chk/store'
4
+
5
+ # This class is in charge of checking the existance on S3 of the assets
6
+ # described by ActiveRecord + Carrierwave.
7
+ # It checks for the existence of the original file an the different versions.
8
+ # It stores the result of the check in Redis.
9
+ #
10
+ # After loading the data from the database, it performs the calls to S3 in
11
+ # parallel via threads.
12
+ class Cws3chk::Checker < Struct.new(:request, :mounted_column, :threads)
13
+ include ThreadifyProcs
14
+
15
+ def check
16
+ call_with_threads procs, number_of_threads: threads
17
+ end
18
+
19
+ private
20
+
21
+ def procs
22
+ [].tap do |_procs|
23
+ iterate_over_resources_and_versions do |resource, uploader, version|
24
+ _procs << Proc.new{ study resource, uploader, version }
25
+ end
26
+ end
27
+ end
28
+
29
+ def versions uploader
30
+ [nil] + uploader.versions.keys
31
+ end
32
+
33
+ def iterate_over_resources_and_versions
34
+ request.find_each do |resource|
35
+ next unless resource.public_send("#{mounted_column}?") # resource.image?
36
+ uploader = resource.public_send mounted_column # resource.image
37
+ versions(uploader).each do |version|
38
+ yield resource, uploader, version
39
+ end
40
+ end
41
+ end
42
+
43
+ def study resource, uploader, version
44
+ s3 = Cws3chk::S3.new uploader, version
45
+ store = Cws3chk::Store.new(
46
+ resource, mounted_column, version)
47
+ if s3.file_exists?
48
+ store.store_headers s3.headers
49
+ else
50
+ store.store_missing_asset
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,12 @@
1
+ # This job takes an ActiveRecord model, a column and a bunch of ids, and runs
2
+ # the check on all the versions of those objects. Including the original version.
3
+ class Cws3chk::Job
4
+ # TODO config
5
+ @queue = :z_low
6
+
7
+ def self.perform klass_name, ids_json, threads, mounted_column
8
+ ids = JSON.load ids_json
9
+ request = klass_name.constantize.where id: ids
10
+ Cws3chk::Checker.new(request, mounted_column, threads).check
11
+ end
12
+ end
@@ -0,0 +1,7 @@
1
+ module Cws3chk::Redis
2
+
3
+ # TODO config
4
+ def redis
5
+ @redis ||= ::RedisProxy
6
+ end
7
+ end
data/lib/cws3chk/s3.rb ADDED
@@ -0,0 +1,43 @@
1
+ require 'retryable_block'
2
+
3
+ class Cws3chk::S3 < Struct.new(:uploader, :version)
4
+ include RetryableBlock
5
+
6
+ # TODO put 1.kilobyte in config
7
+ def file_exists?
8
+ headers.present? && headers['content-length'].to_i > 1.kilobyte
9
+ end
10
+
11
+ def headers
12
+ @headers ||= s3_key_headers
13
+ end
14
+
15
+ private
16
+
17
+ def s3_key_headers
18
+ s3_key.tap do |key|
19
+ begin
20
+ retryable{ key.head }
21
+ print '.'
22
+ rescue => e
23
+ Rails.logger.warn "Cws3chk::S3 #{e.message}"
24
+ end
25
+ end.headers # Headers is blank if the head request has failed.
26
+ end
27
+
28
+ def s3_key
29
+ Aws::S3::Key.create bucket, s3_key_path
30
+ end
31
+
32
+ def bucket
33
+ if defined?(S3::Helper) == 'constant' && S3::Helper.class == Class
34
+ @bucket ||= S3::Helper.current_bucket
35
+ else
36
+ #TODO config
37
+ end
38
+ end
39
+
40
+ def s3_key_path
41
+ @s3_key_path ||= (version ? uploader.public_send(version) : uploader).path
42
+ end
43
+ end
@@ -0,0 +1,24 @@
1
+ require 'Cws3chk/redis'
2
+
3
+ class Cws3chk::Store < Struct.new(:resource, :mounted_column, :version)
4
+ include Cws3chk::Redis
5
+
6
+ # TODO redis or stdout > config
7
+ def store_missing_asset
8
+ redis.sadd 'Cws3chk::missing', base.to_json
9
+ end
10
+
11
+ def store_headers headers
12
+ redis.sadd 'Cws3chk::metadata',
13
+ (base + [headers['content-length']]).to_json
14
+ end
15
+
16
+ private
17
+
18
+ def base
19
+ [resource.class.name,
20
+ resource.id,
21
+ mounted_column,
22
+ version]
23
+ end
24
+ end
@@ -0,0 +1,29 @@
1
+ # 2X dynos support no more than 512 Processes or Threads
2
+ # An AWS connection needs its own thread.
3
+ desc <<-DESC
4
+ This task launchs Resque Jobs to study Carrierwave assets. The links to those
5
+ assets are comming from the database through ActiveRecord.
6
+
7
+ You need to provide the name of the ActiveRecord model, the mounted column, the
8
+ number of available workers and the number of threads per worker.
9
+
10
+ Default:
11
+ * number of workers = 100
12
+ * number of threads = 250
13
+
14
+ Remember:
15
+ * On Heroku a 2X dyno has a limit of 512 Processes or Threads.
16
+ * An AWS connection needs its own thread with the aws gem.
17
+ DESC
18
+ task :collect_missing_assets, :model, :mounted_column, :workers, :threads do
19
+ raise('No Rails environment !') unless Rake::Task[:environment]
20
+ Rake::Task[:environment].invoke
21
+ klass = args.model.camelize.constantize
22
+ ids = klass.where.not(args.mounted_column => nil).pluck(:id)
23
+ workers = args.workers.present? ? args.workers.to_i : 100
24
+ threads = args.threads.present? ? args.threads.to_i : 250
25
+ ids.each_slice(workers) do |ids_group|
26
+ Resque.enqueue Cws3chk::Job, klass.name, ids_group.to_json, threads,
27
+ mounted_column
28
+ end
29
+ end
@@ -0,0 +1,3 @@
1
+ module Cws3chk
2
+ VERSION = "0.0.1"
3
+ end
data/lib/cws3chk.rb ADDED
@@ -0,0 +1,8 @@
1
+ require 'Cws3chk/version'
2
+ require 'Cws3chk/job'
3
+ require 'Cws3chk/checker'
4
+ require 'active_support'
5
+ require 'active_support/core_ext'
6
+
7
+ module Cws3chk
8
+ end
@@ -0,0 +1,116 @@
1
+ require 'minitest/autorun'
2
+ require 'Cws3chk'
3
+ require 'mocha/mini_test'
4
+ require 'aws'
5
+
6
+ class Cws3chkTest < Minitest::Test
7
+
8
+ class Subject
9
+ def image?
10
+ true
11
+ end
12
+
13
+ def image
14
+ end
15
+ end
16
+
17
+ class FakeRedis
18
+ attr_reader :sets
19
+ def initialize
20
+ @sets = {}
21
+ end
22
+
23
+ def sadd key, value
24
+ (@sets[key] ||= []) << value
25
+ end
26
+ end
27
+
28
+ def setup
29
+ Cws3chk::Store.any_instance.stubs(:redis).returns(fake_redis)
30
+ end
31
+
32
+ def test_check_one_present_asset
33
+ set_s3_headers_to headers_of_present_asset
34
+ Cws3chk::Checker.new(request, :image, 2).check
35
+ keys = fake_redis.sets["Cws3chk::metadata"]
36
+ assert_equal keys.size, 2
37
+ assert_equal keys.sort, [
38
+ "[\"Cws3chkTest::Subject\",1,\"image\",\"pdf\",1999]",
39
+ "[\"Cws3chkTest::Subject\",1,\"image\",null,1999]"
40
+ ]
41
+ end
42
+
43
+ def test_check_one_missing_asset
44
+ set_s3_headers_to headers_of_missing_asset
45
+ Cws3chk::Checker.new(request, :image, 2).check
46
+ keys = fake_redis.sets["Cws3chk::missing"]
47
+ assert_equal keys.size, 2
48
+ assert_equal keys.sort, [
49
+ "[\"Cws3chkTest::Subject\",1,\"image\",\"pdf\"]",
50
+ "[\"Cws3chkTest::Subject\",1,\"image\",null]"
51
+ ]
52
+ end
53
+
54
+ private
55
+
56
+ def set_s3_headers_to headers
57
+ @headers = headers
58
+ Aws::S3::Key.stubs(:create).returns(s3_key)
59
+ end
60
+
61
+ def headers_of_present_asset
62
+ {'content-length' => 1999}
63
+ end
64
+
65
+ def headers_of_missing_asset
66
+ nil
67
+ end
68
+
69
+ def s3_key
70
+ mock('s3_key').tap do |_mock|
71
+ _mock.stubs(:head).returns(true)
72
+ _mock.stubs(:headers).returns(@headers)
73
+ end
74
+ end
75
+
76
+ def request
77
+ mock().tap do |_mock|
78
+ _mock.stubs(:find_each).yields(subject)
79
+ end
80
+ end
81
+
82
+ def subject
83
+ mock().tap do |_mock|
84
+ _mock.stubs(:image).returns(uploader)
85
+ _mock.stubs(:image?).returns(true)
86
+ _mock.stubs(:class).returns(Subject)
87
+ _mock.stubs(:id).returns(1)
88
+ end
89
+ end
90
+
91
+ def uploader
92
+ mock().tap do |_mock|
93
+ _mock.stubs(:versions).returns(mock keys: [:pdf])
94
+ _mock.stubs(:pdf).returns(pdf_version)
95
+ _mock.stubs(:path).returns(path)
96
+ end
97
+ end
98
+
99
+ def path
100
+ 'path_to_original'
101
+ end
102
+
103
+ def pdf_version
104
+ mock().tap do |_mock|
105
+ _mock.stubs(:path).returns(pdf_path)
106
+ end
107
+ end
108
+
109
+ def pdf_path
110
+ 'path_to_pdf_version'
111
+ end
112
+
113
+ def fake_redis
114
+ @fake_redis ||= FakeRedis.new
115
+ end
116
+ end
metadata ADDED
@@ -0,0 +1,202 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cws3chk
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Antoine Qu'hen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rails
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 4.1.8
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: 4.1.8
27
+ - !ruby/object:Gem::Dependency
28
+ name: aws
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 2.10.2
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 2.10.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: resque
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: 1.25.2
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: 1.25.2
55
+ - !ruby/object:Gem::Dependency
56
+ name: retryable_block
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.0.1
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: 0.0.1
69
+ - !ruby/object:Gem::Dependency
70
+ name: threadify_procs
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: 0.0.5
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: 0.0.5
83
+ - !ruby/object:Gem::Dependency
84
+ name: redis
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: 3.1.0
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: 3.1.0
97
+ - !ruby/object:Gem::Dependency
98
+ name: json
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - '>='
102
+ - !ruby/object:Gem::Version
103
+ version: 1.8.2
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 1.8.2
111
+ - !ruby/object:Gem::Dependency
112
+ name: bundler
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: '1.7'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ~>
123
+ - !ruby/object:Gem::Version
124
+ version: '1.7'
125
+ - !ruby/object:Gem::Dependency
126
+ name: rake
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ~>
130
+ - !ruby/object:Gem::Version
131
+ version: '10.0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ~>
137
+ - !ruby/object:Gem::Version
138
+ version: '10.0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: mocha
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - '>='
144
+ - !ruby/object:Gem::Version
145
+ version: 1.1.0
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - '>='
151
+ - !ruby/object:Gem::Version
152
+ version: 1.1.0
153
+ description: This gem studies the existency of AR model assets, including their versions,
154
+ on S3 via Resque jobs.
155
+ email:
156
+ - antoinequhen@gmail.com
157
+ executables: []
158
+ extensions: []
159
+ extra_rdoc_files: []
160
+ files:
161
+ - .gitignore
162
+ - Gemfile
163
+ - LICENSE.txt
164
+ - README.md
165
+ - Rakefile
166
+ - cws3chk.gemspec
167
+ - lib/cws3chk.rb
168
+ - lib/cws3chk/checker.rb
169
+ - lib/cws3chk/job.rb
170
+ - lib/cws3chk/redis.rb
171
+ - lib/cws3chk/s3.rb
172
+ - lib/cws3chk/store.rb
173
+ - lib/cws3chk/tasks.rb
174
+ - lib/cws3chk/version.rb
175
+ - test/test_cws3chk.rb
176
+ homepage: https://rubygems.org/gems/cws3chk
177
+ licenses:
178
+ - MIT
179
+ metadata: {}
180
+ post_install_message:
181
+ rdoc_options: []
182
+ require_paths:
183
+ - lib
184
+ required_ruby_version: !ruby/object:Gem::Requirement
185
+ requirements:
186
+ - - '>='
187
+ - !ruby/object:Gem::Version
188
+ version: '0'
189
+ required_rubygems_version: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - '>='
192
+ - !ruby/object:Gem::Version
193
+ version: '0'
194
+ requirements: []
195
+ rubyforge_project:
196
+ rubygems_version: 2.0.14
197
+ signing_key:
198
+ specification_version: 4
199
+ summary: Check assets are on S3 as Carrierwave says
200
+ test_files:
201
+ - test/test_cws3chk.rb
202
+ has_rdoc: