derrick 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3337d0adef7d3a74367fd32cd6432e10746d9b50
4
- data.tar.gz: 23b953e5d08de9fefcc73ac06217d64a39253af1
3
+ metadata.gz: 85d156880226b9165b0f32522de5eaf60e1a63ac
4
+ data.tar.gz: f5d4ea05933b71e2cabfb72a55683bc81d79b063
5
5
  SHA512:
6
- metadata.gz: 9ffb3ec31001802893c3de556751ecbc086387957c5028895281541b127948c1b528a45c6c6c003494a03ed3500a4d847bcde49dc81e90babf75cfd18f67e04c
7
- data.tar.gz: b61b22c42e72d3b7d3dd83daeb8eed86c552d5b5628bc71131a446b53e26f5f314443ebcb3e14377fb9b03dea9d7c3672e6059fe77d873e0ceee856fbb9b6433
6
+ metadata.gz: 8f6d15213b7598333fb606230287f9ca9cd1f2276b04e5a5da1a4ba020dc9622aac58caeab1b8950f19d30f62a3717fd6e57486b6d659a68398ad693eab3e809
7
+ data.tar.gz: 72b95d56cb1b4b52f6ab680d45c76016570f9b9b44008dd0506a37ed2ac215686aa0dce61b9c6b0286235fbcf558cec4cb360870dc50635c510b759851e5fcbf
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --warnings
3
+ --require spec_helper
data/derrick.gemspec CHANGED
@@ -19,6 +19,7 @@ Gem::Specification.new do |spec|
19
19
 
20
20
  spec.add_development_dependency 'bundler', '~> 1.6'
21
21
  spec.add_development_dependency 'rake'
22
+ spec.add_development_dependency 'rspec'
22
23
 
23
24
  spec.add_dependency 'redis'
24
25
  end
@@ -0,0 +1,51 @@
1
+ module Derrick
2
+ class Aggregator
3
+ ANY = '*'.freeze
4
+
5
+ attr_reader :patterns
6
+ def initialize(queue, context)
7
+ @queue = queue
8
+ @patterns = {}
9
+ @context = context
10
+ end
11
+
12
+ def run
13
+ fetcher_count = @context.concurrency
14
+ loop do
15
+ keys = @queue.pop
16
+ if keys == :stop
17
+ fetcher_count -= 1
18
+ break if fetcher_count == 0
19
+ else
20
+ keys.each { |k| aggregate(k) }
21
+ end
22
+ end
23
+ self
24
+ end
25
+
26
+ def aggregate(key)
27
+ pattern = pattern_from(key)
28
+ pattern.aggregate(key)
29
+
30
+ if patterns.size > @context.max_patterns
31
+ compact_uniques!
32
+ end
33
+ end
34
+
35
+ def pattern_from(key)
36
+ @patterns[Pattern.extract(key.name)] ||= Pattern.new
37
+ end
38
+
39
+ def compact_uniques!
40
+ any = @patterns.delete(ANY) || Pattern.new
41
+ @patterns.each do |key, aggregate|
42
+ if aggregate.count == 1
43
+ any.merge!(@patterns.delete(key))
44
+ end
45
+ end
46
+ @patterns[ANY] = any
47
+ nil
48
+ end
49
+
50
+ end
51
+ end
data/lib/derrick/cli.rb CHANGED
@@ -41,11 +41,12 @@ module Derrick
41
41
  end
42
42
 
43
43
  class Context
44
- attr_accessor :concurrency, :batch_size
44
+ attr_accessor :concurrency, :batch_size, :max_patterns
45
45
 
46
46
  def initialize
47
47
  @concurrency = 2
48
48
  @batch_size = 10_000
49
+ @max_patterns = 1_000
49
50
  end
50
51
  end
51
52
 
@@ -140,6 +141,9 @@ module Derrick
140
141
  opts.on('-b', '--batch-size BATCH_SIZE') do |batch_size|
141
142
  @context.batch_size = Integer(batch_size)
142
143
  end
144
+ opts.on('-C', '--max-patterns MAX_PATTERNS') do |max_patterns|
145
+ @context.max_patterns = Integer(max_patterns)
146
+ end
143
147
  end
144
148
  end
145
149
 
@@ -0,0 +1,25 @@
1
+ module Derrick
2
+ class Collector
3
+ def initialize(redis, queue, progress, context)
4
+ @redis = redis
5
+ @queue = queue
6
+ @progress = progress
7
+ @context = context
8
+ end
9
+
10
+ def run
11
+ collect_keys
12
+ @context.concurrency.times { @queue.push(:stop) }
13
+ end
14
+
15
+ def collect_keys
16
+ cursor = '0'
17
+ loop do
18
+ cursor, keys = @redis.scan(cursor, count: @context.batch_size)
19
+ @queue.push(keys)
20
+ @progress.increment_collected(keys.size)
21
+ return if cursor == '0'
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,39 @@
1
+ module Derrick
2
+ Key = Struct.new(:name, :type, :ttl)
3
+
4
+ class Fetcher
5
+ def initialize(redis, input, output, progress)
6
+ @redis = redis
7
+ @input = input
8
+ @output = output
9
+ @progress = progress
10
+ end
11
+
12
+ def run
13
+ while (keys = @input.pop) != :stop
14
+ @output.push(stats(keys))
15
+ end
16
+ @output.push(:stop)
17
+ end
18
+
19
+ def stats(keys)
20
+ types = @redis.pipelined do
21
+ keys.each do |key|
22
+ @redis.type(key)
23
+ end
24
+ end
25
+
26
+ ttls = @redis.pipelined do
27
+ keys.each do |key|
28
+ @redis.ttl(key)
29
+ end
30
+ end
31
+
32
+ @progress.increment_fetched(keys.size)
33
+
34
+ keys.map.with_index do |key, index|
35
+ Key.new(key, types[index], ttls[index])
36
+ end
37
+ end
38
+ end
39
+ end
@@ -3,161 +3,6 @@ require 'thread'
3
3
  Thread.abort_on_exception = true
4
4
 
5
5
  module Derrick
6
- class Collector
7
- def initialize(redis, queue, progress, context)
8
- @redis = redis
9
- @queue = queue
10
- @progress = progress
11
- @context = context
12
- end
13
-
14
- def run
15
- collect_keys
16
- @context.concurrency.times { @queue.push(:stop) }
17
- end
18
-
19
- def collect_keys
20
- cursor = '0'
21
- loop do
22
- cursor, keys = @redis.scan(cursor, count: @context.batch_size)
23
- @queue.push(keys)
24
- @progress.increment_collected(keys.size)
25
- return if cursor == '0'
26
- end
27
- end
28
- end
29
-
30
- Key = Struct.new(:name, :type, :ttl)
31
-
32
- class Fetcher
33
-
34
- def initialize(redis, input, output, progress)
35
- @redis = redis
36
- @input = input
37
- @output = output
38
- @progress = progress
39
- end
40
-
41
- def run
42
- while (keys = @input.pop) != :stop
43
- @output.push(stats(keys))
44
- end
45
- @output.push(:stop)
46
- end
47
-
48
- def stats(keys)
49
- types = @redis.pipelined do
50
- keys.each do |key|
51
- @redis.type(key)
52
- end
53
- end
54
-
55
- ttls = @redis.pipelined do
56
- keys.each do |key|
57
- @redis.ttl(key)
58
- end
59
- end
60
-
61
- @progress.increment_fetched(keys.size)
62
-
63
- keys.map.with_index do |key, index|
64
- Key.new(key, types[index], ttls[index])
65
- end
66
- end
67
-
68
- end
69
-
70
- class Pattern
71
- attr_reader :pattern, :count, :expirable_count, :persisted_count, :types_count
72
-
73
- def initialize
74
- @count = 0
75
- @expirable_count = 0
76
- @persisted_count = 0
77
- @types_count = Hash.new(0)
78
- end
79
-
80
- def expirable_ratio
81
- return 1 if count == 0
82
- expirable_count.to_f / count
83
- end
84
-
85
- def types_ratio
86
- Hash[@types_count.map do |type, sub_count|
87
- [type, sub_count.to_f / count]
88
- end]
89
- end
90
-
91
- def aggregate(key)
92
- @count += 1
93
-
94
- if key.ttl == -1
95
- @persisted_count += 1
96
- else
97
- @expirable_count += 1
98
- end
99
-
100
- @types_count[key.type] += 1
101
- end
102
-
103
- end
104
-
105
- class Progress
106
-
107
- attr_reader :total, :collected, :fetched
108
-
109
- def initialize(total)
110
- @total = total
111
- @mutex = Mutex.new
112
-
113
- @collected = 0
114
- @fetched = 0
115
- end
116
-
117
- def increment_collected(count)
118
- @mutex.synchronize { @collected += count }
119
- end
120
-
121
- def increment_fetched(count)
122
- @mutex.synchronize { @fetched += count }
123
- end
124
-
125
- end
126
-
127
- class Aggregator
128
- attr_reader :patterns
129
- def initialize(queue, context)
130
- @queue = queue
131
- @patterns = {}
132
- @context = context
133
- end
134
-
135
- def run
136
- fetcher_count = @context.concurrency
137
- loop do
138
- keys = @queue.pop
139
- if keys == :stop
140
- fetcher_count -= 1
141
- break if fetcher_count == 0
142
- else
143
- keys.each { |k| aggregate(k) }
144
- end
145
- end
146
- self
147
- end
148
-
149
- def aggregate(key)
150
- pattern = pattern_from(key)
151
- pattern.aggregate(key)
152
- end
153
-
154
- def pattern_from(key)
155
- canonical_key = key.name.inspect.gsub(/(^|:)(\d+|[0-9a-f]{32,40})($|:)/, '\1*\3')
156
- @patterns[canonical_key] ||= Pattern.new
157
- end
158
-
159
- end
160
-
161
6
  class Inspector
162
7
  attr_reader :redis, :progress
163
8
 
@@ -0,0 +1,62 @@
1
+ module Derrick
2
+ class Pattern
3
+ SEGMENT_SEPARATORS = %w(: _ /).freeze
4
+ FIRST_SEGMENT_PATTERN = /.*(#{SEGMENT_SEPARATORS.map { |s| Regexp.escape(s) }.join('|')})/
5
+ IDENTIFIER_PATTERNS = '\d+|[0-9a-f]{32,40}'
6
+ SEGMENT_PATTERNS = SEGMENT_SEPARATORS.map do |separator|
7
+ /(^|#{Regexp.escape(separator)})(#{IDENTIFIER_PATTERNS})($|#{Regexp.escape(separator)})/
8
+ end
9
+
10
+ attr_reader :pattern, :count, :expirable_count, :persisted_count, :types_count
11
+
12
+ def self.extract(key_name)
13
+ key_pattern = SEGMENT_PATTERNS.inject(key_name.inspect[1..-2]) do |key, pattern|
14
+ key.gsub(pattern, '\1*\3')
15
+ end
16
+
17
+ return "#{key_name[FIRST_SEGMENT_PATTERN]}*" if key_pattern == key_name
18
+
19
+ key_pattern
20
+ end
21
+
22
+ def initialize
23
+ @count = 0
24
+ @expirable_count = 0
25
+ @persisted_count = 0
26
+ @types_count = Hash.new(0)
27
+ end
28
+
29
+ def merge!(other)
30
+ @count += other.count
31
+ @expirable_count += other.expirable_count
32
+ @persisted_count += other.persisted_count
33
+ other.types_count.each do |type, count|
34
+ @types_count[type] += count
35
+ end
36
+ self
37
+ end
38
+
39
+ def expirable_ratio
40
+ return 1 if count == 0
41
+ expirable_count.to_f / count
42
+ end
43
+
44
+ def types_ratio
45
+ Hash[@types_count.map do |type, sub_count|
46
+ [type, sub_count.to_f / count]
47
+ end]
48
+ end
49
+
50
+ def aggregate(key)
51
+ @count += 1
52
+
53
+ if key.ttl == -1
54
+ @persisted_count += 1
55
+ else
56
+ @expirable_count += 1
57
+ end
58
+
59
+ @types_count[key.type] += 1
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,21 @@
1
+ module Derrick
2
+ class Progress
3
+ attr_reader :total, :collected, :fetched
4
+
5
+ def initialize(total)
6
+ @total = total
7
+ @mutex = Mutex.new
8
+
9
+ @collected = 0
10
+ @fetched = 0
11
+ end
12
+
13
+ def increment_collected(count)
14
+ @mutex.synchronize { @collected += count }
15
+ end
16
+
17
+ def increment_fetched(count)
18
+ @mutex.synchronize { @fetched += count }
19
+ end
20
+ end
21
+ end
@@ -1,3 +1,3 @@
1
1
  module Derrick
2
- VERSION = "0.0.2"
2
+ VERSION = '0.1.0'
3
3
  end
data/lib/derrick.rb CHANGED
@@ -1,5 +1,10 @@
1
1
  require 'derrick/version'
2
2
  require 'derrick/inspector'
3
+ require 'derrick/aggregator'
4
+ require 'derrick/collector'
5
+ require 'derrick/fetcher'
6
+ require 'derrick/pattern'
7
+ require 'derrick/progress'
3
8
 
4
9
  module Derrick
5
10
  end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ describe Derrick::Pattern do
4
+
5
+ describe '.extract' do
6
+
7
+ it 'canonicalizes integers' do
8
+ expect('foo:42:bar').to match_pattern('foo:*:bar')
9
+ end
10
+
11
+ it 'canonicalizes MD5s' do
12
+ expect('foo:258622b1688250cb619f3c9ccaefb7eb:bar').to match_pattern('foo:*:bar')
13
+ end
14
+
15
+ it 'canonicalizes SHA1s' do
16
+ expect('foo:792eaaec6718c335d63cde10d7281baa4132ba78:bar').to match_pattern('foo:*:bar')
17
+ end
18
+
19
+ it 'escapes strings with invalid characters' do
20
+ expect("hi \xAD").to match_pattern('hi \xAD')
21
+ end
22
+
23
+ it 'accepts `_` as segment separator' do
24
+ expect('foo_42_bar').to match_pattern('foo_*_bar')
25
+ end
26
+
27
+ it 'accepts `/` as segment separator' do
28
+ expect('foo/42/bar').to match_pattern('foo/*/bar')
29
+ end
30
+
31
+ context 'when no identifier is found' do
32
+
33
+ it 'assumes the first segment is shared' do
34
+ expect('foo:bar').to match_pattern('foo:*')
35
+ end
36
+
37
+ context 'and no segment serparator either' do
38
+
39
+ it 'falls in the `*` pattern' do
40
+ expect('foobar').to match_pattern('*')
41
+ end
42
+
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+
49
+ end
@@ -0,0 +1,86 @@
1
+ lib = File.expand_path('../../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ require 'derrick'
5
+
6
+
7
+ Dir[File.join(File.expand_path('../support', __FILE__), '**/*.rb')].each { |f| require f }
8
+
9
+ # This file was generated by the `rspec --init` command. Conventionally, all
10
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
11
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
12
+ # file to always be loaded, without a need to explicitly require it in any files.
13
+ #
14
+ # Given that it is always loaded, you are encouraged to keep this file as
15
+ # light-weight as possible. Requiring heavyweight dependencies from this file
16
+ # will add to the boot time of your test suite on EVERY test run, even for an
17
+ # individual file that may not need all of that loaded. Instead, make a
18
+ # separate helper file that requires this one and then use it only in the specs
19
+ # that actually need it.
20
+ #
21
+ # The `.rspec` file also contains a few flags that are not defaults but that
22
+ # users commonly want.
23
+ #
24
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
25
+ RSpec.configure do |config|
26
+ # The settings below are suggested to provide a good initial experience
27
+ # with RSpec, but feel free to customize to your heart's content.
28
+ =begin
29
+ # These two settings work together to allow you to limit a spec run
30
+ # to individual examples or groups you care about by tagging them with
31
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
32
+ # get run.
33
+ config.filter_run :focus
34
+ config.run_all_when_everything_filtered = true
35
+
36
+ # Many RSpec users commonly either run the entire suite or an individual
37
+ # file, and it's useful to allow more verbose output when running an
38
+ # individual spec file.
39
+ if config.files_to_run.one?
40
+ # Use the documentation formatter for detailed output,
41
+ # unless a formatter has already been configured
42
+ # (e.g. via a command-line flag).
43
+ config.default_formatter = 'doc'
44
+ end
45
+
46
+ # Print the 10 slowest examples and example groups at the
47
+ # end of the spec run, to help surface which specs are running
48
+ # particularly slow.
49
+ config.profile_examples = 10
50
+
51
+ # Run specs in random order to surface order dependencies. If you find an
52
+ # order dependency and want to debug it, you can fix the order by providing
53
+ # the seed, which is printed after each run.
54
+ # --seed 1234
55
+ config.order = :random
56
+
57
+ # Seed global randomization in this process using the `--seed` CLI option.
58
+ # Setting this allows you to use `--seed` to deterministically reproduce
59
+ # test failures related to randomization by passing the same `--seed` value
60
+ # as the one that triggered the failure.
61
+ Kernel.srand config.seed
62
+
63
+ # rspec-expectations config goes here. You can use an alternate
64
+ # assertion/expectation library such as wrong or the stdlib/minitest
65
+ # assertions if you prefer.
66
+ config.expect_with :rspec do |expectations|
67
+ # Enable only the newer, non-monkey-patching expect syntax.
68
+ # For more details, see:
69
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
70
+ expectations.syntax = :expect
71
+ end
72
+
73
+ # rspec-mocks config goes here. You can use an alternate test double
74
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
75
+ config.mock_with :rspec do |mocks|
76
+ # Enable only the newer, non-monkey-patching expect syntax.
77
+ # For more details, see:
78
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
79
+ mocks.syntax = :expect
80
+
81
+ # Prevents you from mocking or stubbing a method that does not exist on
82
+ # a real object. This is generally recommended.
83
+ mocks.verify_partial_doubles = true
84
+ end
85
+ =end
86
+ end
@@ -0,0 +1,9 @@
1
+ RSpec::Matchers.define :match_pattern do |expected_pattern|
2
+ match do |actual|
3
+ Derrick::Pattern.extract(actual) == expected_pattern
4
+ end
5
+
6
+ failure_message do |actual|
7
+ "expected `#{actual}` to be canonicalized as `#{expected_pattern}` but was `#{Derrick::Pattern.extract(actual)}`"
8
+ end
9
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: derrick
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jean Boussier
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-17 00:00:00.000000000 Z
11
+ date: 2014-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: redis
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -61,6 +75,7 @@ extensions: []
61
75
  extra_rdoc_files: []
62
76
  files:
63
77
  - ".gitignore"
78
+ - ".rspec"
64
79
  - Gemfile
65
80
  - LICENSE.txt
66
81
  - README.md
@@ -68,9 +83,17 @@ files:
68
83
  - bin/derrick
69
84
  - derrick.gemspec
70
85
  - lib/derrick.rb
86
+ - lib/derrick/aggregator.rb
71
87
  - lib/derrick/cli.rb
88
+ - lib/derrick/collector.rb
89
+ - lib/derrick/fetcher.rb
72
90
  - lib/derrick/inspector.rb
91
+ - lib/derrick/pattern.rb
92
+ - lib/derrick/progress.rb
73
93
  - lib/derrick/version.rb
94
+ - spec/pattern_spec.rb
95
+ - spec/spec_helper.rb
96
+ - spec/support/pattern_matcher.rb
74
97
  homepage: ''
75
98
  licenses:
76
99
  - MIT
@@ -95,4 +118,7 @@ rubygems_version: 2.2.2
95
118
  signing_key:
96
119
  specification_version: 4
97
120
  summary: Inspect Redis databases and print statistics about the keys
98
- test_files: []
121
+ test_files:
122
+ - spec/pattern_spec.rb
123
+ - spec/spec_helper.rb
124
+ - spec/support/pattern_matcher.rb