derrick 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3337d0adef7d3a74367fd32cd6432e10746d9b50
4
- data.tar.gz: 23b953e5d08de9fefcc73ac06217d64a39253af1
3
+ metadata.gz: 85d156880226b9165b0f32522de5eaf60e1a63ac
4
+ data.tar.gz: f5d4ea05933b71e2cabfb72a55683bc81d79b063
5
5
  SHA512:
6
- metadata.gz: 9ffb3ec31001802893c3de556751ecbc086387957c5028895281541b127948c1b528a45c6c6c003494a03ed3500a4d847bcde49dc81e90babf75cfd18f67e04c
7
- data.tar.gz: b61b22c42e72d3b7d3dd83daeb8eed86c552d5b5628bc71131a446b53e26f5f314443ebcb3e14377fb9b03dea9d7c3672e6059fe77d873e0ceee856fbb9b6433
6
+ metadata.gz: 8f6d15213b7598333fb606230287f9ca9cd1f2276b04e5a5da1a4ba020dc9622aac58caeab1b8950f19d30f62a3717fd6e57486b6d659a68398ad693eab3e809
7
+ data.tar.gz: 72b95d56cb1b4b52f6ab680d45c76016570f9b9b44008dd0506a37ed2ac215686aa0dce61b9c6b0286235fbcf558cec4cb360870dc50635c510b759851e5fcbf
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --warnings
3
+ --require spec_helper
data/derrick.gemspec CHANGED
@@ -19,6 +19,7 @@ Gem::Specification.new do |spec|
19
19
 
20
20
  spec.add_development_dependency 'bundler', '~> 1.6'
21
21
  spec.add_development_dependency 'rake'
22
+ spec.add_development_dependency 'rspec'
22
23
 
23
24
  spec.add_dependency 'redis'
24
25
  end
@@ -0,0 +1,51 @@
1
+ module Derrick
2
+ class Aggregator
3
+ ANY = '*'.freeze
4
+
5
+ attr_reader :patterns
6
+ def initialize(queue, context)
7
+ @queue = queue
8
+ @patterns = {}
9
+ @context = context
10
+ end
11
+
12
+ def run
13
+ fetcher_count = @context.concurrency
14
+ loop do
15
+ keys = @queue.pop
16
+ if keys == :stop
17
+ fetcher_count -= 1
18
+ break if fetcher_count == 0
19
+ else
20
+ keys.each { |k| aggregate(k) }
21
+ end
22
+ end
23
+ self
24
+ end
25
+
26
+ def aggregate(key)
27
+ pattern = pattern_from(key)
28
+ pattern.aggregate(key)
29
+
30
+ if patterns.size > @context.max_patterns
31
+ compact_uniques!
32
+ end
33
+ end
34
+
35
+ def pattern_from(key)
36
+ @patterns[Pattern.extract(key.name)] ||= Pattern.new
37
+ end
38
+
39
+ def compact_uniques!
40
+ any = @patterns.delete(ANY) || Pattern.new
41
+ @patterns.each do |key, aggregate|
42
+ if aggregate.count == 1
43
+ any.merge!(@patterns.delete(key))
44
+ end
45
+ end
46
+ @patterns[ANY] = any
47
+ nil
48
+ end
49
+
50
+ end
51
+ end
data/lib/derrick/cli.rb CHANGED
@@ -41,11 +41,12 @@ module Derrick
41
41
  end
42
42
 
43
43
  class Context
44
- attr_accessor :concurrency, :batch_size
44
+ attr_accessor :concurrency, :batch_size, :max_patterns
45
45
 
46
46
  def initialize
47
47
  @concurrency = 2
48
48
  @batch_size = 10_000
49
+ @max_patterns = 1_000
49
50
  end
50
51
  end
51
52
 
@@ -140,6 +141,9 @@ module Derrick
140
141
  opts.on('-b', '--batch-size BATCH_SIZE') do |batch_size|
141
142
  @context.batch_size = Integer(batch_size)
142
143
  end
144
+ opts.on('-C', '--max-patterns MAX_PATTERNS') do |max_patterns|
145
+ @context.max_patterns = Integer(max_patterns)
146
+ end
143
147
  end
144
148
  end
145
149
 
@@ -0,0 +1,25 @@
1
+ module Derrick
2
+ class Collector
3
+ def initialize(redis, queue, progress, context)
4
+ @redis = redis
5
+ @queue = queue
6
+ @progress = progress
7
+ @context = context
8
+ end
9
+
10
+ def run
11
+ collect_keys
12
+ @context.concurrency.times { @queue.push(:stop) }
13
+ end
14
+
15
+ def collect_keys
16
+ cursor = '0'
17
+ loop do
18
+ cursor, keys = @redis.scan(cursor, count: @context.batch_size)
19
+ @queue.push(keys)
20
+ @progress.increment_collected(keys.size)
21
+ return if cursor == '0'
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,39 @@
1
+ module Derrick
2
+ Key = Struct.new(:name, :type, :ttl)
3
+
4
+ class Fetcher
5
+ def initialize(redis, input, output, progress)
6
+ @redis = redis
7
+ @input = input
8
+ @output = output
9
+ @progress = progress
10
+ end
11
+
12
+ def run
13
+ while (keys = @input.pop) != :stop
14
+ @output.push(stats(keys))
15
+ end
16
+ @output.push(:stop)
17
+ end
18
+
19
+ def stats(keys)
20
+ types = @redis.pipelined do
21
+ keys.each do |key|
22
+ @redis.type(key)
23
+ end
24
+ end
25
+
26
+ ttls = @redis.pipelined do
27
+ keys.each do |key|
28
+ @redis.ttl(key)
29
+ end
30
+ end
31
+
32
+ @progress.increment_fetched(keys.size)
33
+
34
+ keys.map.with_index do |key, index|
35
+ Key.new(key, types[index], ttls[index])
36
+ end
37
+ end
38
+ end
39
+ end
@@ -3,161 +3,6 @@ require 'thread'
3
3
  Thread.abort_on_exception = true
4
4
 
5
5
  module Derrick
6
- class Collector
7
- def initialize(redis, queue, progress, context)
8
- @redis = redis
9
- @queue = queue
10
- @progress = progress
11
- @context = context
12
- end
13
-
14
- def run
15
- collect_keys
16
- @context.concurrency.times { @queue.push(:stop) }
17
- end
18
-
19
- def collect_keys
20
- cursor = '0'
21
- loop do
22
- cursor, keys = @redis.scan(cursor, count: @context.batch_size)
23
- @queue.push(keys)
24
- @progress.increment_collected(keys.size)
25
- return if cursor == '0'
26
- end
27
- end
28
- end
29
-
30
- Key = Struct.new(:name, :type, :ttl)
31
-
32
- class Fetcher
33
-
34
- def initialize(redis, input, output, progress)
35
- @redis = redis
36
- @input = input
37
- @output = output
38
- @progress = progress
39
- end
40
-
41
- def run
42
- while (keys = @input.pop) != :stop
43
- @output.push(stats(keys))
44
- end
45
- @output.push(:stop)
46
- end
47
-
48
- def stats(keys)
49
- types = @redis.pipelined do
50
- keys.each do |key|
51
- @redis.type(key)
52
- end
53
- end
54
-
55
- ttls = @redis.pipelined do
56
- keys.each do |key|
57
- @redis.ttl(key)
58
- end
59
- end
60
-
61
- @progress.increment_fetched(keys.size)
62
-
63
- keys.map.with_index do |key, index|
64
- Key.new(key, types[index], ttls[index])
65
- end
66
- end
67
-
68
- end
69
-
70
- class Pattern
71
- attr_reader :pattern, :count, :expirable_count, :persisted_count, :types_count
72
-
73
- def initialize
74
- @count = 0
75
- @expirable_count = 0
76
- @persisted_count = 0
77
- @types_count = Hash.new(0)
78
- end
79
-
80
- def expirable_ratio
81
- return 1 if count == 0
82
- expirable_count.to_f / count
83
- end
84
-
85
- def types_ratio
86
- Hash[@types_count.map do |type, sub_count|
87
- [type, sub_count.to_f / count]
88
- end]
89
- end
90
-
91
- def aggregate(key)
92
- @count += 1
93
-
94
- if key.ttl == -1
95
- @persisted_count += 1
96
- else
97
- @expirable_count += 1
98
- end
99
-
100
- @types_count[key.type] += 1
101
- end
102
-
103
- end
104
-
105
- class Progress
106
-
107
- attr_reader :total, :collected, :fetched
108
-
109
- def initialize(total)
110
- @total = total
111
- @mutex = Mutex.new
112
-
113
- @collected = 0
114
- @fetched = 0
115
- end
116
-
117
- def increment_collected(count)
118
- @mutex.synchronize { @collected += count }
119
- end
120
-
121
- def increment_fetched(count)
122
- @mutex.synchronize { @fetched += count }
123
- end
124
-
125
- end
126
-
127
- class Aggregator
128
- attr_reader :patterns
129
- def initialize(queue, context)
130
- @queue = queue
131
- @patterns = {}
132
- @context = context
133
- end
134
-
135
- def run
136
- fetcher_count = @context.concurrency
137
- loop do
138
- keys = @queue.pop
139
- if keys == :stop
140
- fetcher_count -= 1
141
- break if fetcher_count == 0
142
- else
143
- keys.each { |k| aggregate(k) }
144
- end
145
- end
146
- self
147
- end
148
-
149
- def aggregate(key)
150
- pattern = pattern_from(key)
151
- pattern.aggregate(key)
152
- end
153
-
154
- def pattern_from(key)
155
- canonical_key = key.name.inspect.gsub(/(^|:)(\d+|[0-9a-f]{32,40})($|:)/, '\1*\3')
156
- @patterns[canonical_key] ||= Pattern.new
157
- end
158
-
159
- end
160
-
161
6
  class Inspector
162
7
  attr_reader :redis, :progress
163
8
 
@@ -0,0 +1,62 @@
1
+ module Derrick
2
+ class Pattern
3
+ SEGMENT_SEPARATORS = %w(: _ /).freeze
4
+ FIRST_SEGMENT_PATTERN = /.*(#{SEGMENT_SEPARATORS.map { |s| Regexp.escape(s) }.join('|')})/
5
+ IDENTIFIER_PATTERNS = '\d+|[0-9a-f]{32,40}'
6
+ SEGMENT_PATTERNS = SEGMENT_SEPARATORS.map do |separator|
7
+ /(^|#{Regexp.escape(separator)})(#{IDENTIFIER_PATTERNS})($|#{Regexp.escape(separator)})/
8
+ end
9
+
10
+ attr_reader :pattern, :count, :expirable_count, :persisted_count, :types_count
11
+
12
+ def self.extract(key_name)
13
+ key_pattern = SEGMENT_PATTERNS.inject(key_name.inspect[1..-2]) do |key, pattern|
14
+ key.gsub(pattern, '\1*\3')
15
+ end
16
+
17
+ return "#{key_name[FIRST_SEGMENT_PATTERN]}*" if key_pattern == key_name
18
+
19
+ key_pattern
20
+ end
21
+
22
+ def initialize
23
+ @count = 0
24
+ @expirable_count = 0
25
+ @persisted_count = 0
26
+ @types_count = Hash.new(0)
27
+ end
28
+
29
+ def merge!(other)
30
+ @count += other.count
31
+ @expirable_count += other.expirable_count
32
+ @persisted_count += other.persisted_count
33
+ other.types_count.each do |type, count|
34
+ @types_count[type] += count
35
+ end
36
+ self
37
+ end
38
+
39
+ def expirable_ratio
40
+ return 1 if count == 0
41
+ expirable_count.to_f / count
42
+ end
43
+
44
+ def types_ratio
45
+ Hash[@types_count.map do |type, sub_count|
46
+ [type, sub_count.to_f / count]
47
+ end]
48
+ end
49
+
50
+ def aggregate(key)
51
+ @count += 1
52
+
53
+ if key.ttl == -1
54
+ @persisted_count += 1
55
+ else
56
+ @expirable_count += 1
57
+ end
58
+
59
+ @types_count[key.type] += 1
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,21 @@
1
+ module Derrick
2
+ class Progress
3
+ attr_reader :total, :collected, :fetched
4
+
5
+ def initialize(total)
6
+ @total = total
7
+ @mutex = Mutex.new
8
+
9
+ @collected = 0
10
+ @fetched = 0
11
+ end
12
+
13
+ def increment_collected(count)
14
+ @mutex.synchronize { @collected += count }
15
+ end
16
+
17
+ def increment_fetched(count)
18
+ @mutex.synchronize { @fetched += count }
19
+ end
20
+ end
21
+ end
@@ -1,3 +1,3 @@
1
1
  module Derrick
2
- VERSION = "0.0.2"
2
+ VERSION = '0.1.0'
3
3
  end
data/lib/derrick.rb CHANGED
@@ -1,5 +1,10 @@
1
1
  require 'derrick/version'
2
2
  require 'derrick/inspector'
3
+ require 'derrick/aggregator'
4
+ require 'derrick/collector'
5
+ require 'derrick/fetcher'
6
+ require 'derrick/pattern'
7
+ require 'derrick/progress'
3
8
 
4
9
  module Derrick
5
10
  end
@@ -0,0 +1,49 @@
1
+ require 'spec_helper'
2
+
3
+ describe Derrick::Pattern do
4
+
5
+ describe '.extract' do
6
+
7
+ it 'canonicalizes integers' do
8
+ expect('foo:42:bar').to match_pattern('foo:*:bar')
9
+ end
10
+
11
+ it 'canonicalizes MD5s' do
12
+ expect('foo:258622b1688250cb619f3c9ccaefb7eb:bar').to match_pattern('foo:*:bar')
13
+ end
14
+
15
+ it 'canonicalizes SHA1s' do
16
+ expect('foo:792eaaec6718c335d63cde10d7281baa4132ba78:bar').to match_pattern('foo:*:bar')
17
+ end
18
+
19
+ it 'escapes strings with invalid characters' do
20
+ expect("hi \xAD").to match_pattern('hi \xAD')
21
+ end
22
+
23
+ it 'accepts `_` as segment separator' do
24
+ expect('foo_42_bar').to match_pattern('foo_*_bar')
25
+ end
26
+
27
+ it 'accepts `/` as segment separator' do
28
+ expect('foo/42/bar').to match_pattern('foo/*/bar')
29
+ end
30
+
31
+ context 'when no identifier is found' do
32
+
33
+ it 'assumes the first segment is shared' do
34
+ expect('foo:bar').to match_pattern('foo:*')
35
+ end
36
+
37
+ context 'and no segment serparator either' do
38
+
39
+ it 'falls in the `*` pattern' do
40
+ expect('foobar').to match_pattern('*')
41
+ end
42
+
43
+ end
44
+
45
+ end
46
+
47
+ end
48
+
49
+ end
@@ -0,0 +1,86 @@
1
+ lib = File.expand_path('../../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+
4
+ require 'derrick'
5
+
6
+
7
+ Dir[File.join(File.expand_path('../support', __FILE__), '**/*.rb')].each { |f| require f }
8
+
9
+ # This file was generated by the `rspec --init` command. Conventionally, all
10
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
11
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
12
+ # file to always be loaded, without a need to explicitly require it in any files.
13
+ #
14
+ # Given that it is always loaded, you are encouraged to keep this file as
15
+ # light-weight as possible. Requiring heavyweight dependencies from this file
16
+ # will add to the boot time of your test suite on EVERY test run, even for an
17
+ # individual file that may not need all of that loaded. Instead, make a
18
+ # separate helper file that requires this one and then use it only in the specs
19
+ # that actually need it.
20
+ #
21
+ # The `.rspec` file also contains a few flags that are not defaults but that
22
+ # users commonly want.
23
+ #
24
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
25
+ RSpec.configure do |config|
26
+ # The settings below are suggested to provide a good initial experience
27
+ # with RSpec, but feel free to customize to your heart's content.
28
+ =begin
29
+ # These two settings work together to allow you to limit a spec run
30
+ # to individual examples or groups you care about by tagging them with
31
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
32
+ # get run.
33
+ config.filter_run :focus
34
+ config.run_all_when_everything_filtered = true
35
+
36
+ # Many RSpec users commonly either run the entire suite or an individual
37
+ # file, and it's useful to allow more verbose output when running an
38
+ # individual spec file.
39
+ if config.files_to_run.one?
40
+ # Use the documentation formatter for detailed output,
41
+ # unless a formatter has already been configured
42
+ # (e.g. via a command-line flag).
43
+ config.default_formatter = 'doc'
44
+ end
45
+
46
+ # Print the 10 slowest examples and example groups at the
47
+ # end of the spec run, to help surface which specs are running
48
+ # particularly slow.
49
+ config.profile_examples = 10
50
+
51
+ # Run specs in random order to surface order dependencies. If you find an
52
+ # order dependency and want to debug it, you can fix the order by providing
53
+ # the seed, which is printed after each run.
54
+ # --seed 1234
55
+ config.order = :random
56
+
57
+ # Seed global randomization in this process using the `--seed` CLI option.
58
+ # Setting this allows you to use `--seed` to deterministically reproduce
59
+ # test failures related to randomization by passing the same `--seed` value
60
+ # as the one that triggered the failure.
61
+ Kernel.srand config.seed
62
+
63
+ # rspec-expectations config goes here. You can use an alternate
64
+ # assertion/expectation library such as wrong or the stdlib/minitest
65
+ # assertions if you prefer.
66
+ config.expect_with :rspec do |expectations|
67
+ # Enable only the newer, non-monkey-patching expect syntax.
68
+ # For more details, see:
69
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
70
+ expectations.syntax = :expect
71
+ end
72
+
73
+ # rspec-mocks config goes here. You can use an alternate test double
74
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
75
+ config.mock_with :rspec do |mocks|
76
+ # Enable only the newer, non-monkey-patching expect syntax.
77
+ # For more details, see:
78
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
79
+ mocks.syntax = :expect
80
+
81
+ # Prevents you from mocking or stubbing a method that does not exist on
82
+ # a real object. This is generally recommended.
83
+ mocks.verify_partial_doubles = true
84
+ end
85
+ =end
86
+ end
@@ -0,0 +1,9 @@
1
+ RSpec::Matchers.define :match_pattern do |expected_pattern|
2
+ match do |actual|
3
+ Derrick::Pattern.extract(actual) == expected_pattern
4
+ end
5
+
6
+ failure_message do |actual|
7
+ "expected `#{actual}` to be canonicalized as `#{expected_pattern}` but was `#{Derrick::Pattern.extract(actual)}`"
8
+ end
9
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: derrick
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jean Boussier
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-17 00:00:00.000000000 Z
11
+ date: 2014-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -38,6 +38,20 @@ dependencies:
38
38
  - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: redis
43
57
  requirement: !ruby/object:Gem::Requirement
@@ -61,6 +75,7 @@ extensions: []
61
75
  extra_rdoc_files: []
62
76
  files:
63
77
  - ".gitignore"
78
+ - ".rspec"
64
79
  - Gemfile
65
80
  - LICENSE.txt
66
81
  - README.md
@@ -68,9 +83,17 @@ files:
68
83
  - bin/derrick
69
84
  - derrick.gemspec
70
85
  - lib/derrick.rb
86
+ - lib/derrick/aggregator.rb
71
87
  - lib/derrick/cli.rb
88
+ - lib/derrick/collector.rb
89
+ - lib/derrick/fetcher.rb
72
90
  - lib/derrick/inspector.rb
91
+ - lib/derrick/pattern.rb
92
+ - lib/derrick/progress.rb
73
93
  - lib/derrick/version.rb
94
+ - spec/pattern_spec.rb
95
+ - spec/spec_helper.rb
96
+ - spec/support/pattern_matcher.rb
74
97
  homepage: ''
75
98
  licenses:
76
99
  - MIT
@@ -95,4 +118,7 @@ rubygems_version: 2.2.2
95
118
  signing_key:
96
119
  specification_version: 4
97
120
  summary: Inspect Redis databases and print statistics about the keys
98
- test_files: []
121
+ test_files:
122
+ - spec/pattern_spec.rb
123
+ - spec/spec_helper.rb
124
+ - spec/support/pattern_matcher.rb