heavy_keeper 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8de4da5b817f62a3047b5de8d8b1118d91577ef12e5389ade96f1dd69179e045
4
+ data.tar.gz: d42f28f5a1af3539010c269e14540fcaef6cafc9ad059529f04a10a55faf3422
5
+ SHA512:
6
+ metadata.gz: 29c288a59c986a75df4224030486c9afb59af3fa00710b35c0355f617e679b75559c6143881a3bb6404754a3ebc18bcb63c99ecc584072ac6910f43ee638dbfc
7
+ data.tar.gz: 8ad3f0583c3e64a59516a80c28cc162c2d64fdd4ce44c82d088db02143713425c049190b3f5cd5352daa9e830896ed9c51399ebf49a3e240d91ca806cd354bc9
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,65 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ TargetRubyVersion: 2.7
4
+ Exclude:
5
+ - Gemfile.lock
6
+
7
+ Bundler/OrderedGems:
8
+ Enabled: false
9
+
10
+ Gemspec/RequiredRubyVersion:
11
+ Enabled: false
12
+
13
+ Layout/ArgumentAlignment:
14
+ EnforcedStyle: with_fixed_indentation
15
+ Layout/MultilineMethodCallIndentation:
16
+ EnforcedStyle: indented
17
+
18
+ Metrics/AbcSize:
19
+ Max: 25
20
+ Metrics/BlockLength:
21
+ Exclude:
22
+ - spec/**/*
23
+ - heavy_keeper.gemspec
24
+ Metrics/CyclomaticComplexity:
25
+ Max: 10
26
+ Metrics/MethodLength:
27
+ Max: 20
28
+ Metrics/PerceivedComplexity:
29
+ Max: 10
30
+
31
+ Naming/VariableNumber:
32
+ Enabled: false
33
+ Naming/PredicateName:
34
+ Enabled: false
35
+
36
+ Style/Alias:
37
+ EnforcedStyle: prefer_alias_method
38
+ Style/BlockDelimiters:
39
+ Exclude:
40
+ - spec/**/*_spec.rb
41
+ Style/Documentation:
42
+ Enabled: false
43
+ Style/FrozenStringLiteralComment:
44
+ Enabled: false
45
+ Style/GuardClause:
46
+ Enabled: false
47
+ Style/IfUnlessModifier:
48
+ Enabled: false
49
+ Style/ModuleFunction:
50
+ Exclude:
51
+ - 'lib/heavy_keeper.rb'
52
+ Style/PercentLiteralDelimiters:
53
+ Enabled: false
54
+ Style/SafeNavigation:
55
+ Enabled: false
56
+ Style/StringLiterals:
57
+ EnforcedStyle: single_quotes
58
+ Style/TrailingUnderscoreVariable:
59
+ Enabled: false
60
+ Style/WordArray:
61
+ EnforcedStyle: percent
62
+ Layout/LineLength:
63
+ Max: 200
64
+ Lint/AmbiguousBlockAssociation:
65
+ Enabled: false
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2022-06-29
4
+
5
+ - Initial release
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in heavy_keeper.gemspec
6
+ gemspec
7
+
8
+ gem 'rubocop', require: false
9
+
10
+ gem 'rake', '~> 13.0'
11
+
12
+ group :test do
13
+ gem 'mock_redis'
14
+ gem 'rspec'
15
+ gem 'pry-byebug'
16
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,103 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ heavy_keeper (0.1.0)
5
+ dry-configurable (>= 0.13.0)
6
+ dry-schema (~> 1)
7
+ redis (~> 4)
8
+ xxhash (~> 0.4.0)
9
+
10
+ GEM
11
+ remote: https://rubygems.org/
12
+ specs:
13
+ ast (2.4.2)
14
+ byebug (11.1.3)
15
+ coderay (1.1.3)
16
+ concurrent-ruby (1.1.10)
17
+ diff-lcs (1.5.0)
18
+ dry-configurable (0.15.0)
19
+ concurrent-ruby (~> 1.0)
20
+ dry-core (~> 0.6)
21
+ dry-container (0.9.0)
22
+ concurrent-ruby (~> 1.0)
23
+ dry-configurable (~> 0.13, >= 0.13.0)
24
+ dry-core (0.7.1)
25
+ concurrent-ruby (~> 1.0)
26
+ dry-inflector (0.2.1)
27
+ dry-initializer (3.1.1)
28
+ dry-logic (1.2.0)
29
+ concurrent-ruby (~> 1.0)
30
+ dry-core (~> 0.5, >= 0.5)
31
+ dry-schema (1.9.1)
32
+ concurrent-ruby (~> 1.0)
33
+ dry-configurable (~> 0.13, >= 0.13.0)
34
+ dry-core (~> 0.5, >= 0.5)
35
+ dry-initializer (~> 3.0)
36
+ dry-logic (~> 1.0)
37
+ dry-types (~> 1.5)
38
+ dry-types (1.5.1)
39
+ concurrent-ruby (~> 1.0)
40
+ dry-container (~> 0.3)
41
+ dry-core (~> 0.5, >= 0.5)
42
+ dry-inflector (~> 0.1, >= 0.1.2)
43
+ dry-logic (~> 1.0, >= 1.0.2)
44
+ method_source (1.0.0)
45
+ mock_redis (0.31.0)
46
+ ruby2_keywords
47
+ parallel (1.22.1)
48
+ parser (3.1.2.0)
49
+ ast (~> 2.4.1)
50
+ pry (0.13.1)
51
+ coderay (~> 1.1)
52
+ method_source (~> 1.0)
53
+ pry-byebug (3.9.0)
54
+ byebug (~> 11.0)
55
+ pry (~> 0.13.0)
56
+ rainbow (3.1.1)
57
+ rake (13.0.6)
58
+ redis (4.7.0)
59
+ regexp_parser (2.4.0)
60
+ rexml (3.2.5)
61
+ rspec (3.11.0)
62
+ rspec-core (~> 3.11.0)
63
+ rspec-expectations (~> 3.11.0)
64
+ rspec-mocks (~> 3.11.0)
65
+ rspec-core (3.11.0)
66
+ rspec-support (~> 3.11.0)
67
+ rspec-expectations (3.11.0)
68
+ diff-lcs (>= 1.2.0, < 2.0)
69
+ rspec-support (~> 3.11.0)
70
+ rspec-mocks (3.11.1)
71
+ diff-lcs (>= 1.2.0, < 2.0)
72
+ rspec-support (~> 3.11.0)
73
+ rspec-support (3.11.0)
74
+ rubocop (1.29.1)
75
+ parallel (~> 1.10)
76
+ parser (>= 3.1.0.0)
77
+ rainbow (>= 2.2.2, < 4.0)
78
+ regexp_parser (>= 1.8, < 3.0)
79
+ rexml (>= 3.2.5, < 4.0)
80
+ rubocop-ast (>= 1.17.0, < 2.0)
81
+ ruby-progressbar (~> 1.7)
82
+ unicode-display_width (>= 1.4.0, < 3.0)
83
+ rubocop-ast (1.18.0)
84
+ parser (>= 3.1.1.0)
85
+ ruby-progressbar (1.11.0)
86
+ ruby2_keywords (0.0.5)
87
+ unicode-display_width (2.1.0)
88
+ xxhash (0.4.0)
89
+
90
+ PLATFORMS
91
+ x86_64-darwin-19
92
+ x86_64-linux
93
+
94
+ DEPENDENCIES
95
+ heavy_keeper!
96
+ mock_redis
97
+ pry-byebug
98
+ rake (~> 13.0)
99
+ rspec
100
+ rubocop
101
+
102
+ BUNDLED WITH
103
+ 2.2.32
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # HeavyKeeper
2
+ This gem implements HeavyKeeper algorithm, based on the paper with the same
3
+ name: https://www.usenix.org/conference/atc18/presentation/gong
4
+
5
+ The interface is designed to be similar with ReBloom TopK datastructure (DS)
6
+ (https://oss.redis.com/redisbloom/TopK_Commands/).
7
+
8
+ This is a naive implementation of HeavyKeeper, probably not very optimized.
9
+ We use multiple Redis DSs:
10
+
11
+ - A hash with maximum depth * width items to act as a bucket to store main
12
+ counter
13
+ - A sorted set with maximum K elements to act as a MinHeap
14
+ - A hash with maximum K elements to store more correct counter of the element in MinHeap
15
+
16
+
17
+ ## Installation
18
+
19
+ Add this line to your application's Gemfile:
20
+
21
+ ```ruby
22
+ gem 'heavy_keeper'
23
+ ```
24
+
25
+ ## Usage
26
+
27
+ You will need to add an initializer to provide some configuration:
28
+
29
+ ```ruby
30
+ HeavyKeeper::Config.configure do |config|
31
+ config.cache_prefix = 'cache_prefix'.freeze # currently used as prefix for the redis data structures.
32
+ config.storage = Redis.new # a Redis store, at least version 4.0
33
+ end
34
+ ```
35
+
36
+ In general, you will only interact with an instance of the `HeavyKeeper::TopK` class.
37
+
38
+ These are the most relevant instance methods (see code comments for more detail):
39
+
40
+ `reserve(name, top_k: size, width:, depth:, decay:)` - sets up a Top K list with specified options
41
+
42
+ `increase_by(name, *items)` - add an array of items to a list
43
+
44
+ `list(name)` - returns full list of items in Top K list
45
+
46
+ `clear(name)` - deletes list
47
+
48
+ `remove(name, item)` - reset the counter of the targeted item in the list
49
+
50
+ ## Development
51
+
52
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rspec` to run the tests.
53
+
54
+ To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
55
+
56
+ ## Contributing
57
+
58
+ Bug reports and pull requests are welcome on GitHub at https://github.com/Kaligo/heavy_keeper.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require 'rubocop/rake_task'
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'heavy_keeper'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require 'irb'
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/heavy_keeper/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'heavy_keeper'
7
+ spec.version = HeavyKeeper::VERSION
8
+ spec.authors = ['Hieu Nguyen', 'Kenneth Teh']
9
+
10
+ spec.summary = 'Gem which implements HeavyKeeper algorithm'
11
+ spec.homepage = 'https://github.com/Kaligo/heavy_keeper'
12
+ spec.required_ruby_version = '>= 2.7.0'
13
+
14
+ spec.metadata['homepage_uri'] = spec.homepage
15
+ spec.metadata['source_code_uri'] = 'https://github.com/Kaligo/heavy_keeper'
16
+ spec.metadata['changelog_uri'] = 'https://github.com/Kaligo/heavy_keeper/blob/master/CHANGELOG.md'
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
+ `git ls-files -z`.split("\x0").reject do |f|
22
+ (f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
23
+ end
24
+ end
25
+ spec.bindir = 'exe'
26
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ['lib']
28
+
29
+ spec.add_dependency 'dry-configurable', '>= 0.13.0'
30
+ spec.add_dependency 'dry-schema', '~> 1'
31
+ spec.add_dependency 'redis', '~> 4'
32
+ spec.add_dependency 'xxhash', '~> 0.4.0'
33
+
34
+ spec.metadata['rubygems_mfa_required'] = 'true'
35
+ end
@@ -0,0 +1,40 @@
1
+ require 'json'
2
+
3
+ # Act as 2D array with to store HeavyKeeper counter.
4
+ # It's implemented using a hash underneath.
5
+ #
6
+ # rubocop:disable Naming/MethodParameterName
7
+ module HeavyKeeper
8
+ class Bucket
9
+ def initialize(storage = HeavyKeeper::Config.config.storage)
10
+ @storage = storage
11
+ end
12
+
13
+ def set(key, i, j, value)
14
+ storage.hset(hash_key(key), "#{i}:#{j}", JSON.generate(value))
15
+ end
16
+
17
+ def get(key, i, j)
18
+ value = storage.hget(hash_key(key), "#{i}:#{j}")
19
+
20
+ value ? JSON.parse(value) : value
21
+ end
22
+
23
+ def clear(key)
24
+ storage.del(hash_key(key))
25
+ end
26
+
27
+ private
28
+
29
+ attr_reader :storage
30
+
31
+ def hash_key(key)
32
+ "#{key_prefix}:hash:#{key}"
33
+ end
34
+
35
+ def key_prefix
36
+ "#{HeavyKeeper::Config.config.cache_prefix}_bucket"
37
+ end
38
+ end
39
+ end
40
+ # rubocop:enable Naming/MethodParameterName
@@ -0,0 +1,10 @@
1
+ require 'dry-configurable'
2
+
3
+ module HeavyKeeper
4
+ class Config
5
+ extend Dry::Configurable
6
+
7
+ setting :cache_prefix, default: 'cache_prefix'
8
+ setting :storage
9
+ end
10
+ end
@@ -0,0 +1,92 @@
1
+ # A min-heap implementation in Redis.
2
+ # This class is not supposed to use outside of the HeavyKeeper implementation
3
+ # for TopK. It uses the following datastructure in Redis:
4
+ # - A sorted set with size K to store the min heap
5
+ # - A hash with size K to store counter for each item in min heap
6
+ #
7
+ module HeavyKeeper
8
+ class MinHeap
9
+ def initialize(storage = HeavyKeeper::Config.config.storage)
10
+ @storage = storage
11
+ end
12
+
13
+ def list(key, total)
14
+ items = storage.zrevrange(sorted_set_key(key), 0, total - 1)
15
+
16
+ if items.empty?
17
+ {}
18
+ else
19
+ storage.mapped_hmget(hash_key(key), *items)
20
+ end
21
+ end
22
+
23
+ def count(key, item)
24
+ storage.hget(hash_key(key), item).to_i
25
+ end
26
+
27
+ def min(key)
28
+ item = storage.zrangebyscore(sorted_set_key(key), - Float::INFINITY, Float::INFINITY, limit: [0, 1]).first
29
+ count(key, item)
30
+ end
31
+
32
+ def exist?(key, item)
33
+ storage.hexists(hash_key(key), item)
34
+ end
35
+
36
+ def add(key, item, value, top_k)
37
+ count = storage.zcard(sorted_set_key(key))
38
+
39
+ storage.multi do
40
+ storage.zadd(sorted_set_key(key), value, item)
41
+ storage.hset(hash_key(key), item, value)
42
+ end
43
+
44
+ if count >= top_k
45
+ dropped_item, _ = storage.zpopmin(sorted_set_key(key))
46
+ storage.hdel(hash_key(key), dropped_item)
47
+
48
+ if dropped_item != item
49
+ value
50
+ end
51
+ else
52
+ value
53
+ end
54
+ end
55
+
56
+ def update(key, item, value)
57
+ storage.multi do
58
+ storage.zrem(sorted_set_key(key), item)
59
+ storage.zincrby(sorted_set_key(key), value, item)
60
+ storage.hset(hash_key(key), item, value)
61
+ end
62
+
63
+ value
64
+ end
65
+
66
+ def clear(key)
67
+ storage.del(sorted_set_key(key))
68
+ storage.del(hash_key(key))
69
+ end
70
+
71
+ def delete(key, item)
72
+ storage.zrem(sorted_set_key(key), item)
73
+ storage.hdel(hash_key(key), item)
74
+ end
75
+
76
+ private
77
+
78
+ attr_reader :storage
79
+
80
+ def sorted_set_key(key)
81
+ "#{key_prefix}:sorted_set:#{key}"
82
+ end
83
+
84
+ def hash_key(key)
85
+ "#{key_prefix}:hash:#{key}"
86
+ end
87
+
88
+ def key_prefix
89
+ "#{HeavyKeeper::Config.config.cache_prefix}_heavy_keeper"
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry-schema'
4
+ require 'securerandom'
5
+ require 'xxhash'
6
+ require_relative '../validations/message'
7
+
8
+ module HeavyKeeper
9
+ class TopK # rubocop:disable Metrics/ClassLength
10
+ Validator = ::Dry::Schema.Params do
11
+ required(:top_k).filled(:integer, gt?: 0)
12
+ required(:width).filled(:integer, gt?: 0)
13
+ required(:depth).filled(:integer, gt?: 0)
14
+ required(:decay).filled(:decimal, gt?: 0, lteq?: 1)
15
+ end
16
+
17
+ # Initiate the controller to create/operate on top-k DS
18
+ #
19
+ # @param storage [Redis] A Redis client to interact with Redis
20
+ #
21
+ # @return [HeavyKeeper::TopK] new instance of the controller
22
+ def initialize(storage: HeavyKeeper::Config.config.storage)
23
+ @storage = storage
24
+ @min_heap = MinHeap.new(storage)
25
+ @bucket = Bucket.new(storage)
26
+ end
27
+
28
+ # Complexity O(1)
29
+ # Initialize a TopK in Redis with specified parameters.
30
+ #
31
+ # @param key [String] a key for identifying top-k DS in Redis
32
+ # @param top_k [Integer] number of top elements we want to track
33
+ # @param width [Integer] Size of the bucket to store counter
34
+ # @param depth [Integer] Number of buckets we want to store
35
+ # @param decay [Decimal] decay factor: smaller number means bigger
36
+ # distinction between mouse-flow and elelphant flow
37
+ #
38
+ # @return OK on success, otherwise raise error
39
+ def reserve(key, options)
40
+ options = validate(options)
41
+
42
+ storage.mapped_hmset(metadata_key(key), options)
43
+ end
44
+
45
+ # Complexity O(k + depth)
46
+ # Add an array of items to a Top-K DS
47
+ #
48
+ # @param key [String] key for identifying top-k DS in Redis
49
+ # @param items [String, String, ...] each value represents an item we want to
50
+ # store in Top-K
51
+ #
52
+ # @return [Array[Nil, Integer]]
53
+ # nil if the item is not addded to the list
54
+ # otherwise, return the current value of item
55
+ def add(key, *items)
56
+ items_and_increments = items.map { |item| [item, 1] }
57
+ increase_by(key, *items_and_increments)
58
+ end
59
+
60
+ # Complexity O(k + (increment * depth))
61
+ # Add an array of items to a Top-K DS, with custom increment for each item
62
+ #
63
+ # @param key [String] key for identifying top-k DS in Redis
64
+ # @param items_and_increments [[String, Integer], ...]
65
+ # each value represents an item and increment that needs to be added
66
+ # to Top-K
67
+ #
68
+ # @return [Array[Nil, String]]
69
+ # nil if the item is not addded to the list
70
+ # otherwise, return the current value of item
71
+ # rubocop:disable Metrics/AbcSize
72
+ # rubocop:disable Metrics/MethodLength
73
+ # rubocop:disable Metrics/BlockLength
74
+ # rubocop:disable Metrics/PerceivedComplexity
75
+ def increase_by(key, *items_and_increments)
76
+ options = validate(storage.hgetall(metadata_key(key)))
77
+
78
+ items_and_increments.map do |(item, increment)|
79
+ max_count = 0
80
+ item_fingerprint = XXhash.xxh64(item)
81
+
82
+ exist = min_heap.exist?(key, item)
83
+ min_value = min_heap.min(key)
84
+
85
+ options[:depth].times do |i|
86
+ bucket_number = XXhash.xxh64_stream(StringIO.new(item), i) % options[:width]
87
+
88
+ fingerprint, count = bucket.get(key, i, bucket_number)
89
+
90
+ if count.nil? || count.zero?
91
+ bucket.set(key, i, bucket_number, [item_fingerprint, increment])
92
+ max_count = [increment, max_count].max
93
+ elsif fingerprint == item_fingerprint
94
+ if exist || count <= min_value
95
+ bucket.set(key, i, bucket_number, [fingerprint, count + increment])
96
+ max_count = [count + increment, max_count].max
97
+ end
98
+ else
99
+ decay = options[:decay]**count
100
+
101
+ if SecureRandom.rand < decay
102
+ count -= increment
103
+
104
+ if count.positive?
105
+ bucket.set(key, i, bucket_number, [fingerprint, count])
106
+ else
107
+ bucket.set(key, i, bucket_number, [item_fingerprint, increment])
108
+ max_count = [increment, max_count].max
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ if exist
115
+ min_heap.update(key, item, max_count)
116
+ else
117
+ min_heap.add(key, item, max_count, options[:top_k])
118
+ end
119
+ end
120
+ end
121
+ # rubocop:enable Metrics/AbcSize
122
+ # rubocop:enable Metrics/MethodLength
123
+ # rubocop:enable Metrics/BlockLength
124
+ # rubocop:enable Metrics/PerceivedComplexity
125
+
126
+ # Complexity O(k)
127
+ # Checks whether an item is one of Top-K items. Multiple items can be checked at once.
128
+ #
129
+ # @param key [String] a key for identifying top-k DS in Redis
130
+ # @param items [String, String, ...] an array of item that we want to check
131
+ #
132
+ # @return [Array[Boolean]] true if item is in Top-K, otherwise return false
133
+ def query(key, *items)
134
+ items.map do |item|
135
+ min_heap.exist?(key, item)
136
+ end
137
+ end
138
+
139
+ # Complexity O(k + depth)
140
+ # Please note this number will never be higher than the real count
141
+ # and likely to be lower. Multiple items can be queried at once.
142
+ #
143
+ # @param key [String] a key for identifying top-k DS in Redis
144
+ # @param items [String, String, ...] an array of item that we want to check
145
+ #
146
+ # @return [Array[Integer]] return the count of each item
147
+ def count(key, *items)
148
+ items.map do |item|
149
+ min_heap.count(key, item)
150
+ end
151
+ end
152
+
153
+ # Complexity O(k)
154
+ # Return full list of items in Top K list.
155
+ #
156
+ # @param key [String] a key for identifying top-k DS in Redis
157
+ #
158
+ # @return [Hash] return a hash contains the key and the count of the top-K
159
+ # elements
160
+ def list(key)
161
+ top_k = storage.hget(metadata_key(key), :top_k).to_i
162
+ min_heap.list(key, top_k)
163
+ end
164
+
165
+ # Complexity O(1)
166
+ # Clean up all Redis data related to a key
167
+ #
168
+ # @param key [String] a key for identifying top-k DS in Redis
169
+ #
170
+ # @return OK if successful; otherwise, raise error
171
+ def clear(key)
172
+ storage.multi do
173
+ storage.del(metadata_key(key))
174
+ min_heap.clear(key)
175
+ bucket.clear(key)
176
+ end
177
+ end
178
+
179
+ # Complexity O(1)
180
+ # Reset counter of an item to zero in order to decay it out
181
+ #
182
+ # @param key [String] a key for identifying top-k DS in Redis
183
+ # @param items [String] item that we want to decay
184
+ #
185
+ # @return OK if successful, raise error otherwise
186
+ def remove(key, item)
187
+ options = validate(storage.hgetall(metadata_key(key)))
188
+ item_fingerprint = XXhash.xxh64(item)
189
+
190
+ options[:depth].times do |i|
191
+ bucket_number = XXhash.xxh64_stream(StringIO.new(item), i) % options[:width]
192
+ fingerprint, _ = bucket.get(key, i, bucket_number)
193
+
194
+ bucket.set(key, i, bucket_number, [fingerprint, 0]) if item_fingerprint == fingerprint
195
+ end
196
+
197
+ min_heap.delete(key, item)
198
+ end
199
+
200
+ private
201
+
202
+ attr_reader :storage, :min_heap, :bucket
203
+
204
+ def metadata_key(key)
205
+ "#{key_prefix}:#{key}:data"
206
+ end
207
+
208
+ def key_prefix
209
+ "#{HeavyKeeper::Config.config.cache_prefix}_heavy_keeper"
210
+ end
211
+
212
+ def validate(options)
213
+ result = Validator.call(options)
214
+
215
+ if result.failure?
216
+ error = ::Validations::Message.new.build(result.errors.to_h).join('. ')
217
+ raise HeavyKeeper::Error, error
218
+ end
219
+
220
+ result.output
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeavyKeeper
4
+ VERSION = '0.1.0'
5
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'heavy_keeper/version'
4
+ require_relative 'heavy_keeper/config'
5
+ require_relative 'heavy_keeper/top_k'
6
+ require_relative 'heavy_keeper/min_heap'
7
+ require_relative 'heavy_keeper/bucket'
8
+
9
+ module HeavyKeeper
10
+ class Error < StandardError; end
11
+ end
@@ -0,0 +1,35 @@
1
+ module Validations
2
+ class Message
3
+ CLASSIFY_SEPARATOR = '_'.freeze
4
+ TITLEIZE_SEPARATOR = ' '.freeze
5
+
6
+ # @errors [Hash | Array] output of dry-validation
7
+ # after validating params
8
+ # @parent [Nil | String] key name of a field that has `errors`
9
+ # after validating params
10
+ # Output: array of string that can be used to feed into
11
+ # Errors::InvalidParamsError
12
+ def build(errors, parent = nil)
13
+ case errors
14
+ when Hash
15
+ errors.flat_map do |key, value|
16
+ child = [parent, key].compact.join(' ')
17
+ build(value, child)
18
+ end
19
+ when Array
20
+ errors.flat_map do |error|
21
+ "#{titleize(parent.to_s)} #{build(error)}"
22
+ end
23
+ else
24
+ errors
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def titleize(string)
31
+ # NOTE: this is not a robust implementation of titleize
32
+ string.split(CLASSIFY_SEPARATOR).map(&:capitalize).join(TITLEIZE_SEPARATOR)
33
+ end
34
+ end
35
+ end
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: heavy_keeper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Hieu Nguyen
8
+ - Kenneth Teh
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2022-06-30 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: dry-configurable
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: 0.13.0
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: 0.13.0
28
+ - !ruby/object:Gem::Dependency
29
+ name: dry-schema
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '1'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '1'
42
+ - !ruby/object:Gem::Dependency
43
+ name: redis
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '4'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '4'
56
+ - !ruby/object:Gem::Dependency
57
+ name: xxhash
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: 0.4.0
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: 0.4.0
70
+ description:
71
+ email:
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".rspec"
77
+ - ".rubocop.yml"
78
+ - CHANGELOG.md
79
+ - Gemfile
80
+ - Gemfile.lock
81
+ - README.md
82
+ - Rakefile
83
+ - bin/console
84
+ - bin/setup
85
+ - heavy_keeper.gemspec
86
+ - lib/heavy_keeper.rb
87
+ - lib/heavy_keeper/bucket.rb
88
+ - lib/heavy_keeper/config.rb
89
+ - lib/heavy_keeper/min_heap.rb
90
+ - lib/heavy_keeper/top_k.rb
91
+ - lib/heavy_keeper/version.rb
92
+ - lib/validations/message.rb
93
+ homepage: https://github.com/Kaligo/heavy_keeper
94
+ licenses: []
95
+ metadata:
96
+ homepage_uri: https://github.com/Kaligo/heavy_keeper
97
+ source_code_uri: https://github.com/Kaligo/heavy_keeper
98
+ changelog_uri: https://github.com/Kaligo/heavy_keeper/blob/master/CHANGELOG.md
99
+ rubygems_mfa_required: 'true'
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: 2.7.0
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubygems_version: 3.2.28
116
+ signing_key:
117
+ specification_version: 4
118
+ summary: Gem which implements HeavyKeeper algorithm
119
+ test_files: []