heavy_keeper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8de4da5b817f62a3047b5de8d8b1118d91577ef12e5389ade96f1dd69179e045
4
+ data.tar.gz: d42f28f5a1af3539010c269e14540fcaef6cafc9ad059529f04a10a55faf3422
5
+ SHA512:
6
+ metadata.gz: 29c288a59c986a75df4224030486c9afb59af3fa00710b35c0355f617e679b75559c6143881a3bb6404754a3ebc18bcb63c99ecc584072ac6910f43ee638dbfc
7
+ data.tar.gz: 8ad3f0583c3e64a59516a80c28cc162c2d64fdd4ce44c82d088db02143713425c049190b3f5cd5352daa9e830896ed9c51399ebf49a3e240d91ca806cd354bc9
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,65 @@
1
+ AllCops:
2
+ NewCops: enable
3
+ TargetRubyVersion: 2.7
4
+ Exclude:
5
+ - Gemfile.lock
6
+
7
+ Bundler/OrderedGems:
8
+ Enabled: false
9
+
10
+ Gemspec/RequiredRubyVersion:
11
+ Enabled: false
12
+
13
+ Layout/ArgumentAlignment:
14
+ EnforcedStyle: with_fixed_indentation
15
+ Layout/MultilineMethodCallIndentation:
16
+ EnforcedStyle: indented
17
+
18
+ Metrics/AbcSize:
19
+ Max: 25
20
+ Metrics/BlockLength:
21
+ Exclude:
22
+ - spec/**/*
23
+ - heavy_keeper.gemspec
24
+ Metrics/CyclomaticComplexity:
25
+ Max: 10
26
+ Metrics/MethodLength:
27
+ Max: 20
28
+ Metrics/PerceivedComplexity:
29
+ Max: 10
30
+
31
+ Naming/VariableNumber:
32
+ Enabled: false
33
+ Naming/PredicateName:
34
+ Enabled: false
35
+
36
+ Style/Alias:
37
+ EnforcedStyle: prefer_alias_method
38
+ Style/BlockDelimiters:
39
+ Exclude:
40
+ - spec/**/*_spec.rb
41
+ Style/Documentation:
42
+ Enabled: false
43
+ Style/FrozenStringLiteralComment:
44
+ Enabled: false
45
+ Style/GuardClause:
46
+ Enabled: false
47
+ Style/IfUnlessModifier:
48
+ Enabled: false
49
+ Style/ModuleFunction:
50
+ Exclude:
51
+ - 'lib/heavy_keeper.rb'
52
+ Style/PercentLiteralDelimiters:
53
+ Enabled: false
54
+ Style/SafeNavigation:
55
+ Enabled: false
56
+ Style/StringLiterals:
57
+ EnforcedStyle: single_quotes
58
+ Style/TrailingUnderscoreVariable:
59
+ Enabled: false
60
+ Style/WordArray:
61
+ EnforcedStyle: percent
62
+ Layout/LineLength:
63
+ Max: 200
64
+ Lint/AmbiguousBlockAssociation:
65
+ Enabled: false
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2022-06-29
4
+
5
+ - Initial release
data/Gemfile ADDED
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ # Specify your gem's dependencies in heavy_keeper.gemspec
6
+ gemspec
7
+
8
+ gem 'rubocop', require: false
9
+
10
+ gem 'rake', '~> 13.0'
11
+
12
+ group :test do
13
+ gem 'mock_redis'
14
+ gem 'rspec'
15
+ gem 'pry-byebug'
16
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,103 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ heavy_keeper (0.1.0)
5
+ dry-configurable (>= 0.13.0)
6
+ dry-schema (~> 1)
7
+ redis (~> 4)
8
+ xxhash (~> 0.4.0)
9
+
10
+ GEM
11
+ remote: https://rubygems.org/
12
+ specs:
13
+ ast (2.4.2)
14
+ byebug (11.1.3)
15
+ coderay (1.1.3)
16
+ concurrent-ruby (1.1.10)
17
+ diff-lcs (1.5.0)
18
+ dry-configurable (0.15.0)
19
+ concurrent-ruby (~> 1.0)
20
+ dry-core (~> 0.6)
21
+ dry-container (0.9.0)
22
+ concurrent-ruby (~> 1.0)
23
+ dry-configurable (~> 0.13, >= 0.13.0)
24
+ dry-core (0.7.1)
25
+ concurrent-ruby (~> 1.0)
26
+ dry-inflector (0.2.1)
27
+ dry-initializer (3.1.1)
28
+ dry-logic (1.2.0)
29
+ concurrent-ruby (~> 1.0)
30
+ dry-core (~> 0.5, >= 0.5)
31
+ dry-schema (1.9.1)
32
+ concurrent-ruby (~> 1.0)
33
+ dry-configurable (~> 0.13, >= 0.13.0)
34
+ dry-core (~> 0.5, >= 0.5)
35
+ dry-initializer (~> 3.0)
36
+ dry-logic (~> 1.0)
37
+ dry-types (~> 1.5)
38
+ dry-types (1.5.1)
39
+ concurrent-ruby (~> 1.0)
40
+ dry-container (~> 0.3)
41
+ dry-core (~> 0.5, >= 0.5)
42
+ dry-inflector (~> 0.1, >= 0.1.2)
43
+ dry-logic (~> 1.0, >= 1.0.2)
44
+ method_source (1.0.0)
45
+ mock_redis (0.31.0)
46
+ ruby2_keywords
47
+ parallel (1.22.1)
48
+ parser (3.1.2.0)
49
+ ast (~> 2.4.1)
50
+ pry (0.13.1)
51
+ coderay (~> 1.1)
52
+ method_source (~> 1.0)
53
+ pry-byebug (3.9.0)
54
+ byebug (~> 11.0)
55
+ pry (~> 0.13.0)
56
+ rainbow (3.1.1)
57
+ rake (13.0.6)
58
+ redis (4.7.0)
59
+ regexp_parser (2.4.0)
60
+ rexml (3.2.5)
61
+ rspec (3.11.0)
62
+ rspec-core (~> 3.11.0)
63
+ rspec-expectations (~> 3.11.0)
64
+ rspec-mocks (~> 3.11.0)
65
+ rspec-core (3.11.0)
66
+ rspec-support (~> 3.11.0)
67
+ rspec-expectations (3.11.0)
68
+ diff-lcs (>= 1.2.0, < 2.0)
69
+ rspec-support (~> 3.11.0)
70
+ rspec-mocks (3.11.1)
71
+ diff-lcs (>= 1.2.0, < 2.0)
72
+ rspec-support (~> 3.11.0)
73
+ rspec-support (3.11.0)
74
+ rubocop (1.29.1)
75
+ parallel (~> 1.10)
76
+ parser (>= 3.1.0.0)
77
+ rainbow (>= 2.2.2, < 4.0)
78
+ regexp_parser (>= 1.8, < 3.0)
79
+ rexml (>= 3.2.5, < 4.0)
80
+ rubocop-ast (>= 1.17.0, < 2.0)
81
+ ruby-progressbar (~> 1.7)
82
+ unicode-display_width (>= 1.4.0, < 3.0)
83
+ rubocop-ast (1.18.0)
84
+ parser (>= 3.1.1.0)
85
+ ruby-progressbar (1.11.0)
86
+ ruby2_keywords (0.0.5)
87
+ unicode-display_width (2.1.0)
88
+ xxhash (0.4.0)
89
+
90
+ PLATFORMS
91
+ x86_64-darwin-19
92
+ x86_64-linux
93
+
94
+ DEPENDENCIES
95
+ heavy_keeper!
96
+ mock_redis
97
+ pry-byebug
98
+ rake (~> 13.0)
99
+ rspec
100
+ rubocop
101
+
102
+ BUNDLED WITH
103
+ 2.2.32
data/README.md ADDED
@@ -0,0 +1,58 @@
1
+ # HeavyKeeper
2
+ This gem implements HeavyKeeper algorithm, based on the paper with the same
3
+ name: https://www.usenix.org/conference/atc18/presentation/gong
4
+
5
+ The interface is designed to be similar with ReBloom TopK datastructure (DS)
6
+ (https://oss.redis.com/redisbloom/TopK_Commands/).
7
+
8
+ This is a naive implementation of HeavyKeeper, probably not very optimized.
9
+ We use multiple Redis DSs:
10
+
11
+ - A hash with maximum depth * width items to act as a bucket to store main
12
+ counter
13
+ - A sorted set with maximum K elements to act as a MinHeap
14
+ - A hash with maximum K elements to store more correct counter of the element in MinHeap
15
+
16
+
17
+ ## Installation
18
+
19
+ Add this line to your application's Gemfile:
20
+
21
+ ```ruby
22
+ gem 'heavy_keeper'
23
+ ```
24
+
25
+ ## Usage
26
+
27
+ You will need to add an initializer to provide some configuration:
28
+
29
+ ```ruby
30
+ HeavyKeeper::Config.configure do |config|
31
+ config.cache_prefix = 'cache_prefix'.freeze # currently used as prefix for the redis data structures.
32
+ config.storage = Redis.new # a Redis store, at least version 4.0
33
+ end
34
+ ```
35
+
36
+ In general, you will only interact with an instance of the `HeavyKeeper::TopK` class.
37
+
38
+ These are the most relevant instance methods (see code comments for more detail):
39
+
40
+ `reserve(name, top_k: size, width:, depth:, decay:)` - sets up a Top K list with specified options
41
+
42
+ `increase_by(name, *items)` - add an array of items to a list
43
+
44
+ `list(name)` - returns full list of items in Top K list
45
+
46
+ `clear(name)` - deletes list
47
+
48
+ `remove(name, item)` - reset the counter of the targeted item in the list
49
+
50
+ ## Development
51
+
52
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rspec` to run the tests.
53
+
54
+ To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
55
+
56
+ ## Contributing
57
+
58
+ Bug reports and pull requests are welcome on GitHub at https://github.com/Kaligo/heavy_keeper.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require 'rubocop/rake_task'
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'heavy_keeper'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require 'irb'
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/heavy_keeper/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'heavy_keeper'
7
+ spec.version = HeavyKeeper::VERSION
8
+ spec.authors = ['Hieu Nguyen', 'Kenneth Teh']
9
+
10
+ spec.summary = 'Gem which implements HeavyKeeper algorithm'
11
+ spec.homepage = 'https://github.com/Kaligo/heavy_keeper'
12
+ spec.required_ruby_version = '>= 2.7.0'
13
+
14
+ spec.metadata['homepage_uri'] = spec.homepage
15
+ spec.metadata['source_code_uri'] = 'https://github.com/Kaligo/heavy_keeper'
16
+ spec.metadata['changelog_uri'] = 'https://github.com/Kaligo/heavy_keeper/blob/master/CHANGELOG.md'
17
+
18
+ # Specify which files should be added to the gem when it is released.
19
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
+ `git ls-files -z`.split("\x0").reject do |f|
22
+ (f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
23
+ end
24
+ end
25
+ spec.bindir = 'exe'
26
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ['lib']
28
+
29
+ spec.add_dependency 'dry-configurable', '>= 0.13.0'
30
+ spec.add_dependency 'dry-schema', '~> 1'
31
+ spec.add_dependency 'redis', '~> 4'
32
+ spec.add_dependency 'xxhash', '~> 0.4.0'
33
+
34
+ spec.metadata['rubygems_mfa_required'] = 'true'
35
+ end
@@ -0,0 +1,40 @@
1
+ require 'json'
2
+
3
+ # Act as 2D array with to store HeavyKeeper counter.
4
+ # It's implemented using a hash underneath.
5
+ #
6
+ # rubocop:disable Naming/MethodParameterName
7
+ module HeavyKeeper
8
+ class Bucket
9
+ def initialize(storage = HeavyKeeper::Config.config.storage)
10
+ @storage = storage
11
+ end
12
+
13
+ def set(key, i, j, value)
14
+ storage.hset(hash_key(key), "#{i}:#{j}", JSON.generate(value))
15
+ end
16
+
17
+ def get(key, i, j)
18
+ value = storage.hget(hash_key(key), "#{i}:#{j}")
19
+
20
+ value ? JSON.parse(value) : value
21
+ end
22
+
23
+ def clear(key)
24
+ storage.del(hash_key(key))
25
+ end
26
+
27
+ private
28
+
29
+ attr_reader :storage
30
+
31
+ def hash_key(key)
32
+ "#{key_prefix}:hash:#{key}"
33
+ end
34
+
35
+ def key_prefix
36
+ "#{HeavyKeeper::Config.config.cache_prefix}_bucket"
37
+ end
38
+ end
39
+ end
40
+ # rubocop:enable Naming/MethodParameterName
@@ -0,0 +1,10 @@
1
+ require 'dry-configurable'
2
+
3
+ module HeavyKeeper
4
+ class Config
5
+ extend Dry::Configurable
6
+
7
+ setting :cache_prefix, default: 'cache_prefix'
8
+ setting :storage
9
+ end
10
+ end
@@ -0,0 +1,92 @@
1
+ # A min-heap implementation in Redis.
2
+ # This class is not supposed to use outside of the HeavyKeeper implementation
3
+ # for TopK. It uses the following datastructure in Redis:
4
+ # - A sorted set with size K to store the min heap
5
+ # - A hash with size K to store counter for each item in min heap
6
+ #
7
+ module HeavyKeeper
8
+ class MinHeap
9
+ def initialize(storage = HeavyKeeper::Config.config.storage)
10
+ @storage = storage
11
+ end
12
+
13
+ def list(key, total)
14
+ items = storage.zrevrange(sorted_set_key(key), 0, total - 1)
15
+
16
+ if items.empty?
17
+ {}
18
+ else
19
+ storage.mapped_hmget(hash_key(key), *items)
20
+ end
21
+ end
22
+
23
+ def count(key, item)
24
+ storage.hget(hash_key(key), item).to_i
25
+ end
26
+
27
+ def min(key)
28
+ item = storage.zrangebyscore(sorted_set_key(key), - Float::INFINITY, Float::INFINITY, limit: [0, 1]).first
29
+ count(key, item)
30
+ end
31
+
32
+ def exist?(key, item)
33
+ storage.hexists(hash_key(key), item)
34
+ end
35
+
36
+ def add(key, item, value, top_k)
37
+ count = storage.zcard(sorted_set_key(key))
38
+
39
+ storage.multi do
40
+ storage.zadd(sorted_set_key(key), value, item)
41
+ storage.hset(hash_key(key), item, value)
42
+ end
43
+
44
+ if count >= top_k
45
+ dropped_item, _ = storage.zpopmin(sorted_set_key(key))
46
+ storage.hdel(hash_key(key), dropped_item)
47
+
48
+ if dropped_item != item
49
+ value
50
+ end
51
+ else
52
+ value
53
+ end
54
+ end
55
+
56
+ def update(key, item, value)
57
+ storage.multi do
58
+ storage.zrem(sorted_set_key(key), item)
59
+ storage.zincrby(sorted_set_key(key), value, item)
60
+ storage.hset(hash_key(key), item, value)
61
+ end
62
+
63
+ value
64
+ end
65
+
66
+ def clear(key)
67
+ storage.del(sorted_set_key(key))
68
+ storage.del(hash_key(key))
69
+ end
70
+
71
+ def delete(key, item)
72
+ storage.zrem(sorted_set_key(key), item)
73
+ storage.hdel(hash_key(key), item)
74
+ end
75
+
76
+ private
77
+
78
+ attr_reader :storage
79
+
80
+ def sorted_set_key(key)
81
+ "#{key_prefix}:sorted_set:#{key}"
82
+ end
83
+
84
+ def hash_key(key)
85
+ "#{key_prefix}:hash:#{key}"
86
+ end
87
+
88
+ def key_prefix
89
+ "#{HeavyKeeper::Config.config.cache_prefix}_heavy_keeper"
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry-schema'
4
+ require 'securerandom'
5
+ require 'xxhash'
6
+ require_relative '../validations/message'
7
+
8
+ module HeavyKeeper
9
+ class TopK # rubocop:disable Metrics/ClassLength
10
+ Validator = ::Dry::Schema.Params do
11
+ required(:top_k).filled(:integer, gt?: 0)
12
+ required(:width).filled(:integer, gt?: 0)
13
+ required(:depth).filled(:integer, gt?: 0)
14
+ required(:decay).filled(:decimal, gt?: 0, lteq?: 1)
15
+ end
16
+
17
+ # Initiate the controller to create/operate on top-k DS
18
+ #
19
+ # @param storage [Redis] A Redis client to interact with Redis
20
+ #
21
+ # @return [HeavyKeeper::TopK] new instance of the controller
22
+ def initialize(storage: HeavyKeeper::Config.config.storage)
23
+ @storage = storage
24
+ @min_heap = MinHeap.new(storage)
25
+ @bucket = Bucket.new(storage)
26
+ end
27
+
28
+ # Complexity O(1)
29
+ # Initialize a TopK in Redis with specified parameters.
30
+ #
31
+ # @param key [String] a key for identifying top-k DS in Redis
32
+ # @param top_k [Integer] number of top elements we want to track
33
+ # @param width [Integer] Size of the bucket to store counter
34
+ # @param depth [Integer] Number of buckets we want to store
35
+ # @param decay [Decimal] decay factor: smaller number means bigger
36
+ # distinction between mouse-flow and elelphant flow
37
+ #
38
+ # @return OK on success, otherwise raise error
39
+ def reserve(key, options)
40
+ options = validate(options)
41
+
42
+ storage.mapped_hmset(metadata_key(key), options)
43
+ end
44
+
45
+ # Complexity O(k + depth)
46
+ # Add an array of items to a Top-K DS
47
+ #
48
+ # @param key [String] key for identifying top-k DS in Redis
49
+ # @param items [String, String, ...] each value represents an item we want to
50
+ # store in Top-K
51
+ #
52
+ # @return [Array[Nil, Integer]]
53
+ # nil if the item is not addded to the list
54
+ # otherwise, return the current value of item
55
+ def add(key, *items)
56
+ items_and_increments = items.map { |item| [item, 1] }
57
+ increase_by(key, *items_and_increments)
58
+ end
59
+
60
+ # Complexity O(k + (increment * depth))
61
+ # Add an array of items to a Top-K DS, with custom increment for each item
62
+ #
63
+ # @param key [String] key for identifying top-k DS in Redis
64
+ # @param items_and_increments [[String, Integer], ...]
65
+ # each value represents an item and increment that needs to be added
66
+ # to Top-K
67
+ #
68
+ # @return [Array[Nil, String]]
69
+ # nil if the item is not addded to the list
70
+ # otherwise, return the current value of item
71
+ # rubocop:disable Metrics/AbcSize
72
+ # rubocop:disable Metrics/MethodLength
73
+ # rubocop:disable Metrics/BlockLength
74
+ # rubocop:disable Metrics/PerceivedComplexity
75
+ def increase_by(key, *items_and_increments)
76
+ options = validate(storage.hgetall(metadata_key(key)))
77
+
78
+ items_and_increments.map do |(item, increment)|
79
+ max_count = 0
80
+ item_fingerprint = XXhash.xxh64(item)
81
+
82
+ exist = min_heap.exist?(key, item)
83
+ min_value = min_heap.min(key)
84
+
85
+ options[:depth].times do |i|
86
+ bucket_number = XXhash.xxh64_stream(StringIO.new(item), i) % options[:width]
87
+
88
+ fingerprint, count = bucket.get(key, i, bucket_number)
89
+
90
+ if count.nil? || count.zero?
91
+ bucket.set(key, i, bucket_number, [item_fingerprint, increment])
92
+ max_count = [increment, max_count].max
93
+ elsif fingerprint == item_fingerprint
94
+ if exist || count <= min_value
95
+ bucket.set(key, i, bucket_number, [fingerprint, count + increment])
96
+ max_count = [count + increment, max_count].max
97
+ end
98
+ else
99
+ decay = options[:decay]**count
100
+
101
+ if SecureRandom.rand < decay
102
+ count -= increment
103
+
104
+ if count.positive?
105
+ bucket.set(key, i, bucket_number, [fingerprint, count])
106
+ else
107
+ bucket.set(key, i, bucket_number, [item_fingerprint, increment])
108
+ max_count = [increment, max_count].max
109
+ end
110
+ end
111
+ end
112
+ end
113
+
114
+ if exist
115
+ min_heap.update(key, item, max_count)
116
+ else
117
+ min_heap.add(key, item, max_count, options[:top_k])
118
+ end
119
+ end
120
+ end
121
+ # rubocop:enable Metrics/AbcSize
122
+ # rubocop:enable Metrics/MethodLength
123
+ # rubocop:enable Metrics/BlockLength
124
+ # rubocop:enable Metrics/PerceivedComplexity
125
+
126
+ # Complexity O(k)
127
+ # Checks whether an item is one of Top-K items. Multiple items can be checked at once.
128
+ #
129
+ # @param key [String] a key for identifying top-k DS in Redis
130
+ # @param items [String, String, ...] an array of item that we want to check
131
+ #
132
+ # @return [Array[Boolean]] true if item is in Top-K, otherwise return false
133
+ def query(key, *items)
134
+ items.map do |item|
135
+ min_heap.exist?(key, item)
136
+ end
137
+ end
138
+
139
+ # Complexity O(k + depth)
140
+ # Please note this number will never be higher than the real count
141
+ # and likely to be lower. Multiple items can be queried at once.
142
+ #
143
+ # @param key [String] a key for identifying top-k DS in Redis
144
+ # @param items [String, String, ...] an array of item that we want to check
145
+ #
146
+ # @return [Array[Integer]] return the count of each item
147
+ def count(key, *items)
148
+ items.map do |item|
149
+ min_heap.count(key, item)
150
+ end
151
+ end
152
+
153
+ # Complexity O(k)
154
+ # Return full list of items in Top K list.
155
+ #
156
+ # @param key [String] a key for identifying top-k DS in Redis
157
+ #
158
+ # @return [Hash] return a hash contains the key and the count of the top-K
159
+ # elements
160
+ def list(key)
161
+ top_k = storage.hget(metadata_key(key), :top_k).to_i
162
+ min_heap.list(key, top_k)
163
+ end
164
+
165
+ # Complexity O(1)
166
+ # Clean up all Redis data related to a key
167
+ #
168
+ # @param key [String] a key for identifying top-k DS in Redis
169
+ #
170
+ # @return OK if successful; otherwise, raise error
171
+ def clear(key)
172
+ storage.multi do
173
+ storage.del(metadata_key(key))
174
+ min_heap.clear(key)
175
+ bucket.clear(key)
176
+ end
177
+ end
178
+
179
+ # Complexity O(1)
180
+ # Reset counter of an item to zero in order to decay it out
181
+ #
182
+ # @param key [String] a key for identifying top-k DS in Redis
183
+ # @param items [String] item that we want to decay
184
+ #
185
+ # @return OK if successful, raise error otherwise
186
+ def remove(key, item)
187
+ options = validate(storage.hgetall(metadata_key(key)))
188
+ item_fingerprint = XXhash.xxh64(item)
189
+
190
+ options[:depth].times do |i|
191
+ bucket_number = XXhash.xxh64_stream(StringIO.new(item), i) % options[:width]
192
+ fingerprint, _ = bucket.get(key, i, bucket_number)
193
+
194
+ bucket.set(key, i, bucket_number, [fingerprint, 0]) if item_fingerprint == fingerprint
195
+ end
196
+
197
+ min_heap.delete(key, item)
198
+ end
199
+
200
+ private
201
+
202
+ attr_reader :storage, :min_heap, :bucket
203
+
204
+ def metadata_key(key)
205
+ "#{key_prefix}:#{key}:data"
206
+ end
207
+
208
+ def key_prefix
209
+ "#{HeavyKeeper::Config.config.cache_prefix}_heavy_keeper"
210
+ end
211
+
212
+ def validate(options)
213
+ result = Validator.call(options)
214
+
215
+ if result.failure?
216
+ error = ::Validations::Message.new.build(result.errors.to_h).join('. ')
217
+ raise HeavyKeeper::Error, error
218
+ end
219
+
220
+ result.output
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HeavyKeeper
4
+ VERSION = '0.1.0'
5
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'heavy_keeper/version'
4
+ require_relative 'heavy_keeper/config'
5
+ require_relative 'heavy_keeper/top_k'
6
+ require_relative 'heavy_keeper/min_heap'
7
+ require_relative 'heavy_keeper/bucket'
8
+
9
+ module HeavyKeeper
10
+ class Error < StandardError; end
11
+ end
@@ -0,0 +1,35 @@
1
+ module Validations
2
+ class Message
3
+ CLASSIFY_SEPARATOR = '_'.freeze
4
+ TITLEIZE_SEPARATOR = ' '.freeze
5
+
6
+ # @errors [Hash | Array] output of dry-validation
7
+ # after validating params
8
+ # @parent [Nil | String] key name of a field that has `errors`
9
+ # after validating params
10
+ # Output: array of string that can be used to feed into
11
+ # Errors::InvalidParamsError
12
+ def build(errors, parent = nil)
13
+ case errors
14
+ when Hash
15
+ errors.flat_map do |key, value|
16
+ child = [parent, key].compact.join(' ')
17
+ build(value, child)
18
+ end
19
+ when Array
20
+ errors.flat_map do |error|
21
+ "#{titleize(parent.to_s)} #{build(error)}"
22
+ end
23
+ else
24
+ errors
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def titleize(string)
31
+ # NOTE: this is not a robust implementation of titleize
32
+ string.split(CLASSIFY_SEPARATOR).map(&:capitalize).join(TITLEIZE_SEPARATOR)
33
+ end
34
+ end
35
+ end
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: heavy_keeper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Hieu Nguyen
8
+ - Kenneth Teh
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2022-06-30 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: dry-configurable
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: 0.13.0
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: 0.13.0
28
+ - !ruby/object:Gem::Dependency
29
+ name: dry-schema
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: '1'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: '1'
42
+ - !ruby/object:Gem::Dependency
43
+ name: redis
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - "~>"
47
+ - !ruby/object:Gem::Version
48
+ version: '4'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - "~>"
54
+ - !ruby/object:Gem::Version
55
+ version: '4'
56
+ - !ruby/object:Gem::Dependency
57
+ name: xxhash
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: 0.4.0
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: 0.4.0
70
+ description:
71
+ email:
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".rspec"
77
+ - ".rubocop.yml"
78
+ - CHANGELOG.md
79
+ - Gemfile
80
+ - Gemfile.lock
81
+ - README.md
82
+ - Rakefile
83
+ - bin/console
84
+ - bin/setup
85
+ - heavy_keeper.gemspec
86
+ - lib/heavy_keeper.rb
87
+ - lib/heavy_keeper/bucket.rb
88
+ - lib/heavy_keeper/config.rb
89
+ - lib/heavy_keeper/min_heap.rb
90
+ - lib/heavy_keeper/top_k.rb
91
+ - lib/heavy_keeper/version.rb
92
+ - lib/validations/message.rb
93
+ homepage: https://github.com/Kaligo/heavy_keeper
94
+ licenses: []
95
+ metadata:
96
+ homepage_uri: https://github.com/Kaligo/heavy_keeper
97
+ source_code_uri: https://github.com/Kaligo/heavy_keeper
98
+ changelog_uri: https://github.com/Kaligo/heavy_keeper/blob/master/CHANGELOG.md
99
+ rubygems_mfa_required: 'true'
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: 2.7.0
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubygems_version: 3.2.28
116
+ signing_key:
117
+ specification_version: 4
118
+ summary: Gem which implements HeavyKeeper algorithm
119
+ test_files: []