heavy_keeper 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +65 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +16 -0
- data/Gemfile.lock +103 -0
- data/README.md +58 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/heavy_keeper.gemspec +35 -0
- data/lib/heavy_keeper/bucket.rb +40 -0
- data/lib/heavy_keeper/config.rb +10 -0
- data/lib/heavy_keeper/min_heap.rb +92 -0
- data/lib/heavy_keeper/top_k.rb +223 -0
- data/lib/heavy_keeper/version.rb +5 -0
- data/lib/heavy_keeper.rb +11 -0
- data/lib/validations/message.rb +35 -0
- metadata +119 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 8de4da5b817f62a3047b5de8d8b1118d91577ef12e5389ade96f1dd69179e045
|
|
4
|
+
data.tar.gz: d42f28f5a1af3539010c269e14540fcaef6cafc9ad059529f04a10a55faf3422
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 29c288a59c986a75df4224030486c9afb59af3fa00710b35c0355f617e679b75559c6143881a3bb6404754a3ebc18bcb63c99ecc584072ac6910f43ee638dbfc
|
|
7
|
+
data.tar.gz: 8ad3f0583c3e64a59516a80c28cc162c2d64fdd4ce44c82d088db02143713425c049190b3f5cd5352daa9e830896ed9c51399ebf49a3e240d91ca806cd354bc9
|
data/.rspec
ADDED
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
AllCops:
|
|
2
|
+
NewCops: enable
|
|
3
|
+
TargetRubyVersion: 2.7
|
|
4
|
+
Exclude:
|
|
5
|
+
- Gemfile.lock
|
|
6
|
+
|
|
7
|
+
Bundler/OrderedGems:
|
|
8
|
+
Enabled: false
|
|
9
|
+
|
|
10
|
+
Gemspec/RequiredRubyVersion:
|
|
11
|
+
Enabled: false
|
|
12
|
+
|
|
13
|
+
Layout/ArgumentAlignment:
|
|
14
|
+
EnforcedStyle: with_fixed_indentation
|
|
15
|
+
Layout/MultilineMethodCallIndentation:
|
|
16
|
+
EnforcedStyle: indented
|
|
17
|
+
|
|
18
|
+
Metrics/AbcSize:
|
|
19
|
+
Max: 25
|
|
20
|
+
Metrics/BlockLength:
|
|
21
|
+
Exclude:
|
|
22
|
+
- spec/**/*
|
|
23
|
+
- heavy_keeper.gemspec
|
|
24
|
+
Metrics/CyclomaticComplexity:
|
|
25
|
+
Max: 10
|
|
26
|
+
Metrics/MethodLength:
|
|
27
|
+
Max: 20
|
|
28
|
+
Metrics/PerceivedComplexity:
|
|
29
|
+
Max: 10
|
|
30
|
+
|
|
31
|
+
Naming/VariableNumber:
|
|
32
|
+
Enabled: false
|
|
33
|
+
Naming/PredicateName:
|
|
34
|
+
Enabled: false
|
|
35
|
+
|
|
36
|
+
Style/Alias:
|
|
37
|
+
EnforcedStyle: prefer_alias_method
|
|
38
|
+
Style/BlockDelimiters:
|
|
39
|
+
Exclude:
|
|
40
|
+
- spec/**/*_spec.rb
|
|
41
|
+
Style/Documentation:
|
|
42
|
+
Enabled: false
|
|
43
|
+
Style/FrozenStringLiteralComment:
|
|
44
|
+
Enabled: false
|
|
45
|
+
Style/GuardClause:
|
|
46
|
+
Enabled: false
|
|
47
|
+
Style/IfUnlessModifier:
|
|
48
|
+
Enabled: false
|
|
49
|
+
Style/ModuleFunction:
|
|
50
|
+
Exclude:
|
|
51
|
+
- 'lib/heavy_keeper.rb'
|
|
52
|
+
Style/PercentLiteralDelimiters:
|
|
53
|
+
Enabled: false
|
|
54
|
+
Style/SafeNavigation:
|
|
55
|
+
Enabled: false
|
|
56
|
+
Style/StringLiterals:
|
|
57
|
+
EnforcedStyle: single_quotes
|
|
58
|
+
Style/TrailingUnderscoreVariable:
|
|
59
|
+
Enabled: false
|
|
60
|
+
Style/WordArray:
|
|
61
|
+
EnforcedStyle: percent
|
|
62
|
+
Layout/LineLength:
|
|
63
|
+
Max: 200
|
|
64
|
+
Lint/AmbiguousBlockAssociation:
|
|
65
|
+
Enabled: false
|
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
source 'https://rubygems.org'
|
|
4
|
+
|
|
5
|
+
# Specify your gem's dependencies in heavy_keeper.gemspec
|
|
6
|
+
gemspec
|
|
7
|
+
|
|
8
|
+
gem 'rubocop', require: false
|
|
9
|
+
|
|
10
|
+
gem 'rake', '~> 13.0'
|
|
11
|
+
|
|
12
|
+
group :test do
|
|
13
|
+
gem 'mock_redis'
|
|
14
|
+
gem 'rspec'
|
|
15
|
+
gem 'pry-byebug'
|
|
16
|
+
end
|
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
heavy_keeper (0.1.0)
|
|
5
|
+
dry-configurable (>= 0.13.0)
|
|
6
|
+
dry-schema (~> 1)
|
|
7
|
+
redis (~> 4)
|
|
8
|
+
xxhash (~> 0.4.0)
|
|
9
|
+
|
|
10
|
+
GEM
|
|
11
|
+
remote: https://rubygems.org/
|
|
12
|
+
specs:
|
|
13
|
+
ast (2.4.2)
|
|
14
|
+
byebug (11.1.3)
|
|
15
|
+
coderay (1.1.3)
|
|
16
|
+
concurrent-ruby (1.1.10)
|
|
17
|
+
diff-lcs (1.5.0)
|
|
18
|
+
dry-configurable (0.15.0)
|
|
19
|
+
concurrent-ruby (~> 1.0)
|
|
20
|
+
dry-core (~> 0.6)
|
|
21
|
+
dry-container (0.9.0)
|
|
22
|
+
concurrent-ruby (~> 1.0)
|
|
23
|
+
dry-configurable (~> 0.13, >= 0.13.0)
|
|
24
|
+
dry-core (0.7.1)
|
|
25
|
+
concurrent-ruby (~> 1.0)
|
|
26
|
+
dry-inflector (0.2.1)
|
|
27
|
+
dry-initializer (3.1.1)
|
|
28
|
+
dry-logic (1.2.0)
|
|
29
|
+
concurrent-ruby (~> 1.0)
|
|
30
|
+
dry-core (~> 0.5, >= 0.5)
|
|
31
|
+
dry-schema (1.9.1)
|
|
32
|
+
concurrent-ruby (~> 1.0)
|
|
33
|
+
dry-configurable (~> 0.13, >= 0.13.0)
|
|
34
|
+
dry-core (~> 0.5, >= 0.5)
|
|
35
|
+
dry-initializer (~> 3.0)
|
|
36
|
+
dry-logic (~> 1.0)
|
|
37
|
+
dry-types (~> 1.5)
|
|
38
|
+
dry-types (1.5.1)
|
|
39
|
+
concurrent-ruby (~> 1.0)
|
|
40
|
+
dry-container (~> 0.3)
|
|
41
|
+
dry-core (~> 0.5, >= 0.5)
|
|
42
|
+
dry-inflector (~> 0.1, >= 0.1.2)
|
|
43
|
+
dry-logic (~> 1.0, >= 1.0.2)
|
|
44
|
+
method_source (1.0.0)
|
|
45
|
+
mock_redis (0.31.0)
|
|
46
|
+
ruby2_keywords
|
|
47
|
+
parallel (1.22.1)
|
|
48
|
+
parser (3.1.2.0)
|
|
49
|
+
ast (~> 2.4.1)
|
|
50
|
+
pry (0.13.1)
|
|
51
|
+
coderay (~> 1.1)
|
|
52
|
+
method_source (~> 1.0)
|
|
53
|
+
pry-byebug (3.9.0)
|
|
54
|
+
byebug (~> 11.0)
|
|
55
|
+
pry (~> 0.13.0)
|
|
56
|
+
rainbow (3.1.1)
|
|
57
|
+
rake (13.0.6)
|
|
58
|
+
redis (4.7.0)
|
|
59
|
+
regexp_parser (2.4.0)
|
|
60
|
+
rexml (3.2.5)
|
|
61
|
+
rspec (3.11.0)
|
|
62
|
+
rspec-core (~> 3.11.0)
|
|
63
|
+
rspec-expectations (~> 3.11.0)
|
|
64
|
+
rspec-mocks (~> 3.11.0)
|
|
65
|
+
rspec-core (3.11.0)
|
|
66
|
+
rspec-support (~> 3.11.0)
|
|
67
|
+
rspec-expectations (3.11.0)
|
|
68
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
69
|
+
rspec-support (~> 3.11.0)
|
|
70
|
+
rspec-mocks (3.11.1)
|
|
71
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
|
72
|
+
rspec-support (~> 3.11.0)
|
|
73
|
+
rspec-support (3.11.0)
|
|
74
|
+
rubocop (1.29.1)
|
|
75
|
+
parallel (~> 1.10)
|
|
76
|
+
parser (>= 3.1.0.0)
|
|
77
|
+
rainbow (>= 2.2.2, < 4.0)
|
|
78
|
+
regexp_parser (>= 1.8, < 3.0)
|
|
79
|
+
rexml (>= 3.2.5, < 4.0)
|
|
80
|
+
rubocop-ast (>= 1.17.0, < 2.0)
|
|
81
|
+
ruby-progressbar (~> 1.7)
|
|
82
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
|
83
|
+
rubocop-ast (1.18.0)
|
|
84
|
+
parser (>= 3.1.1.0)
|
|
85
|
+
ruby-progressbar (1.11.0)
|
|
86
|
+
ruby2_keywords (0.0.5)
|
|
87
|
+
unicode-display_width (2.1.0)
|
|
88
|
+
xxhash (0.4.0)
|
|
89
|
+
|
|
90
|
+
PLATFORMS
|
|
91
|
+
x86_64-darwin-19
|
|
92
|
+
x86_64-linux
|
|
93
|
+
|
|
94
|
+
DEPENDENCIES
|
|
95
|
+
heavy_keeper!
|
|
96
|
+
mock_redis
|
|
97
|
+
pry-byebug
|
|
98
|
+
rake (~> 13.0)
|
|
99
|
+
rspec
|
|
100
|
+
rubocop
|
|
101
|
+
|
|
102
|
+
BUNDLED WITH
|
|
103
|
+
2.2.32
|
data/README.md
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# HeavyKeeper
|
|
2
|
+
This gem implements HeavyKeeper algorithm, based on the paper with the same
|
|
3
|
+
name: https://www.usenix.org/conference/atc18/presentation/gong
|
|
4
|
+
|
|
5
|
+
The interface is designed to be similar with ReBloom TopK datastructure (DS)
|
|
6
|
+
(https://oss.redis.com/redisbloom/TopK_Commands/).
|
|
7
|
+
|
|
8
|
+
This is a naive implementation of HeavyKeeper, probably not very optimized.
|
|
9
|
+
We use multiple Redis DSs:
|
|
10
|
+
|
|
11
|
+
- A hash with maximum depth * width items to act as a bucket to store main
|
|
12
|
+
counter
|
|
13
|
+
- A sorted set with maximum K elements to act as a MinHeap
|
|
14
|
+
- A hash with maximum K elements to store more correct counter of the element in MinHeap
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
Add this line to your application's Gemfile:
|
|
20
|
+
|
|
21
|
+
```ruby
|
|
22
|
+
gem 'heavy_keeper'
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Usage
|
|
26
|
+
|
|
27
|
+
You will need to add an initializer to provide some configuration:
|
|
28
|
+
|
|
29
|
+
```ruby
|
|
30
|
+
HeavyKeeper::Config.configure do |config|
|
|
31
|
+
config.cache_prefix = 'cache_prefix'.freeze # currently used as prefix for the redis data structures.
|
|
32
|
+
config.storage = Redis.new # a Redis store, at least version 4.0
|
|
33
|
+
end
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
In general, you will only interact with an instance of the `HeavyKeeper::TopK` class.
|
|
37
|
+
|
|
38
|
+
These are the most relevant instance methods (see code comments for more detail):
|
|
39
|
+
|
|
40
|
+
`reserve(name, top_k: size, width:, depth:, decay:)` - sets up a Top K list with specified options
|
|
41
|
+
|
|
42
|
+
`increase_by(name, *items)` - add an array of items to a list
|
|
43
|
+
|
|
44
|
+
`list(name)` - returns full list of items in Top K list
|
|
45
|
+
|
|
46
|
+
`clear(name)` - deletes list
|
|
47
|
+
|
|
48
|
+
`remove(name, item)` - reset the counter of the targeted item in the list
|
|
49
|
+
|
|
50
|
+
## Development
|
|
51
|
+
|
|
52
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rspec` to run the tests.
|
|
53
|
+
|
|
54
|
+
To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
|
55
|
+
|
|
56
|
+
## Contributing
|
|
57
|
+
|
|
58
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/Kaligo/heavy_keeper.
|
data/Rakefile
ADDED
data/bin/console
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require 'bundler/setup'
|
|
5
|
+
require 'heavy_keeper'
|
|
6
|
+
|
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
|
9
|
+
|
|
10
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
|
11
|
+
# require "pry"
|
|
12
|
+
# Pry.start
|
|
13
|
+
|
|
14
|
+
require 'irb'
|
|
15
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/heavy_keeper/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'heavy_keeper'
|
|
7
|
+
spec.version = HeavyKeeper::VERSION
|
|
8
|
+
spec.authors = ['Hieu Nguyen', 'Kenneth Teh']
|
|
9
|
+
|
|
10
|
+
spec.summary = 'Gem which implements HeavyKeeper algorithm'
|
|
11
|
+
spec.homepage = 'https://github.com/Kaligo/heavy_keeper'
|
|
12
|
+
spec.required_ruby_version = '>= 2.7.0'
|
|
13
|
+
|
|
14
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
|
15
|
+
spec.metadata['source_code_uri'] = 'https://github.com/Kaligo/heavy_keeper'
|
|
16
|
+
spec.metadata['changelog_uri'] = 'https://github.com/Kaligo/heavy_keeper/blob/master/CHANGELOG.md'
|
|
17
|
+
|
|
18
|
+
# Specify which files should be added to the gem when it is released.
|
|
19
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
20
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
|
21
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
|
22
|
+
(f == __FILE__) || f.match(%r{\A(?:(?:test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
spec.bindir = 'exe'
|
|
26
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
27
|
+
spec.require_paths = ['lib']
|
|
28
|
+
|
|
29
|
+
spec.add_dependency 'dry-configurable', '>= 0.13.0'
|
|
30
|
+
spec.add_dependency 'dry-schema', '~> 1'
|
|
31
|
+
spec.add_dependency 'redis', '~> 4'
|
|
32
|
+
spec.add_dependency 'xxhash', '~> 0.4.0'
|
|
33
|
+
|
|
34
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
35
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
|
|
3
|
+
# Act as 2D array with to store HeavyKeeper counter.
|
|
4
|
+
# It's implemented using a hash underneath.
|
|
5
|
+
#
|
|
6
|
+
# rubocop:disable Naming/MethodParameterName
|
|
7
|
+
module HeavyKeeper
|
|
8
|
+
class Bucket
|
|
9
|
+
def initialize(storage = HeavyKeeper::Config.config.storage)
|
|
10
|
+
@storage = storage
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def set(key, i, j, value)
|
|
14
|
+
storage.hset(hash_key(key), "#{i}:#{j}", JSON.generate(value))
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def get(key, i, j)
|
|
18
|
+
value = storage.hget(hash_key(key), "#{i}:#{j}")
|
|
19
|
+
|
|
20
|
+
value ? JSON.parse(value) : value
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def clear(key)
|
|
24
|
+
storage.del(hash_key(key))
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
attr_reader :storage
|
|
30
|
+
|
|
31
|
+
def hash_key(key)
|
|
32
|
+
"#{key_prefix}:hash:#{key}"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def key_prefix
|
|
36
|
+
"#{HeavyKeeper::Config.config.cache_prefix}_bucket"
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
# rubocop:enable Naming/MethodParameterName
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# A min-heap implementation in Redis.
|
|
2
|
+
# This class is not supposed to use outside of the HeavyKeeper implementation
|
|
3
|
+
# for TopK. It uses the following datastructure in Redis:
|
|
4
|
+
# - A sorted set with size K to store the min heap
|
|
5
|
+
# - A hash with size K to store counter for each item in min heap
|
|
6
|
+
#
|
|
7
|
+
module HeavyKeeper
|
|
8
|
+
class MinHeap
|
|
9
|
+
def initialize(storage = HeavyKeeper::Config.config.storage)
|
|
10
|
+
@storage = storage
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def list(key, total)
|
|
14
|
+
items = storage.zrevrange(sorted_set_key(key), 0, total - 1)
|
|
15
|
+
|
|
16
|
+
if items.empty?
|
|
17
|
+
{}
|
|
18
|
+
else
|
|
19
|
+
storage.mapped_hmget(hash_key(key), *items)
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def count(key, item)
|
|
24
|
+
storage.hget(hash_key(key), item).to_i
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def min(key)
|
|
28
|
+
item = storage.zrangebyscore(sorted_set_key(key), - Float::INFINITY, Float::INFINITY, limit: [0, 1]).first
|
|
29
|
+
count(key, item)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def exist?(key, item)
|
|
33
|
+
storage.hexists(hash_key(key), item)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def add(key, item, value, top_k)
|
|
37
|
+
count = storage.zcard(sorted_set_key(key))
|
|
38
|
+
|
|
39
|
+
storage.multi do
|
|
40
|
+
storage.zadd(sorted_set_key(key), value, item)
|
|
41
|
+
storage.hset(hash_key(key), item, value)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
if count >= top_k
|
|
45
|
+
dropped_item, _ = storage.zpopmin(sorted_set_key(key))
|
|
46
|
+
storage.hdel(hash_key(key), dropped_item)
|
|
47
|
+
|
|
48
|
+
if dropped_item != item
|
|
49
|
+
value
|
|
50
|
+
end
|
|
51
|
+
else
|
|
52
|
+
value
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def update(key, item, value)
|
|
57
|
+
storage.multi do
|
|
58
|
+
storage.zrem(sorted_set_key(key), item)
|
|
59
|
+
storage.zincrby(sorted_set_key(key), value, item)
|
|
60
|
+
storage.hset(hash_key(key), item, value)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
value
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def clear(key)
|
|
67
|
+
storage.del(sorted_set_key(key))
|
|
68
|
+
storage.del(hash_key(key))
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def delete(key, item)
|
|
72
|
+
storage.zrem(sorted_set_key(key), item)
|
|
73
|
+
storage.hdel(hash_key(key), item)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
private
|
|
77
|
+
|
|
78
|
+
attr_reader :storage
|
|
79
|
+
|
|
80
|
+
def sorted_set_key(key)
|
|
81
|
+
"#{key_prefix}:sorted_set:#{key}"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def hash_key(key)
|
|
85
|
+
"#{key_prefix}:hash:#{key}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def key_prefix
|
|
89
|
+
"#{HeavyKeeper::Config.config.cache_prefix}_heavy_keeper"
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'dry-schema'
|
|
4
|
+
require 'securerandom'
|
|
5
|
+
require 'xxhash'
|
|
6
|
+
require_relative '../validations/message'
|
|
7
|
+
|
|
8
|
+
module HeavyKeeper
|
|
9
|
+
class TopK # rubocop:disable Metrics/ClassLength
|
|
10
|
+
Validator = ::Dry::Schema.Params do
|
|
11
|
+
required(:top_k).filled(:integer, gt?: 0)
|
|
12
|
+
required(:width).filled(:integer, gt?: 0)
|
|
13
|
+
required(:depth).filled(:integer, gt?: 0)
|
|
14
|
+
required(:decay).filled(:decimal, gt?: 0, lteq?: 1)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Initiate the controller to create/operate on top-k DS
|
|
18
|
+
#
|
|
19
|
+
# @param storage [Redis] A Redis client to interact with Redis
|
|
20
|
+
#
|
|
21
|
+
# @return [HeavyKeeper::TopK] new instance of the controller
|
|
22
|
+
def initialize(storage: HeavyKeeper::Config.config.storage)
|
|
23
|
+
@storage = storage
|
|
24
|
+
@min_heap = MinHeap.new(storage)
|
|
25
|
+
@bucket = Bucket.new(storage)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Complexity O(1)
|
|
29
|
+
# Initialize a TopK in Redis with specified parameters.
|
|
30
|
+
#
|
|
31
|
+
# @param key [String] a key for identifying top-k DS in Redis
|
|
32
|
+
# @param top_k [Integer] number of top elements we want to track
|
|
33
|
+
# @param width [Integer] Size of the bucket to store counter
|
|
34
|
+
# @param depth [Integer] Number of buckets we want to store
|
|
35
|
+
# @param decay [Decimal] decay factor: smaller number means bigger
|
|
36
|
+
# distinction between mouse-flow and elelphant flow
|
|
37
|
+
#
|
|
38
|
+
# @return OK on success, otherwise raise error
|
|
39
|
+
def reserve(key, options)
|
|
40
|
+
options = validate(options)
|
|
41
|
+
|
|
42
|
+
storage.mapped_hmset(metadata_key(key), options)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Complexity O(k + depth)
|
|
46
|
+
# Add an array of items to a Top-K DS
|
|
47
|
+
#
|
|
48
|
+
# @param key [String] key for identifying top-k DS in Redis
|
|
49
|
+
# @param items [String, String, ...] each value represents an item we want to
|
|
50
|
+
# store in Top-K
|
|
51
|
+
#
|
|
52
|
+
# @return [Array[Nil, Integer]]
|
|
53
|
+
# nil if the item is not addded to the list
|
|
54
|
+
# otherwise, return the current value of item
|
|
55
|
+
def add(key, *items)
|
|
56
|
+
items_and_increments = items.map { |item| [item, 1] }
|
|
57
|
+
increase_by(key, *items_and_increments)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Complexity O(k + (increment * depth))
|
|
61
|
+
# Add an array of items to a Top-K DS, with custom increment for each item
|
|
62
|
+
#
|
|
63
|
+
# @param key [String] key for identifying top-k DS in Redis
|
|
64
|
+
# @param items_and_increments [[String, Integer], ...]
|
|
65
|
+
# each value represents an item and increment that needs to be added
|
|
66
|
+
# to Top-K
|
|
67
|
+
#
|
|
68
|
+
# @return [Array[Nil, String]]
|
|
69
|
+
# nil if the item is not addded to the list
|
|
70
|
+
# otherwise, return the current value of item
|
|
71
|
+
# rubocop:disable Metrics/AbcSize
|
|
72
|
+
# rubocop:disable Metrics/MethodLength
|
|
73
|
+
# rubocop:disable Metrics/BlockLength
|
|
74
|
+
# rubocop:disable Metrics/PerceivedComplexity
|
|
75
|
+
def increase_by(key, *items_and_increments)
|
|
76
|
+
options = validate(storage.hgetall(metadata_key(key)))
|
|
77
|
+
|
|
78
|
+
items_and_increments.map do |(item, increment)|
|
|
79
|
+
max_count = 0
|
|
80
|
+
item_fingerprint = XXhash.xxh64(item)
|
|
81
|
+
|
|
82
|
+
exist = min_heap.exist?(key, item)
|
|
83
|
+
min_value = min_heap.min(key)
|
|
84
|
+
|
|
85
|
+
options[:depth].times do |i|
|
|
86
|
+
bucket_number = XXhash.xxh64_stream(StringIO.new(item), i) % options[:width]
|
|
87
|
+
|
|
88
|
+
fingerprint, count = bucket.get(key, i, bucket_number)
|
|
89
|
+
|
|
90
|
+
if count.nil? || count.zero?
|
|
91
|
+
bucket.set(key, i, bucket_number, [item_fingerprint, increment])
|
|
92
|
+
max_count = [increment, max_count].max
|
|
93
|
+
elsif fingerprint == item_fingerprint
|
|
94
|
+
if exist || count <= min_value
|
|
95
|
+
bucket.set(key, i, bucket_number, [fingerprint, count + increment])
|
|
96
|
+
max_count = [count + increment, max_count].max
|
|
97
|
+
end
|
|
98
|
+
else
|
|
99
|
+
decay = options[:decay]**count
|
|
100
|
+
|
|
101
|
+
if SecureRandom.rand < decay
|
|
102
|
+
count -= increment
|
|
103
|
+
|
|
104
|
+
if count.positive?
|
|
105
|
+
bucket.set(key, i, bucket_number, [fingerprint, count])
|
|
106
|
+
else
|
|
107
|
+
bucket.set(key, i, bucket_number, [item_fingerprint, increment])
|
|
108
|
+
max_count = [increment, max_count].max
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
if exist
|
|
115
|
+
min_heap.update(key, item, max_count)
|
|
116
|
+
else
|
|
117
|
+
min_heap.add(key, item, max_count, options[:top_k])
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
# rubocop:enable Metrics/AbcSize
|
|
122
|
+
# rubocop:enable Metrics/MethodLength
|
|
123
|
+
# rubocop:enable Metrics/BlockLength
|
|
124
|
+
# rubocop:enable Metrics/PerceivedComplexity
|
|
125
|
+
|
|
126
|
+
# Complexity O(k)
|
|
127
|
+
# Checks whether an item is one of Top-K items. Multiple items can be checked at once.
|
|
128
|
+
#
|
|
129
|
+
# @param key [String] a key for identifying top-k DS in Redis
|
|
130
|
+
# @param items [String, String, ...] an array of item that we want to check
|
|
131
|
+
#
|
|
132
|
+
# @return [Array[Boolean]] true if item is in Top-K, otherwise return false
|
|
133
|
+
def query(key, *items)
|
|
134
|
+
items.map do |item|
|
|
135
|
+
min_heap.exist?(key, item)
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Complexity O(k + depth)
|
|
140
|
+
# Please note this number will never be higher than the real count
|
|
141
|
+
# and likely to be lower. Multiple items can be queried at once.
|
|
142
|
+
#
|
|
143
|
+
# @param key [String] a key for identifying top-k DS in Redis
|
|
144
|
+
# @param items [String, String, ...] an array of item that we want to check
|
|
145
|
+
#
|
|
146
|
+
# @return [Array[Integer]] return the count of each item
|
|
147
|
+
def count(key, *items)
|
|
148
|
+
items.map do |item|
|
|
149
|
+
min_heap.count(key, item)
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Complexity O(k)
|
|
154
|
+
# Return full list of items in Top K list.
|
|
155
|
+
#
|
|
156
|
+
# @param key [String] a key for identifying top-k DS in Redis
|
|
157
|
+
#
|
|
158
|
+
# @return [Hash] return a hash contains the key and the count of the top-K
|
|
159
|
+
# elements
|
|
160
|
+
def list(key)
|
|
161
|
+
top_k = storage.hget(metadata_key(key), :top_k).to_i
|
|
162
|
+
min_heap.list(key, top_k)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Complexity O(1)
|
|
166
|
+
# Clean up all Redis data related to a key
|
|
167
|
+
#
|
|
168
|
+
# @param key [String] a key for identifying top-k DS in Redis
|
|
169
|
+
#
|
|
170
|
+
# @return OK if successful; otherwise, raise error
|
|
171
|
+
def clear(key)
|
|
172
|
+
storage.multi do
|
|
173
|
+
storage.del(metadata_key(key))
|
|
174
|
+
min_heap.clear(key)
|
|
175
|
+
bucket.clear(key)
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Complexity O(1)
|
|
180
|
+
# Reset counter of an item to zero in order to decay it out
|
|
181
|
+
#
|
|
182
|
+
# @param key [String] a key for identifying top-k DS in Redis
|
|
183
|
+
# @param items [String] item that we want to decay
|
|
184
|
+
#
|
|
185
|
+
# @return OK if successful, raise error otherwise
|
|
186
|
+
def remove(key, item)
|
|
187
|
+
options = validate(storage.hgetall(metadata_key(key)))
|
|
188
|
+
item_fingerprint = XXhash.xxh64(item)
|
|
189
|
+
|
|
190
|
+
options[:depth].times do |i|
|
|
191
|
+
bucket_number = XXhash.xxh64_stream(StringIO.new(item), i) % options[:width]
|
|
192
|
+
fingerprint, _ = bucket.get(key, i, bucket_number)
|
|
193
|
+
|
|
194
|
+
bucket.set(key, i, bucket_number, [fingerprint, 0]) if item_fingerprint == fingerprint
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
min_heap.delete(key, item)
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
private
|
|
201
|
+
|
|
202
|
+
attr_reader :storage, :min_heap, :bucket
|
|
203
|
+
|
|
204
|
+
def metadata_key(key)
|
|
205
|
+
"#{key_prefix}:#{key}:data"
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def key_prefix
|
|
209
|
+
"#{HeavyKeeper::Config.config.cache_prefix}_heavy_keeper"
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
def validate(options)
|
|
213
|
+
result = Validator.call(options)
|
|
214
|
+
|
|
215
|
+
if result.failure?
|
|
216
|
+
error = ::Validations::Message.new.build(result.errors.to_h).join('. ')
|
|
217
|
+
raise HeavyKeeper::Error, error
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
result.output
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
data/lib/heavy_keeper.rb
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'heavy_keeper/version'
|
|
4
|
+
require_relative 'heavy_keeper/config'
|
|
5
|
+
require_relative 'heavy_keeper/top_k'
|
|
6
|
+
require_relative 'heavy_keeper/min_heap'
|
|
7
|
+
require_relative 'heavy_keeper/bucket'
|
|
8
|
+
|
|
9
|
+
module HeavyKeeper
|
|
10
|
+
class Error < StandardError; end
|
|
11
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
module Validations
|
|
2
|
+
class Message
|
|
3
|
+
CLASSIFY_SEPARATOR = '_'.freeze
|
|
4
|
+
TITLEIZE_SEPARATOR = ' '.freeze
|
|
5
|
+
|
|
6
|
+
# @errors [Hash | Array] output of dry-validation
|
|
7
|
+
# after validating params
|
|
8
|
+
# @parent [Nil | String] key name of a field that has `errors`
|
|
9
|
+
# after validating params
|
|
10
|
+
# Output: array of string that can be used to feed into
|
|
11
|
+
# Errors::InvalidParamsError
|
|
12
|
+
def build(errors, parent = nil)
|
|
13
|
+
case errors
|
|
14
|
+
when Hash
|
|
15
|
+
errors.flat_map do |key, value|
|
|
16
|
+
child = [parent, key].compact.join(' ')
|
|
17
|
+
build(value, child)
|
|
18
|
+
end
|
|
19
|
+
when Array
|
|
20
|
+
errors.flat_map do |error|
|
|
21
|
+
"#{titleize(parent.to_s)} #{build(error)}"
|
|
22
|
+
end
|
|
23
|
+
else
|
|
24
|
+
errors
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
def titleize(string)
|
|
31
|
+
# NOTE: this is not a robust implementation of titleize
|
|
32
|
+
string.split(CLASSIFY_SEPARATOR).map(&:capitalize).join(TITLEIZE_SEPARATOR)
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: heavy_keeper
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Hieu Nguyen
|
|
8
|
+
- Kenneth Teh
|
|
9
|
+
autorequire:
|
|
10
|
+
bindir: exe
|
|
11
|
+
cert_chain: []
|
|
12
|
+
date: 2022-06-30 00:00:00.000000000 Z
|
|
13
|
+
dependencies:
|
|
14
|
+
- !ruby/object:Gem::Dependency
|
|
15
|
+
name: dry-configurable
|
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
|
17
|
+
requirements:
|
|
18
|
+
- - ">="
|
|
19
|
+
- !ruby/object:Gem::Version
|
|
20
|
+
version: 0.13.0
|
|
21
|
+
type: :runtime
|
|
22
|
+
prerelease: false
|
|
23
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
24
|
+
requirements:
|
|
25
|
+
- - ">="
|
|
26
|
+
- !ruby/object:Gem::Version
|
|
27
|
+
version: 0.13.0
|
|
28
|
+
- !ruby/object:Gem::Dependency
|
|
29
|
+
name: dry-schema
|
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
|
31
|
+
requirements:
|
|
32
|
+
- - "~>"
|
|
33
|
+
- !ruby/object:Gem::Version
|
|
34
|
+
version: '1'
|
|
35
|
+
type: :runtime
|
|
36
|
+
prerelease: false
|
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
38
|
+
requirements:
|
|
39
|
+
- - "~>"
|
|
40
|
+
- !ruby/object:Gem::Version
|
|
41
|
+
version: '1'
|
|
42
|
+
- !ruby/object:Gem::Dependency
|
|
43
|
+
name: redis
|
|
44
|
+
requirement: !ruby/object:Gem::Requirement
|
|
45
|
+
requirements:
|
|
46
|
+
- - "~>"
|
|
47
|
+
- !ruby/object:Gem::Version
|
|
48
|
+
version: '4'
|
|
49
|
+
type: :runtime
|
|
50
|
+
prerelease: false
|
|
51
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
52
|
+
requirements:
|
|
53
|
+
- - "~>"
|
|
54
|
+
- !ruby/object:Gem::Version
|
|
55
|
+
version: '4'
|
|
56
|
+
- !ruby/object:Gem::Dependency
|
|
57
|
+
name: xxhash
|
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
|
59
|
+
requirements:
|
|
60
|
+
- - "~>"
|
|
61
|
+
- !ruby/object:Gem::Version
|
|
62
|
+
version: 0.4.0
|
|
63
|
+
type: :runtime
|
|
64
|
+
prerelease: false
|
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
66
|
+
requirements:
|
|
67
|
+
- - "~>"
|
|
68
|
+
- !ruby/object:Gem::Version
|
|
69
|
+
version: 0.4.0
|
|
70
|
+
description:
|
|
71
|
+
email:
|
|
72
|
+
executables: []
|
|
73
|
+
extensions: []
|
|
74
|
+
extra_rdoc_files: []
|
|
75
|
+
files:
|
|
76
|
+
- ".rspec"
|
|
77
|
+
- ".rubocop.yml"
|
|
78
|
+
- CHANGELOG.md
|
|
79
|
+
- Gemfile
|
|
80
|
+
- Gemfile.lock
|
|
81
|
+
- README.md
|
|
82
|
+
- Rakefile
|
|
83
|
+
- bin/console
|
|
84
|
+
- bin/setup
|
|
85
|
+
- heavy_keeper.gemspec
|
|
86
|
+
- lib/heavy_keeper.rb
|
|
87
|
+
- lib/heavy_keeper/bucket.rb
|
|
88
|
+
- lib/heavy_keeper/config.rb
|
|
89
|
+
- lib/heavy_keeper/min_heap.rb
|
|
90
|
+
- lib/heavy_keeper/top_k.rb
|
|
91
|
+
- lib/heavy_keeper/version.rb
|
|
92
|
+
- lib/validations/message.rb
|
|
93
|
+
homepage: https://github.com/Kaligo/heavy_keeper
|
|
94
|
+
licenses: []
|
|
95
|
+
metadata:
|
|
96
|
+
homepage_uri: https://github.com/Kaligo/heavy_keeper
|
|
97
|
+
source_code_uri: https://github.com/Kaligo/heavy_keeper
|
|
98
|
+
changelog_uri: https://github.com/Kaligo/heavy_keeper/blob/master/CHANGELOG.md
|
|
99
|
+
rubygems_mfa_required: 'true'
|
|
100
|
+
post_install_message:
|
|
101
|
+
rdoc_options: []
|
|
102
|
+
require_paths:
|
|
103
|
+
- lib
|
|
104
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
105
|
+
requirements:
|
|
106
|
+
- - ">="
|
|
107
|
+
- !ruby/object:Gem::Version
|
|
108
|
+
version: 2.7.0
|
|
109
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
|
+
requirements:
|
|
111
|
+
- - ">="
|
|
112
|
+
- !ruby/object:Gem::Version
|
|
113
|
+
version: '0'
|
|
114
|
+
requirements: []
|
|
115
|
+
rubygems_version: 3.2.28
|
|
116
|
+
signing_key:
|
|
117
|
+
specification_version: 4
|
|
118
|
+
summary: Gem which implements HeavyKeeper algorithm
|
|
119
|
+
test_files: []
|