hash_sample 0.8.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +86 -0
- data/Rakefile +156 -0
- data/hash_sample.gemspec +36 -0
- data/lib/hash_sample/version.rb +4 -0
- data/lib/hash_sample.rb +94 -0
- data/spec/hash_sample_spec.rb +168 -0
- data/spec/spec_helper.rb +7 -0
- metadata +84 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 96d9552ffc743c9e19465465918840036fc94d6b04097b487d0c18b2ab3a094e
|
4
|
+
data.tar.gz: ec329877c7b1978caf6dd17a36fd41b0094b64863753a1f890d1b5a5bcc7f529
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 2d026964365bd5fcafb9b349dad30553644cbd3d048cb6a38abeb9c833d3726366f1cce045ce45cfeeb202e3e7aa24a3c5d78940c541ab62bccccfe33302c322
|
7
|
+
data.tar.gz: 80c85b10012618901bf538e82341349c81608838b26beb2ac9d6237f29a53b5b7c3eda26084e1fefa91bc86b3e14964072f86e6de57d0390e768c2e08c823970
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2020 Sergey Evstegneiev
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
# hash_sample
|
2
|
+
|
3
|
+
Implements regular sampling and weighted random sampling with and without replacement for Hashes
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
gem install hash_sample
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
require 'hash_sample'
|
13
|
+
loaded_die = {'1' => 0.1, '2' => 0.1, '3' => 0.1, '4' => 0.1, '5' => 0.1, '6' => 0.5}
|
14
|
+
p loaded_die.wchoice # "6"
|
15
|
+
p loaded_die.wchoice(1) # ["6"]
|
16
|
+
p loaded_die.wchoice(10) # ["4", "6", "3", "3", "2", "2", "1", "6", "4", "6"]
|
17
|
+
p loaded_die.wsample # 6
|
18
|
+
p loaded_die.wsample(6) # ["6", "3", "2", "4", "1", "5"]
|
19
|
+
p loaded_die.wsample(10) # ["2", "6", "1", "3", "4", "5"]
|
20
|
+
p loaded_die.sample # { '1' => 0.1 }
|
21
|
+
p loaded_die.sample(6) # {'1' => 0.1, '2' => 0.1, '3' => 0.1, '4' => 0.1, '5' => 0.1, '6' => 0.5}
|
22
|
+
```
|
23
|
+
|
24
|
+
## Hash instance methods
|
25
|
+
### hash.sample(n = 1) ⇒ Hash
|
26
|
+
Choose a random key=>value pair or n random pairs from the hash.
|
27
|
+
|
28
|
+
The key=>value pairs are chosen by using random and unique indices in order to ensure that each pair doesn't includes more than once
|
29
|
+
|
30
|
+
If the hash is empty it returns an empty hash.
|
31
|
+
|
32
|
+
If the hash contains less than n unique keys, the copy of whole hash will be returned, none of keys will be lost due to bad luck.
|
33
|
+
|
34
|
+
Returns new Hash containing sample key=>value pairs
|
35
|
+
|
36
|
+
### hash.wchoice ⇒ Object
|
37
|
+
### hash.wchoice(n) ⇒ Array of n samples.
|
38
|
+
Weighted random sampling *with* replacement.
|
39
|
+
|
40
|
+
Choose a random key or n random keys from the hash, according to weights defined in hash values.
|
41
|
+
|
42
|
+
The samples are drawn by using random and replaced by its copy, so they **can be repeated in result**.
|
43
|
+
|
44
|
+
If the hash is empty the first form returns nil and the second form returns an empty array.
|
45
|
+
|
46
|
+
All weights should be Numeric.
|
47
|
+
|
48
|
+
Zero or negative weighs will be ignored.
|
49
|
+
|
50
|
+
{'_' => 9, 'a' => 1}.wchoice(10) # ["_", "a", "_", "_", "_", "_", "_", "_", "_", "_"]
|
51
|
+
|
52
|
+
### hash.wsample ⇒ Object
|
53
|
+
### hash.wsample(n) ⇒ Array of n samples.
|
54
|
+
Weighted random sampling *without* replacement.
|
55
|
+
|
56
|
+
Choose 1 or n *distinct* random keys from the hash, according to weights defined in hash values.
|
57
|
+
Drawn items are not replaced.
|
58
|
+
|
59
|
+
If the hash is empty the first form returns nil and the second form returns an empty array.
|
60
|
+
|
61
|
+
All weights should be Numeric.
|
62
|
+
|
63
|
+
Zero or negative weighs will be ignored.
|
64
|
+
|
65
|
+
{'_' => 9, 'a' => 1}.wchoice(10) # ["_", "a"]
|
66
|
+
|
67
|
+
### hash.wchoices(n = 1) ⇒ Object
|
68
|
+
alias for wchoice
|
69
|
+
|
70
|
+
### hash.wsamples(n = 1) ⇒ Object
|
71
|
+
alias for wsample
|
72
|
+
|
73
|
+
## Contributing
|
74
|
+
|
75
|
+
1. Fork it ( https://github.com/serg123e/hash_sample/fork )
|
76
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
77
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
78
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
79
|
+
5. Create a new Pull Request
|
80
|
+
|
81
|
+
## References
|
82
|
+
|
83
|
+
1. [Efraimidis and Spirakis implementation of random sampling with replacement](https://gist.github.com/O-I/3e0654509dd8057b539a)
|
84
|
+
2. [Weighted Random Sampling (2005; Efraimidis, Spirakis)](https://utopia.duth.gr/~pefraimi/research/data/2007EncOfAlg.pdf)
|
85
|
+
3. [Abandoned Ruby feature request](https://bugs.ruby-lang.org/issues/4247#change-25166)
|
86
|
+
4. [Inspiring example of using max_by for Enumerables with the same algorithm](https://ruby-doc.org/core-2.7.1/Enumerable.html#method-i-max_by)
|
data/Rakefile
ADDED
@@ -0,0 +1,156 @@
|
|
1
|
+
require 'yard'
|
2
|
+
require 'rake'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
#############################################################################
|
6
|
+
#
|
7
|
+
# Helper functions
|
8
|
+
#
|
9
|
+
#############################################################################
|
10
|
+
|
11
|
+
def name
|
12
|
+
"hash_sample"
|
13
|
+
end
|
14
|
+
|
15
|
+
def version
|
16
|
+
line = File.read("lib/#{name}/version.rb")[/^\s*VERSION\s*=\s*.*/]
|
17
|
+
line.match(/.*VERSION\s*=\s*['"](.*)['"]/)[1]
|
18
|
+
end
|
19
|
+
|
20
|
+
# assumes x.y.z all digit version
|
21
|
+
def next_version
|
22
|
+
# x.y.z
|
23
|
+
v = version.split '.'
|
24
|
+
# bump z
|
25
|
+
v[-1] = v[-1].to_i + 1
|
26
|
+
v.join '.'
|
27
|
+
end
|
28
|
+
|
29
|
+
def bump_version
|
30
|
+
old_file = File.read("lib/#{name}/version.rb")
|
31
|
+
old_version_line = old_file[/^\s*VERSION\s*=\s*.*/]
|
32
|
+
new_version = next_version
|
33
|
+
# replace first match of old version with new version
|
34
|
+
old_file.sub!(old_version_line, " VERSION = '#{new_version}'")
|
35
|
+
|
36
|
+
File.write("lib/#{name}/version.rb", old_file)
|
37
|
+
|
38
|
+
new_version
|
39
|
+
end
|
40
|
+
|
41
|
+
def replace_header(head, header_name)
|
42
|
+
head.sub!(/(\.#{header_name}\s*= ').*'/) { "#{$1}#{send(header_name)}'"}
|
43
|
+
end
|
44
|
+
|
45
|
+
def gemspec_file
|
46
|
+
"#{name}.gemspec"
|
47
|
+
end
|
48
|
+
|
49
|
+
def gem_files
|
50
|
+
["#{name}-#{version}.gem"]
|
51
|
+
end
|
52
|
+
|
53
|
+
def gemspecs
|
54
|
+
["#{name}.gemspec"]
|
55
|
+
end
|
56
|
+
|
57
|
+
def date
|
58
|
+
Date.today.to_s
|
59
|
+
end
|
60
|
+
#############################################################################
|
61
|
+
#
|
62
|
+
# Custom tasks (add your own tasks here)
|
63
|
+
#
|
64
|
+
#############################################################################
|
65
|
+
|
66
|
+
YARD::Rake::YardocTask.new do |t|
|
67
|
+
end
|
68
|
+
|
69
|
+
desc "Generate RCov test coverage and open in your browser"
|
70
|
+
task :coverage do
|
71
|
+
require 'rcov'
|
72
|
+
sh "rm -fr coverage"
|
73
|
+
sh "rcov test/test_*.rb"
|
74
|
+
sh "open coverage/index.html"
|
75
|
+
end
|
76
|
+
|
77
|
+
desc "Open an irb session preloaded with this library"
|
78
|
+
task :console do
|
79
|
+
sh "irb -r rubygems -r ./lib/#{name}.rb"
|
80
|
+
end
|
81
|
+
|
82
|
+
desc "Update version number and gemspec"
|
83
|
+
task :bump do
|
84
|
+
puts "Updated version to #{bump_version}"
|
85
|
+
# Execute does not invoke dependencies.
|
86
|
+
# Manually invoke gemspec then validate.
|
87
|
+
Rake::Task[:gemspec].execute
|
88
|
+
Rake::Task[:validate].execute
|
89
|
+
end
|
90
|
+
|
91
|
+
desc 'Build gem'
|
92
|
+
task :build => :gemspec do
|
93
|
+
sh "mkdir pkg"
|
94
|
+
gemspecs.each do |gemspec|
|
95
|
+
sh "gem build #{gemspec}"
|
96
|
+
end
|
97
|
+
gem_files.each do |gem_file|
|
98
|
+
sh "mv #{gem_file} pkg"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
desc "Build and install"
|
104
|
+
task :install => :build do
|
105
|
+
sh "gem install --local --no-document pkg/#{name}-#{version}.gem"
|
106
|
+
end
|
107
|
+
|
108
|
+
desc 'Update gemspec'
|
109
|
+
task :gemspec => :validate do
|
110
|
+
# read spec file and split out manifest section
|
111
|
+
spec = File.read(gemspec_file)
|
112
|
+
head, _manifest, tail = spec.split(/\s*# = MANIFEST =\n/)
|
113
|
+
|
114
|
+
# replace name version and date
|
115
|
+
replace_header(head, :name)
|
116
|
+
replace_header(head, :version)
|
117
|
+
replace_header(head, :date)
|
118
|
+
#comment this out if your rubyforge_project has a different name
|
119
|
+
# replace_header(head, :rubyforge_project)
|
120
|
+
|
121
|
+
# determine file list from git ls-files
|
122
|
+
files = `git ls-files`.
|
123
|
+
split("\n").
|
124
|
+
sort.
|
125
|
+
reject { |file| file =~ /^\./ }.
|
126
|
+
reject { |file| file =~ /^(rdoc|pkg|test|Home\.md|\.gitattributes|Guardfile)/ }.
|
127
|
+
map { |file| " #{file}" }.
|
128
|
+
join("\n")
|
129
|
+
|
130
|
+
# piece file back together and write
|
131
|
+
manifest = " s.files = %w(\n#{files}\n )"
|
132
|
+
spec = [head, manifest, tail].join("\n # = MANIFEST =\n")
|
133
|
+
File.open(gemspec_file, 'w') { |io| io.write(spec) }
|
134
|
+
puts "Updated #{gemspec_file}"
|
135
|
+
end
|
136
|
+
|
137
|
+
desc 'Validate lib files and version file'
|
138
|
+
task :validate do
|
139
|
+
libfiles = Dir['lib/*'] - ["lib/#{name}.rb", "lib/#{name}"]
|
140
|
+
unless libfiles.empty?
|
141
|
+
puts "Directory `lib` should only contain a `#{name}.rb` file and `#{name}` dir."
|
142
|
+
exit!
|
143
|
+
end
|
144
|
+
unless Dir['VERSION*'].empty?
|
145
|
+
puts "A `VERSION` file at root level violates Gem best practices."
|
146
|
+
exit!
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
begin
|
151
|
+
require 'rspec/core/rake_task'
|
152
|
+
desc "run rspec tests"
|
153
|
+
RSpec::Core::RakeTask.new(:spec)
|
154
|
+
task :default => :spec
|
155
|
+
rescue LoadError
|
156
|
+
end
|
data/hash_sample.gemspec
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'hash_sample'
|
3
|
+
s.platform = Gem::Platform::RUBY
|
4
|
+
s.authors = ["Sergey Evstegneiev"]
|
5
|
+
s.email = ["serg123e@gmail.com"]
|
6
|
+
s.homepage = 'https://github.com/serg123e/hash_sample'
|
7
|
+
s.summary = %q{Implements multiple sampling methods for Hash class}
|
8
|
+
s.description = %q{Regular and weighted random sampling with and without replacement are implemented}
|
9
|
+
# s.metadata = { 'source_code_uri' => 'https://github.com/serg123e/hash-sample' }
|
10
|
+
s.add_development_dependency "rspec", "~> 3.5"
|
11
|
+
s.add_development_dependency "rake", "~> 13"
|
12
|
+
|
13
|
+
s.require_paths = ["lib"]
|
14
|
+
|
15
|
+
s.required_ruby_version = '>= 2.4'
|
16
|
+
|
17
|
+
s.date = '2020-05-01'
|
18
|
+
s.version = '0.8.5'
|
19
|
+
s.license = 'MIT'
|
20
|
+
|
21
|
+
s.rdoc_options = ['--charset=UTF-8']
|
22
|
+
s.extra_rdoc_files = %w(README.md LICENSE)
|
23
|
+
# = MANIFEST =
|
24
|
+
s.files = %w(
|
25
|
+
LICENSE
|
26
|
+
README.md
|
27
|
+
Rakefile
|
28
|
+
hash_sample.gemspec
|
29
|
+
lib/hash_sample.rb
|
30
|
+
lib/hash_sample/version.rb
|
31
|
+
spec/hash_sample_spec.rb
|
32
|
+
spec/spec_helper.rb
|
33
|
+
)
|
34
|
+
# = MANIFEST =
|
35
|
+
s.test_files = s.files.select { |path| path =~ /^test\/test_.*\.rb/ }
|
36
|
+
end
|
data/lib/hash_sample.rb
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
# monkey-patched Hash module
|
2
|
+
class Hash
|
3
|
+
##
|
4
|
+
# Choose a random key=>value pair or *n* random pairs from the hash.
|
5
|
+
#
|
6
|
+
# @return [Hash] new Hash containing sample key=>value pairs
|
7
|
+
#
|
8
|
+
# The elements are chosen by using random and unique indices in order to ensure that each element doesn't includes more than once.
|
9
|
+
# If the hash is empty it returns an empty hash.
|
10
|
+
# If the hash contains less than *n* unique keys, the copy of whole hash will be returned, none of keys will be lost.
|
11
|
+
def sample(n = 1)
|
12
|
+
to_a.sample(n).to_h
|
13
|
+
end
|
14
|
+
|
15
|
+
###
|
16
|
+
# alias for wchoice
|
17
|
+
def wchoices(*args)
|
18
|
+
wchoice(*args)
|
19
|
+
end
|
20
|
+
|
21
|
+
##
|
22
|
+
# Choose 1 or n random keys from the hash, according to weights defined in hash values
|
23
|
+
# (weighted random sampling *with* *replacement*)
|
24
|
+
#
|
25
|
+
# @overload wchoice
|
26
|
+
# @return [Object] one sample object
|
27
|
+
# @overload wchoice(n)
|
28
|
+
# @param n [Integer] number of samples to be returned
|
29
|
+
# @return [Array] Array of n samples
|
30
|
+
#
|
31
|
+
# The keys are chosen by using random according to its weights and *can* *be* *repeated* *in* *result*.
|
32
|
+
# If the hash is empty the first form returns nil and the second form returns an empty array.
|
33
|
+
# All weights should be Numeric.
|
34
|
+
# Zero or negative weighs will be ignored.
|
35
|
+
#
|
36
|
+
# ===== Example
|
37
|
+
#
|
38
|
+
# p {'_' => 9, 'a' => 1}.wchoice(10) # ["_", "a", "_", "_", "_", "_", "_", "_", "_", "_"]
|
39
|
+
#
|
40
|
+
def wchoice(*args)
|
41
|
+
_check_weighted_params
|
42
|
+
n = args.first || 1
|
43
|
+
res = []
|
44
|
+
n.times do
|
45
|
+
tmp = max_by { |_, weight| weight.positive? ? rand**(1.0 / weight) : 0 }
|
46
|
+
res << tmp.first unless tmp.nil?
|
47
|
+
end
|
48
|
+
return args.empty? ? res.first : res
|
49
|
+
end
|
50
|
+
|
51
|
+
# internal method to validate parameters
|
52
|
+
def _check_weighted_params(*_args)
|
53
|
+
sum_weights = 0
|
54
|
+
each_value do |v|
|
55
|
+
raise ArgumentError, "All weights should be numeric unlike #{v}" unless v.is_a? Numeric
|
56
|
+
|
57
|
+
sum_weights += v if v.positive?
|
58
|
+
end
|
59
|
+
|
60
|
+
raise ArgumentError, "At least one weight should be > 0" unless sum_weights.positive? || empty?
|
61
|
+
end
|
62
|
+
|
63
|
+
##
|
64
|
+
# Choose 1 or n *distinct* random keys from the hash, according to weights defined in hash values
|
65
|
+
# (weighted random sampling *without* *replacement*)
|
66
|
+
#
|
67
|
+
# @overload wsample
|
68
|
+
# @return [Object] one sample object
|
69
|
+
# @overload wsample(n)
|
70
|
+
# @param n [Integer] number of samples to be returned
|
71
|
+
# @return [Array] Array of n or sometimes less than n samples
|
72
|
+
#
|
73
|
+
# When there are no sufficient distinct samples to return, the result will contain less than n samples
|
74
|
+
# If the hash is empty the first form returns nil and the second form returns an empty array.
|
75
|
+
# All weights should be Numeric.
|
76
|
+
# Zero or negative weighs will be ignored.
|
77
|
+
#
|
78
|
+
# ===== Example
|
79
|
+
#
|
80
|
+
# p {'_' => 9, 'a' => 1}.wsample(10) # ["_", "a"]
|
81
|
+
#
|
82
|
+
def wsample(*args)
|
83
|
+
_check_weighted_params
|
84
|
+
n = args.first || 1
|
85
|
+
res = max_by(n) { |_, weight| weight.positive? ? rand**(1.0 / weight) : 0 }.map(&:first)
|
86
|
+
return args.empty? ? res.first : res
|
87
|
+
end
|
88
|
+
|
89
|
+
###
|
90
|
+
# alias for wsample
|
91
|
+
def wsamples(*args)
|
92
|
+
wsample(*args)
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,168 @@
|
|
1
|
+
# require 'lib/core_ext.rb'
|
2
|
+
|
3
|
+
#
|
4
|
+
# Specs
|
5
|
+
#
|
6
|
+
describe 'Hash#sample' do
|
7
|
+
describe 'when specified parameter n>1' do
|
8
|
+
it 'returns new Hash with specified number of unique key=>value samples' do
|
9
|
+
h = { 'a' => 'b', 'b' => 'b', 'c' => 'b' }
|
10
|
+
expect(h.sample(3)).to eq h
|
11
|
+
end
|
12
|
+
end
|
13
|
+
describe 'when specified parameter n> number of unique keys' do
|
14
|
+
it 'returns new Hash only with unique key=>value samples' do
|
15
|
+
h = { 'a' => 'b', 'b' => 'b', 'c' => 'b' }
|
16
|
+
expect(h.sample(10)).to eq h
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe 'when specified parameter n> number of unique keys' do
|
21
|
+
it 'keys can not be lost because of bad luck' do
|
22
|
+
h = { 'a' => 'b', 'b' => 'b', 'c' => 'b' }
|
23
|
+
min = h.keys.length
|
24
|
+
100.times do
|
25
|
+
min = [h.sample(4).keys.length, min].min
|
26
|
+
end
|
27
|
+
expect(min).to be 3
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe 'when specified parameter n==1' do
|
32
|
+
it 'returns new Hash with 1 random key=>value sample' do
|
33
|
+
h = { 'a' => 'b', 'b' => 'b', 'c' => 'b' }
|
34
|
+
expect(h.sample(1).keys.length).to eq 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
%w[wchoice wsample].each do |weighted_method|
|
40
|
+
describe 'plural form of method' do
|
41
|
+
weighted_methods = weighted_method + "s"
|
42
|
+
it 'can be used' do
|
43
|
+
expect({}).to respond_to(weighted_methods)
|
44
|
+
end
|
45
|
+
it 'works as expected without args' do
|
46
|
+
expect({ 'a' => 1 }.send(weighted_methods)).to eq 'a'
|
47
|
+
end
|
48
|
+
it 'works as expected with args' do
|
49
|
+
expect({ 'a' => 1 }.send(weighted_methods, 1)).to eq ['a']
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "Hash\##{weighted_method}" do
|
54
|
+
it 'returns weighted sample key from all keys with respect of its weights' do
|
55
|
+
s = { 1 => 90, 2 => 10 }
|
56
|
+
freq = Hash.new(0)
|
57
|
+
1000.times { freq[s.send(weighted_method)] += 1 }
|
58
|
+
expect(freq[1]).to be_between(800, 999)
|
59
|
+
expect(freq[2]).to be_between(1, 200)
|
60
|
+
end
|
61
|
+
|
62
|
+
describe 'when weights are equal' do
|
63
|
+
it 'it should returns equal parts of samples' do
|
64
|
+
res = 1.upto(100_000).to_a.map { { +1 => 50, -1 => 50 }.send(weighted_method) }
|
65
|
+
expect(res.sum).to be_between(-1000, 1000) # +-1% bias
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
describe 'when Hash is empty' do
|
70
|
+
it 'returns nil' do
|
71
|
+
expect({}.send(weighted_method)).to be_nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
describe 'when weights are Float' do
|
76
|
+
it 'returns a value as expected' do
|
77
|
+
expect([1, 2].include?({ 1 => 0.1, 2 => 0.9 }.send(weighted_method))).to be true
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
describe 'when some weights are negative' do
|
82
|
+
it 'does not sample that key' do
|
83
|
+
100.times { expect({ 'a' => -1, 'b' => 2 }.send(weighted_method)).to eq 'b' }
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
describe 'when weight contains zero' do
|
88
|
+
it 'returns non-zero weighted element' do
|
89
|
+
10.times do
|
90
|
+
expect({ 1 => 0, 2 => 1, 3 => 0 }.send(weighted_method)).to eq 2
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
describe 'when weight is non-numeric' do
|
96
|
+
it 'raises ArgumentError' do
|
97
|
+
expect { { 1 => 'asd', 2 => 2 }.send(weighted_method) }.to raise_error(ArgumentError)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
describe 'when all weights are zero' do
|
102
|
+
it 'raises ArgumentError' do
|
103
|
+
expect { { 1 => 0, 2 => 0 }.send(weighted_method) }.to raise_error(ArgumentError)
|
104
|
+
end
|
105
|
+
# @todo do not raise error when all weights are zero
|
106
|
+
# xit 'returns empty array' do
|
107
|
+
# h = { 'a'=>0, 'b'=>0, 'c'=>0 }
|
108
|
+
# expect( h.wchoice(10) ).to eq []
|
109
|
+
# end
|
110
|
+
end
|
111
|
+
|
112
|
+
describe 'when hash is empty' do
|
113
|
+
it 'returns empty array or nil' do
|
114
|
+
expect({}.send(weighted_method, 10)).to eq []
|
115
|
+
expect({}.send(weighted_method)).to eq nil
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
describe 'when specified parameter n>1' do
|
120
|
+
it 'returns array of sample keys 2' do
|
121
|
+
100.times { expect({ 1 => 1, 2 => 0.01, 3 => 0.0000001 }.wchoice(3).length).to be 3 }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
describe 'when specified parameter n==1' do
|
125
|
+
subject { { '1' => 1, '2' => 1, '3' => 1 }.wchoice(1) }
|
126
|
+
it 'returns array of one key' do
|
127
|
+
expect(subject).to be_kind_of(Array)
|
128
|
+
expect(subject.length).to be 1
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
describe 'should work with complex Objetcts as keys' do
|
133
|
+
subject { { %w[asd zxf] => 1, %w[asd bsd] => 1, %w[asd dsf] => 1 }.wchoice }
|
134
|
+
it 'returns array of one key' do
|
135
|
+
expect(subject).to be_kind_of(Array)
|
136
|
+
expect(subject.length).to be 2
|
137
|
+
expect(subject).to include 'asd'
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
describe 'when specified parameter n is greater than number of unique keys' do
|
142
|
+
it 'returns array with exactly n key samples, repeating some of them' do
|
143
|
+
h = { 'a' => 1, 'b' => 1, 'c' => 1 }
|
144
|
+
expect(h.wchoice(10).length).to eq 10
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
describe 'Hash#wchoice' do
|
151
|
+
describe 'when specified parameter n>1' do
|
152
|
+
it 'returns array of n sample keys' do
|
153
|
+
expect({ 'a' => 1 }.wchoice(2)).to eq %w[a a]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
describe 'Hash#wsample' do
|
159
|
+
describe 'when specified parameter n>1' do
|
160
|
+
it 'returns array of unique keys' do
|
161
|
+
expect({ 'a' => 1 }.wsample(2)).to eq ['a']
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
it 'returned objects are not repeated' do
|
166
|
+
expect({ '_' => 9, 'a' => 1 }.wsample(10).sort).to eq %w[_ a]
|
167
|
+
end
|
168
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: hash_sample
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.8.5
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sergey Evstegneiev
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2020-05-01 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.5'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '13'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '13'
|
41
|
+
description: Regular and weighted random sampling with and without replacement are
|
42
|
+
implemented
|
43
|
+
email:
|
44
|
+
- serg123e@gmail.com
|
45
|
+
executables: []
|
46
|
+
extensions: []
|
47
|
+
extra_rdoc_files:
|
48
|
+
- README.md
|
49
|
+
- LICENSE
|
50
|
+
files:
|
51
|
+
- LICENSE
|
52
|
+
- README.md
|
53
|
+
- Rakefile
|
54
|
+
- hash_sample.gemspec
|
55
|
+
- lib/hash_sample.rb
|
56
|
+
- lib/hash_sample/version.rb
|
57
|
+
- spec/hash_sample_spec.rb
|
58
|
+
- spec/spec_helper.rb
|
59
|
+
homepage: https://github.com/serg123e/hash_sample
|
60
|
+
licenses:
|
61
|
+
- MIT
|
62
|
+
metadata: {}
|
63
|
+
post_install_message:
|
64
|
+
rdoc_options:
|
65
|
+
- "--charset=UTF-8"
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '2.4'
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
requirements: []
|
79
|
+
rubyforge_project:
|
80
|
+
rubygems_version: 2.7.6.2
|
81
|
+
signing_key:
|
82
|
+
specification_version: 4
|
83
|
+
summary: Implements multiple sampling methods for Hash class
|
84
|
+
test_files: []
|