weighted_sampler 0.0.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.gitlab-ci.yml +23 -0
- data/.rspec +1 -1
- data/.rubocop.yml +60 -0
- data/.travis.yml +1 -1
- data/Gemfile +7 -2
- data/Gemfile.lock +53 -0
- data/README.md +86 -3
- data/Rakefile +5 -3
- data/bin/console +4 -3
- data/lib/weighted_sampler/version.rb +3 -1
- data/lib/weighted_sampler.rb +17 -9
- data/weighted_sampler.gemspec +2 -9
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d03c4329b1bc66f6c6ae1008c7f50b466f990f77bb46faa9850b8812d777dc9a
|
4
|
+
data.tar.gz: 13b517ea952b2f9c532601b57dbe6cbd8a6263b9f1f8cc29a2c6829caf88d7d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b09dc165fcd8f11ddc6aa34a894ad315d256f1b8450ee86e8c8fc7639a162622996250db77b9e6e3a32bc0c552487f21a306eb5e7155ec598e6cf9a8c5d39486
|
7
|
+
data.tar.gz: 247ad644e8d31e944aaba67bb5230c49464c7f5b0607bba4a57e566222441ca9cd8783f8bd9a3c33371062534441055ea637f52523a7f85046a3a6d58452366b
|
data/.gitignore
CHANGED
data/.gitlab-ci.yml
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
image: ruby
|
2
|
+
|
3
|
+
stages:
|
4
|
+
- test
|
5
|
+
|
6
|
+
before_script:
|
7
|
+
- bundle install
|
8
|
+
|
9
|
+
ruby2.3:
|
10
|
+
image: ruby:2.3
|
11
|
+
script: rspec
|
12
|
+
|
13
|
+
ruby2.4:
|
14
|
+
image: ruby:2.4
|
15
|
+
script: rspec
|
16
|
+
|
17
|
+
ruby2.5:
|
18
|
+
image: ruby:2.5
|
19
|
+
script: rspec
|
20
|
+
|
21
|
+
ruby2.6-rc:
|
22
|
+
image: ruby:2.6-rc
|
23
|
+
script: rspec
|
data/.rspec
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
AllCops:
|
2
|
+
Exclude:
|
3
|
+
- tmp/**/*
|
4
|
+
|
5
|
+
Rails:
|
6
|
+
Enabled: true
|
7
|
+
|
8
|
+
# We prefer having assignments inside if-s
|
9
|
+
Lint/AssignmentInCondition:
|
10
|
+
Enabled: false
|
11
|
+
|
12
|
+
# Longer lines is awesome
|
13
|
+
Metrics/LineLength:
|
14
|
+
Max: 160
|
15
|
+
|
16
|
+
# Having every class documented is too much
|
17
|
+
Style/Documentation:
|
18
|
+
Enabled: false
|
19
|
+
|
20
|
+
# We do love empty lines
|
21
|
+
Layout/EmptyLines:
|
22
|
+
Enabled: false
|
23
|
+
|
24
|
+
Layout/EmptyLinesAroundArguments:
|
25
|
+
Enabled: false
|
26
|
+
|
27
|
+
Layout/EmptyLinesAroundClassBody:
|
28
|
+
Enabled: false
|
29
|
+
|
30
|
+
Layout/EmptyLinesAroundBlockBody:
|
31
|
+
Enabled: false
|
32
|
+
|
33
|
+
Layout/EmptyLinesAroundModuleBody:
|
34
|
+
Enabled: false
|
35
|
+
|
36
|
+
# We do love complex code
|
37
|
+
Metrics/AbcSize:
|
38
|
+
Enabled: false
|
39
|
+
|
40
|
+
Metrics/CyclomaticComplexity:
|
41
|
+
Enabled: false
|
42
|
+
|
43
|
+
Metrics/ClassLength:
|
44
|
+
Enabled: false
|
45
|
+
|
46
|
+
Metrics/MethodLength:
|
47
|
+
Enabled: false
|
48
|
+
|
49
|
+
Metrics/PerceivedComplexity:
|
50
|
+
Enabled: false
|
51
|
+
|
52
|
+
Metrics/BlockLength:
|
53
|
+
Enabled: false
|
54
|
+
|
55
|
+
# This is AR-only cop
|
56
|
+
Rails/FindEach:
|
57
|
+
Enabled: false
|
58
|
+
|
59
|
+
Rails/CreateTableWithTimestamps:
|
60
|
+
Enabled: false
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
|
5
|
+
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
6
|
+
|
7
|
+
gem 'pry-byebug'
|
8
|
+
gem 'simplecov'
|
4
9
|
|
5
10
|
# Specify your gem's dependencies in weighted_sampler.gemspec
|
6
11
|
gemspec
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
weighted_sampler (1.0.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
byebug (10.0.2)
|
10
|
+
coderay (1.1.2)
|
11
|
+
diff-lcs (1.3)
|
12
|
+
docile (1.3.1)
|
13
|
+
json (2.1.0)
|
14
|
+
method_source (0.9.0)
|
15
|
+
pry (0.11.3)
|
16
|
+
coderay (~> 1.1.0)
|
17
|
+
method_source (~> 0.9.0)
|
18
|
+
pry-byebug (3.6.0)
|
19
|
+
byebug (~> 10.0)
|
20
|
+
pry (~> 0.10)
|
21
|
+
rake (10.5.0)
|
22
|
+
rspec (3.7.0)
|
23
|
+
rspec-core (~> 3.7.0)
|
24
|
+
rspec-expectations (~> 3.7.0)
|
25
|
+
rspec-mocks (~> 3.7.0)
|
26
|
+
rspec-core (3.7.1)
|
27
|
+
rspec-support (~> 3.7.0)
|
28
|
+
rspec-expectations (3.7.0)
|
29
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
30
|
+
rspec-support (~> 3.7.0)
|
31
|
+
rspec-mocks (3.7.0)
|
32
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
33
|
+
rspec-support (~> 3.7.0)
|
34
|
+
rspec-support (3.7.1)
|
35
|
+
simplecov (0.16.1)
|
36
|
+
docile (~> 1.1)
|
37
|
+
json (>= 1.8, < 3)
|
38
|
+
simplecov-html (~> 0.10.0)
|
39
|
+
simplecov-html (0.10.2)
|
40
|
+
|
41
|
+
PLATFORMS
|
42
|
+
ruby
|
43
|
+
|
44
|
+
DEPENDENCIES
|
45
|
+
bundler (~> 1.16)
|
46
|
+
pry-byebug
|
47
|
+
rake (~> 10.0)
|
48
|
+
rspec (~> 3.0)
|
49
|
+
simplecov
|
50
|
+
weighted_sampler!
|
51
|
+
|
52
|
+
BUNDLED WITH
|
53
|
+
1.16.1
|
data/README.md
CHANGED
@@ -22,17 +22,100 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
|
-
|
25
|
+
Module or sampler instance modes available
|
26
|
+
|
27
|
+
### Module
|
28
|
+
|
29
|
+
```ruby
|
30
|
+
>> WeightedSampler.sample([P0, P1, ...])
|
31
|
+
=> INDEX
|
32
|
+
>> WeightedSampler.sample({K0 => P0, K1 => P1, ...})
|
33
|
+
=> Ki
|
34
|
+
```
|
35
|
+
|
36
|
+
#### Input as an Array
|
37
|
+
|
38
|
+
You can provide `Array` of probabilities in a form of weights for each option.
|
39
|
+
|
40
|
+
Equal probabilities: `[50, 50]` or `[1, 1]` or `[0.5, 0.5]`
|
41
|
+
|
42
|
+
Different probabilities: `[99, 1]`, or `[0.001, 0.1]` (index 1 is 100x times more likely to be chosen than 0)
|
43
|
+
|
44
|
+
If your input probabilies are not normalized `WeightedSampler` will do it for you
|
45
|
+
|
46
|
+
`OUTPUT` will be an index of selected value, so that you can match it to your more complex data structure
|
47
|
+
|
48
|
+
#### Input as an Hash
|
49
|
+
|
50
|
+
To simplify dome workflows you can provide `Hash` structure in a way
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
{ K0 => P0, ...}
|
54
|
+
{ a: 1, b: 1, c: 2} # c has 0.5, a and b - 0.25
|
55
|
+
{ 0 => 50, 150 => 1} # 105 key is 50 times less probable to be picked
|
56
|
+
```
|
57
|
+
|
58
|
+
where `values` are probabilities with requirements similar to `Array` approach
|
59
|
+
|
60
|
+
`OUTPUT` in this case will be picked `key`
|
61
|
+
|
62
|
+
### Class (`::Base`)
|
63
|
+
|
64
|
+
Class is the *recommended* way to use of sampler becuase it's performance is ~10x better than Module
|
65
|
+
|
66
|
+
You need to initialize sampler:
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
sampler = WeightedSampler::Base.new([P0, P1, ...])
|
70
|
+
# OR
|
71
|
+
sampler = WeightedSampler::Base.new({K0 => P0, K1 => P1, ...})
|
72
|
+
```
|
73
|
+
|
74
|
+
after that you can get samples via
|
75
|
+
|
76
|
+
`sampler.sample # => index (for Array) or key (for Hash)`
|
77
|
+
|
78
|
+
Input parameter to initialization of an instance are similar to Module use case.
|
79
|
+
|
80
|
+
Plus, you can you `seed` option for repeatable results
|
81
|
+
|
82
|
+
### Options
|
83
|
+
|
84
|
+
#### `skip_normalization`
|
85
|
+
**You do not have to normalize input probabilities**
|
86
|
+
|
87
|
+
But for some reason you may want to normalize yourself, for this
|
88
|
+
you have an option `skip_normalization`
|
89
|
+
|
90
|
+
```ruby
|
91
|
+
WeightedSampler.sample([...], skip_normalization: true)
|
92
|
+
WeightedSampler.sample({...}, skip_normalization: true)
|
93
|
+
```
|
94
|
+
|
95
|
+
if we will not be able to sum provided probabilities into `1` you'll get `RuntimeError` exception with some information about this
|
96
|
+
|
97
|
+
#### `seed` (¡ Class use case only !)
|
98
|
+
|
99
|
+
If you need to get repeatable sequence of samples you can initialize sampler with seed Integer (similar to ruby`s [Random](https://ruby-doc.org/core/Random.html#method-c-new)
|
100
|
+
|
101
|
+
```ruby
|
102
|
+
WeightedSampler::Base.new([...], seed: SEED)
|
103
|
+
WeightedSampler::Base.new([...], seed: SEED)
|
104
|
+
```
|
105
|
+
|
106
|
+
Please, note that if `seed` is not provided, sampler will use generic `rand` functionality without any seed initialization
|
107
|
+
|
108
|
+
### Performance
|
26
109
|
|
27
110
|
## Development
|
28
111
|
|
29
|
-
After checking out the repo, run `bin/setup` to install dependencies. Then, run `
|
112
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rspec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
113
|
|
31
114
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
115
|
|
33
116
|
## Contributing
|
34
117
|
|
35
|
-
Bug reports and pull requests are welcome on GitHub at https://
|
118
|
+
Bug reports and pull requests are welcome on GitHub at https://gitlab.com/[USERNAME]/weighted_sampler. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
36
119
|
|
37
120
|
## License
|
38
121
|
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require
|
4
|
-
require
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'weighted_sampler'
|
5
6
|
|
6
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
8
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +11,5 @@ require "weighted_sampler"
|
|
10
11
|
# require "pry"
|
11
12
|
# Pry.start
|
12
13
|
|
13
|
-
require
|
14
|
+
require 'irb'
|
14
15
|
IRB.start(__FILE__)
|
data/lib/weighted_sampler.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'weighted_sampler/version'
|
4
|
+
require 'pry'
|
3
5
|
module WeightedSampler
|
4
6
|
|
5
7
|
# sum of floats are never stable enough to guarantee exact equality to 1
|
@@ -16,16 +18,17 @@ module WeightedSampler
|
|
16
18
|
elsif enum.is_a?(Array)
|
17
19
|
@p_ranges = normalized_ranges(enum, skip_normalization)
|
18
20
|
@keys = [*0...enum.size]
|
19
|
-
else
|
20
|
-
raise ArgumentError, 'input structure must be a Hash or an Array'
|
21
21
|
end
|
22
|
+
|
23
|
+
return unless @p_ranges.nil? || @keys.nil? || @keys.empty?
|
24
|
+
raise ArgumentError, 'input structure must be a non-empty Hash or Array'
|
22
25
|
end
|
23
26
|
|
24
27
|
def sample
|
25
28
|
pick = @random ? @random.rand : rand
|
26
29
|
|
27
30
|
idx = @p_ranges.index { |range| range.include? pick }
|
28
|
-
@keys[idx]
|
31
|
+
@keys[idx] if idx
|
29
32
|
end
|
30
33
|
|
31
34
|
private
|
@@ -40,23 +43,28 @@ module WeightedSampler
|
|
40
43
|
end
|
41
44
|
|
42
45
|
def normalize_probabilities(array)
|
43
|
-
sum = array.inject(&:+)
|
46
|
+
sum = array.inject(&:+).to_f
|
44
47
|
|
45
48
|
array.map { |el| el / sum }
|
46
49
|
end
|
47
50
|
|
48
51
|
def array_to_ranges(array)
|
49
52
|
start = 0.0
|
50
|
-
ranges = array.map
|
53
|
+
ranges = array.map do |v|
|
54
|
+
p_start = start
|
55
|
+
start += v
|
56
|
+
|
57
|
+
(p_start...v + p_start)
|
58
|
+
end
|
51
59
|
|
52
|
-
raise 'normalized probabilities total is not 1' if start - 1.0 > ERROR_ALLOWANCE
|
60
|
+
raise 'normalized probabilities total is not 1' if (start - 1.0).abs > ERROR_ALLOWANCE
|
53
61
|
|
54
62
|
ranges
|
55
63
|
end
|
56
64
|
|
57
65
|
end
|
58
66
|
|
59
|
-
def self.sample(enum,
|
60
|
-
Base.new(enum,
|
67
|
+
def self.sample(enum, skip_normalization: false)
|
68
|
+
Base.new(enum, skip_normalization: skip_normalization).sample
|
61
69
|
end
|
62
70
|
end
|
data/weighted_sampler.gemspec
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
lib = File.expand_path('lib', __dir__)
|
3
5
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
6
|
require 'weighted_sampler/version'
|
@@ -18,15 +20,6 @@ Gem::Specification.new do |spec|
|
|
18
20
|
spec.homepage = 'https://gitlab.com/alexey_b/weighted_sampler'
|
19
21
|
spec.license = 'MIT'
|
20
22
|
|
21
|
-
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
22
|
-
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
23
|
-
# if spec.respond_to?(:metadata)
|
24
|
-
# spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
25
|
-
# else
|
26
|
-
# raise 'RubyGems 2.0 or newer is required to protect against ' \
|
27
|
-
# 'public gem pushes.'
|
28
|
-
# end
|
29
|
-
|
30
23
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
31
24
|
f.match(%r{^(test|spec|features)/})
|
32
25
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: weighted_sampler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Oleksiy Babich
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -62,10 +62,13 @@ extensions: []
|
|
62
62
|
extra_rdoc_files: []
|
63
63
|
files:
|
64
64
|
- ".gitignore"
|
65
|
+
- ".gitlab-ci.yml"
|
65
66
|
- ".rspec"
|
67
|
+
- ".rubocop.yml"
|
66
68
|
- ".travis.yml"
|
67
69
|
- CODE_OF_CONDUCT.md
|
68
70
|
- Gemfile
|
71
|
+
- Gemfile.lock
|
69
72
|
- LICENSE.txt
|
70
73
|
- README.md
|
71
74
|
- Rakefile
|