zipfian 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +13 -1
- data/lib/zipfian.rb +57 -18
- data/lib/zipfian/version.rb +1 -1
- data/test/test_zipfian.rb +53 -3
- data/test/test_zipfian_perf.rb +41 -0
- data/zipfian.gemspec +1 -0
- metadata +77 -71
data/README.md
CHANGED
@@ -34,7 +34,7 @@ puts z.sample # Integer between 1 and 1000
|
|
34
34
|
|
35
35
|
```
|
36
36
|
|
37
|
-
##
|
37
|
+
## Initialization overhead and caching
|
38
38
|
|
39
39
|
On initialization, Zipfian precalculates and stores the values of cumulative distribution function for every integer in the range.
|
40
40
|
As the number gets bigger, it will take more time and memory.
|
@@ -46,6 +46,18 @@ z = Zipfian.new 1000000, 0.5
|
|
46
46
|
puts z.sample * 1000 - rand(1000)
|
47
47
|
```
|
48
48
|
|
49
|
+
To avoid repeated initialization when multiple `Zipfian` instances are used,
|
50
|
+
you can optionally enable thread-safe caching of precalculated data at class-level
|
51
|
+
by setting the third parameter of initializer to true.
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
# Cache precalculated data
|
55
|
+
z1 = Zipfian.new 1000000, 0.5, true
|
56
|
+
|
57
|
+
# Returns immediately.
|
58
|
+
z2 = Zipfian.new 1000000, 0.5
|
59
|
+
```
|
60
|
+
|
49
61
|
## Contributing
|
50
62
|
|
51
63
|
1. Fork it
|
data/lib/zipfian.rb
CHANGED
@@ -3,7 +3,12 @@ require "zipfian/version"
|
|
3
3
|
class Zipfian
|
4
4
|
attr_reader :n, :s
|
5
5
|
|
6
|
-
|
6
|
+
@@global_mutex = Mutex.new
|
7
|
+
@@mutexes = {}
|
8
|
+
@@h = {}
|
9
|
+
@@cdf = {}
|
10
|
+
|
11
|
+
def initialize n, s, cache = false
|
7
12
|
unless n > 0 && n.is_a?(Integer)
|
8
13
|
raise ArgumentError.new("Number of elements must be a positive integer")
|
9
14
|
end
|
@@ -14,25 +19,33 @@ class Zipfian
|
|
14
19
|
@n = n
|
15
20
|
@s = s
|
16
21
|
sums = [0]
|
17
|
-
@h = (1..@n).inject(0) { |sum, i| sums[i] = sum + 1.0 / (i ** @s) }
|
18
|
-
@cdf = (0..@n).map { |i| sums[i] / @h }
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
l = 0
|
23
|
-
r = self.length - 2
|
23
|
+
compute_h = lambda { (1..@n).inject(0) { |sum, i| sums[i] = sum + 1.0 / (i ** @s) } }
|
24
|
+
compute_cdf = lambda { (0..@n).map { |i| sums[i] / @h } }
|
24
25
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
26
|
+
key = [n, s]
|
27
|
+
mutex = nil
|
28
|
+
if cache
|
29
|
+
@@global_mutex.synchronize do
|
30
|
+
mutex = @@mutexes[key] ||= Mutex.new
|
31
|
+
end
|
32
|
+
mutex.synchronize do
|
33
|
+
@h = @@h[key] ||= compute_h.call
|
34
|
+
@cdf = @@cdf[key] ||= compute_cdf.call
|
35
|
+
end
|
36
|
+
else
|
37
|
+
@@global_mutex.synchronize do
|
38
|
+
# Do not create mutex
|
39
|
+
mutex = @@mutexes[key]
|
40
|
+
end
|
41
|
+
if mutex
|
42
|
+
mutex.synchronize do
|
43
|
+
@h = @@h[key] || compute_h.call
|
44
|
+
@cdf = @@cdf[key] || compute_cdf.call
|
33
45
|
end
|
34
|
-
|
35
|
-
|
46
|
+
else
|
47
|
+
@h = compute_h.call
|
48
|
+
@cdf = compute_cdf.call
|
36
49
|
end
|
37
50
|
end
|
38
51
|
end
|
@@ -55,14 +68,40 @@ class Zipfian
|
|
55
68
|
end
|
56
69
|
|
57
70
|
def sample
|
58
|
-
@cdf
|
71
|
+
binary_search_index @cdf, rand
|
59
72
|
end
|
60
73
|
|
74
|
+
def self.cached
|
75
|
+
ret = []
|
76
|
+
@@global_mutex.synchronize do
|
77
|
+
@@mutexes.keys.each do |key|
|
78
|
+
ret << { :n => key.first, :s => key.last }
|
79
|
+
end
|
80
|
+
end
|
81
|
+
ret
|
82
|
+
end
|
61
83
|
private
|
62
84
|
def check_rank k
|
63
85
|
unless k.is_a?(Integer) && k >= 1 && k <= @n
|
64
86
|
raise ArgumentError.new("Rank must be a positive integer (max: #{@n})")
|
65
87
|
end
|
66
88
|
end
|
89
|
+
|
90
|
+
def binary_search_index arr, v
|
91
|
+
l = 0
|
92
|
+
r = arr.length - 2
|
93
|
+
|
94
|
+
while (c = (l + r) / 2) && l < r
|
95
|
+
if v < arr[c]
|
96
|
+
r = c - 1
|
97
|
+
elsif v > arr[c]
|
98
|
+
l = c + 1
|
99
|
+
else
|
100
|
+
return c
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
v < arr[c] ? c : c + 1
|
105
|
+
end
|
67
106
|
end
|
68
107
|
|
data/lib/zipfian/version.rb
CHANGED
data/test/test_zipfian.rb
CHANGED
@@ -4,7 +4,10 @@ require 'rubygems'
|
|
4
4
|
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '../lib')
|
5
5
|
require 'zipfian'
|
6
6
|
require 'test-unit'
|
7
|
+
require 'parallelize'
|
8
|
+
require 'benchmark'
|
7
9
|
|
10
|
+
# TODO: naive tests
|
8
11
|
class TestZipfian < Test::Unit::TestCase
|
9
12
|
def test_init
|
10
13
|
assert_raise(ArgumentError) { z = Zipfian.new }
|
@@ -44,10 +47,57 @@ class TestZipfian < Test::Unit::TestCase
|
|
44
47
|
end
|
45
48
|
|
46
49
|
def test_sample
|
47
|
-
|
50
|
+
# cached
|
51
|
+
3.times do
|
52
|
+
z = Zipfian.new 100000, 1, true
|
48
53
|
|
49
|
-
|
50
|
-
|
54
|
+
10000.times do |i|
|
55
|
+
assert (1..100000).include?(z.sample)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_cached_initialization
|
61
|
+
puts Benchmark.measure { Zipfian.new 10 ** 6, 0.99 }
|
62
|
+
p Zipfian.cached
|
63
|
+
puts Benchmark.measure { Zipfian.new 10 ** 6, 0.99, true }
|
64
|
+
p Zipfian.cached
|
65
|
+
puts Benchmark.measure { Zipfian.new 10 ** 6, 0.99 }
|
66
|
+
p Zipfian.cached
|
67
|
+
|
68
|
+
puts Benchmark.measure { Zipfian.new 10 ** 6, 0.88 }
|
69
|
+
p Zipfian.cached
|
70
|
+
puts Benchmark.measure { Zipfian.new 10 ** 6, 0.88 }
|
71
|
+
p Zipfian.cached
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_multi_threaded
|
75
|
+
m = Mutex.new
|
76
|
+
msgs = []
|
77
|
+
32.times.peach(32) do |idx|
|
78
|
+
m.synchronize { msgs << :s }
|
79
|
+
z = Zipfian.new(10 ** 6, 0.12345, true)
|
80
|
+
m.synchronize { msgs << :e }
|
81
|
+
end
|
82
|
+
puts msgs.join
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_dist
|
86
|
+
[0.1, 0.5, 0.75, 1].each do |s|
|
87
|
+
puts "s = #{s}"
|
88
|
+
max = 10 ** 6
|
89
|
+
cnt = 10 ** 4
|
90
|
+
hst = 20
|
91
|
+
col = 60
|
92
|
+
z = Zipfian.new max, s
|
93
|
+
histogram = Array.new(hst) { 0 }
|
94
|
+
cnt.times do
|
95
|
+
histogram[(z.sample - 1) * hst / max] += 1
|
96
|
+
end
|
97
|
+
|
98
|
+
r = col.to_f / histogram.max
|
99
|
+
puts histogram.map { |e| '*' * (e * r).to_i + " : #{e}" }
|
100
|
+
puts
|
51
101
|
end
|
52
102
|
end
|
53
103
|
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '../lib')
|
5
|
+
require 'zipfian'
|
6
|
+
require 'test-unit'
|
7
|
+
require 'benchmark'
|
8
|
+
require 'parallelize'
|
9
|
+
|
10
|
+
class TestZipfian < Test::Unit::TestCase
|
11
|
+
def test_zipfian_performance
|
12
|
+
cnt = 100000
|
13
|
+
|
14
|
+
[8, 4, 2, 1].each do |thr|
|
15
|
+
puts "# of threads: #{thr}"
|
16
|
+
zps = Array.new(thr)
|
17
|
+
(2..6).each do |pow|
|
18
|
+
max = 10 ** pow
|
19
|
+
|
20
|
+
puts "Range: 1 ~ #{max}"
|
21
|
+
print "Initialize (sec): "
|
22
|
+
puts Benchmark.measure {
|
23
|
+
thr.times.peach(thr) do |idx|
|
24
|
+
zps[idx] = Zipfian.new max, 1, true
|
25
|
+
end
|
26
|
+
}.real
|
27
|
+
|
28
|
+
print "Sample throughput (op/sec): "
|
29
|
+
puts cnt / Benchmark.measure {
|
30
|
+
thr.times.peach(thr) do |idx|
|
31
|
+
cnt.times do |i|
|
32
|
+
zps[idx].sample
|
33
|
+
end
|
34
|
+
end
|
35
|
+
}.real
|
36
|
+
puts
|
37
|
+
end
|
38
|
+
puts
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/zipfian.gemspec
CHANGED
metadata
CHANGED
@@ -1,74 +1,87 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: zipfian
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 0
|
9
|
-
- 2
|
10
|
-
version: 0.0.2
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Junegunn Choi
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-06-04 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: test-unit
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
segments:
|
30
|
-
- 0
|
31
|
-
version: "0"
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
32
22
|
type: :development
|
33
|
-
version_requirements: *id001
|
34
|
-
- !ruby/object:Gem::Dependency
|
35
|
-
name: guard
|
36
23
|
prerelease: false
|
37
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: guard
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
38
33
|
none: false
|
39
|
-
requirements:
|
40
|
-
- -
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
|
43
|
-
segments:
|
44
|
-
- 0
|
45
|
-
version: "0"
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
46
38
|
type: :development
|
47
|
-
|
48
|
-
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
49
47
|
name: guard-test
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
50
55
|
prerelease: false
|
51
|
-
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: parallelize
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
52
65
|
none: false
|
53
|
-
requirements:
|
54
|
-
- -
|
55
|
-
- !ruby/object:Gem::Version
|
56
|
-
|
57
|
-
segments:
|
58
|
-
- 0
|
59
|
-
version: "0"
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
60
70
|
type: :development
|
61
|
-
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
62
78
|
description: Zipfian distribution
|
63
|
-
email:
|
79
|
+
email:
|
64
80
|
- junegunn.c@gmail.com
|
65
81
|
executables: []
|
66
|
-
|
67
82
|
extensions: []
|
68
|
-
|
69
83
|
extra_rdoc_files: []
|
70
|
-
|
71
|
-
files:
|
84
|
+
files:
|
72
85
|
- .gitignore
|
73
86
|
- Gemfile
|
74
87
|
- Guardfile
|
@@ -78,39 +91,32 @@ files:
|
|
78
91
|
- lib/zipfian.rb
|
79
92
|
- lib/zipfian/version.rb
|
80
93
|
- test/test_zipfian.rb
|
94
|
+
- test/test_zipfian_perf.rb
|
81
95
|
- zipfian.gemspec
|
82
96
|
homepage: https://github.com/junegunn/zipfian
|
83
97
|
licenses: []
|
84
|
-
|
85
98
|
post_install_message:
|
86
99
|
rdoc_options: []
|
87
|
-
|
88
|
-
require_paths:
|
100
|
+
require_paths:
|
89
101
|
- lib
|
90
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
103
|
none: false
|
92
|
-
requirements:
|
93
|
-
- -
|
94
|
-
- !ruby/object:Gem::Version
|
95
|
-
|
96
|
-
|
97
|
-
- 0
|
98
|
-
version: "0"
|
99
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - ! '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
109
|
none: false
|
101
|
-
requirements:
|
102
|
-
- -
|
103
|
-
- !ruby/object:Gem::Version
|
104
|
-
|
105
|
-
segments:
|
106
|
-
- 0
|
107
|
-
version: "0"
|
110
|
+
requirements:
|
111
|
+
- - ! '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
108
114
|
requirements: []
|
109
|
-
|
110
115
|
rubyforge_project:
|
111
|
-
rubygems_version: 1.8.
|
116
|
+
rubygems_version: 1.8.24
|
112
117
|
signing_key:
|
113
118
|
specification_version: 3
|
114
119
|
summary: Zipfian distribution
|
115
|
-
test_files:
|
120
|
+
test_files:
|
116
121
|
- test/test_zipfian.rb
|
122
|
+
- test/test_zipfian_perf.rb
|