zipfian 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +13 -1
- data/lib/zipfian.rb +57 -18
- data/lib/zipfian/version.rb +1 -1
- data/test/test_zipfian.rb +53 -3
- data/test/test_zipfian_perf.rb +41 -0
- data/zipfian.gemspec +1 -0
- metadata +77 -71
data/README.md
CHANGED
@@ -34,7 +34,7 @@ puts z.sample # Integer between 1 and 1000
|
|
34
34
|
|
35
35
|
```
|
36
36
|
|
37
|
-
##
|
37
|
+
## Initialization overhead and caching
|
38
38
|
|
39
39
|
On initialization, Zipfian precalculates and stores the values of cumulative distribution function for every integer in the range.
|
40
40
|
As the number gets bigger, it will take more time and memory.
|
@@ -46,6 +46,18 @@ z = Zipfian.new 1000000, 0.5
|
|
46
46
|
puts z.sample * 1000 - rand(1000)
|
47
47
|
```
|
48
48
|
|
49
|
+
To avoid repeated initialization when multiple `Zipfian` instances are used,
|
50
|
+
you can optionally enable thread-safe caching of precalculated data at class-level
|
51
|
+
by setting the third parameter of initializer to true.
|
52
|
+
|
53
|
+
```ruby
|
54
|
+
# Cache precalculated data
|
55
|
+
z1 = Zipfian.new 1000000, 0.5, true
|
56
|
+
|
57
|
+
# Returns immediately.
|
58
|
+
z2 = Zipfian.new 1000000, 0.5
|
59
|
+
```
|
60
|
+
|
49
61
|
## Contributing
|
50
62
|
|
51
63
|
1. Fork it
|
data/lib/zipfian.rb
CHANGED
@@ -3,7 +3,12 @@ require "zipfian/version"
|
|
3
3
|
class Zipfian
|
4
4
|
attr_reader :n, :s
|
5
5
|
|
6
|
-
|
6
|
+
@@global_mutex = Mutex.new
|
7
|
+
@@mutexes = {}
|
8
|
+
@@h = {}
|
9
|
+
@@cdf = {}
|
10
|
+
|
11
|
+
def initialize n, s, cache = false
|
7
12
|
unless n > 0 && n.is_a?(Integer)
|
8
13
|
raise ArgumentError.new("Number of elements must be a positive integer")
|
9
14
|
end
|
@@ -14,25 +19,33 @@ class Zipfian
|
|
14
19
|
@n = n
|
15
20
|
@s = s
|
16
21
|
sums = [0]
|
17
|
-
@h = (1..@n).inject(0) { |sum, i| sums[i] = sum + 1.0 / (i ** @s) }
|
18
|
-
@cdf = (0..@n).map { |i| sums[i] / @h }
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
l = 0
|
23
|
-
r = self.length - 2
|
23
|
+
compute_h = lambda { (1..@n).inject(0) { |sum, i| sums[i] = sum + 1.0 / (i ** @s) } }
|
24
|
+
compute_cdf = lambda { (0..@n).map { |i| sums[i] / @h } }
|
24
25
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
26
|
+
key = [n, s]
|
27
|
+
mutex = nil
|
28
|
+
if cache
|
29
|
+
@@global_mutex.synchronize do
|
30
|
+
mutex = @@mutexes[key] ||= Mutex.new
|
31
|
+
end
|
32
|
+
mutex.synchronize do
|
33
|
+
@h = @@h[key] ||= compute_h.call
|
34
|
+
@cdf = @@cdf[key] ||= compute_cdf.call
|
35
|
+
end
|
36
|
+
else
|
37
|
+
@@global_mutex.synchronize do
|
38
|
+
# Do not create mutex
|
39
|
+
mutex = @@mutexes[key]
|
40
|
+
end
|
41
|
+
if mutex
|
42
|
+
mutex.synchronize do
|
43
|
+
@h = @@h[key] || compute_h.call
|
44
|
+
@cdf = @@cdf[key] || compute_cdf.call
|
33
45
|
end
|
34
|
-
|
35
|
-
|
46
|
+
else
|
47
|
+
@h = compute_h.call
|
48
|
+
@cdf = compute_cdf.call
|
36
49
|
end
|
37
50
|
end
|
38
51
|
end
|
@@ -55,14 +68,40 @@ class Zipfian
|
|
55
68
|
end
|
56
69
|
|
57
70
|
def sample
|
58
|
-
@cdf
|
71
|
+
binary_search_index @cdf, rand
|
59
72
|
end
|
60
73
|
|
74
|
+
def self.cached
|
75
|
+
ret = []
|
76
|
+
@@global_mutex.synchronize do
|
77
|
+
@@mutexes.keys.each do |key|
|
78
|
+
ret << { :n => key.first, :s => key.last }
|
79
|
+
end
|
80
|
+
end
|
81
|
+
ret
|
82
|
+
end
|
61
83
|
private
|
62
84
|
def check_rank k
|
63
85
|
unless k.is_a?(Integer) && k >= 1 && k <= @n
|
64
86
|
raise ArgumentError.new("Rank must be a positive integer (max: #{@n})")
|
65
87
|
end
|
66
88
|
end
|
89
|
+
|
90
|
+
def binary_search_index arr, v
|
91
|
+
l = 0
|
92
|
+
r = arr.length - 2
|
93
|
+
|
94
|
+
while (c = (l + r) / 2) && l < r
|
95
|
+
if v < arr[c]
|
96
|
+
r = c - 1
|
97
|
+
elsif v > arr[c]
|
98
|
+
l = c + 1
|
99
|
+
else
|
100
|
+
return c
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
v < arr[c] ? c : c + 1
|
105
|
+
end
|
67
106
|
end
|
68
107
|
|
data/lib/zipfian/version.rb
CHANGED
data/test/test_zipfian.rb
CHANGED
@@ -4,7 +4,10 @@ require 'rubygems'
|
|
4
4
|
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '../lib')
|
5
5
|
require 'zipfian'
|
6
6
|
require 'test-unit'
|
7
|
+
require 'parallelize'
|
8
|
+
require 'benchmark'
|
7
9
|
|
10
|
+
# TODO: naive tests
|
8
11
|
class TestZipfian < Test::Unit::TestCase
|
9
12
|
def test_init
|
10
13
|
assert_raise(ArgumentError) { z = Zipfian.new }
|
@@ -44,10 +47,57 @@ class TestZipfian < Test::Unit::TestCase
|
|
44
47
|
end
|
45
48
|
|
46
49
|
def test_sample
|
47
|
-
|
50
|
+
# cached
|
51
|
+
3.times do
|
52
|
+
z = Zipfian.new 100000, 1, true
|
48
53
|
|
49
|
-
|
50
|
-
|
54
|
+
10000.times do |i|
|
55
|
+
assert (1..100000).include?(z.sample)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_cached_initialization
|
61
|
+
puts Benchmark.measure { Zipfian.new 10 ** 6, 0.99 }
|
62
|
+
p Zipfian.cached
|
63
|
+
puts Benchmark.measure { Zipfian.new 10 ** 6, 0.99, true }
|
64
|
+
p Zipfian.cached
|
65
|
+
puts Benchmark.measure { Zipfian.new 10 ** 6, 0.99 }
|
66
|
+
p Zipfian.cached
|
67
|
+
|
68
|
+
puts Benchmark.measure { Zipfian.new 10 ** 6, 0.88 }
|
69
|
+
p Zipfian.cached
|
70
|
+
puts Benchmark.measure { Zipfian.new 10 ** 6, 0.88 }
|
71
|
+
p Zipfian.cached
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_multi_threaded
|
75
|
+
m = Mutex.new
|
76
|
+
msgs = []
|
77
|
+
32.times.peach(32) do |idx|
|
78
|
+
m.synchronize { msgs << :s }
|
79
|
+
z = Zipfian.new(10 ** 6, 0.12345, true)
|
80
|
+
m.synchronize { msgs << :e }
|
81
|
+
end
|
82
|
+
puts msgs.join
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_dist
|
86
|
+
[0.1, 0.5, 0.75, 1].each do |s|
|
87
|
+
puts "s = #{s}"
|
88
|
+
max = 10 ** 6
|
89
|
+
cnt = 10 ** 4
|
90
|
+
hst = 20
|
91
|
+
col = 60
|
92
|
+
z = Zipfian.new max, s
|
93
|
+
histogram = Array.new(hst) { 0 }
|
94
|
+
cnt.times do
|
95
|
+
histogram[(z.sample - 1) * hst / max] += 1
|
96
|
+
end
|
97
|
+
|
98
|
+
r = col.to_f / histogram.max
|
99
|
+
puts histogram.map { |e| '*' * (e * r).to_i + " : #{e}" }
|
100
|
+
puts
|
51
101
|
end
|
52
102
|
end
|
53
103
|
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), '../lib')
|
5
|
+
require 'zipfian'
|
6
|
+
require 'test-unit'
|
7
|
+
require 'benchmark'
|
8
|
+
require 'parallelize'
|
9
|
+
|
10
|
+
class TestZipfian < Test::Unit::TestCase
|
11
|
+
def test_zipfian_performance
|
12
|
+
cnt = 100000
|
13
|
+
|
14
|
+
[8, 4, 2, 1].each do |thr|
|
15
|
+
puts "# of threads: #{thr}"
|
16
|
+
zps = Array.new(thr)
|
17
|
+
(2..6).each do |pow|
|
18
|
+
max = 10 ** pow
|
19
|
+
|
20
|
+
puts "Range: 1 ~ #{max}"
|
21
|
+
print "Initialize (sec): "
|
22
|
+
puts Benchmark.measure {
|
23
|
+
thr.times.peach(thr) do |idx|
|
24
|
+
zps[idx] = Zipfian.new max, 1, true
|
25
|
+
end
|
26
|
+
}.real
|
27
|
+
|
28
|
+
print "Sample throughput (op/sec): "
|
29
|
+
puts cnt / Benchmark.measure {
|
30
|
+
thr.times.peach(thr) do |idx|
|
31
|
+
cnt.times do |i|
|
32
|
+
zps[idx].sample
|
33
|
+
end
|
34
|
+
end
|
35
|
+
}.real
|
36
|
+
puts
|
37
|
+
end
|
38
|
+
puts
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/zipfian.gemspec
CHANGED
metadata
CHANGED
@@ -1,74 +1,87 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: zipfian
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 0
|
9
|
-
- 2
|
10
|
-
version: 0.0.2
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Junegunn Choi
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-06-04 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: test-unit
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
segments:
|
30
|
-
- 0
|
31
|
-
version: "0"
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
32
22
|
type: :development
|
33
|
-
version_requirements: *id001
|
34
|
-
- !ruby/object:Gem::Dependency
|
35
|
-
name: guard
|
36
23
|
prerelease: false
|
37
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: guard
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
38
33
|
none: false
|
39
|
-
requirements:
|
40
|
-
- -
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
|
43
|
-
segments:
|
44
|
-
- 0
|
45
|
-
version: "0"
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
46
38
|
type: :development
|
47
|
-
|
48
|
-
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
49
47
|
name: guard-test
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
50
55
|
prerelease: false
|
51
|
-
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: parallelize
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
52
65
|
none: false
|
53
|
-
requirements:
|
54
|
-
- -
|
55
|
-
- !ruby/object:Gem::Version
|
56
|
-
|
57
|
-
segments:
|
58
|
-
- 0
|
59
|
-
version: "0"
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
60
70
|
type: :development
|
61
|
-
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
62
78
|
description: Zipfian distribution
|
63
|
-
email:
|
79
|
+
email:
|
64
80
|
- junegunn.c@gmail.com
|
65
81
|
executables: []
|
66
|
-
|
67
82
|
extensions: []
|
68
|
-
|
69
83
|
extra_rdoc_files: []
|
70
|
-
|
71
|
-
files:
|
84
|
+
files:
|
72
85
|
- .gitignore
|
73
86
|
- Gemfile
|
74
87
|
- Guardfile
|
@@ -78,39 +91,32 @@ files:
|
|
78
91
|
- lib/zipfian.rb
|
79
92
|
- lib/zipfian/version.rb
|
80
93
|
- test/test_zipfian.rb
|
94
|
+
- test/test_zipfian_perf.rb
|
81
95
|
- zipfian.gemspec
|
82
96
|
homepage: https://github.com/junegunn/zipfian
|
83
97
|
licenses: []
|
84
|
-
|
85
98
|
post_install_message:
|
86
99
|
rdoc_options: []
|
87
|
-
|
88
|
-
require_paths:
|
100
|
+
require_paths:
|
89
101
|
- lib
|
90
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
91
103
|
none: false
|
92
|
-
requirements:
|
93
|
-
- -
|
94
|
-
- !ruby/object:Gem::Version
|
95
|
-
|
96
|
-
|
97
|
-
- 0
|
98
|
-
version: "0"
|
99
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
|
+
requirements:
|
105
|
+
- - ! '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
109
|
none: false
|
101
|
-
requirements:
|
102
|
-
- -
|
103
|
-
- !ruby/object:Gem::Version
|
104
|
-
|
105
|
-
segments:
|
106
|
-
- 0
|
107
|
-
version: "0"
|
110
|
+
requirements:
|
111
|
+
- - ! '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: '0'
|
108
114
|
requirements: []
|
109
|
-
|
110
115
|
rubyforge_project:
|
111
|
-
rubygems_version: 1.8.
|
116
|
+
rubygems_version: 1.8.24
|
112
117
|
signing_key:
|
113
118
|
specification_version: 3
|
114
119
|
summary: Zipfian distribution
|
115
|
-
test_files:
|
120
|
+
test_files:
|
116
121
|
- test/test_zipfian.rb
|
122
|
+
- test/test_zipfian_perf.rb
|