zipfian 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -34,7 +34,7 @@ puts z.sample # Integer between 1 and 1000
34
34
 
35
35
  ```
36
36
 
37
- ## Disclaimer
37
+ ## Initialization overhead and caching
38
38
 
39
39
  On initialization, Zipfian precalculates and stores the values of cumulative distribution function for every integer in the range.
40
40
  As the number gets bigger, it will take more time and memory.
@@ -46,6 +46,18 @@ z = Zipfian.new 1000000, 0.5
46
46
  puts z.sample * 1000 - rand(1000)
47
47
  ```
48
48
 
49
+ To avoid repeated initialization when multiple `Zipfian` instances are used,
50
+ you can optionally enable thread-safe caching of precalculated data at class-level
51
+ by setting the third parameter of initializer to true.
52
+
53
+ ```ruby
54
+ # Cache precalculated data
55
+ z1 = Zipfian.new 1000000, 0.5, true
56
+
57
+ # Returns immediately.
58
+ z2 = Zipfian.new 1000000, 0.5
59
+ ```
60
+
49
61
  ## Contributing
50
62
 
51
63
  1. Fork it
@@ -3,7 +3,12 @@ require "zipfian/version"
3
3
  class Zipfian
4
4
  attr_reader :n, :s
5
5
 
6
- def initialize n, s
6
+ @@global_mutex = Mutex.new
7
+ @@mutexes = {}
8
+ @@h = {}
9
+ @@cdf = {}
10
+
11
+ def initialize n, s, cache = false
7
12
  unless n > 0 && n.is_a?(Integer)
8
13
  raise ArgumentError.new("Number of elements must be a positive integer")
9
14
  end
@@ -14,25 +19,33 @@ class Zipfian
14
19
  @n = n
15
20
  @s = s
16
21
  sums = [0]
17
- @h = (1..@n).inject(0) { |sum, i| sums[i] = sum + 1.0 / (i ** @s) }
18
- @cdf = (0..@n).map { |i| sums[i] / @h }
19
22
 
20
- class << @cdf
21
- def binary_search_index v
22
- l = 0
23
- r = self.length - 2
23
+ compute_h = lambda { (1..@n).inject(0) { |sum, i| sums[i] = sum + 1.0 / (i ** @s) } }
24
+ compute_cdf = lambda { (0..@n).map { |i| sums[i] / @h } }
24
25
 
25
- while (c = (l + r) / 2) && l < r
26
- if v < self[c]
27
- r = c - 1
28
- elsif v > self[c]
29
- l = c + 1
30
- else
31
- return c
32
- end
26
+ key = [n, s]
27
+ mutex = nil
28
+ if cache
29
+ @@global_mutex.synchronize do
30
+ mutex = @@mutexes[key] ||= Mutex.new
31
+ end
32
+ mutex.synchronize do
33
+ @h = @@h[key] ||= compute_h.call
34
+ @cdf = @@cdf[key] ||= compute_cdf.call
35
+ end
36
+ else
37
+ @@global_mutex.synchronize do
38
+ # Do not create mutex
39
+ mutex = @@mutexes[key]
40
+ end
41
+ if mutex
42
+ mutex.synchronize do
43
+ @h = @@h[key] || compute_h.call
44
+ @cdf = @@cdf[key] || compute_cdf.call
33
45
  end
34
-
35
- v < self[c] ? c : c + 1
46
+ else
47
+ @h = compute_h.call
48
+ @cdf = compute_cdf.call
36
49
  end
37
50
  end
38
51
  end
@@ -55,14 +68,40 @@ class Zipfian
55
68
  end
56
69
 
57
70
  def sample
58
- @cdf.binary_search_index rand
71
+ binary_search_index @cdf, rand
59
72
  end
60
73
 
74
+ def self.cached
75
+ ret = []
76
+ @@global_mutex.synchronize do
77
+ @@mutexes.keys.each do |key|
78
+ ret << { :n => key.first, :s => key.last }
79
+ end
80
+ end
81
+ ret
82
+ end
61
83
  private
62
84
  def check_rank k
63
85
  unless k.is_a?(Integer) && k >= 1 && k <= @n
64
86
  raise ArgumentError.new("Rank must be a positive integer (max: #{@n})")
65
87
  end
66
88
  end
89
+
90
+ def binary_search_index arr, v
91
+ l = 0
92
+ r = arr.length - 2
93
+
94
+ while (c = (l + r) / 2) && l < r
95
+ if v < arr[c]
96
+ r = c - 1
97
+ elsif v > arr[c]
98
+ l = c + 1
99
+ else
100
+ return c
101
+ end
102
+ end
103
+
104
+ v < arr[c] ? c : c + 1
105
+ end
67
106
  end
68
107
 
@@ -1,3 +1,3 @@
1
1
  class Zipfian
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
@@ -4,7 +4,10 @@ require 'rubygems'
4
4
  $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '../lib')
5
5
  require 'zipfian'
6
6
  require 'test-unit'
7
+ require 'parallelize'
8
+ require 'benchmark'
7
9
 
10
+ # TODO: naive tests
8
11
  class TestZipfian < Test::Unit::TestCase
9
12
  def test_init
10
13
  assert_raise(ArgumentError) { z = Zipfian.new }
@@ -44,10 +47,57 @@ class TestZipfian < Test::Unit::TestCase
44
47
  end
45
48
 
46
49
  def test_sample
47
- z = Zipfian.new 100, 1
50
+ # cached
51
+ 3.times do
52
+ z = Zipfian.new 100000, 1, true
48
53
 
49
- 10000.times do |i|
50
- assert (1..100).include?(z.sample)
54
+ 10000.times do |i|
55
+ assert (1..100000).include?(z.sample)
56
+ end
57
+ end
58
+ end
59
+
60
+ def test_cached_initialization
61
+ puts Benchmark.measure { Zipfian.new 10 ** 6, 0.99 }
62
+ p Zipfian.cached
63
+ puts Benchmark.measure { Zipfian.new 10 ** 6, 0.99, true }
64
+ p Zipfian.cached
65
+ puts Benchmark.measure { Zipfian.new 10 ** 6, 0.99 }
66
+ p Zipfian.cached
67
+
68
+ puts Benchmark.measure { Zipfian.new 10 ** 6, 0.88 }
69
+ p Zipfian.cached
70
+ puts Benchmark.measure { Zipfian.new 10 ** 6, 0.88 }
71
+ p Zipfian.cached
72
+ end
73
+
74
+ def test_multi_threaded
75
+ m = Mutex.new
76
+ msgs = []
77
+ 32.times.peach(32) do |idx|
78
+ m.synchronize { msgs << :s }
79
+ z = Zipfian.new(10 ** 6, 0.12345, true)
80
+ m.synchronize { msgs << :e }
81
+ end
82
+ puts msgs.join
83
+ end
84
+
85
+ def test_dist
86
+ [0.1, 0.5, 0.75, 1].each do |s|
87
+ puts "s = #{s}"
88
+ max = 10 ** 6
89
+ cnt = 10 ** 4
90
+ hst = 20
91
+ col = 60
92
+ z = Zipfian.new max, s
93
+ histogram = Array.new(hst) { 0 }
94
+ cnt.times do
95
+ histogram[(z.sample - 1) * hst / max] += 1
96
+ end
97
+
98
+ r = col.to_f / histogram.max
99
+ puts histogram.map { |e| '*' * (e * r).to_i + " : #{e}" }
100
+ puts
51
101
  end
52
102
  end
53
103
  end
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), '../lib')
5
+ require 'zipfian'
6
+ require 'test-unit'
7
+ require 'benchmark'
8
+ require 'parallelize'
9
+
10
+ class TestZipfian < Test::Unit::TestCase
11
+ def test_zipfian_performance
12
+ cnt = 100000
13
+
14
+ [8, 4, 2, 1].each do |thr|
15
+ puts "# of threads: #{thr}"
16
+ zps = Array.new(thr)
17
+ (2..6).each do |pow|
18
+ max = 10 ** pow
19
+
20
+ puts "Range: 1 ~ #{max}"
21
+ print "Initialize (sec): "
22
+ puts Benchmark.measure {
23
+ thr.times.peach(thr) do |idx|
24
+ zps[idx] = Zipfian.new max, 1, true
25
+ end
26
+ }.real
27
+
28
+ print "Sample throughput (op/sec): "
29
+ puts cnt / Benchmark.measure {
30
+ thr.times.peach(thr) do |idx|
31
+ cnt.times do |i|
32
+ zps[idx].sample
33
+ end
34
+ end
35
+ }.real
36
+ puts
37
+ end
38
+ puts
39
+ end
40
+ end
41
+ end
@@ -18,4 +18,5 @@ Gem::Specification.new do |gem|
18
18
  gem.add_development_dependency 'test-unit'
19
19
  gem.add_development_dependency 'guard'
20
20
  gem.add_development_dependency 'guard-test'
21
+ gem.add_development_dependency 'parallelize'
21
22
  end
metadata CHANGED
@@ -1,74 +1,87 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: zipfian
3
- version: !ruby/object:Gem::Version
4
- hash: 27
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 0
9
- - 2
10
- version: 0.0.2
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Junegunn Choi
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-05-04 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-06-04 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: test-unit
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 3
29
- segments:
30
- - 0
31
- version: "0"
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
32
22
  type: :development
33
- version_requirements: *id001
34
- - !ruby/object:Gem::Dependency
35
- name: guard
36
23
  prerelease: false
37
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: guard
32
+ requirement: !ruby/object:Gem::Requirement
38
33
  none: false
39
- requirements:
40
- - - ">="
41
- - !ruby/object:Gem::Version
42
- hash: 3
43
- segments:
44
- - 0
45
- version: "0"
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
46
38
  type: :development
47
- version_requirements: *id002
48
- - !ruby/object:Gem::Dependency
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
49
47
  name: guard-test
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
50
55
  prerelease: false
51
- requirement: &id003 !ruby/object:Gem::Requirement
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: parallelize
64
+ requirement: !ruby/object:Gem::Requirement
52
65
  none: false
53
- requirements:
54
- - - ">="
55
- - !ruby/object:Gem::Version
56
- hash: 3
57
- segments:
58
- - 0
59
- version: "0"
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
60
70
  type: :development
61
- version_requirements: *id003
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
62
78
  description: Zipfian distribution
63
- email:
79
+ email:
64
80
  - junegunn.c@gmail.com
65
81
  executables: []
66
-
67
82
  extensions: []
68
-
69
83
  extra_rdoc_files: []
70
-
71
- files:
84
+ files:
72
85
  - .gitignore
73
86
  - Gemfile
74
87
  - Guardfile
@@ -78,39 +91,32 @@ files:
78
91
  - lib/zipfian.rb
79
92
  - lib/zipfian/version.rb
80
93
  - test/test_zipfian.rb
94
+ - test/test_zipfian_perf.rb
81
95
  - zipfian.gemspec
82
96
  homepage: https://github.com/junegunn/zipfian
83
97
  licenses: []
84
-
85
98
  post_install_message:
86
99
  rdoc_options: []
87
-
88
- require_paths:
100
+ require_paths:
89
101
  - lib
90
- required_ruby_version: !ruby/object:Gem::Requirement
102
+ required_ruby_version: !ruby/object:Gem::Requirement
91
103
  none: false
92
- requirements:
93
- - - ">="
94
- - !ruby/object:Gem::Version
95
- hash: 3
96
- segments:
97
- - 0
98
- version: "0"
99
- required_rubygems_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
109
  none: false
101
- requirements:
102
- - - ">="
103
- - !ruby/object:Gem::Version
104
- hash: 3
105
- segments:
106
- - 0
107
- version: "0"
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
108
114
  requirements: []
109
-
110
115
  rubyforge_project:
111
- rubygems_version: 1.8.21
116
+ rubygems_version: 1.8.24
112
117
  signing_key:
113
118
  specification_version: 3
114
119
  summary: Zipfian distribution
115
- test_files:
120
+ test_files:
116
121
  - test/test_zipfian.rb
122
+ - test/test_zipfian_perf.rb