accumulators 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +70 -13
- data/VERSION +1 -1
- data/accumulators.gemspec +3 -1
- data/lib/accumulators.rb +1 -0
- data/lib/accumulators/sum.rb +19 -0
- data/spec/lib/accumulators/sum_spec.rb +93 -0
- metadata +4 -2
data/README.md
CHANGED
@@ -2,10 +2,23 @@ accumulators
|
|
2
2
|
============
|
3
3
|
|
4
4
|
A set of statistical accumulators for Ruby. There is a range of containers for
|
5
|
-
different measures, such as count, mean, and mean-variance. You can add values to
|
6
|
-
the containers, and ask for their respective statistical measures.
|
7
|
-
|
8
|
-
|
5
|
+
different measures, such as count, sum, mean, and mean-variance. You can add values to
|
6
|
+
the containers, and ask for their respective statistical measures.
|
7
|
+
|
8
|
+
Where appropriate the accumulators use incremental algorithms to update their
|
9
|
+
measures with each addition, so you can add lots of values without needing lots of
|
10
|
+
memory. (see http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm)
|
11
|
+
|
12
|
+
The accumulators can be combined. e.g. if you have two mean-variance
|
13
|
+
accumulators, each of which has their own mean and variance, you can add them
|
14
|
+
together to get the mean and variance of the combined samples.
|
15
|
+
|
16
|
+
Always remember that floating-point arithmetic is not precise, so if you are
|
17
|
+
using these accumulators for anything important, and especially if you are
|
18
|
+
accumulating lots of samples, you should run your own tests on the accuracy of
|
19
|
+
the calculations. I am not aware of any significant issues, but I make no promises
|
20
|
+
and provide no assurances as to the accuracy or stability of these
|
21
|
+
calculations.
|
9
22
|
|
10
23
|
Installation
|
11
24
|
------------
|
@@ -16,31 +29,75 @@ Example Usage
|
|
16
29
|
|
17
30
|
$ irb
|
18
31
|
>> require 'accumulators'
|
19
|
-
>>
|
20
|
-
>>
|
21
|
-
>>
|
22
|
-
>>
|
23
|
-
>>
|
24
|
-
>>
|
32
|
+
>> meanvar = Accumulators:MeanVariance.new
|
33
|
+
>> meanvar.add 1
|
34
|
+
>> meanvar.add 2
|
35
|
+
>> meanvar.add 3
|
36
|
+
>> meanvar.add 4
|
37
|
+
>> meanvar.count
|
25
38
|
=> 4
|
26
|
-
>>
|
39
|
+
>> meanvar.mean
|
27
40
|
=> 2.5
|
41
|
+
>> meanvar.variance
|
42
|
+
=> 1.0
|
43
|
+
>> meanvar.variance(type: :sample)
|
44
|
+
=> 1.0
|
45
|
+
>> meanvar.variance(type: :population)
|
46
|
+
=> 1.25
|
47
|
+
>> meanvar.stddev
|
48
|
+
=> 1.0
|
49
|
+
>> meanvar.stddev(type: :population)
|
50
|
+
=> 1.118033988749895
|
51
|
+
|
52
|
+
Example of combining accumulators
|
53
|
+
---------------------------------
|
54
|
+
Accumulators of the same type can be added together. Here we will add three
|
55
|
+
numbers to one mean-variance accumulator, three to another, and show that their
|
56
|
+
combined mean and variance is equal to that of the single accumulator which
|
57
|
+
saw all the values, within the usual constraints of floating-point arithmetic.
|
58
|
+
|
59
|
+
>> mv1 = Accumulators::MeanVariance.new # sees the first 3 values
|
60
|
+
>> mv2 = Accumulators::MeanVariance.new # sees the second 3 values
|
61
|
+
>> mv = Accumulators::MeanVariance.new # sees all values
|
62
|
+
>>
|
63
|
+
>> [1.1, 2.2, 3.3].each {|n| mv1.add n; mv.add n}
|
64
|
+
>> [4.4, 5.5, 6.6].each {|n| mv2.add n; mv.add n}
|
65
|
+
>>
|
66
|
+
>> mv1.mean
|
67
|
+
=> 2.2
|
68
|
+
>> mv1.variance
|
69
|
+
=> 0.6049999999999998
|
70
|
+
>>
|
71
|
+
>> mv2.mean
|
72
|
+
=> 5.5
|
73
|
+
>> mv2.variance
|
74
|
+
=> 0.6049999999999998
|
75
|
+
>>
|
76
|
+
>> mv.mean
|
77
|
+
=> 3.8499999999999996
|
78
|
+
>> mv.variance
|
79
|
+
=> 3.025
|
80
|
+
>>
|
81
|
+
>> mv1.add mv2
|
82
|
+
>> mv1.mean
|
83
|
+
=> 3.8499999999999996
|
84
|
+
>> mv1.variance
|
85
|
+
>> 3.0249999999999995 # close to 3.025 - discrepancy from floating-point
|
28
86
|
|
29
87
|
Available accumulators
|
30
88
|
----------------------
|
31
89
|
|
32
90
|
* Count
|
91
|
+
* Sum
|
33
92
|
* Mean
|
34
93
|
* MeanVariance
|
35
94
|
|
36
95
|
TODO
|
37
96
|
----
|
38
97
|
|
39
|
-
* Allow choosing between biased & unbiased variance/standard devation
|
40
98
|
* Skew?
|
41
99
|
* Weighted Means
|
42
100
|
* min, max, and min-max
|
43
|
-
* sum
|
44
101
|
|
45
102
|
Contributing to accumulators
|
46
103
|
----------------------------
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0
|
data/accumulators.gemspec
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{accumulators}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Gavin Heavyside"]
|
@@ -31,9 +31,11 @@ Gem::Specification.new do |s|
|
|
31
31
|
"lib/accumulators/count.rb",
|
32
32
|
"lib/accumulators/mean.rb",
|
33
33
|
"lib/accumulators/mean_variance.rb",
|
34
|
+
"lib/accumulators/sum.rb",
|
34
35
|
"spec/lib/accumulators/count_spec.rb",
|
35
36
|
"spec/lib/accumulators/mean_spec.rb",
|
36
37
|
"spec/lib/accumulators/mean_variance_spec.rb",
|
38
|
+
"spec/lib/accumulators/sum_spec.rb",
|
37
39
|
"spec/spec_helper.rb"
|
38
40
|
]
|
39
41
|
s.homepage = %q{http://github.com/hgavin/accumulators}
|
data/lib/accumulators.rb
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Accumulators
|
2
|
+
class Sum
|
3
|
+
attr_reader :sum
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
@sum = 0
|
7
|
+
end
|
8
|
+
|
9
|
+
def add(rhs)
|
10
|
+
if rhs.is_a? Numeric
|
11
|
+
@sum += rhs
|
12
|
+
elsif rhs.is_a? self.class
|
13
|
+
@sum += rhs.sum
|
14
|
+
else
|
15
|
+
raise ArgumentError.new("You may not add #{rhs.class} to #{self.class}")
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Accumulators
|
4
|
+
describe Sum do
|
5
|
+
let(:sum){ Sum.new }
|
6
|
+
|
7
|
+
context "Creation" do
|
8
|
+
it "can be created" do
|
9
|
+
lambda{ Sum.new }.should_not raise_error
|
10
|
+
end
|
11
|
+
|
12
|
+
it "returns a sum of 0 before anything is added to it" do
|
13
|
+
sum.sum.should == 0
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
context "adding numbers or distributions" do
|
18
|
+
it "allows integers to be added" do
|
19
|
+
lambda { sum.add 5 }.should_not raise_error
|
20
|
+
end
|
21
|
+
|
22
|
+
it "allows floats to be added" do
|
23
|
+
lambda { sum.add 3.4 }.should_not raise_error
|
24
|
+
end
|
25
|
+
|
26
|
+
it "allows other Sum distributions to be added" do
|
27
|
+
lambda { sum.add Sum.new }.should_not raise_error
|
28
|
+
end
|
29
|
+
|
30
|
+
it "raises an ArgumentError if a String is added" do
|
31
|
+
lambda { sum.add "1.5" }.should raise_error(
|
32
|
+
ArgumentError,
|
33
|
+
"You may not add String to Accumulators::Sum")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context "Correctness of int additions" do
|
38
|
+
it "should return an Integer 6 when 1,2,3 is added" do
|
39
|
+
(1..3).each{ |i| sum.add i }
|
40
|
+
|
41
|
+
sum.sum.should be_a Integer
|
42
|
+
sum.sum.should == 6
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should calculate the sum correctly for a set of 1000 random integers" do
|
46
|
+
vals = []
|
47
|
+
1000.times do
|
48
|
+
vals << rand(100000)
|
49
|
+
sum.add vals.last
|
50
|
+
end
|
51
|
+
sum.sum.should be_a Integer
|
52
|
+
sum.sum.should == vals.reduce(:+)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context "Correctness of float additions" do
|
57
|
+
it "should return a Float 6.1 when 1.1, 2.8, 2.2 is added" do
|
58
|
+
[1.1,2.8,2.2].each{|f| sum.add f}
|
59
|
+
|
60
|
+
sum.sum.should be_a Float
|
61
|
+
sum.sum.should be_within(EPSILON).of(6.1)
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should calculate the sum correctly for a set of 1000 random floats" do
|
65
|
+
vals = []
|
66
|
+
1000.times do
|
67
|
+
vals << rand * 1000000
|
68
|
+
sum.add vals.last
|
69
|
+
end
|
70
|
+
|
71
|
+
sum.sum.should be_a Float
|
72
|
+
sum.sum.should be_within(EPSILON).of(vals.reduce(:+))
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
context "Correctness of accumulator additions" do
|
77
|
+
it "combines two sums correctly" do
|
78
|
+
s2 = Sum.new
|
79
|
+
vals = []
|
80
|
+
500.times do
|
81
|
+
vals << rand * 1000*1000
|
82
|
+
sum.add vals.last
|
83
|
+
vals << rand * 100*1000
|
84
|
+
s2.add vals.last
|
85
|
+
end
|
86
|
+
|
87
|
+
sum.add(s2)
|
88
|
+
sum.sum.should be_a Float
|
89
|
+
sum.sum.should be_within(EPSILON).of(vals.reduce(:+))
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: accumulators
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.
|
5
|
+
version: 0.4.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Gavin Heavyside
|
@@ -103,9 +103,11 @@ files:
|
|
103
103
|
- lib/accumulators/count.rb
|
104
104
|
- lib/accumulators/mean.rb
|
105
105
|
- lib/accumulators/mean_variance.rb
|
106
|
+
- lib/accumulators/sum.rb
|
106
107
|
- spec/lib/accumulators/count_spec.rb
|
107
108
|
- spec/lib/accumulators/mean_spec.rb
|
108
109
|
- spec/lib/accumulators/mean_variance_spec.rb
|
110
|
+
- spec/lib/accumulators/sum_spec.rb
|
109
111
|
- spec/spec_helper.rb
|
110
112
|
has_rdoc: true
|
111
113
|
homepage: http://github.com/hgavin/accumulators
|
@@ -121,7 +123,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
121
123
|
requirements:
|
122
124
|
- - ">="
|
123
125
|
- !ruby/object:Gem::Version
|
124
|
-
hash: -
|
126
|
+
hash: -3603142917674474649
|
125
127
|
segments:
|
126
128
|
- 0
|
127
129
|
version: "0"
|