ministat 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,5 @@
1
+ == 0.9.0 / 2007-08-02
2
+
3
+ * 1 major enhancement
4
+ * Birthday!
5
+
data/Manifest.txt ADDED
@@ -0,0 +1,16 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/ministat
6
+ lib/ministat.rb
7
+ package/History.txt
8
+ package/Manifest.txt
9
+ package/README.txt
10
+ package/Rakefile
11
+ package/bin/package
12
+ package/lib/package.rb
13
+ package/test/test_package.rb
14
+ test/data/1.dat
15
+ test/data/2.dat
16
+ test/test_ministat.rb
data/README.txt ADDED
@@ -0,0 +1,59 @@
1
+ ministat
2
+ by Dean Hudson
3
+ ero.com
4
+
5
+ == DESCRIPTION:
6
+
7
+ This is a simple package that generates simple statistical info on
8
+ numerical data sets of a single variable. It's nothing too fancy, but
9
+ maybe just enough to coat your numbers with a thin layer of science. Or,
10
+ at least to get you thinking about what it may take to do so.
11
+
12
+ == FEATURES/PROBLEMS:
13
+
14
+ * Pure Ruby
15
+ * It's small and simple
16
+ * It's probably good enough
17
+ * I haven't profiled it against large data sets
18
+ * Naive median implementation requires a sort, but it could be done in linear time. Patches welcome.
19
+ * Missing tests for harmonic and geometric mean -- the stats package I was generating test data with didn't have them.
20
+
21
+ == SYNOPSIS:
22
+
23
+ require 'ministat'
24
+
25
+ data = [1,2,3,4,5,6,7,7,6,5,4,4]
26
+ puts MiniStat::Data.new(data).to_s
27
+
28
+ == REQUIREMENTS:
29
+
30
+ Hoe
31
+
32
+ == INSTALL:
33
+
34
+ sudo gem install ministat
35
+
36
+ == LICENSE:
37
+
38
+ (The MIT License)
39
+
40
+ Copyright (c) 2007 Dean Hudson
41
+
42
+ Permission is hereby granted, free of charge, to any person obtaining
43
+ a copy of this software and associated documentation files (the
44
+ 'Software'), to deal in the Software without restriction, including
45
+ without limitation the rights to use, copy, modify, merge, publish,
46
+ distribute, sublicense, and/or sell copies of the Software, and to
47
+ permit persons to whom the Software is furnished to do so, subject to
48
+ the following conditions:
49
+
50
+ The above copyright notice and this permission notice shall be
51
+ included in all copies or substantial portions of the Software.
52
+
53
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
54
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
55
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
56
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
57
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
58
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
59
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require './lib/ministat.rb'
6
+
7
+ Hoe.new('ministat', MiniStat::VERSION) do |p|
8
+ p.rubyforge_name = 'ministat'
9
+ p.author = 'Dean Hudson'
10
+ p.email = 'dean@ero.com'
11
+ p.summary = 'A small and simple library to generate statistical info on single-variable datasets.'
12
+ p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
13
+ p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
14
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
15
+ end
16
+
17
+ # vim: syntax=Ruby
data/bin/ministat ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby -ws
2
+
3
+ require 'ministat'
4
+
5
+ if $f and File.exists($f)
6
+ lines = []
7
+ lines = File.readlines($f).each {|l| l.chomp!}
8
+ puts MiniStat::Data.new(lines).to_s
9
+ else
10
+ puts "Usage: ministat -f datafile"
11
+ end
data/lib/ministat.rb ADDED
@@ -0,0 +1,127 @@
1
+ require 'mathn'
2
+
3
+ module MiniStat
4
+ VERSION = '1.0.0'
5
+ class Data
6
+ attr_reader :data
7
+
8
+ def initialize(data)
9
+ @data = data.collect {|data| data.to_f}.sort
10
+ @sorted = true
11
+ end
12
+
13
+ # Return the median of your dataset. Naive implementaion
14
+ # -- does a sort on the data.
15
+ def median(data=@data)
16
+ unless @sorted and data == @data
17
+ data.sort!
18
+ @sort = true
19
+ end
20
+ if data.size % 2 == 0
21
+ return (data[data.size / 2 - 1] + data[(data.size / 2)]) / 2
22
+ else
23
+ split = (data.size + 1) / 2
24
+ return (data[split - 1.5] + data[split - 0.5]) / 2
25
+ end
26
+ end
27
+
28
+ def partition(pivot, data=@data)
29
+ low = []
30
+ high = []
31
+ data.each do |i|
32
+ high.push(i) if i > pivot
33
+ low.push(i) if i < pivot
34
+ end
35
+ return {:low => low, :high => high}
36
+ end
37
+
38
+ # First quartile.
39
+ def q1
40
+ @q1 ||= median(partition(median(@data), @data)[:low])
41
+ end
42
+
43
+ # Third quartile
44
+ def q3
45
+ @q3 ||= median(partition(median(@data), @data)[:high])
46
+ end
47
+
48
+ # Interquartile range, ie the middle 50% of the data.
49
+ def iqr
50
+ @iqr ||= q3 - q1
51
+ end
52
+
53
+ # Returns an array of outlying data points.
54
+ def outliers
55
+ @outliers ||=
56
+ @data.collect do |i|
57
+ i if (i < q1 - (1.5 * iqr) or i > q3 + (1.5 * iqr))
58
+ end.compact
59
+ end
60
+
61
+ # Computes arthmetic mean (most common average).
62
+ def mean(data=@data)
63
+ (data.inject(0) {|i,j| i += j}) / data.size
64
+ end
65
+
66
+ # Computes mode and a histogram.
67
+ def mode
68
+ @hist ||= {}
69
+ @max_freq ||= 0
70
+ @mode ||= nil
71
+ unless @mode
72
+ @data.each do |val|
73
+ @hist[val] ||= 0
74
+ @hist[val] += 1
75
+ @max_freq, @mode = @hist[val], val if @hist[val] > @max_freq
76
+ end
77
+ end
78
+ @mode
79
+ end
80
+
81
+ # Computes variance. Used to measure degree of spread
82
+ # in dataset.
83
+ def variance
84
+ @variance ||=
85
+ @data.inject(0) { |i,j| i += (j - mean(@data)) ** 2} / (@data.size)
86
+ end
87
+
88
+ # Standard deviation. Square root of variance, measure of the
89
+ # spread of the data about the mean.
90
+ def std_dev
91
+ @std_dev ||= Math.sqrt(variance)
92
+ end
93
+
94
+ # Geometric mean. Only applies to non-negative numbers, and
95
+ # relates to log-normal distribution.
96
+ def geometric_mean
97
+ @geoeteric_mean ||=
98
+ (@data.inject(1) {|i,j| i *= j})**(1.0/@data.size)
99
+ end
100
+
101
+ # Harmonic or subcontrary mean. Tends strongly toward the least
102
+ # elements of the dataset.
103
+ def harmonic_mean
104
+ @harmonic_mean ||=
105
+ @data.size.to_f / (@data.inject(0) {|i,j| i += (1.0/j)})
106
+ end
107
+
108
+ # Return a string with statisical info about a dataset.
109
+ def to_s
110
+ <<-DATA_STR
111
+ Partition:#{partition(median).inspect}
112
+ Mean:#{mean}
113
+ Geometric Mean:#{geometric_mean}
114
+ Harmonic Mean:#{harmonic_mean}
115
+ Median:#{median}
116
+ Min:#{data.min}
117
+ Q1:#{q1}
118
+ Q3:#{q3}
119
+ Max:#{data.max}
120
+ IQR:#{iqr}
121
+ Outliers:#{outliers.inspect}
122
+ Variance:#{variance}
123
+ Std Dev:#{std_dev}
124
+ DATA_STR
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,5 @@
1
+ == 1.0.0 / 2007-11-06
2
+
3
+ * 1 major enhancement
4
+ * Birthday!
5
+
@@ -0,0 +1,7 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/package
6
+ lib/package.rb
7
+ test/test_package.rb
@@ -0,0 +1,48 @@
1
+ package
2
+ by FIX (your name)
3
+ FIX (url)
4
+
5
+ == DESCRIPTION:
6
+
7
+ FIX (describe your package)
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * FIX (list of features or problems)
12
+
13
+ == SYNOPSIS:
14
+
15
+ FIX (code sample of usage)
16
+
17
+ == REQUIREMENTS:
18
+
19
+ * FIX (list of requirements)
20
+
21
+ == INSTALL:
22
+
23
+ * FIX (sudo gem install, anything else)
24
+
25
+ == LICENSE:
26
+
27
+ (The MIT License)
28
+
29
+ Copyright (c) 2007 FIX
30
+
31
+ Permission is hereby granted, free of charge, to any person obtaining
32
+ a copy of this software and associated documentation files (the
33
+ 'Software'), to deal in the Software without restriction, including
34
+ without limitation the rights to use, copy, modify, merge, publish,
35
+ distribute, sublicense, and/or sell copies of the Software, and to
36
+ permit persons to whom the Software is furnished to do so, subject to
37
+ the following conditions:
38
+
39
+ The above copyright notice and this permission notice shall be
40
+ included in all copies or substantial portions of the Software.
41
+
42
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
43
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
44
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
45
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
46
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
47
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
48
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/package/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require './lib/package.rb'
6
+
7
+ Hoe.new('package', Package::VERSION) do |p|
8
+ p.rubyforge_name = 'package'
9
+ # p.author = 'FIX'
10
+ # p.email = 'FIX'
11
+ # p.summary = 'FIX'
12
+ # p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
13
+ # p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
14
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
15
+ end
16
+
17
+ # vim: syntax=Ruby
File without changes
@@ -0,0 +1,3 @@
1
+ class Package
2
+ VERSION = '1.0.0'
3
+ end
File without changes
data/test/data/1.dat ADDED
@@ -0,0 +1,28 @@
1
+ 0.2
2
+ 0.5
3
+ 0.7
4
+ 0.8
5
+ 0.9
6
+ 0.95
7
+ 1
8
+ 1.1
9
+ 1.2
10
+ 1.21
11
+ 1.32
12
+ 1.4
13
+ 1.6
14
+ 1.7
15
+ 1.77
16
+ 1.8
17
+ 2
18
+ 2.2
19
+ 2.4
20
+ 2.6
21
+ 2.88
22
+ 3.1
23
+ 3.5
24
+ 3.9
25
+ 4.3
26
+ 4.8
27
+ 5.2
28
+ 6
data/test/data/2.dat ADDED
@@ -0,0 +1,83 @@
1
+ -5.7
2
+ 1.2
3
+ 4.1
4
+ 3.2
5
+ 7.3
6
+ 7.5
7
+ 18.6
8
+ 3.7
9
+ -1.8
10
+ 2.4
11
+ -6.5
12
+ 6.7
13
+ 9.4
14
+ -2.0
15
+ -2.8
16
+ -3.4
17
+ 19.2
18
+ -4.8
19
+ 0.5
20
+ -0.6
21
+ 2.8
22
+ -0.5
23
+ -4.5
24
+ 8.7
25
+ 2.7
26
+ 4.1
27
+ -10.3
28
+ 4.8
29
+ -2.3
30
+ -3.1
31
+ -10.2
32
+ -3.7
33
+ -26.6
34
+ 7.2
35
+ -2.9
36
+ -2.3
37
+ 3.5
38
+ -4.6
39
+ 17.2
40
+ 4.2
41
+ 0.5
42
+ 8.3
43
+ -7.1
44
+ -8.4
45
+ 7.7
46
+ -9.6
47
+ 6
48
+ 6.8
49
+ 10.9
50
+ 1.6
51
+ 0.2
52
+ -2.4
53
+ -2.4
54
+ 3.9
55
+ 1.7
56
+ 9.0
57
+ 3.6
58
+ 7.6
59
+ 3.2
60
+ -3.7
61
+ 4.2
62
+ 13.2
63
+ .9
64
+ 4.2
65
+ 4
66
+ 2.8
67
+ 6.7
68
+ -10.4
69
+ 2.7
70
+ 10.3
71
+ 5.7
72
+ .6
73
+ -14.2
74
+ 1.3
75
+ 2.9
76
+ 11.8
77
+ 10.6
78
+ 5.2
79
+ 13.8
80
+ -14.7
81
+ 3.5
82
+ 11.7
83
+ 1.3
@@ -0,0 +1,79 @@
1
+ # Code Generated by ZenTest v. 3.6.1
2
+ # classname: asrt / meth = ratio%
3
+ # MiniStat: 0 / 10 = 0.00%
4
+
5
+ require 'test/unit' unless defined? $ZENTEST and $ZENTEST
6
+ require 'ministat'
7
+ require 'rubygems'
8
+ require 'ruby-debug'
9
+
10
+ class TestMiniStat < Test::Unit::TestCase
11
+ def setup
12
+ @data1 = [34, 47, 1, 15, 57, 24, 20, 11, 19, 50, 28, 37]
13
+ @data2 = [60, 56, 61, 68, 51, 53, 69, 54]
14
+ @data3 = File.open('test/data/1.dat').map {|l| l.chomp}
15
+ @data4 = File.open('test/data/2.dat').map {|l| l.chomp}
16
+ @ms1 = MiniStat::Data.new(@data1)
17
+ @ms2 = MiniStat::Data.new(@data2)
18
+ @ms3 = MiniStat::Data.new(@data3)
19
+ @ms4 = MiniStat::Data.new(@data4)
20
+ # we test to within a tolerance to schluff off
21
+ # possible floating point and rounding errors
22
+ @error = 0.001
23
+ end
24
+
25
+ def test_iqr
26
+ assert(@ms1.iqr - 25 < @error)
27
+ end
28
+
29
+ def test_mean
30
+ assert(@ms2.mean - 59 < @error)
31
+ assert(@ms3.mean - 2.179 < @error)
32
+ assert(@ms4.mean - 1.878 < @error)
33
+ end
34
+
35
+ def test_median
36
+ assert(@ms1.median - 26 < @error)
37
+ assert(@ms3.median - 1.735 < @error)
38
+ assert(@ms3.median - 2.8 < @error)
39
+ end
40
+
41
+ def test_mode
42
+ ms = MiniStat::Data.new([1,1,1,2,3,4,5])
43
+ assert_equal(ms.mode, 1)
44
+ ms = MiniStat::Data.new([1,2,2,2,2,3,4,5])
45
+ assert_equal(ms.mode, 2)
46
+ end
47
+
48
+ def test_outliers
49
+ assert_equal(@ms1.outliers, [])
50
+ end
51
+
52
+ def test_q1
53
+ assert(@ms1.q1 - 17 < @error)
54
+ assert(@ms3.q1 - 1.05 < @error)
55
+ assert(@ms4.q1 - -2.6 < @error)
56
+ end
57
+
58
+ def test_q3
59
+ assert(@ms1.q3 - 42 < @error)
60
+ assert(@ms3.q3 - 2.99 < @error)
61
+ assert(@ms4.q3 - 6.75 < @error)
62
+ end
63
+
64
+ def test_std_dev
65
+ assert(@ms2.std_dev - 6.324 < @error)
66
+ assert(@ms3.std_dev - 1.515 < @error)
67
+ assert(@ms4.std_dev - 7.553 < @error)
68
+ end
69
+
70
+ def test_geo_mean
71
+ # debugger
72
+ end
73
+
74
+ def test_harm_mean
75
+ # debugger
76
+ end
77
+ end
78
+
79
+ # Number of errors detected: 11
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.4
3
+ specification_version: 1
4
+ name: ministat
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2007-11-11 00:00:00 -08:00
8
+ summary: A small and simple library to generate statistical info on single-variable datasets.
9
+ require_paths:
10
+ - lib
11
+ email: dean@ero.com
12
+ homepage: " by Dean Hudson"
13
+ rubyforge_project: ministat
14
+ description: "== FEATURES/PROBLEMS: * Pure Ruby * It's small and simple * It's probably good enough * I haven't profiled it against large data sets * Naive median implementation requires a sort, but it could be done in linear time. Patches welcome. * Missing tests for harmonic and geometric mean -- the stats package I was generating test data with didn't have them. == SYNOPSIS: require 'ministat' data = [1,2,3,4,5,6,7,7,6,5,4,4] puts MiniStat::Data.new(data).to_s"
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Dean Hudson
31
+ files:
32
+ - History.txt
33
+ - Manifest.txt
34
+ - README.txt
35
+ - Rakefile
36
+ - bin/ministat
37
+ - lib/ministat.rb
38
+ - package/History.txt
39
+ - package/Manifest.txt
40
+ - package/README.txt
41
+ - package/Rakefile
42
+ - package/bin/package
43
+ - package/lib/package.rb
44
+ - package/test/test_package.rb
45
+ - test/data/1.dat
46
+ - test/data/2.dat
47
+ - test/test_ministat.rb
48
+ test_files:
49
+ - test/test_ministat.rb
50
+ rdoc_options:
51
+ - --main
52
+ - README.txt
53
+ extra_rdoc_files:
54
+ - History.txt
55
+ - Manifest.txt
56
+ - README.txt
57
+ - package/History.txt
58
+ - package/Manifest.txt
59
+ - package/README.txt
60
+ executables:
61
+ - ministat
62
+ extensions: []
63
+
64
+ requirements: []
65
+
66
+ dependencies:
67
+ - !ruby/object:Gem::Dependency
68
+ name: hoe
69
+ version_requirement:
70
+ version_requirements: !ruby/object:Gem::Version::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 1.3.0
75
+ version: