ministat 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt ADDED
@@ -0,0 +1,5 @@
1
+ == 0.9.0 / 2007-08-02
2
+
3
+ * 1 major enhancement
4
+ * Birthday!
5
+
data/Manifest.txt ADDED
@@ -0,0 +1,16 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/ministat
6
+ lib/ministat.rb
7
+ package/History.txt
8
+ package/Manifest.txt
9
+ package/README.txt
10
+ package/Rakefile
11
+ package/bin/package
12
+ package/lib/package.rb
13
+ package/test/test_package.rb
14
+ test/data/1.dat
15
+ test/data/2.dat
16
+ test/test_ministat.rb
data/README.txt ADDED
@@ -0,0 +1,59 @@
1
+ ministat
2
+ by Dean Hudson
3
+ ero.com
4
+
5
+ == DESCRIPTION:
6
+
7
+ This is a simple package that generates simple statistical info on
8
+ numerical data sets of a single variable. It's nothing too fancy, but
9
+ maybe just enough to coat your numbers with a thin layer of science. Or,
10
+ at least to get you thinking about what it may take to do so.
11
+
12
+ == FEATURES/PROBLEMS:
13
+
14
+ * Pure Ruby
15
+ * It's small and simple
16
+ * It's probably good enough
17
+ * I haven't profiled it against large data sets
18
+ * Naive median implementation requires a sort, but it could be done in linear time. Patches welcome.
19
+ * Missing tests for harmonic and geometric mean -- the stats package I was generating test data with didn't have them.
20
+
21
+ == SYNOPSIS:
22
+
23
+ require 'ministat'
24
+
25
+ data = [1,2,3,4,5,6,7,7,6,5,4,4]
26
+ puts MiniStat::Data.new(data).to_s
27
+
28
+ == REQUIREMENTS:
29
+
30
+ Hoe
31
+
32
+ == INSTALL:
33
+
34
+ sudo gem install ministat
35
+
36
+ == LICENSE:
37
+
38
+ (The MIT License)
39
+
40
+ Copyright (c) 2007 Dean Hudson
41
+
42
+ Permission is hereby granted, free of charge, to any person obtaining
43
+ a copy of this software and associated documentation files (the
44
+ 'Software'), to deal in the Software without restriction, including
45
+ without limitation the rights to use, copy, modify, merge, publish,
46
+ distribute, sublicense, and/or sell copies of the Software, and to
47
+ permit persons to whom the Software is furnished to do so, subject to
48
+ the following conditions:
49
+
50
+ The above copyright notice and this permission notice shall be
51
+ included in all copies or substantial portions of the Software.
52
+
53
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
54
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
55
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
56
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
57
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
58
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
59
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require './lib/ministat.rb'
6
+
7
+ Hoe.new('ministat', MiniStat::VERSION) do |p|
8
+ p.rubyforge_name = 'ministat'
9
+ p.author = 'Dean Hudson'
10
+ p.email = 'dean@ero.com'
11
+ p.summary = 'A small and simple library to generate statistical info on single-variable datasets.'
12
+ p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
13
+ p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
14
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
15
+ end
16
+
17
+ # vim: syntax=Ruby
data/bin/ministat ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby -ws
2
+
3
+ require 'ministat'
4
+
5
+ if $f and File.exists($f)
6
+ lines = []
7
+ lines = File.readlines($f).each {|l| l.chomp!}
8
+ puts MiniStat::Data.new(lines).to_s
9
+ else
10
+ puts "Usage: ministat -f datafile"
11
+ end
data/lib/ministat.rb ADDED
@@ -0,0 +1,127 @@
1
+ require 'mathn'
2
+
3
+ module MiniStat
4
+ VERSION = '1.0.0'
5
+ class Data
6
+ attr_reader :data
7
+
8
+ def initialize(data)
9
+ @data = data.collect {|data| data.to_f}.sort
10
+ @sorted = true
11
+ end
12
+
13
+ # Return the median of your dataset. Naive implementaion
14
+ # -- does a sort on the data.
15
+ def median(data=@data)
16
+ unless @sorted and data == @data
17
+ data.sort!
18
+ @sort = true
19
+ end
20
+ if data.size % 2 == 0
21
+ return (data[data.size / 2 - 1] + data[(data.size / 2)]) / 2
22
+ else
23
+ split = (data.size + 1) / 2
24
+ return (data[split - 1.5] + data[split - 0.5]) / 2
25
+ end
26
+ end
27
+
28
+ def partition(pivot, data=@data)
29
+ low = []
30
+ high = []
31
+ data.each do |i|
32
+ high.push(i) if i > pivot
33
+ low.push(i) if i < pivot
34
+ end
35
+ return {:low => low, :high => high}
36
+ end
37
+
38
+ # First quartile.
39
+ def q1
40
+ @q1 ||= median(partition(median(@data), @data)[:low])
41
+ end
42
+
43
+ # Third quartile
44
+ def q3
45
+ @q3 ||= median(partition(median(@data), @data)[:high])
46
+ end
47
+
48
+ # Interquartile range, ie the middle 50% of the data.
49
+ def iqr
50
+ @iqr ||= q3 - q1
51
+ end
52
+
53
+ # Returns an array of outlying data points.
54
+ def outliers
55
+ @outliers ||=
56
+ @data.collect do |i|
57
+ i if (i < q1 - (1.5 * iqr) or i > q3 + (1.5 * iqr))
58
+ end.compact
59
+ end
60
+
61
+ # Computes arthmetic mean (most common average).
62
+ def mean(data=@data)
63
+ (data.inject(0) {|i,j| i += j}) / data.size
64
+ end
65
+
66
+ # Computes mode and a histogram.
67
+ def mode
68
+ @hist ||= {}
69
+ @max_freq ||= 0
70
+ @mode ||= nil
71
+ unless @mode
72
+ @data.each do |val|
73
+ @hist[val] ||= 0
74
+ @hist[val] += 1
75
+ @max_freq, @mode = @hist[val], val if @hist[val] > @max_freq
76
+ end
77
+ end
78
+ @mode
79
+ end
80
+
81
+ # Computes variance. Used to measure degree of spread
82
+ # in dataset.
83
+ def variance
84
+ @variance ||=
85
+ @data.inject(0) { |i,j| i += (j - mean(@data)) ** 2} / (@data.size)
86
+ end
87
+
88
+ # Standard deviation. Square root of variance, measure of the
89
+ # spread of the data about the mean.
90
+ def std_dev
91
+ @std_dev ||= Math.sqrt(variance)
92
+ end
93
+
94
+ # Geometric mean. Only applies to non-negative numbers, and
95
+ # relates to log-normal distribution.
96
+ def geometric_mean
97
+ @geoeteric_mean ||=
98
+ (@data.inject(1) {|i,j| i *= j})**(1.0/@data.size)
99
+ end
100
+
101
+ # Harmonic or subcontrary mean. Tends strongly toward the least
102
+ # elements of the dataset.
103
+ def harmonic_mean
104
+ @harmonic_mean ||=
105
+ @data.size.to_f / (@data.inject(0) {|i,j| i += (1.0/j)})
106
+ end
107
+
108
+ # Return a string with statisical info about a dataset.
109
+ def to_s
110
+ <<-DATA_STR
111
+ Partition:#{partition(median).inspect}
112
+ Mean:#{mean}
113
+ Geometric Mean:#{geometric_mean}
114
+ Harmonic Mean:#{harmonic_mean}
115
+ Median:#{median}
116
+ Min:#{data.min}
117
+ Q1:#{q1}
118
+ Q3:#{q3}
119
+ Max:#{data.max}
120
+ IQR:#{iqr}
121
+ Outliers:#{outliers.inspect}
122
+ Variance:#{variance}
123
+ Std Dev:#{std_dev}
124
+ DATA_STR
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,5 @@
1
+ == 1.0.0 / 2007-11-06
2
+
3
+ * 1 major enhancement
4
+ * Birthday!
5
+
@@ -0,0 +1,7 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ bin/package
6
+ lib/package.rb
7
+ test/test_package.rb
@@ -0,0 +1,48 @@
1
+ package
2
+ by FIX (your name)
3
+ FIX (url)
4
+
5
+ == DESCRIPTION:
6
+
7
+ FIX (describe your package)
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ * FIX (list of features or problems)
12
+
13
+ == SYNOPSIS:
14
+
15
+ FIX (code sample of usage)
16
+
17
+ == REQUIREMENTS:
18
+
19
+ * FIX (list of requirements)
20
+
21
+ == INSTALL:
22
+
23
+ * FIX (sudo gem install, anything else)
24
+
25
+ == LICENSE:
26
+
27
+ (The MIT License)
28
+
29
+ Copyright (c) 2007 FIX
30
+
31
+ Permission is hereby granted, free of charge, to any person obtaining
32
+ a copy of this software and associated documentation files (the
33
+ 'Software'), to deal in the Software without restriction, including
34
+ without limitation the rights to use, copy, modify, merge, publish,
35
+ distribute, sublicense, and/or sell copies of the Software, and to
36
+ permit persons to whom the Software is furnished to do so, subject to
37
+ the following conditions:
38
+
39
+ The above copyright notice and this permission notice shall be
40
+ included in all copies or substantial portions of the Software.
41
+
42
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
43
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
44
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
45
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
46
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
47
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
48
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/package/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require './lib/package.rb'
6
+
7
+ Hoe.new('package', Package::VERSION) do |p|
8
+ p.rubyforge_name = 'package'
9
+ # p.author = 'FIX'
10
+ # p.email = 'FIX'
11
+ # p.summary = 'FIX'
12
+ # p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
13
+ # p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
14
+ p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
15
+ end
16
+
17
+ # vim: syntax=Ruby
File without changes
@@ -0,0 +1,3 @@
1
+ class Package
2
+ VERSION = '1.0.0'
3
+ end
File without changes
data/test/data/1.dat ADDED
@@ -0,0 +1,28 @@
1
+ 0.2
2
+ 0.5
3
+ 0.7
4
+ 0.8
5
+ 0.9
6
+ 0.95
7
+ 1
8
+ 1.1
9
+ 1.2
10
+ 1.21
11
+ 1.32
12
+ 1.4
13
+ 1.6
14
+ 1.7
15
+ 1.77
16
+ 1.8
17
+ 2
18
+ 2.2
19
+ 2.4
20
+ 2.6
21
+ 2.88
22
+ 3.1
23
+ 3.5
24
+ 3.9
25
+ 4.3
26
+ 4.8
27
+ 5.2
28
+ 6
data/test/data/2.dat ADDED
@@ -0,0 +1,83 @@
1
+ -5.7
2
+ 1.2
3
+ 4.1
4
+ 3.2
5
+ 7.3
6
+ 7.5
7
+ 18.6
8
+ 3.7
9
+ -1.8
10
+ 2.4
11
+ -6.5
12
+ 6.7
13
+ 9.4
14
+ -2.0
15
+ -2.8
16
+ -3.4
17
+ 19.2
18
+ -4.8
19
+ 0.5
20
+ -0.6
21
+ 2.8
22
+ -0.5
23
+ -4.5
24
+ 8.7
25
+ 2.7
26
+ 4.1
27
+ -10.3
28
+ 4.8
29
+ -2.3
30
+ -3.1
31
+ -10.2
32
+ -3.7
33
+ -26.6
34
+ 7.2
35
+ -2.9
36
+ -2.3
37
+ 3.5
38
+ -4.6
39
+ 17.2
40
+ 4.2
41
+ 0.5
42
+ 8.3
43
+ -7.1
44
+ -8.4
45
+ 7.7
46
+ -9.6
47
+ 6
48
+ 6.8
49
+ 10.9
50
+ 1.6
51
+ 0.2
52
+ -2.4
53
+ -2.4
54
+ 3.9
55
+ 1.7
56
+ 9.0
57
+ 3.6
58
+ 7.6
59
+ 3.2
60
+ -3.7
61
+ 4.2
62
+ 13.2
63
+ .9
64
+ 4.2
65
+ 4
66
+ 2.8
67
+ 6.7
68
+ -10.4
69
+ 2.7
70
+ 10.3
71
+ 5.7
72
+ .6
73
+ -14.2
74
+ 1.3
75
+ 2.9
76
+ 11.8
77
+ 10.6
78
+ 5.2
79
+ 13.8
80
+ -14.7
81
+ 3.5
82
+ 11.7
83
+ 1.3
@@ -0,0 +1,79 @@
1
+ # Code Generated by ZenTest v. 3.6.1
2
+ # classname: asrt / meth = ratio%
3
+ # MiniStat: 0 / 10 = 0.00%
4
+
5
+ require 'test/unit' unless defined? $ZENTEST and $ZENTEST
6
+ require 'ministat'
7
+ require 'rubygems'
8
+ require 'ruby-debug'
9
+
10
+ class TestMiniStat < Test::Unit::TestCase
11
+ def setup
12
+ @data1 = [34, 47, 1, 15, 57, 24, 20, 11, 19, 50, 28, 37]
13
+ @data2 = [60, 56, 61, 68, 51, 53, 69, 54]
14
+ @data3 = File.open('test/data/1.dat').map {|l| l.chomp}
15
+ @data4 = File.open('test/data/2.dat').map {|l| l.chomp}
16
+ @ms1 = MiniStat::Data.new(@data1)
17
+ @ms2 = MiniStat::Data.new(@data2)
18
+ @ms3 = MiniStat::Data.new(@data3)
19
+ @ms4 = MiniStat::Data.new(@data4)
20
+ # we test to within a tolerance to schluff off
21
+ # possible floating point and rounding errors
22
+ @error = 0.001
23
+ end
24
+
25
+ def test_iqr
26
+ assert(@ms1.iqr - 25 < @error)
27
+ end
28
+
29
+ def test_mean
30
+ assert(@ms2.mean - 59 < @error)
31
+ assert(@ms3.mean - 2.179 < @error)
32
+ assert(@ms4.mean - 1.878 < @error)
33
+ end
34
+
35
+ def test_median
36
+ assert(@ms1.median - 26 < @error)
37
+ assert(@ms3.median - 1.735 < @error)
38
+ assert(@ms3.median - 2.8 < @error)
39
+ end
40
+
41
+ def test_mode
42
+ ms = MiniStat::Data.new([1,1,1,2,3,4,5])
43
+ assert_equal(ms.mode, 1)
44
+ ms = MiniStat::Data.new([1,2,2,2,2,3,4,5])
45
+ assert_equal(ms.mode, 2)
46
+ end
47
+
48
+ def test_outliers
49
+ assert_equal(@ms1.outliers, [])
50
+ end
51
+
52
+ def test_q1
53
+ assert(@ms1.q1 - 17 < @error)
54
+ assert(@ms3.q1 - 1.05 < @error)
55
+ assert(@ms4.q1 - -2.6 < @error)
56
+ end
57
+
58
+ def test_q3
59
+ assert(@ms1.q3 - 42 < @error)
60
+ assert(@ms3.q3 - 2.99 < @error)
61
+ assert(@ms4.q3 - 6.75 < @error)
62
+ end
63
+
64
+ def test_std_dev
65
+ assert(@ms2.std_dev - 6.324 < @error)
66
+ assert(@ms3.std_dev - 1.515 < @error)
67
+ assert(@ms4.std_dev - 7.553 < @error)
68
+ end
69
+
70
+ def test_geo_mean
71
+ # debugger
72
+ end
73
+
74
+ def test_harm_mean
75
+ # debugger
76
+ end
77
+ end
78
+
79
+ # Number of errors detected: 11
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.4
3
+ specification_version: 1
4
+ name: ministat
5
+ version: !ruby/object:Gem::Version
6
+ version: 1.0.0
7
+ date: 2007-11-11 00:00:00 -08:00
8
+ summary: A small and simple library to generate statistical info on single-variable datasets.
9
+ require_paths:
10
+ - lib
11
+ email: dean@ero.com
12
+ homepage: " by Dean Hudson"
13
+ rubyforge_project: ministat
14
+ description: "== FEATURES/PROBLEMS: * Pure Ruby * It's small and simple * It's probably good enough * I haven't profiled it against large data sets * Naive median implementation requires a sort, but it could be done in linear time. Patches welcome. * Missing tests for harmonic and geometric mean -- the stats package I was generating test data with didn't have them. == SYNOPSIS: require 'ministat' data = [1,2,3,4,5,6,7,7,6,5,4,4] puts MiniStat::Data.new(data).to_s"
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Dean Hudson
31
+ files:
32
+ - History.txt
33
+ - Manifest.txt
34
+ - README.txt
35
+ - Rakefile
36
+ - bin/ministat
37
+ - lib/ministat.rb
38
+ - package/History.txt
39
+ - package/Manifest.txt
40
+ - package/README.txt
41
+ - package/Rakefile
42
+ - package/bin/package
43
+ - package/lib/package.rb
44
+ - package/test/test_package.rb
45
+ - test/data/1.dat
46
+ - test/data/2.dat
47
+ - test/test_ministat.rb
48
+ test_files:
49
+ - test/test_ministat.rb
50
+ rdoc_options:
51
+ - --main
52
+ - README.txt
53
+ extra_rdoc_files:
54
+ - History.txt
55
+ - Manifest.txt
56
+ - README.txt
57
+ - package/History.txt
58
+ - package/Manifest.txt
59
+ - package/README.txt
60
+ executables:
61
+ - ministat
62
+ extensions: []
63
+
64
+ requirements: []
65
+
66
+ dependencies:
67
+ - !ruby/object:Gem::Dependency
68
+ name: hoe
69
+ version_requirement:
70
+ version_requirements: !ruby/object:Gem::Version::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: 1.3.0
75
+ version: