array-statistics 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ == 0.1.1 2008-10-31
2
+
3
+ * Fixed badly packaged gem. Whadda ya want? It's my first gem.
4
+ == 0.1.0 2008-10-30
5
+
6
+ * 1 major enhancement:
7
+ * Initial release
@@ -0,0 +1,7 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/array-statistics.rb
6
+ test/test_array-statistics.rb
7
+
@@ -0,0 +1,64 @@
1
+ = array-statistics
2
+
3
+ http://rubyforge.org/projects/array-statistic/
4
+
5
+ == DESCRIPTION:
6
+
7
+ array-statistics adds statistical operations to the ruby Array class. array-statistics supercedes the old gem "array_statistics"
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ array-statistics add support for:
12
+ * Percentiles
13
+ * Quartiles
14
+ * Median
15
+ * Mean
16
+ * Sum
17
+ * Outliers
18
+
19
+ == SYNOPSIS:
20
+
21
+ Array Statistics adds statistical operations to the ruby Array class.
22
+ The operations work on arrays of numbers or, more interestingly, arrays of _things_.
23
+ Each method added to Array by this gem takes an optional block that defines the "value" on which to base the operation.
24
+
25
+ For example,
26
+ require 'rubygems'
27
+ require 'array-statistics'
28
+ [2,3,55].median #=> 3
29
+ [2,3,55].average #=> 20
30
+ [{:value => 2}, {:value => 3}].sum {|element| element[:value] } #=> 5
31
+
32
+
33
+ == REQUIREMENTS:
34
+
35
+ * rubygems
36
+
37
+ == INSTALL:
38
+
39
+ sudo gem install array-statistics
40
+
41
+ == LICENSE:
42
+
43
+ (The MIT License)
44
+
45
+ Copyright (c) 2008 Bruce Goodwin
46
+
47
+ Permission is hereby granted, free of charge, to any person obtaining
48
+ a copy of this software and associated documentation files (the
49
+ 'Software'), to deal in the Software without restriction, including
50
+ without limitation the rights to use, copy, modify, merge, publish,
51
+ distribute, sublicense, and/or sell copies of the Software, and to
52
+ permit persons to whom the Software is furnished to do so, subject to
53
+ the following conditions:
54
+
55
+ The above copyright notice and this permission notice shall be
56
+ included in all copies or substantial portions of the Software.
57
+
58
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
59
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
60
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
61
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
62
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
63
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
64
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,13 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require './lib/array-statistics.rb'
6
+
7
+ Hoe.new('array-statistics', ArrayStatistics::VERSION) do |p|
8
+ p.rubyforge_name = 'array-statistic' # if different than lowercase project name
9
+ p.developer('Bruce Goodwin', 'bgruby@gmail.com')
10
+ #p.remote_rdoc_dir = '' # Release to root
11
+ end
12
+
13
+ # vim: syntax=Ruby
@@ -0,0 +1,336 @@
1
+
2
+ #:stopdoc:
3
+
4
+ # no need for these yet... maybe later.
5
+ # $:.unshift(File.dirname(__FILE__)) unless
6
+ # $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
7
+
8
+ #:startdoc:
9
+
10
+ module ArrayStatistics
11
+ VERSION = '0.1.1'
12
+ end
13
+ class Array
14
+ @as_sort_dirty = true
15
+ @as_last_comparison_block = nil
16
+
17
+ # Get percent order statistic based on "order statistic" from here: http://mathworld.wolfram.com/topics/RankStatistics.html
18
+ # Given some percentage between 0 and 1 (inclusive) return the greatest value in the subarray of this array which contains
19
+ # the bottom percent_less_than values of this array.
20
+ #
21
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
22
+ # This block can be safely skipped if the array contains numbers.
23
+ def percentile(percent_less_than, &value_block) # :yields: element
24
+ value_block = proc{|element| element} unless block_given?
25
+ sort! do |x, y|
26
+ value_block.call(x) <=> value_block.call(y)
27
+ end
28
+
29
+ percent_less_than = 0 if percent_less_than < 0
30
+ return nil if percent_less_than == 0
31
+ percent_less_than = 1 if percent_less_than > 1
32
+ percentile_i = (percent_less_than * (length-1)).floor
33
+ return self[percentile_i]
34
+ end
35
+
36
+ # Get percent rank based on "statistical rank" from here:http://mathworld.wolfram.com/topics/RankStatistics.html
37
+ # Given some value, find the percentage of its rank in this array
38
+ # the number returned will be the number between 0 and 1 (inclusive) which represents the percentage of values
39
+ # in this array which are less than or equal to the value passed in.
40
+ #
41
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
42
+ # This block can be safely skipped if the array contains numbers.
43
+ def percentile_rank(value, &value_block) # :yields: element
44
+ value_block = proc{|element| element} unless block_given?
45
+ sort! do |x, y|
46
+ value_block.call(x) <=> value_block.call(y)
47
+ end
48
+ return 0.0 if value < value_block.call(self[0])
49
+ each_index do |i|
50
+ if(value_block.call(self[i]) > value)
51
+
52
+ return i.to_f/length
53
+ end
54
+ end
55
+ return 1
56
+ end
57
+
58
+ # Get the median value of this array.
59
+ #
60
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
61
+ # This block can be safely skipped if the array contains numbers.
62
+ def median(sort_required=true, &value_block) # :yields: element
63
+ return 0 if empty? #to reduce instances of calling math methods on nil.
64
+ value_block = proc{|element| element} unless block_given?
65
+
66
+ median_index_arr = median_indices(&value_block)
67
+ median_vals = median_index_arr.collect do |element_index|
68
+ value_block.call(self[element_index])
69
+ end
70
+
71
+ median_vals.average
72
+ end
73
+
74
+ # returns either a single or double-value array containing the index or surrounding indeces
75
+ #
76
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
77
+ # This block can be safely skipped if the array contains numbers.
78
+ def median_indices(sort_required=true, &value_block) # :yields: element
79
+ return 0 if length == 0
80
+ value_block = proc{|element| element} unless block_given?
81
+ if(sort_required)
82
+ sort! do |x, y|
83
+ value_block.call(x) <=> value_block.call(y)
84
+ end
85
+ end
86
+
87
+ median_index_arr = [length / 2]
88
+ if(length % 2 == 0)
89
+ median_index_arr.unshift(median_index_arr[0]-1)
90
+ end
91
+
92
+ median_index_arr
93
+ end
94
+
95
+ #returns an array with 2 values. The values are the first and third quartile indices following the same rules as the results of the median_indices method
96
+ #
97
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
98
+ # This block can be safely skipped if the array contains numbers.
99
+ def quartile_indices(&value_block) # :yields: element
100
+ median_i = median_indices(&value_block)
101
+ low_end = median_i[0]
102
+ high_start = median_i[median_i.length() -1]
103
+
104
+ low_arr = self[0..low_end]
105
+ high_arr = self[high_start..-1]
106
+
107
+ q1_indices = low_arr.median_indices(false, &value_block)
108
+ q3_indices = high_arr.median_indices(false, &value_block)
109
+ q3_indices.collect! do |index|
110
+ index + high_start
111
+ end
112
+ return [q1_indices, q3_indices]
113
+ end
114
+
115
+ #returns an array containing all the outliers in this set
116
+ #
117
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
118
+ # This block can be safely skipped if the array contains numbers.
119
+ def outliers(quartile_range_factor=1.5, &value_block) # :yields: element
120
+ value_block = proc{|element| element} unless block_given?
121
+ outlier_arr = []
122
+ outlier_threshold_is = outlier_threshold_indices(quartile_range_factor, &value_block)
123
+ # puts("Outlier Thresholds Indeces = [#{outlier_threshold_is[0]} <=> #{outlier_threshold_is[1]}]")
124
+ outlier_arr << self[0..outlier_threshold_is[0]] unless outlier_threshold_is[0].nil?
125
+ # puts("upper outliers = #{self[outlier_threshold_is[1]..-1] }")
126
+ outlier_arr << self[outlier_threshold_is[1]..-1] unless outlier_threshold_is[1].nil?
127
+ return outlier_arr.flatten
128
+ end
129
+
130
+ #removes all the outliers from this set and returns them.
131
+ #
132
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
133
+ # This block can be safely skipped if the array contains numbers.
134
+ def remove_outliers!(quartile_range_factor=1.5, &value_block) # :yields: element
135
+ outlier_arr = []
136
+ outlier_threshold_is = outlier_threshold_indices(quartile_range_factor, &value_block)
137
+
138
+ outlier_arr = outlier_arr + self.slice!(0..outlier_threshold_is[0]) unless outlier_threshold_is[0].nil?
139
+ #the preceeding slice!() will, of course shift the upper outlier threshold index down
140
+ high_outlier_index_offset = (outlier_threshold_is[0].nil?)? 0 : outlier_threshold_is[0] + 1
141
+ outlier_arr = outlier_arr + self.slice!(outlier_threshold_is[1]-high_outlier_index_offset..-1) unless outlier_threshold_is[1].nil?
142
+ return outlier_arr
143
+ end
144
+
145
+ # Returns an array with two values.
146
+ # The first value is the index of the last low outlier in this sorted array (this array will be sorted as a side-effect of this method)
147
+ # or nil if there are no low-end outliers
148
+ # The second value is the index of the first high outlier in this sorted array or nil if there are no high-end outliers
149
+ #
150
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
151
+ # This block can be safely skipped if the array contains numbers.
152
+ def outlier_threshold_indices(quartile_range_factor=1.5, &value_block) # :yields: element
153
+ value_block = proc{|element| element} unless block_given?
154
+ thresholds = outlier_thresholds(quartile_range_factor, &value_block) #this sorts self!
155
+ low_index = -1
156
+ while(value_block.call(self[low_index +1] ) < thresholds[0]) do
157
+ low_index = low_index + 1
158
+ end
159
+ low_index = nil if(low_index == -1)
160
+
161
+ high_index = length
162
+ while(value_block.call(self[high_index -1] ) > thresholds[1]) do
163
+ high_index = high_index - 1
164
+ end
165
+ high_index = nil if(high_index == length)
166
+ return [low_index, high_index]
167
+ end
168
+
169
+ #returns an array with two values:
170
+ # The first value is the low outlier threshhold for this data set
171
+ # The second value is the high outlier threshhold for this data set
172
+ #
173
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
174
+ # This block can be safely skipped if the array contains numbers.
175
+ def outlier_thresholds(quartile_range_factor=1.5, &value_block) # :yields: element
176
+ value_block = proc{|element| element} unless block_given?
177
+
178
+ quartile_is = quartile_indices(&value_block)
179
+ q1 = quartile_is[0].collect do |element_index|
180
+ value_block.call(self[element_index])
181
+ end
182
+ q1 = q1.average
183
+
184
+ q3 = quartile_is[1].collect do |element_index|
185
+ value_block.call(self[element_index])
186
+ end
187
+ q3 = q3.average
188
+
189
+ interquartile_range = q3-q1
190
+ # puts("Interquartile Range = [#{q1} <=> #{q3}], quartile range factor: #{quartile_range_factor}")
191
+ low_outlier_threshold = q1 - (interquartile_range * quartile_range_factor)
192
+ high_outlier_threshold = q3 + (interquartile_range * quartile_range_factor)
193
+ # puts("Outlier Thresholds = [#{low_outlier_threshold} <=> #{high_outlier_threshold}]")
194
+
195
+ return [low_outlier_threshold, high_outlier_threshold]
196
+ end
197
+
198
+ # Returns the average of the values in this array.
199
+ #
200
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
201
+ # This block can be safely skipped if the array contains numbers.
202
+ def average(&value_block) # :yields: element
203
+ s = sum(&value_block)
204
+ s = s.to_f if s.integer?
205
+ s / length
206
+ end
207
+ alias mean average
208
+
209
+ # Returns the sum of all the values in this array.
210
+ #
211
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
212
+ # This block can be safely skipped if the array contains numbers.
213
+ def sum(&value_block) # :yields: element
214
+ value_block = proc{|element| element} unless block_given?
215
+ s = 0
216
+ each do |element|
217
+ s = s + value_block.call(element)
218
+ end
219
+ s
220
+ end
221
+
222
+ @as_sort_dirty = true
223
+ @as_last_comparison_block = nil
224
+
225
+ alias old_sort! sort!
226
+ # Adds smarter sorting: A lot of methods above need the array sorted and they may call one another. This sort! method only sorts the
227
+ # array if it isn't currently known to be sorted.
228
+ #
229
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
230
+ # This block can be safely skipped if the array contains numbers.
231
+ def sort!(&comparison_block) # :yields: element
232
+ if(dirty? || (comparison_block != @as_last_comparison_block))
233
+ old_sort!(&comparison_block)
234
+ @as_last_comparison_block = comparison_block
235
+ clean
236
+ end
237
+ self
238
+ end
239
+
240
+ #:stopdoc:
241
+ def dirty
242
+ @as_sort_dirty = true
243
+ end
244
+
245
+ def dirty?
246
+ dirty unless instance_variable_defined? :@as_sort_dirty
247
+ @as_sort_dirty
248
+ end
249
+
250
+ def clean
251
+ @as_sort_dirty = false
252
+ end
253
+
254
+ # BELOW: Modify the array to mark itself as dirty when the array might need resorting.
255
+
256
+ # For our purposes "Dirty" doesn't merely mean that the array has been modified, It means
257
+ # that the array has been modified in a way that could have messed up the sorting.
258
+ # For this reason, not all methods that could modify the array are included. e.g. anything
259
+ # that *JUST* removes elements doesn't mess up the order of the array, but insertions might.
260
+
261
+ # Side-note: there are a lot of array methods that don't follow the destructive-methods-end-in-! naming pattern
262
+
263
+ alias as_old_ass []=
264
+ def []=(*as_args)
265
+ ret = as_old_ass(*as_args)
266
+ dirty
267
+ ret
268
+ end
269
+
270
+ alias as_old_app <<
271
+ def <<(*as_args)
272
+ ret = as_old_app(*as_args)
273
+ dirty
274
+ ret
275
+ end
276
+
277
+ alias as_old_push push
278
+ def push(*as_args)
279
+ ret = as_old_push(*as_args)
280
+ dirty
281
+ ret
282
+ end
283
+
284
+ alias as_old_collect collect!
285
+ def collect!(*as_args, &block)
286
+ ret = as_old_collect(*as_args, &block)
287
+ dirty
288
+ ret
289
+ end
290
+
291
+ alias as_old_map! map!
292
+ def map!(*as_args, &block)
293
+ ret = as_old_map!(*as_args, &block)
294
+ dirty
295
+ ret
296
+ end
297
+
298
+ alias as_old_fill fill
299
+ def fill(*as_args, &block)
300
+ ret = as_old_fill(*as_args, &block)
301
+ dirty
302
+ ret
303
+ end
304
+
305
+ alias as_old_flatten flatten!
306
+ def flatten!(*as_args, &block)
307
+ ret = as_old_flatten(*as_args, &block)
308
+ dirty
309
+ ret
310
+ end
311
+
312
+ alias as_old_replace replace
313
+ def replace(*as_args, &block)
314
+ ret = as_old_replace(*as_args, &block)
315
+ dirty
316
+ ret
317
+ end
318
+
319
+ alias as_old_reverse reverse!
320
+ def reverse!(*as_args, &block)
321
+ ret = as_old_reverse(*as_args, &block)
322
+ dirty
323
+ ret
324
+ end
325
+
326
+ alias as_old_unshift unshift
327
+ def unshift(*as_args, &block)
328
+ ret = as_old_unshift(*as_args, &block)
329
+ dirty
330
+ ret
331
+ end
332
+ #:startdoc:
333
+ end
334
+
335
+
336
+
@@ -0,0 +1,51 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class TestArrayStatistics < Test::Unit::TestCase
4
+
5
+ def setup
6
+ end
7
+
8
+ def test_empty_array
9
+ assert_same(0, [].median)
10
+ assert_same(0, [].median_indices)
11
+ end
12
+
13
+ def test_percentile
14
+ assert_nil([].percentile(1))
15
+ #edge cases
16
+ assert_nil([1,2,3,4,5,6,8].percentile(0))
17
+ assert_equal(8, [1,2,3,4,5,6,8].percentile(1))
18
+ assert_equal([1,2,3,4,5,6,8].percentile(135484), [1,2,3,4,5,6,8].percentile(1))
19
+ assert_equal([1,2,3,4,5,6,8].percentile(-3), [1,2,3,4,5,6,8].percentile(0))
20
+ #proper handling of midrange values
21
+ assert_equal(1, [1,2,3,4].percentile(0.0001))
22
+ assert_equal(1, [1,2,3,4].percentile(0.25))
23
+ assert_equal(1, [1,2,3,4].percentile(0.26))
24
+ assert_equal(2, [1,2,3,4].percentile(0.5))
25
+ end
26
+
27
+ def test_percentile_rank
28
+ #edge cases
29
+ assert_equal(0, [1,2,3,4].percentile_rank(-1))
30
+ assert_equal(1, [1,2,3,4].percentile_rank(19849))
31
+ #proper handling of midrange values
32
+ assert_equal(0, [1,2,3,4].percentile_rank(0.99))
33
+ assert_equal(0.25, [1,2,3,4].percentile_rank(1))
34
+ assert_equal(0.25, [1,2,3,4].percentile_rank(1.3))
35
+ end
36
+
37
+ def test_median
38
+ assert_equal(0, [].median)
39
+ assert_equal(666, [666].median)
40
+ assert_equal(1.5, [1,2].median)
41
+ assert_equal(3, [2,3,4].median)
42
+ assert_equal(2.5, [1,2,3,4].median)
43
+ end
44
+
45
+ def test_outliers
46
+ assert_equal([-60,999,9999], [-60,1,2,3,1,2,3,4,3,2,1,1,1,1,4,4,4,4,3,999, 9999].outliers)
47
+
48
+ end
49
+ end
50
+
51
+
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: array-statistics
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Bruce Goodwin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-10-31 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hoe
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.8.2
24
+ version:
25
+ description: array-statistics adds statistical operations to the ruby Array class. array-statistics supercedes the old gem "array_statistics"
26
+ email:
27
+ - bgruby@gmail.com
28
+ executables: []
29
+
30
+ extensions: []
31
+
32
+ extra_rdoc_files:
33
+ - History.txt
34
+ - Manifest.txt
35
+ - README.txt
36
+ files:
37
+ - History.txt
38
+ - Manifest.txt
39
+ - README.txt
40
+ - Rakefile
41
+ - lib/array-statistics.rb
42
+ - test/test_array-statistics.rb
43
+ has_rdoc: true
44
+ homepage: http://rubyforge.org/projects/array-statistic/
45
+ post_install_message:
46
+ rdoc_options:
47
+ - --main
48
+ - README.txt
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ requirements: []
64
+
65
+ rubyforge_project: array-statistic
66
+ rubygems_version: 1.3.1
67
+ signing_key:
68
+ specification_version: 2
69
+ summary: array-statistics adds statistical operations to the ruby Array class
70
+ test_files:
71
+ - test/test_array-statistics.rb