array-statistics 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ == 0.1.1 2008-10-31
2
+
3
+ * Fixed badly packaged gem. Whadda ya want? It's my first gem.
4
+ == 0.1.0 2008-10-30
5
+
6
+ * 1 major enhancement:
7
+ * Initial release
@@ -0,0 +1,7 @@
1
+ History.txt
2
+ Manifest.txt
3
+ README.txt
4
+ Rakefile
5
+ lib/array-statistics.rb
6
+ test/test_array-statistics.rb
7
+
@@ -0,0 +1,64 @@
1
+ = array-statistics
2
+
3
+ http://rubyforge.org/projects/array-statistic/
4
+
5
+ == DESCRIPTION:
6
+
7
+ array-statistics adds statistical operations to the ruby Array class. array-statistics supercedes the old gem "array_statistics"
8
+
9
+ == FEATURES/PROBLEMS:
10
+
11
+ array-statistics add support for:
12
+ * Percentiles
13
+ * Quartiles
14
+ * Median
15
+ * Mean
16
+ * Sum
17
+ * Outliers
18
+
19
+ == SYNOPSIS:
20
+
21
+ Array Statistics adds statistical operations to the ruby Array class.
22
+ The operations work on arrays of numbers or, more interestingly, arrays of _things_.
23
+ Each method added to Array by this gem takes an optional block that defines the "value" on which to base the operation.
24
+
25
+ For example,
26
+ require 'rubygems'
27
+ require 'array-statistics'
28
+ [2,3,55].median #=> 3
29
+ [2,3,55].average #=> 20
30
+ [{:value => 2}, {:value => 3}].sum {|element| element[:value] } #=> 5
31
+
32
+
33
+ == REQUIREMENTS:
34
+
35
+ * rubygems
36
+
37
+ == INSTALL:
38
+
39
+ sudo gem install array-statistics
40
+
41
+ == LICENSE:
42
+
43
+ (The MIT License)
44
+
45
+ Copyright (c) 2008 Bruce Goodwin
46
+
47
+ Permission is hereby granted, free of charge, to any person obtaining
48
+ a copy of this software and associated documentation files (the
49
+ 'Software'), to deal in the Software without restriction, including
50
+ without limitation the rights to use, copy, modify, merge, publish,
51
+ distribute, sublicense, and/or sell copies of the Software, and to
52
+ permit persons to whom the Software is furnished to do so, subject to
53
+ the following conditions:
54
+
55
+ The above copyright notice and this permission notice shall be
56
+ included in all copies or substantial portions of the Software.
57
+
58
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
59
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
60
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
61
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
62
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
63
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
64
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,13 @@
1
+ # -*- ruby -*-
2
+
3
+ require 'rubygems'
4
+ require 'hoe'
5
+ require './lib/array-statistics.rb'
6
+
7
+ Hoe.new('array-statistics', ArrayStatistics::VERSION) do |p|
8
+ p.rubyforge_name = 'array-statistic' # if different than lowercase project name
9
+ p.developer('Bruce Goodwin', 'bgruby@gmail.com')
10
+ #p.remote_rdoc_dir = '' # Release to root
11
+ end
12
+
13
+ # vim: syntax=Ruby
@@ -0,0 +1,336 @@
1
+
2
+ #:stopdoc:
3
+
4
+ # no need for these yet... maybe later.
5
+ # $:.unshift(File.dirname(__FILE__)) unless
6
+ # $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
7
+
8
+ #:startdoc:
9
+
10
+ module ArrayStatistics
11
+ VERSION = '0.1.1'
12
+ end
13
+ class Array
14
+ @as_sort_dirty = true
15
+ @as_last_comparison_block = nil
16
+
17
+ # Get percent order statistic based on "order statistic" from here: http://mathworld.wolfram.com/topics/RankStatistics.html
18
+ # Given some percentage between 0 and 1 (inclusive) return the greatest value in the subarray of this array which contains
19
+ # the bottom percent_less_than values of this array.
20
+ #
21
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
22
+ # This block can be safely skipped if the array contains numbers.
23
+ def percentile(percent_less_than, &value_block) # :yields: element
24
+ value_block = proc{|element| element} unless block_given?
25
+ sort! do |x, y|
26
+ value_block.call(x) <=> value_block.call(y)
27
+ end
28
+
29
+ percent_less_than = 0 if percent_less_than < 0
30
+ return nil if percent_less_than == 0
31
+ percent_less_than = 1 if percent_less_than > 1
32
+ percentile_i = (percent_less_than * (length-1)).floor
33
+ return self[percentile_i]
34
+ end
35
+
36
+ # Get percent rank based on "statistical rank" from here:http://mathworld.wolfram.com/topics/RankStatistics.html
37
+ # Given some value, find the percentage of its rank in this array
38
+ # the number returned will be the number between 0 and 1 (inclusive) which represents the percentage of values
39
+ # in this array which are less than or equal to the value passed in.
40
+ #
41
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
42
+ # This block can be safely skipped if the array contains numbers.
43
+ def percentile_rank(value, &value_block) # :yields: element
44
+ value_block = proc{|element| element} unless block_given?
45
+ sort! do |x, y|
46
+ value_block.call(x) <=> value_block.call(y)
47
+ end
48
+ return 0.0 if value < value_block.call(self[0])
49
+ each_index do |i|
50
+ if(value_block.call(self[i]) > value)
51
+
52
+ return i.to_f/length
53
+ end
54
+ end
55
+ return 1
56
+ end
57
+
58
+ # Get the median value of this array.
59
+ #
60
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
61
+ # This block can be safely skipped if the array contains numbers.
62
+ def median(sort_required=true, &value_block) # :yields: element
63
+ return 0 if empty? #to reduce instances of calling math methods on nil.
64
+ value_block = proc{|element| element} unless block_given?
65
+
66
+ median_index_arr = median_indices(&value_block)
67
+ median_vals = median_index_arr.collect do |element_index|
68
+ value_block.call(self[element_index])
69
+ end
70
+
71
+ median_vals.average
72
+ end
73
+
74
+ # returns either a single or double-value array containing the index or surrounding indeces
75
+ #
76
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
77
+ # This block can be safely skipped if the array contains numbers.
78
+ def median_indices(sort_required=true, &value_block) # :yields: element
79
+ return 0 if length == 0
80
+ value_block = proc{|element| element} unless block_given?
81
+ if(sort_required)
82
+ sort! do |x, y|
83
+ value_block.call(x) <=> value_block.call(y)
84
+ end
85
+ end
86
+
87
+ median_index_arr = [length / 2]
88
+ if(length % 2 == 0)
89
+ median_index_arr.unshift(median_index_arr[0]-1)
90
+ end
91
+
92
+ median_index_arr
93
+ end
94
+
95
+ #returns an array with 2 values. The values are the first and third quartile indices following the same rules as the results of the median_indices method
96
+ #
97
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
98
+ # This block can be safely skipped if the array contains numbers.
99
+ def quartile_indices(&value_block) # :yields: element
100
+ median_i = median_indices(&value_block)
101
+ low_end = median_i[0]
102
+ high_start = median_i[median_i.length() -1]
103
+
104
+ low_arr = self[0..low_end]
105
+ high_arr = self[high_start..-1]
106
+
107
+ q1_indices = low_arr.median_indices(false, &value_block)
108
+ q3_indices = high_arr.median_indices(false, &value_block)
109
+ q3_indices.collect! do |index|
110
+ index + high_start
111
+ end
112
+ return [q1_indices, q3_indices]
113
+ end
114
+
115
+ #returns an array containing all the outliers in this set
116
+ #
117
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
118
+ # This block can be safely skipped if the array contains numbers.
119
+ def outliers(quartile_range_factor=1.5, &value_block) # :yields: element
120
+ value_block = proc{|element| element} unless block_given?
121
+ outlier_arr = []
122
+ outlier_threshold_is = outlier_threshold_indices(quartile_range_factor, &value_block)
123
+ # puts("Outlier Thresholds Indeces = [#{outlier_threshold_is[0]} <=> #{outlier_threshold_is[1]}]")
124
+ outlier_arr << self[0..outlier_threshold_is[0]] unless outlier_threshold_is[0].nil?
125
+ # puts("upper outliers = #{self[outlier_threshold_is[1]..-1] }")
126
+ outlier_arr << self[outlier_threshold_is[1]..-1] unless outlier_threshold_is[1].nil?
127
+ return outlier_arr.flatten
128
+ end
129
+
130
+ #removes all the outliers from this set and returns them.
131
+ #
132
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
133
+ # This block can be safely skipped if the array contains numbers.
134
+ def remove_outliers!(quartile_range_factor=1.5, &value_block) # :yields: element
135
+ outlier_arr = []
136
+ outlier_threshold_is = outlier_threshold_indices(quartile_range_factor, &value_block)
137
+
138
+ outlier_arr = outlier_arr + self.slice!(0..outlier_threshold_is[0]) unless outlier_threshold_is[0].nil?
139
+ #the preceeding slice!() will, of course shift the upper outlier threshold index down
140
+ high_outlier_index_offset = (outlier_threshold_is[0].nil?)? 0 : outlier_threshold_is[0] + 1
141
+ outlier_arr = outlier_arr + self.slice!(outlier_threshold_is[1]-high_outlier_index_offset..-1) unless outlier_threshold_is[1].nil?
142
+ return outlier_arr
143
+ end
144
+
145
+ # Returns an array with two values.
146
+ # The first value is the index of the last low outlier in this sorted array (this array will be sorted as a side-effect of this method)
147
+ # or nil if there are no low-end outliers
148
+ # The second value is the index of the first high outlier in this sorted array or nil if there are no high-end outliers
149
+ #
150
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
151
+ # This block can be safely skipped if the array contains numbers.
152
+ def outlier_threshold_indices(quartile_range_factor=1.5, &value_block) # :yields: element
153
+ value_block = proc{|element| element} unless block_given?
154
+ thresholds = outlier_thresholds(quartile_range_factor, &value_block) #this sorts self!
155
+ low_index = -1
156
+ while(value_block.call(self[low_index +1] ) < thresholds[0]) do
157
+ low_index = low_index + 1
158
+ end
159
+ low_index = nil if(low_index == -1)
160
+
161
+ high_index = length
162
+ while(value_block.call(self[high_index -1] ) > thresholds[1]) do
163
+ high_index = high_index - 1
164
+ end
165
+ high_index = nil if(high_index == length)
166
+ return [low_index, high_index]
167
+ end
168
+
169
+ #returns an array with two values:
170
+ # The first value is the low outlier threshhold for this data set
171
+ # The second value is the high outlier threshhold for this data set
172
+ #
173
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
174
+ # This block can be safely skipped if the array contains numbers.
175
+ def outlier_thresholds(quartile_range_factor=1.5, &value_block) # :yields: element
176
+ value_block = proc{|element| element} unless block_given?
177
+
178
+ quartile_is = quartile_indices(&value_block)
179
+ q1 = quartile_is[0].collect do |element_index|
180
+ value_block.call(self[element_index])
181
+ end
182
+ q1 = q1.average
183
+
184
+ q3 = quartile_is[1].collect do |element_index|
185
+ value_block.call(self[element_index])
186
+ end
187
+ q3 = q3.average
188
+
189
+ interquartile_range = q3-q1
190
+ # puts("Interquartile Range = [#{q1} <=> #{q3}], quartile range factor: #{quartile_range_factor}")
191
+ low_outlier_threshold = q1 - (interquartile_range * quartile_range_factor)
192
+ high_outlier_threshold = q3 + (interquartile_range * quartile_range_factor)
193
+ # puts("Outlier Thresholds = [#{low_outlier_threshold} <=> #{high_outlier_threshold}]")
194
+
195
+ return [low_outlier_threshold, high_outlier_threshold]
196
+ end
197
+
198
+ # Returns the average of the values in this array.
199
+ #
200
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
201
+ # This block can be safely skipped if the array contains numbers.
202
+ def average(&value_block) # :yields: element
203
+ s = sum(&value_block)
204
+ s = s.to_f if s.integer?
205
+ s / length
206
+ end
207
+ alias mean average
208
+
209
+ # Returns the sum of all the values in this array.
210
+ #
211
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
212
+ # This block can be safely skipped if the array contains numbers.
213
+ def sum(&value_block) # :yields: element
214
+ value_block = proc{|element| element} unless block_given?
215
+ s = 0
216
+ each do |element|
217
+ s = s + value_block.call(element)
218
+ end
219
+ s
220
+ end
221
+
222
+ @as_sort_dirty = true
223
+ @as_last_comparison_block = nil
224
+
225
+ alias old_sort! sort!
226
+ # Adds smarter sorting: A lot of methods above need the array sorted and they may call one another. This sort! method only sorts the
227
+ # array if it isn't currently known to be sorted.
228
+ #
229
+ # Like most methods in this package, this method takes an optional block that defines the "value" of the objects in the array.
230
+ # This block can be safely skipped if the array contains numbers.
231
+ def sort!(&comparison_block) # :yields: element
232
+ if(dirty? || (comparison_block != @as_last_comparison_block))
233
+ old_sort!(&comparison_block)
234
+ @as_last_comparison_block = comparison_block
235
+ clean
236
+ end
237
+ self
238
+ end
239
+
240
+ #:stopdoc:
241
+ def dirty
242
+ @as_sort_dirty = true
243
+ end
244
+
245
+ def dirty?
246
+ dirty unless instance_variable_defined? :@as_sort_dirty
247
+ @as_sort_dirty
248
+ end
249
+
250
+ def clean
251
+ @as_sort_dirty = false
252
+ end
253
+
254
+ # BELOW: Modify the array to mark itself as dirty when the array might need resorting.
255
+
256
+ # For our purposes "Dirty" doesn't merely mean that the array has been modified, It means
257
+ # that the array has been modified in a way that could have messed up the sorting.
258
+ # For this reason, not all methods that could modify the array are included. e.g. anything
259
+ # that *JUST* removes elements doesn't mess up the order of the array, but insertions might.
260
+
261
+ # Side-note: there are a lot of array methods that don't follow the destructive-methods-end-in-! naming pattern
262
+
263
+ alias as_old_ass []=
264
+ def []=(*as_args)
265
+ ret = as_old_ass(*as_args)
266
+ dirty
267
+ ret
268
+ end
269
+
270
+ alias as_old_app <<
271
+ def <<(*as_args)
272
+ ret = as_old_app(*as_args)
273
+ dirty
274
+ ret
275
+ end
276
+
277
+ alias as_old_push push
278
+ def push(*as_args)
279
+ ret = as_old_push(*as_args)
280
+ dirty
281
+ ret
282
+ end
283
+
284
+ alias as_old_collect collect!
285
+ def collect!(*as_args, &block)
286
+ ret = as_old_collect(*as_args, &block)
287
+ dirty
288
+ ret
289
+ end
290
+
291
+ alias as_old_map! map!
292
+ def map!(*as_args, &block)
293
+ ret = as_old_map!(*as_args, &block)
294
+ dirty
295
+ ret
296
+ end
297
+
298
+ alias as_old_fill fill
299
+ def fill(*as_args, &block)
300
+ ret = as_old_fill(*as_args, &block)
301
+ dirty
302
+ ret
303
+ end
304
+
305
+ alias as_old_flatten flatten!
306
+ def flatten!(*as_args, &block)
307
+ ret = as_old_flatten(*as_args, &block)
308
+ dirty
309
+ ret
310
+ end
311
+
312
+ alias as_old_replace replace
313
+ def replace(*as_args, &block)
314
+ ret = as_old_replace(*as_args, &block)
315
+ dirty
316
+ ret
317
+ end
318
+
319
+ alias as_old_reverse reverse!
320
+ def reverse!(*as_args, &block)
321
+ ret = as_old_reverse(*as_args, &block)
322
+ dirty
323
+ ret
324
+ end
325
+
326
+ alias as_old_unshift unshift
327
+ def unshift(*as_args, &block)
328
+ ret = as_old_unshift(*as_args, &block)
329
+ dirty
330
+ ret
331
+ end
332
+ #:startdoc:
333
+ end
334
+
335
+
336
+
@@ -0,0 +1,51 @@
1
+ require File.dirname(__FILE__) + '/test_helper.rb'
2
+
3
+ class TestArrayStatistics < Test::Unit::TestCase
4
+
5
+ def setup
6
+ end
7
+
8
+ def test_empty_array
9
+ assert_same(0, [].median)
10
+ assert_same(0, [].median_indices)
11
+ end
12
+
13
+ def test_percentile
14
+ assert_nil([].percentile(1))
15
+ #edge cases
16
+ assert_nil([1,2,3,4,5,6,8].percentile(0))
17
+ assert_equal(8, [1,2,3,4,5,6,8].percentile(1))
18
+ assert_equal([1,2,3,4,5,6,8].percentile(135484), [1,2,3,4,5,6,8].percentile(1))
19
+ assert_equal([1,2,3,4,5,6,8].percentile(-3), [1,2,3,4,5,6,8].percentile(0))
20
+ #proper handling of midrange values
21
+ assert_equal(1, [1,2,3,4].percentile(0.0001))
22
+ assert_equal(1, [1,2,3,4].percentile(0.25))
23
+ assert_equal(1, [1,2,3,4].percentile(0.26))
24
+ assert_equal(2, [1,2,3,4].percentile(0.5))
25
+ end
26
+
27
+ def test_percentile_rank
28
+ #edge cases
29
+ assert_equal(0, [1,2,3,4].percentile_rank(-1))
30
+ assert_equal(1, [1,2,3,4].percentile_rank(19849))
31
+ #proper handling of midrange values
32
+ assert_equal(0, [1,2,3,4].percentile_rank(0.99))
33
+ assert_equal(0.25, [1,2,3,4].percentile_rank(1))
34
+ assert_equal(0.25, [1,2,3,4].percentile_rank(1.3))
35
+ end
36
+
37
+ def test_median
38
+ assert_equal(0, [].median)
39
+ assert_equal(666, [666].median)
40
+ assert_equal(1.5, [1,2].median)
41
+ assert_equal(3, [2,3,4].median)
42
+ assert_equal(2.5, [1,2,3,4].median)
43
+ end
44
+
45
+ def test_outliers
46
+ assert_equal([-60,999,9999], [-60,1,2,3,1,2,3,4,3,2,1,1,1,1,4,4,4,4,3,999, 9999].outliers)
47
+
48
+ end
49
+ end
50
+
51
+
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: array-statistics
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Bruce Goodwin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-10-31 00:00:00 -04:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hoe
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.8.2
24
+ version:
25
+ description: array-statistics adds statistical operations to the ruby Array class. array-statistics supercedes the old gem "array_statistics"
26
+ email:
27
+ - bgruby@gmail.com
28
+ executables: []
29
+
30
+ extensions: []
31
+
32
+ extra_rdoc_files:
33
+ - History.txt
34
+ - Manifest.txt
35
+ - README.txt
36
+ files:
37
+ - History.txt
38
+ - Manifest.txt
39
+ - README.txt
40
+ - Rakefile
41
+ - lib/array-statistics.rb
42
+ - test/test_array-statistics.rb
43
+ has_rdoc: true
44
+ homepage: http://rubyforge.org/projects/array-statistic/
45
+ post_install_message:
46
+ rdoc_options:
47
+ - --main
48
+ - README.txt
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: "0"
62
+ version:
63
+ requirements: []
64
+
65
+ rubyforge_project: array-statistic
66
+ rubygems_version: 1.3.1
67
+ signing_key:
68
+ specification_version: 2
69
+ summary: array-statistics adds statistical operations to the ruby Array class
70
+ test_files:
71
+ - test/test_array-statistics.rb