fastout 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data/.gitignore +4 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +4 -0
  4. data/Gemfile.lock +38 -0
  5. data/MIT-LICENSE +20 -0
  6. data/README.md +8 -0
  7. data/Rakefile +2 -0
  8. data/doc/classes/Fastout.html +105 -0
  9. data/doc/classes/Ranker.html +468 -0
  10. data/doc/classes/Ranker.src/M000001.html +18 -0
  11. data/doc/classes/Ranker.src/M000002.html +22 -0
  12. data/doc/classes/Ranker.src/M000003.html +42 -0
  13. data/doc/classes/Ranker.src/M000004.html +21 -0
  14. data/doc/classes/Ranker.src/M000005.html +28 -0
  15. data/doc/classes/Ranker.src/M000006.html +35 -0
  16. data/doc/classes/Ranker.src/M000007.html +18 -0
  17. data/doc/classes/Ranker.src/M000008.html +18 -0
  18. data/doc/classes/Ranker.src/M000009.html +33 -0
  19. data/doc/classes/Ranker.src/M000010.html +18 -0
  20. data/doc/classes/Ranker.src/M000011.html +22 -0
  21. data/doc/classes/Ranker.src/M000012.html +24 -0
  22. data/doc/classes/Ranker.src/M000013.html +28 -0
  23. data/doc/classes/Ranker.src/M000014.html +20 -0
  24. data/doc/classes/Ranker.src/M000015.html +19 -0
  25. data/doc/classes/Ranker/Point.html +262 -0
  26. data/doc/classes/Ranker/Point.src/M000016.html +18 -0
  27. data/doc/classes/Ranker/Point.src/M000017.html +24 -0
  28. data/doc/classes/Ranker/Point.src/M000018.html +18 -0
  29. data/doc/classes/Ranker/Point.src/M000019.html +18 -0
  30. data/doc/classes/Ranker/Point.src/M000020.html +18 -0
  31. data/doc/classes/Ranker/Point.src/M000021.html +26 -0
  32. data/doc/classes/Ranker/Point.src/M000022.html +18 -0
  33. data/doc/created.rid +1 -0
  34. data/doc/files/lib/fastout/ranker_rb.html +121 -0
  35. data/doc/files/lib/fastout/version_rb.html +101 -0
  36. data/doc/files/lib/fastout_rb.html +108 -0
  37. data/doc/files/spec/fastout/ranker_spec_rb.html +109 -0
  38. data/doc/files/spec/spec_helper_rb.html +110 -0
  39. data/doc/fr_class_index.html +29 -0
  40. data/doc/fr_file_index.html +28 -0
  41. data/doc/fr_method_index.html +48 -0
  42. data/doc/index.html +24 -0
  43. data/doc/rdoc-style.css +208 -0
  44. data/fastout.gemspec +29 -0
  45. data/lib/fastout.rb +1 -0
  46. data/lib/fastout/ranker.rb +243 -0
  47. data/lib/fastout/version.rb +3 -0
  48. data/spec/fastout/ranker_spec.rb +252 -0
  49. data/spec/parkinsons.csv +1 -0
  50. data/spec/spec_helper.rb +9 -0
  51. metadata +217 -0
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "fastout/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "fastout"
7
+ s.version = Fastout::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Jason Dew"]
10
+ s.email = ["jason.dew@gmail.com"]
11
+ s.homepage = "http://rubygems.org/gems/fastout"
12
+ s.summary = %q{Detect outliers in high-dimension data sets}
13
+ s.description = %q{Detect outliers in high-dimension data sets using the FASTOUT algorithm by Foss et. al}
14
+
15
+ s.rubyforge_project = "fastout"
16
+
17
+ s.add_development_dependency "rspec", "~>2.0"
18
+ s.add_development_dependency "rr"
19
+ s.add_development_dependency "autotest"
20
+ s.add_development_dependency "autotest-fsevent"
21
+ s.add_development_dependency "autotest-growl"
22
+ s.add_development_dependency "redgreen"
23
+ s.add_development_dependency "fastercsv"
24
+
25
+ s.files = `git ls-files`.split("\n")
26
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
27
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
28
+ s.require_paths = ["lib"]
29
+ end
@@ -0,0 +1 @@
1
+ require "fastout/ranker"
@@ -0,0 +1,243 @@
1
+ # Takes a data set and determines the outliers using the FASTOUT algorithm from
2
+ # Foss et al., "Class Separation through Variance: a new application of outlier detection",
3
+ # Knowledge and Information Systems, 2010.
4
+ #
5
+ # Author:: Jason Dew (mailto:jason.dew@gmail.com)
6
+ # Copyright:: Copyright (c) 2010 Jason Dew
7
+ # License:: MIT
8
+ class Ranker
9
+
10
+ class Point
11
+
12
+ @@next_id = 0
13
+
14
+ def self.next_id= id
15
+ @@next_id = id
16
+ end
17
+
18
+ attr_reader :id, :attributes, :bins
19
+ attr_accessor :cluster, :score
20
+
21
+ def initialize *attributes
22
+ @attributes = attributes
23
+ @cluster = nil
24
+ @score = 0
25
+ @bins = []
26
+
27
+ @id = @@next_id
28
+ @@next_id += 1
29
+ end
30
+
31
+ def [] index
32
+ @attributes[index]
33
+ end
34
+
35
+ def clustered?
36
+ !! cluster
37
+ end
38
+
39
+ def uncluster!
40
+ @cluster = nil
41
+ end
42
+
43
+ def in_the_neighborhood_of? point, attribute_indexes, neighborhoods
44
+ attribute_indexes.each do |attribute_index|
45
+ return false if (bins[attribute_index] - point.bins[attribute_index]).abs > 1
46
+ end
47
+
48
+ attribute_indexes.each_with_index do |attribute_index, neighborhood_index|
49
+ return false if (attributes[attribute_index] - point.attributes[attribute_index]) > (neighborhoods[neighborhood_index] / 2.0)
50
+ end
51
+
52
+ true
53
+ end
54
+
55
+ def neighbor_of_any? points, attribute_indexes, neighborhoods
56
+ points.inject(false) {|found, point| found or in_the_neighborhood_of?(point, attribute_indexes, neighborhoods) }
57
+ end
58
+
59
+ end
60
+
61
+ attr_reader :data, :points, :minimums, :maximums
62
+
63
+ def self.pointify data
64
+ data.map {|attributes| Point.new *attributes }
65
+ end
66
+
67
+ # takes a 2-d array, +data+, where the rows are data points and the columns are the attributes,
68
+ # values should all be numerical
69
+ # * +data+ should not be empty or nil will be returned
70
+ # * also generates minimum and maximum values for each attribute for later use
71
+ def initialize data
72
+ raise "data must have more than one attribute and more than one data point" unless data.size > 1 and data.first.size > 1
73
+ @data = data
74
+ @points = self.class.pointify data
75
+ @minimums, @maximums = compute_minimums_and_maximums
76
+ Point.next_id = 0
77
+ end
78
+
79
+ # searches the parameter space to find the optimized values of +k+ and +q+
80
+ # * +theta_target+ is the maximum acceptable value of theta, default is 1
81
+ # * +sample+ is the number of iterations to perform in estimating the parameters
82
+ # * +n+ is the number of points to rank
83
+ def optimized_ranking sample, n, theta_target=1
84
+ k = 3
85
+ q = 5
86
+ max_q = n / 4
87
+ step_q = 10
88
+ last_theta = n
89
+ theta, s = calculate_theta(sample, k, n, q)
90
+
91
+ while (theta > theta_target or theta < last_theta or q < max_q) do
92
+ return s if (theta <= theta_target)
93
+
94
+ if (theta >= last_theta)
95
+ # effectiveness declining so try next k
96
+ k += 1
97
+ q -= step_q
98
+ last_theta = n
99
+ else
100
+ # try next q
101
+ q += step_q
102
+ last_theta = theta
103
+ end
104
+
105
+ theta, s = calculate_theta(sample, k, n, q)
106
+ end
107
+
108
+ s
109
+ end
110
+
111
+ # find and rank the points by their outlier score and determine
112
+ # theta (the number of points with an outlier score of +n+)
113
+ def calculate_theta sample, k, n, q
114
+ s = ranked_outliers sample, k, q
115
+ theta = points.inject(0) {|sum, point| point.score == n ? sum + 1 : sum }
116
+
117
+ [theta, s]
118
+ end
119
+
120
+ # chooses +k+ random attributes with an average of +q+ data points
121
+ # in each bin +sample+ times to determine outliers
122
+ def ranked_outliers sample_size, k, q
123
+ # determine number of bins and their widths
124
+ bin_count = compute_bin_count(q)
125
+ bin_widths = compute_bin_widths(q, bin_count)
126
+
127
+ # assign points to the attribute bins
128
+ assign_points_to_bins! bin_widths, bin_count
129
+
130
+ 1.upto(sample_size) {
131
+ score_points_from_a_random_set_of_attributes! k, bin_widths }
132
+
133
+ points.sort_by(&:score).reverse
134
+ end
135
+
136
+ # pick a random set of attributes and compute the outlier score
137
+ # for each of the points
138
+ def score_points_from_a_random_set_of_attributes! number_of_attributes_to_choose, all_bin_widths
139
+ cluster = 0
140
+ attribute_indexes = random_attribute_indexes number_of_attributes_to_choose
141
+ bin_widths = attribute_indexes.map {|index| all_bin_widths[index] }
142
+
143
+ points.each do |point|
144
+ next if point.clustered?
145
+
146
+ point.cluster = (cluster += 1)
147
+ neighbors = cluster_neighbors point, cluster, attribute_indexes, bin_widths
148
+
149
+ point.uncluster! if neighbors.empty?
150
+ end
151
+
152
+ points.each do |point|
153
+ next unless point.clustered?
154
+ point.uncluster!
155
+ point.score += 1
156
+ end
157
+ end
158
+
159
+ # randomly choose +number+ of attribute indexes
160
+ def random_attribute_indexes number
161
+ (0...@data.first.size).sort_by { rand }[0..number]
162
+ end
163
+
164
+ # find all unclustered points that are neighbors of +point+ on
165
+ # *all* selected attributes or neighbors in the neighborhood
166
+ # of +point+; find recursively until no additions can be made
167
+ def cluster_neighbors point, cluster, attribute_indexes, bin_widths
168
+ recursively_cluster_neighbors point, cluster, attribute_indexes, bin_widths, []
169
+ end
170
+
171
+ # recursive step of #cluster_neighbors
172
+ def recursively_cluster_neighbors point, cluster, attribute_indexes, bin_widths, neighbors
173
+ fruitful = false
174
+
175
+ unclustered_points.each do |unclustered_point|
176
+ next unless point.in_the_neighborhood_of?(unclustered_point, attribute_indexes, bin_widths) or
177
+ unclustered_point.neighbor_of_any?(neighbors, attribute_indexes, bin_widths)
178
+
179
+ fruitful = true
180
+ unclustered_point.cluster = cluster
181
+ neighbors << unclustered_point
182
+ end
183
+
184
+ if fruitful
185
+ recursively_cluster_neighbors point, cluster, attribute_indexes, bin_widths, neighbors
186
+ else
187
+ neighbors
188
+ end
189
+ end
190
+
191
+ # find all of the points that don't already belong to a cluster
192
+ def unclustered_points
193
+ points.select {|point| not point.clustered? }
194
+ end
195
+
196
+ # assign each of the data points to a bin based on the given +bin_widths+,
197
+ # returns a 2-d array in attribute-major order
198
+ def assign_points_to_bins! bin_widths, bin_count
199
+ bin_widths.each_with_index do |bin_width, attribute_index|
200
+ points.each do |point|
201
+ point.bins[attribute_index] = bin_index(point, attribute_index, bin_width)
202
+ end
203
+ end
204
+ end
205
+
206
+ def bin_index point, attribute_index, bin_width
207
+ minimum = @minimums[attribute_index]
208
+ maximum = @maximums[attribute_index]
209
+
210
+ value = point[attribute_index]
211
+ index = ((value - minimum) / bin_width).floor
212
+
213
+ value == maximum ? index - 1 : index
214
+ end
215
+
216
+ def compute_minimums_and_maximums
217
+ minimums = @data.first.dup
218
+ maximums = @data.first.dup
219
+
220
+ @data.each do |attributes|
221
+ attributes.each_with_index do |attribute, attribute_index|
222
+ minimums[attribute_index] = attribute if attribute < minimums[attribute_index]
223
+ maximums[attribute_index] = attribute if attribute > maximums[attribute_index]
224
+ end
225
+ end
226
+
227
+ [minimums, maximums]
228
+ end
229
+
230
+ # determine the widths of the bins based on +q+
231
+ def compute_bin_widths q, bin_count
232
+ (0...@data.first.size).map do |attribute_index|
233
+ (@maximums[attribute_index] - @minimums[attribute_index]) / bin_count.to_f
234
+ end
235
+ end
236
+
237
+ # compute the number of bins for a given +q+
238
+ def compute_bin_count q
239
+ count = (@data.size / q.to_f).ceil
240
+ count < 2 ? 2 : count
241
+ end
242
+
243
+ end
@@ -0,0 +1,3 @@
1
+ module Fastout
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,252 @@
1
+ require "spec_helper"
2
+ require "fastercsv"
3
+
4
+ module Fastout
5
+
6
+ describe Ranker do
7
+
8
+ describe Ranker::Point do
9
+
10
+ before { @point = Ranker::Point.new 1.0, 4.2, -1 }
11
+
12
+ context "#[]" do
13
+ it("should be able to index the attributes directly") { @point[0].should == 1.0 }
14
+ end
15
+
16
+ context "#clustered?" do
17
+ it "should be false when cluster is nil" do
18
+ mock(@point).cluster { nil }
19
+ @point.clustered?.should be_false
20
+ end
21
+
22
+ it "should be true when cluster is not nil" do
23
+ mock(@point).cluster { 42 }
24
+ @point.clustered?.should be_true
25
+ end
26
+ end
27
+
28
+ context "#uncluster!" do
29
+ it "should set cluster equal to nil" do
30
+ @point.cluster = 42
31
+ @point.uncluster!
32
+ @point.cluster.should be_nil
33
+ end
34
+ end
35
+
36
+ context "#in_the_neighborhood_of?" do
37
+ before(:each) do
38
+ stub(@point).bins { [2, 2] }
39
+ @test_point = Ranker::Point.new
40
+ end
41
+
42
+ it "should be false when the test point is two bins away for an attribute" do
43
+ mock(@test_point).bins { [0, 2] }
44
+ @point.in_the_neighborhood_of?(@test_point, [0, 1], [1, 1]).should be_false
45
+ end
46
+
47
+ it "should be false when the test point is more than half a neighborhood away for an attribute" do
48
+ mock(@test_point).bins { [1, 2] }.times(2)
49
+ mock(@point).attributes { [2.5, 2.5] }
50
+ mock(@test_point).attributes { [1.75, 2.5] }
51
+
52
+ @point.in_the_neighborhood_of?(@test_point, [0, 1], [1, 1]).should be_false
53
+ end
54
+
55
+ it "should be true when the test point is less than half a neighborhood away for an attribute" do
56
+ mock(@test_point).bins { [3, 2] }.times(2)
57
+ mock(@point).attributes { [2.5, 2.5] }.times(2)
58
+ mock(@test_point).attributes { [2.75, 2.5] }.times(2)
59
+
60
+ @point.in_the_neighborhood_of?(@test_point, [0, 1], [1, 1]).should be_true
61
+ end
62
+ end
63
+
64
+ context "#neighbor_of_any?" do
65
+
66
+ it "should check to see if any point is a neighbor" do
67
+ mock(@point).in_the_neighborhood_of?(:point_0, :attribute_indexes, :neighborhoods) { false }
68
+ mock(@point).in_the_neighborhood_of?(:point_1, :attribute_indexes, :neighborhoods) { true }
69
+
70
+ @point.neighbor_of_any?([:point_0, :point_1], :attribute_indexes, :neighborhoods).should be_true
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+
77
+ it("should raise an error when given an empty array") { lambda { Ranker.new([]) }.should raise_error }
78
+ it("should raise an error when given an array containing an empty array") { lambda { Ranker.new([[]]) }.should raise_error }
79
+ it("should raise an error when given an array containing one non-empty array") { lambda { Ranker.new([[1, 2, 3]]) }.should raise_error }
80
+ it("should raise an error when given an array containing only one attribute") { lambda { Ranker.new([[1], [2], [3]]) }.should raise_error }
81
+
82
+ context "given 3 attributes and 4 data points" do
83
+ before(:each) do
84
+ @ranker = Ranker.new [[ 1.0, 3, -1],
85
+ [ 2.0, 50, 1],
86
+ [ 3.0, 5, 1],
87
+ [ 4.2, 2, 1]]
88
+ end
89
+
90
+ context ".pointify" do
91
+ it "should generate a point object for each row" do
92
+ @ranker.points.size.should == 4
93
+ end
94
+ end
95
+
96
+ context "#optimized_ranking" do
97
+ it "should find the optimal values for k and q" do
98
+ @ranker.optimized_ranking(10, 5, 1).map(&:id).should == [3, 2, 1, 0]
99
+ end
100
+ end
101
+
102
+ context "#ranked_outliers" do
103
+ it "should compute the necessary parameters and return the points sorted by score" do
104
+ mock(@ranker).compute_bin_count(42) { :bin_count }
105
+ mock(@ranker).compute_bin_widths(42, :bin_count) { :bin_widths }
106
+ mock(@ranker).assign_points_to_bins!(:bin_widths, :bin_count)
107
+ mock(@ranker).score_points_from_a_random_set_of_attributes!(5, :bin_widths).times(100)
108
+ mock(@ranker.points).sort_by { mock!.reverse { :answer }.subject }
109
+
110
+ @ranker.ranked_outliers(100, 5, 42).should == :answer
111
+ end
112
+ end
113
+
114
+ context "#score_points_from_a_random_set_of_attributes!" do
115
+ it "should pick a random set of attributes and cycle through the points" do
116
+ mock(@ranker).random_attribute_indexes(5) { [2, 0] }
117
+ mock(@ranker).cluster_neighbors(is_a(Ranker::Point), is_a(Fixnum), [2, 0], [2, 0]) { [] }.times(4)
118
+
119
+ @ranker.score_points_from_a_random_set_of_attributes!(5, [0, 1, 2])
120
+ end
121
+ end
122
+
123
+ context "#random_attribute_indexes" do
124
+ it("should give me back the correct number of indexes") { @ranker.random_attribute_indexes(3).size.should == 3 }
125
+ end
126
+
127
+ context "#cluster_neighbors" do
128
+ it "should call recursively_cluster_neighbors" do
129
+ mock(@ranker).recursively_cluster_neighbors(:point, :cluster, :attribute_indexes, :bin_widths, [])
130
+
131
+ @ranker.cluster_neighbors :point, :cluster, :attribute_indexes, :bin_widths
132
+ end
133
+ end
134
+
135
+ context "#recursively_cluster_neighbors" do
136
+
137
+ it "should return its neighbors when there are no more unclustered points" do
138
+ mock(@ranker).unclustered_points { [] }
139
+ @ranker.recursively_cluster_neighbors(:point, :cluster, :attribute_indexes, :bin_widths, :neighbors).should == :neighbors
140
+ end
141
+
142
+ it "should return its neighbors if it doesn't find any new neighbors" do
143
+ unclustered_point = mock!.neighbor_of_any?(:neighbors, :attribute_indexes, :bin_widths) { false }.subject
144
+ mock(@ranker).unclustered_points { [unclustered_point] }
145
+ point = mock!.in_the_neighborhood_of?(unclustered_point, :attribute_indexes, :bin_widths) { false }.subject
146
+
147
+ @ranker.recursively_cluster_neighbors(point, :cluster, :attribute_indexes, :bin_widths, :neighbors).should == :neighbors
148
+ end
149
+
150
+ it "should call itself if it finds a new neighbor" do
151
+ @called = false
152
+ unclustered_point = Ranker::Point.new
153
+ mock(unclustered_point).cluster=(:cluster)
154
+
155
+ mock(@ranker).unclustered_points do
156
+ if @called
157
+ []
158
+ else
159
+ @called = true
160
+ [unclustered_point]
161
+ end
162
+ end.times(2)
163
+
164
+ point = mock!.in_the_neighborhood_of?(unclustered_point, :attribute_indexes, :bin_widths) { true }.subject
165
+
166
+ @ranker.recursively_cluster_neighbors(point, :cluster, :attribute_indexes, :bin_widths, []).should == [unclustered_point]
167
+ end
168
+ end
169
+
170
+ context "#unclustered_points" do
171
+
172
+ it "should find only the points that aren't clustered" do
173
+ mock(@ranker).points do
174
+ [mock!.clustered? { true }.subject,
175
+ mock!.clustered? { true }.subject,
176
+ mock!.clustered? { false }.subject]
177
+ end
178
+
179
+ @ranker.unclustered_points.size == 2
180
+ end
181
+
182
+ end
183
+
184
+ context "#calculate_theta" do
185
+
186
+ it "should call #ranked_outliers and find theta" do
187
+ mock(@ranker).ranked_outliers(:sample, :k, :q) { :s }
188
+ mock(@ranker).points { [mock!.score { :n }.subject,
189
+ mock!.score { :not_n }.subject,
190
+ mock!.score { :not_n }.subject,
191
+ mock!.score { :n }.subject] }
192
+ @ranker.calculate_theta(:sample, :k, :n, :q).should == [2, :s]
193
+ end
194
+
195
+ end
196
+
197
+ context "#compute_minimums_and_maximums" do
198
+ it "should properly compute minimums and maximums" do
199
+ @ranker.minimums.should == [1.0, 2.0, -1.0]
200
+ @ranker.maximums.should == [4.2, 50.0, 1.0]
201
+ end
202
+ end
203
+
204
+ context "#bin_count" do
205
+ it("should be equal to 4 when Q=1") { @ranker.compute_bin_count(1).should == 4 }
206
+ it("should be equal to 2 when Q=2") { @ranker.compute_bin_count(2).should == 2 }
207
+ it("should be equal to 2 when Q=3") { @ranker.compute_bin_count(3).should == 2 }
208
+ it("should be equal to 2 when Q=4") { @ranker.compute_bin_count(4).should == 2 }
209
+ it("should be equal to 2 when Q=5") { @ranker.compute_bin_count(5).should == 2 }
210
+ end
211
+
212
+ context "#compute_bin_widths" do
213
+ it("should be equal to [0.75, 12.0, 0.0] when Q=1") { @ranker.compute_bin_widths(1, 4).should == [0.8, 12.0, 0.5] }
214
+ it("should be equal to [1.6, 24.0, 0.0] when Q=2") { @ranker.compute_bin_widths(2, 2).should == [1.6, 24.0, 1.0] }
215
+ end
216
+
217
+ context "#assign_to_bins!" do
218
+ context "with q=2" do
219
+ it "should work properly" do
220
+ points = [(point_0 = Ranker::Point.new(1.0, 3, -1)),
221
+ (point_1 = Ranker::Point.new(2.0, 50, 1)),
222
+ (point_2 = Ranker::Point.new(3.0, 5, 1)),
223
+ (point_3 = Ranker::Point.new(4.2, 2, 1))]
224
+
225
+ mock(@ranker).points { points }.times(3)
226
+
227
+ @ranker.assign_points_to_bins! [1.6, 24.0, 1.0], 2
228
+
229
+ point_0.bins.should == [0, 0, 0]
230
+ point_1.bins.should == [0, 1, 1]
231
+ point_2.bins.should == [1, 0, 1]
232
+ point_3.bins.should == [1, 0, 1]
233
+ end
234
+ end
235
+ end
236
+
237
+ end
238
+
239
+ context "given a somewhat non-trivially dataset" do
240
+
241
+ it "should find the outliers" do
242
+ # data from Exploiting Nonlinear Recurrence and Fractal Scaling Properties for Voice Disorder Detection', Little MA, McSharry PE, Roberts SJ, Costello DAE, Moroz IM. BioMedical Engineering OnLine 2007, 6:23 (26 June 2007)
243
+ data = FasterCSV.read("spec/parkinsons.csv").map {|row| row.map {|datum| datum = datum.to_f } }
244
+ ranker = Ranker.new data
245
+
246
+ ranker.optimized_ranking(5, 5).size.should == 195
247
+ end
248
+ end
249
+
250
+ end
251
+
252
+ end