sciruby 0.1.3 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/CHANGES +3 -0
- data/CONTRIBUTING.md +46 -0
- data/Gemfile +34 -0
- data/LICENSE.txt +21 -0
- data/README.rdoc +28 -0
- data/lib/sciruby/gems.rb +27 -0
- data/lib/sciruby/version.rb +3 -0
- data/lib/sciruby.rb +2 -77
- data/sciruby.gemspec +35 -0
- metadata +51 -413
- data/.autotest +0 -23
- data/.gemtest +0 -0
- data/History.txt +0 -6
- data/Manifest.txt +0 -119
- data/Rakefile +0 -178
- data/bin/sciruby-plotter +0 -12
- data/data/r/man/AirPassengers.Rd +0 -51
- data/data/r/man/BJsales.Rd +0 -34
- data/data/r/man/BOD.Rd +0 -53
- data/data/r/man/ChickWeight.Rd +0 -68
- data/data/r/man/DNase.Rd +0 -63
- data/data/r/man/EuStockMarkets.Rd +0 -28
- data/data/r/man/Formaldehyde.Rd +0 -44
- data/data/r/man/HairEyeColor.Rd +0 -77
- data/data/r/man/Harman23.cor.Rd +0 -25
- data/data/r/man/Harman74.cor.Rd +0 -28
- data/data/r/man/Indometh.Rd +0 -57
- data/data/r/man/InsectSprays.Rd +0 -45
- data/data/r/man/JohnsonJohnson.Rd +0 -37
- data/data/r/man/LakeHuron.Rd +0 -27
- data/data/r/man/LifeCycleSavings.Rd +0 -54
- data/data/r/man/Loblolly.Rd +0 -56
- data/data/r/man/Nile.Rd +0 -78
- data/data/r/man/Orange.Rd +0 -57
- data/data/r/man/OrchardSprays.Rd +0 -62
- data/data/r/man/PlantGrowth.Rd +0 -39
- data/data/r/man/Puromycin.Rd +0 -84
- data/data/r/man/Theoph.Rd +0 -84
- data/data/r/man/Titanic.Rd +0 -73
- data/data/r/man/ToothGrowth.Rd +0 -40
- data/data/r/man/UCBAdmissions.Rd +0 -68
- data/data/r/man/UKDriverDeaths.Rd +0 -72
- data/data/r/man/UKLungDeaths.Rd +0 -40
- data/data/r/man/UKgas.Rd +0 -25
- data/data/r/man/USAccDeaths.Rd +0 -23
- data/data/r/man/USArrests.Rd +0 -45
- data/data/r/man/USJudgeRatings.Rd +0 -38
- data/data/r/man/USPersonalExpenditure.Rd +0 -33
- data/data/r/man/VADeaths.Rd +0 -51
- data/data/r/man/WWWusage.Rd +0 -41
- data/data/r/man/WorldPhones.Rd +0 -40
- data/data/r/man/ability.cov.Rd +0 -50
- data/data/r/man/airmiles.Rd +0 -29
- data/data/r/man/airquality.Rd +0 -56
- data/data/r/man/anscombe.Rd +0 -62
- data/data/r/man/attenu.Rd +0 -66
- data/data/r/man/attitude.Rd +0 -48
- data/data/r/man/austres.Rd +0 -22
- data/data/r/man/beavers.Rd +0 -73
- data/data/r/man/cars.Rd +0 -59
- data/data/r/man/chickwts.Rd +0 -47
- data/data/r/man/co2.Rd +0 -43
- data/data/r/man/crimtab.Rd +0 -129
- data/data/r/man/datasets-package.Rd +0 -24
- data/data/r/man/discoveries.Rd +0 -30
- data/data/r/man/esoph.Rd +0 -66
- data/data/r/man/euro.Rd +0 -56
- data/data/r/man/eurodist.Rd +0 -25
- data/data/r/man/faithful.Rd +0 -63
- data/data/r/man/freeny.Rd +0 -56
- data/data/r/man/infert.Rd +0 -56
- data/data/r/man/iris.Rd +0 -62
- data/data/r/man/islands.Rd +0 -29
- data/data/r/man/lh.Rd +0 -22
- data/data/r/man/longley.Rd +0 -56
- data/data/r/man/lynx.Rd +0 -33
- data/data/r/man/morley.Rd +0 -50
- data/data/r/man/mtcars.Rd +0 -44
- data/data/r/man/nhtemp.Rd +0 -30
- data/data/r/man/nottem.Rd +0 -30
- data/data/r/man/occupationalStatus.Rd +0 -44
- data/data/r/man/precip.Rd +0 -31
- data/data/r/man/presidents.Rd +0 -36
- data/data/r/man/pressure.Rd +0 -41
- data/data/r/man/quakes.Rd +0 -40
- data/data/r/man/randu.Rd +0 -46
- data/data/r/man/rivers.Rd +0 -21
- data/data/r/man/rock.Rd +0 -34
- data/data/r/man/sleep.Rd +0 -51
- data/data/r/man/stackloss.Rd +0 -77
- data/data/r/man/state.Rd +0 -80
- data/data/r/man/sunspot.month.Rd +0 -49
- data/data/r/man/sunspot.year.Rd +0 -26
- data/data/r/man/sunspots.Rd +0 -33
- data/data/r/man/swiss.Rd +0 -79
- data/data/r/man/treering.Rd +0 -38
- data/data/r/man/trees.Rd +0 -48
- data/data/r/man/uspop.Rd +0 -27
- data/data/r/man/volcano.Rd +0 -31
- data/data/r/man/warpbreaks.Rd +0 -56
- data/data/r/man/women.Rd +0 -40
- data/data/r/man/zCO2.Rd +0 -81
- data/lib/ext/csv.rb +0 -22
- data/lib/ext/shoes.rb +0 -131
- data/lib/ext/string.rb +0 -39
- data/lib/sciruby/analysis/suite.rb +0 -87
- data/lib/sciruby/analysis/suite_report_builder.rb +0 -44
- data/lib/sciruby/analysis.rb +0 -98
- data/lib/sciruby/config.rb +0 -93
- data/lib/sciruby/data/guardian.rb +0 -96
- data/lib/sciruby/data/r/base.rb +0 -110
- data/lib/sciruby/data/r/data_frame.rb +0 -24
- data/lib/sciruby/data/r/grouped_data.rb +0 -7
- data/lib/sciruby/data/r/list.rb +0 -20
- data/lib/sciruby/data/r/multi_time_series.rb +0 -24
- data/lib/sciruby/data/r/r_matrix.rb +0 -7
- data/lib/sciruby/data/r/time_series.rb +0 -19
- data/lib/sciruby/data/r/time_series_base.rb +0 -40
- data/lib/sciruby/data/r/vector.rb +0 -125
- data/lib/sciruby/data/r.rb +0 -155
- data/lib/sciruby/data.rb +0 -168
- data/lib/sciruby/editor.rb +0 -82
- data/lib/sciruby/plotter.rb +0 -128
- data/lib/sciruby/recommend.rb +0 -70
- data/lib/sciruby/validation.rb +0 -368
- data/readme.md +0 -75
- data/static/sciruby-icon.png +0 -0
- data/test/helpers_tests.rb +0 -58
- data/test/test_recommend.rb +0 -16
data/lib/sciruby/recommend.rb
DELETED
@@ -1,70 +0,0 @@
|
|
1
|
-
require 'set'
|
2
|
-
require 'distribution' # for Hypergeometric
|
3
|
-
|
4
|
-
warn "[DEPRECATION] SciRuby::Recommend is deprecated."
|
5
|
-
|
6
|
-
unless defined?(SortedSet) # Ruby 1.8
|
7
|
-
class SortedSet < Set
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
# Added by John O. Woods.
|
12
|
-
#
|
13
|
-
# Makes use of Distribution gem, formerly part of Statsample, by Claudio Bustos.
|
14
|
-
|
15
|
-
module SciRuby
|
16
|
-
# Methods and classes for expert recommendation systems.
|
17
|
-
#
|
18
|
-
# This module is likely to go away soon.
|
19
|
-
module Recommend
|
20
|
-
# Set Distance functions: determine distances between sets.
|
21
|
-
#
|
22
|
-
# These functions may be useful for k-nearest neighbors searches and expert recommendation systems.
|
23
|
-
#
|
24
|
-
# Sets are used for systems where vectors are binary. For example, if you have a matrix of customers and products, a
|
25
|
-
# zero means the customer has not bought the product, and a one means the customer has. There is no concept of
|
26
|
-
# degree.
|
27
|
-
#
|
28
|
-
# Pearson is probably the function most people will want to use.
|
29
|
-
class SetDistance
|
30
|
-
|
31
|
-
# Create a new recommendation-by-set-distance object. This requires as arguments two sets (+a+ and +b+) and a
|
32
|
-
# +total+ size which indicates the number of items from which the sets are drawn.
|
33
|
-
#
|
34
|
-
# It also takes an optional +distance_function+ (e.g., :hypergeometric or :pearson). If this is given, the distance
|
35
|
-
# will be calculated immediately. Otherwise, you can use the various distance functions to calculate distance by
|
36
|
-
# a variety of metrics (e.g., distance_hypergeometric, distance_pearson, and so on).
|
37
|
-
def initialize a, b, total, distance_function = nil
|
38
|
-
@a = a.is_a?(Set) ? a : SortedSet.new(a)
|
39
|
-
@b = b.is_a?(Set) ? b : SortedSet.new(b)
|
40
|
-
@total = total
|
41
|
-
|
42
|
-
unless distance_function.nil? # Calculate immediately if a distance function is given.
|
43
|
-
@distance_function = "distance_#{distance_function.to_s}".to_sym
|
44
|
-
@distance = self.send @distance_function
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
attr_reader :a, :b, :total, :distance
|
49
|
-
|
50
|
-
def m; a.size; end
|
51
|
-
def n; b.size; end
|
52
|
-
def ab; @ab ||= a.intersection(b); end
|
53
|
-
def k; ab.size; end
|
54
|
-
alias :a_dot_b :k
|
55
|
-
|
56
|
-
# Calculate distance as the hypergeometric probability of seeing an intersection of +k+ or greater between two sets
|
57
|
-
# +a+ and +b+. This is basically the complement of cdf(+k-1+, +m+, +n+, +total+).
|
58
|
-
def distance_hypergeometric
|
59
|
-
@distance_hypergeometric ||= 1.0 - Distribution::Hypergeometric.cdf(k-1, m, n, total)
|
60
|
-
end
|
61
|
-
|
62
|
-
# The generalization of Pearson correlation coefficient using binary vectors.
|
63
|
-
def distance_pearson
|
64
|
-
@distance_pearson ||= 1.0 - (total * a_dot_b - m*n).abs / Math.sqrt( (total - m)*(total - n)*m*n )
|
65
|
-
end
|
66
|
-
|
67
|
-
end
|
68
|
-
|
69
|
-
end
|
70
|
-
end
|
data/lib/sciruby/validation.rb
DELETED
@@ -1,368 +0,0 @@
|
|
1
|
-
module Rubyvis
|
2
|
-
module Scale
|
3
|
-
class Ordinal
|
4
|
-
def size
|
5
|
-
@r.size
|
6
|
-
end
|
7
|
-
end
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
warn "[DEPRECATION] SciRuby::Validation is deprecated and will be replaced in the near future."
|
12
|
-
|
13
|
-
# Added by John O. Woods.
|
14
|
-
#
|
15
|
-
# Methods for quantifying the predictive abilities of binary classifier systems (i.e., true positives, false positives,
|
16
|
-
# etc.)
|
17
|
-
#
|
18
|
-
# This module is likely to go away soon, or to change significantly.
|
19
|
-
#
|
20
|
-
module SciRuby
|
21
|
-
module Validation
|
22
|
-
# Binary confusion matrix for generating Receiver Operating Characteristic (ROC) and Precision-Recall curves.
|
23
|
-
class Binary
|
24
|
-
DEFAULT_VIS_OPTIONS = {
|
25
|
-
:width => 400,
|
26
|
-
:height => 400,
|
27
|
-
:left => 20,
|
28
|
-
:bottom => 20,
|
29
|
-
:right => 10,
|
30
|
-
:top => 5,
|
31
|
-
:line_color => "#66abca",
|
32
|
-
:line_width => 2
|
33
|
-
}
|
34
|
-
|
35
|
-
# Create a new confusion matrix, with +total_positives+ as the number of items that are known to be correct.
|
36
|
-
# +initial_negatives+ is the total number of items to be tested as we push each prediction/set of predictions.
|
37
|
-
def initialize initial_negatives, total_positives
|
38
|
-
raise(ArgumentError, "total predictions should be greater or equal to total positives") unless initial_negatives >= total_positives
|
39
|
-
@tp, @p, @tn, @n = [0], [0], [initial_negatives - total_positives], [initial_negatives]
|
40
|
-
@threshold = { 1.0 => 0 }
|
41
|
-
@roc_area = 0.0
|
42
|
-
end
|
43
|
-
|
44
|
-
# Allows us to zoom right in on a specific score and find out how many positives have been hit by the time
|
45
|
-
# we've gotten that far down the list.
|
46
|
-
attr_reader :threshold
|
47
|
-
|
48
|
-
# True positives axis
|
49
|
-
attr_reader :tp
|
50
|
-
alias :tp_axis :tp
|
51
|
-
alias :true_positives_axis :tp
|
52
|
-
|
53
|
-
# Positives (true and false) axis
|
54
|
-
attr_reader :p
|
55
|
-
alias :p_axis :p
|
56
|
-
alias :positives_axis :p
|
57
|
-
|
58
|
-
# True negatives axis
|
59
|
-
attr_reader :tn
|
60
|
-
alias :tn_axis :tn
|
61
|
-
alias :true_negatives_axis :tn
|
62
|
-
|
63
|
-
# Negatives (true and false) axis
|
64
|
-
attr_reader :n
|
65
|
-
alias :n_axis :n
|
66
|
-
alias :negatives_axis :n
|
67
|
-
|
68
|
-
# Area under the Receiver-Operating Characteristic (ROC) curve.
|
69
|
-
attr_reader :roc_area
|
70
|
-
|
71
|
-
# Data for a visualization/plot
|
72
|
-
def data type
|
73
|
-
if type == :roc
|
74
|
-
fpr_axis.zip(tpr_axis)
|
75
|
-
elsif type == :precision_recall
|
76
|
-
tpr_axis.zip(precision_axis)
|
77
|
-
else
|
78
|
-
raise ArgumentError, "Unrecognized plot type: #{type.to_s}"
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
class << self
|
83
|
-
|
84
|
-
# Generate an empty panel.
|
85
|
-
def vis options = {}
|
86
|
-
options.reverse_merge! DEFAULT_VIS_OPTIONS
|
87
|
-
|
88
|
-
x = Rubyvis::Scale.linear(0.0, 1.0).range(0, options[:width])
|
89
|
-
y = Rubyvis::Scale.linear(0.0, 1.0).range(0, options[:height])
|
90
|
-
|
91
|
-
v = Rubyvis::Panel.new do
|
92
|
-
width options[:width]
|
93
|
-
height options[:height]
|
94
|
-
bottom options[:bottom]
|
95
|
-
left options[:left]
|
96
|
-
right options[:right]
|
97
|
-
top options[:top]
|
98
|
-
end
|
99
|
-
|
100
|
-
v.add(pv.Rule).
|
101
|
-
data(y.ticks()).
|
102
|
-
bottom(y).
|
103
|
-
strokeStyle( lambda {|dd| dd != 0 ? "#eee" : "#000"} ).
|
104
|
-
anchor("left").add(pv.Label).
|
105
|
-
visible( lambda {|dd| dd > 0 and dd < 1} ).
|
106
|
-
text(y.tick_format)
|
107
|
-
|
108
|
-
# X-axis and ticks.
|
109
|
-
v.add(pv.Rule).
|
110
|
-
data(x.ticks()).
|
111
|
-
left(x).
|
112
|
-
stroke_style( lambda {|dd| dd != 0 ? "#eee" : "#000"} ).
|
113
|
-
anchor("bottom").add(pv.Label).
|
114
|
-
visible( lambda {|dd| dd > 0 and dd < 1} ).
|
115
|
-
text(x.tick_format)
|
116
|
-
|
117
|
-
v
|
118
|
-
end
|
119
|
-
|
120
|
-
# Plot an array of curves, or a hash of real and control curves. Kind of cluttered.
|
121
|
-
def plot hsh_or_ary, type, options = {}
|
122
|
-
vis = begin
|
123
|
-
if hsh_or_ary.is_a?(OpenStruct)
|
124
|
-
plot_hash hsh_or_ary, type, options
|
125
|
-
elsif hsh_or_ary.is_a?(Array)
|
126
|
-
plot_array hsh_or_ary, type, options
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
vis.render()
|
131
|
-
require "rsvg2"
|
132
|
-
svg = RSVG::Handle.new_from_data(vis.to_svg).tap { |s| s.close }
|
133
|
-
SciRuby::Plotter.new svg
|
134
|
-
end
|
135
|
-
|
136
|
-
protected
|
137
|
-
# Plot :real and :control arrays on the same panel. Not really very useful, as it gets too cluttered.
|
138
|
-
def plot_hash hsh, type, options = {}
|
139
|
-
options[:colors] ||= :category10
|
140
|
-
options[:line_width] ||= 2
|
141
|
-
|
142
|
-
colors = Rubyvis::Colors.send(options[:colors])
|
143
|
-
options[:panel] = vis(options) # set up panel and store it in the options hash
|
144
|
-
|
145
|
-
hsh.real.each_index do |i|
|
146
|
-
options[:panel] = hsh.real[i].vis(type, options.merge({ :line_color => colors[i % colors.size] }))
|
147
|
-
end if hsh.respond_to?(:real) # may not have anything but controls
|
148
|
-
|
149
|
-
hsh.control.each_index do |i|
|
150
|
-
options[:panel] = hsh.control[i].vis(type, options.merge({ :line_color => colors[i % colors.size] }))
|
151
|
-
end if hsh.respond_to?(:control) # May not have a control set up
|
152
|
-
|
153
|
-
options[:panel]
|
154
|
-
end
|
155
|
-
|
156
|
-
|
157
|
-
# Plot multiple Validation::Binary objects on the same panel.
|
158
|
-
def plot_array ary, type, options = {}
|
159
|
-
|
160
|
-
options[:colors] ||= :category10
|
161
|
-
colors = Rubyvis::Colors.send(options[:colors])
|
162
|
-
|
163
|
-
options[:panel] = vis(options) # set up panel
|
164
|
-
ary.each_index do |i|
|
165
|
-
options[:panel] = ary[i].vis(type, options.merge({:line_color => colors[i % colors.size]}))
|
166
|
-
end
|
167
|
-
|
168
|
-
options[:panel]
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
# RubyVis object for a plot
|
173
|
-
def vis type, options = {}
|
174
|
-
options.reverse_merge! DEFAULT_VIS_OPTIONS
|
175
|
-
|
176
|
-
d = data(type)
|
177
|
-
|
178
|
-
x = Rubyvis::Scale.linear(0.0, 1.0).range(0, options[:width])
|
179
|
-
y = Rubyvis::Scale.linear(0.0, 1.0).range(0, options[:height])
|
180
|
-
|
181
|
-
# Use existing panel or create new empty one
|
182
|
-
v = options.has_key?(:panel) ? options[:panel] : self.class.send(:vis, options)
|
183
|
-
|
184
|
-
v.add(Rubyvis::Panel).
|
185
|
-
data(d).
|
186
|
-
add(Rubyvis::Dot).
|
187
|
-
left(lambda { |dd| x.scale(dd[0])} ).
|
188
|
-
bottom(lambda { |dd| y.scale(dd[1])} ).
|
189
|
-
stroke_style("black").
|
190
|
-
shape_size(2).
|
191
|
-
title(lambda { |dd| "%0.1f" % dd[1]} )
|
192
|
-
|
193
|
-
v.add(Rubyvis::Line).
|
194
|
-
data(d).
|
195
|
-
line_width(options[:line_width]).
|
196
|
-
left(lambda { |dd| x.scale(dd[0])} ).
|
197
|
-
bottom(lambda { |dd| y.scale(dd[1])} ).
|
198
|
-
stroke_style(options[:line_color]).
|
199
|
-
anchor("bottom")
|
200
|
-
|
201
|
-
v
|
202
|
-
end
|
203
|
-
|
204
|
-
|
205
|
-
# Plot on a new or existing panel. To use an existing panel, just set option :panel to be a
|
206
|
-
# Rubyvis::Panel object.
|
207
|
-
#
|
208
|
-
# To use a new panel, just don't set :panel. You can provide various options as for the vis()
|
209
|
-
# method.
|
210
|
-
#
|
211
|
-
# The first argument should be the type of plot, :roc or :precision_recall.
|
212
|
-
def plot type, options = {}
|
213
|
-
v = vis(type, options)
|
214
|
-
|
215
|
-
v.render()
|
216
|
-
|
217
|
-
require "rsvg2"
|
218
|
-
|
219
|
-
svg = RSVG::Handle.new_from_data(v.to_svg).tap { |s| s.close }
|
220
|
-
SciRuby::Plotter.new svg
|
221
|
-
end
|
222
|
-
|
223
|
-
|
224
|
-
# Get the "actual" precision at some recall value.
|
225
|
-
def precision_at_fraction_recall val
|
226
|
-
@p.each_index do |i|
|
227
|
-
return actual_precision(i) if tpr(i) < val
|
228
|
-
end
|
229
|
-
0.0
|
230
|
-
end
|
231
|
-
|
232
|
-
# Push the number of predicted and the number of correctly predicted for a given score. ROC area thus far is
|
233
|
-
# calculated instantly and returned.
|
234
|
-
def push predicted, correctly_predicted, score = nil
|
235
|
-
raise(ArgumentError, "Requires two integers as arguments") unless predicted.is_a?(Fixnum) && correctly_predicted.is_a?(Fixnum)
|
236
|
-
raise(ArgumentError, "First argument should be greater than or equal to second argument") unless predicted >= correctly_predicted
|
237
|
-
|
238
|
-
@threshold[score] = @p.size unless score.nil?
|
239
|
-
|
240
|
-
last_i = p.size - 1
|
241
|
-
i = p.size
|
242
|
-
|
243
|
-
@p << @p[last_i] + predicted
|
244
|
-
@n << @n[last_i] - predicted
|
245
|
-
|
246
|
-
@tp << @tp[last_i] + correctly_predicted
|
247
|
-
@tn << @tn[last_i] - predicted + correctly_predicted
|
248
|
-
|
249
|
-
delta_tpr = tpr(i) - tpr(last_i)
|
250
|
-
delta_fpr = fpr(i) - fpr(last_i)
|
251
|
-
|
252
|
-
@roc_area += (tpr(last_i) + 0.5 * delta_tpr) * delta_fpr
|
253
|
-
end
|
254
|
-
|
255
|
-
# Some methods you'd want to validate don't offer scores for each and every item. In that case, just call
|
256
|
-
# push_remainder in order to ensure the line gets drawn all the way to the right.
|
257
|
-
def push_remainder
|
258
|
-
# push all remaining negatives, none correct, score of 0.
|
259
|
-
push n.last, n.first - tn.first, 0.0
|
260
|
-
end
|
261
|
-
|
262
|
-
# Given some bin +i+, what is the true positive rate / sensitivity / recall
|
263
|
-
def tpr(i)
|
264
|
-
[ @tp[i].quo(@tp[i] + @n[i] - @tn[i]), 1 ].min
|
265
|
-
end
|
266
|
-
alias :sensitivity :tpr
|
267
|
-
alias :recall :tpr
|
268
|
-
|
269
|
-
# Given some bin +i+, what is the false positive rate / fallout?
|
270
|
-
def fpr(i)
|
271
|
-
begin
|
272
|
-
[ (@p[i] - @tp[i]).quo(@p[i] - @tp[i] + @tn[i]), 1 ].min
|
273
|
-
rescue ZeroDivisionError => e
|
274
|
-
STDERR.puts "TP axis: #{tp_axis.inspect}"
|
275
|
-
STDERR.puts "P axis: #{p_axis.inspect}"
|
276
|
-
STDERR.puts "TN axis: #{tn_axis.inspect}"
|
277
|
-
STDERR.puts "N axis: #{n_axis.inspect}"
|
278
|
-
raise ZeroDivisionError, "i=#{i}, p[i]=#{@p[i]}, tp[i]=#{@tp[i]}, tn[i]=#{@tn[i]}"
|
279
|
-
end
|
280
|
-
end
|
281
|
-
alias :fallout :fpr
|
282
|
-
|
283
|
-
# Calculate the actual precision at some point.
|
284
|
-
#
|
285
|
-
# This is used because a precision-recall curve actually levels out the stair-steps, and sometimes you need
|
286
|
-
# the true value instead.
|
287
|
-
#
|
288
|
-
# To get the leveled-out values, you would use precision_axis_and_area or just precision_axis.
|
289
|
-
def actual_precision(i)
|
290
|
-
return 1 if @p[i] == 0 # Prevents ZeroDivisionError.
|
291
|
-
begin
|
292
|
-
[ @tp[i].quo(@p[i]), 1 ].min
|
293
|
-
rescue ZeroDivisionError => e
|
294
|
-
STDERR.puts "TP axis: #{tp_axis.inspect}"
|
295
|
-
STDERR.puts "P axis: #{p_axis.inspect}"
|
296
|
-
STDERR.puts "TN axis: #{tn_axis.inspect}"
|
297
|
-
STDERR.puts "N axis: #{n_axis.inspect}"
|
298
|
-
raise ZeroDivisionError, "i=#{i}, p[i]=#{@p[i]}, tp[i]=#{@tp[i]}, tn[i]=#{@tn[i]}"
|
299
|
-
end
|
300
|
-
end
|
301
|
-
|
302
|
-
# True positive rate axis (as we walk through the list of predictions from best-scored to worst-scored)
|
303
|
-
def tpr_axis
|
304
|
-
axis = []
|
305
|
-
@p.each_index do |i|
|
306
|
-
axis << tpr(i)
|
307
|
-
end
|
308
|
-
axis
|
309
|
-
end
|
310
|
-
|
311
|
-
# False positive rate axis
|
312
|
-
def fpr_axis
|
313
|
-
axis = []
|
314
|
-
@p.each_index do |i|
|
315
|
-
axis << fpr(i)
|
316
|
-
end
|
317
|
-
axis
|
318
|
-
end
|
319
|
-
|
320
|
-
# Precision axis for a precision-recall plot; and the area under the precision-recall curve.
|
321
|
-
#
|
322
|
-
# Returns an OpenStruct with two attributes: precision_axis (an array), and area (a Fixnum).
|
323
|
-
def precision_axis_and_area
|
324
|
-
prec = Array.new(@p.size)
|
325
|
-
area = 0.0
|
326
|
-
i = @p.size - 1
|
327
|
-
max = prec[i] = actual_precision(i); i -= 1
|
328
|
-
|
329
|
-
while i >= 0
|
330
|
-
max = prec[i] = [max, actual_precision(i)].max
|
331
|
-
area += ( tpr(i+1).to_f - tpr(i).to_f ) * max
|
332
|
-
|
333
|
-
i -= 1
|
334
|
-
end
|
335
|
-
prec[0] = 1.0
|
336
|
-
|
337
|
-
OpenStruct.new({:precision_axis => prec, :area => area})
|
338
|
-
end
|
339
|
-
|
340
|
-
# Returns the number of prediction score bins.
|
341
|
-
def bins
|
342
|
-
@p.size - 1
|
343
|
-
end
|
344
|
-
|
345
|
-
# Returns the size of the plot (for axes).
|
346
|
-
def size
|
347
|
-
@p.size
|
348
|
-
end
|
349
|
-
|
350
|
-
# Returns the total number of predictions, correct and incorrect.
|
351
|
-
def max
|
352
|
-
@n[0]
|
353
|
-
end
|
354
|
-
|
355
|
-
# Returns the total number of known values.
|
356
|
-
def known
|
357
|
-
@n[0] - @tn[0]
|
358
|
-
end
|
359
|
-
|
360
|
-
# Returns just the precision axis without the area. Note that it takes just as long to calculate; this just
|
361
|
-
# leaves off the area if you don't need it.
|
362
|
-
def precision_axis
|
363
|
-
precision_axis_and_area.precision_axis
|
364
|
-
end
|
365
|
-
|
366
|
-
end
|
367
|
-
end
|
368
|
-
end
|
data/readme.md
DELETED
@@ -1,75 +0,0 @@
|
|
1
|
-
# SciRuby
|
2
|
-
|
3
|
-
* http://sciruby.com
|
4
|
-
* http://github.com/SciRuby/sciruby
|
5
|
-
|
6
|
-
## Description
|
7
|
-
|
8
|
-
Ruby has for some time had no equivalent to the beautifully constructed numpy, scipy, and matplotlib libraries for Python. We believe that the time for a Ruby science and visualization package has come and gone. Sometimes when a solution of sugar and water becomes super-saturated, from it precipitates a pure, delicious, and diabetes-inducing crystal of sweetness, induced by no more than the tap of a finger. So it is, we believe, with the need for numeric and visualization libraries in Ruby.
|
9
|
-
|
10
|
-
We are not the first with this idea, but we are trying to bring it to life.
|
11
|
-
|
12
|
-
[![Click here to lend your support to SciRuby and make a donation at pledgie.com!](https://www.pledgie.com/campaigns/15783.png?skin_name=chrome)](http://www.pledgie.com/campaigns/15783)
|
13
|
-
|
14
|
-
## Warning!
|
15
|
-
|
16
|
-
Please be aware that SciRuby is in ALPHA status. If you're thinking of using SciRuby to write mission critical code, such as for driving a car or flying a space shuttle, you may wish to choose other software (for now).
|
17
|
-
|
18
|
-
## Planned Features
|
19
|
-
|
20
|
-
* Numarray: [Narray](http://narray.rubyforge.org/) rewrite.
|
21
|
-
* SciRuby::Analysis - domain-specific language (DSL) for hassle-free statistical analysis (originally from [Statsample](http://github.com/clbustos/statsample))
|
22
|
-
|
23
|
-
## Current Features
|
24
|
-
* SciRuby::Plotter - visualization GUI for updating plots as scripts are modified
|
25
|
-
* SciRuby::Editor - code editor for modifying rubyvis plot scripts
|
26
|
-
* [Rubyvis](http://rubyvis.rubyforge.org) - [Protovis](http://mbostock.github.com/protovis/)-like plotting in Ruby
|
27
|
-
* [Statsample](https://github.com/clbustos/statsample) - a suite for basic and advanced statistics in Ruby
|
28
|
-
* [Minimization](https://github.com/clbustos/minimization) algorithms in pure Ruby and using GSL
|
29
|
-
* Numeric [integration](https://github.com/clbustos/integration) algorithms
|
30
|
-
|
31
|
-
## Synopsis
|
32
|
-
|
33
|
-
FIX (code sample of usage)
|
34
|
-
|
35
|
-
## Requirements
|
36
|
-
|
37
|
-
* statsample (and optionally statsample-optimization)
|
38
|
-
* distribution
|
39
|
-
* rubyvis
|
40
|
-
* narray
|
41
|
-
* minimization
|
42
|
-
* integration
|
43
|
-
* green_shoes
|
44
|
-
|
45
|
-
## Installation
|
46
|
-
|
47
|
-
gem install sciruby
|
48
|
-
|
49
|
-
You'll also want to make sure you install the headers for the GUI. If you're using Ubuntu:
|
50
|
-
|
51
|
-
sudo apt-get install libgtk2.0-dev libgtksourceview2-dev librsvg2-dev libcairo2-dev
|
52
|
-
|
53
|
-
Instructions for installing these for OSX are available through the [Green Shoes wiki](https://github.com/ashbb/green_shoes/wiki/Building-Green-Shoes-on-OSX).
|
54
|
-
|
55
|
-
You can also optionally get rb-gsl, statistics2, and other useful architecture-specific packages using
|
56
|
-
|
57
|
-
gem install statsample-optimization
|
58
|
-
|
59
|
-
## Developers
|
60
|
-
|
61
|
-
After checking out the source, run:
|
62
|
-
|
63
|
-
$ rake newb
|
64
|
-
$ bundle install
|
65
|
-
|
66
|
-
This task will install any missing dependencies, run the tests/specs,
|
67
|
-
and generate the RDoc.
|
68
|
-
|
69
|
-
## License
|
70
|
-
|
71
|
-
SciRuby is licensed under the GNU General Public License, v3.
|
72
|
-
|
73
|
-
## Donations
|
74
|
-
|
75
|
-
[![Click here to lend your support to SciRuby and make a donation at pledgie.com!](https://www.pledgie.com/campaigns/15783.png?skin_name=chrome)](http://www.pledgie.com/campaigns/15783)
|
data/static/sciruby-icon.png
DELETED
Binary file
|
data/test/helpers_tests.rb
DELETED
@@ -1,58 +0,0 @@
|
|
1
|
-
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/../lib/'))
|
2
|
-
$:.unshift(File.expand_path(File.dirname(__FILE__)+'/'))
|
3
|
-
|
4
|
-
|
5
|
-
require 'minitest/unit'
|
6
|
-
require 'tempfile'
|
7
|
-
require 'tmpdir'
|
8
|
-
require 'shoulda'
|
9
|
-
|
10
|
-
require 'sciruby'
|
11
|
-
|
12
|
-
module MiniTest
|
13
|
-
class Unit
|
14
|
-
class TestCase
|
15
|
-
include Shoulda::InstanceMethods
|
16
|
-
extend Shoulda::ClassMethods
|
17
|
-
include Shoulda::Assertions
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
module Assertions
|
22
|
-
def assert_similar_vector(exp, obs, delta=1e-10,msg=nil)
|
23
|
-
msg||="Different vectors #{exp} - #{obs}"
|
24
|
-
assert_equal(exp.size, obs.size)
|
25
|
-
exp.data_with_nils.each_with_index {|v,i|
|
26
|
-
assert_in_delta(v,obs[i],delta)
|
27
|
-
}
|
28
|
-
end
|
29
|
-
def assert_equal_matrix(exp,obs,delta=1e-10,msg=nil)
|
30
|
-
assert_equal(exp.row_size, obs.row_size, "Different row size.#{msg}")
|
31
|
-
assert_equal(exp.column_size, obs.column_size, "Different column size.#{msg}")
|
32
|
-
exp.row_size.times {|i|
|
33
|
-
exp.column_size.times {|j|
|
34
|
-
assert_in_delta(exp[i,j],obs[i,j], delta, "Different element #{i},#{j}\nExpected:\n#{exp}\nObserved:\n#{obs}.#{msg}")
|
35
|
-
}
|
36
|
-
}
|
37
|
-
end
|
38
|
-
alias :assert_raise :assert_raises unless method_defined? :assert_raise
|
39
|
-
alias :assert_not_equal :refute_equal unless method_defined? :assert_not_equal
|
40
|
-
alias :assert_not_same :refute_same unless method_defined? :assert_not_same
|
41
|
-
unless method_defined? :assert_nothing_raised
|
42
|
-
def assert_nothing_raised(msg=nil)
|
43
|
-
msg||="Nothing should be raised, but raised %s"
|
44
|
-
begin
|
45
|
-
yield
|
46
|
-
not_raised=true
|
47
|
-
rescue Exception => e
|
48
|
-
not_raised=false
|
49
|
-
msg=sprintf(msg,e)
|
50
|
-
end
|
51
|
-
assert(not_raised,msg)
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
MiniTest::Unit.autorun
|
58
|
-
|
data/test/test_recommend.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
|
2
|
-
|
3
|
-
class TestRecommend < MiniTest::Unit::TestCase
|
4
|
-
context(SciRuby::Recommend::SetDistance) do
|
5
|
-
setup do
|
6
|
-
@a, @b, @total = [1,3,5,6], [5,6,7,9], 10000
|
7
|
-
@hypg = SciRuby::Recommend::SetDistance.new(@a, @b, @total, :hypergeometric)
|
8
|
-
end
|
9
|
-
should "return correct value for distance with hypergeometric as default" do
|
10
|
-
assert_in_delta 7.19879956756486e-07, @hypg.distance, 0.00001
|
11
|
-
end
|
12
|
-
should "return correct value for distance_pearson" do
|
13
|
-
assert_in_delta 0.500200080032013, @hypg.distance_pearson, 0.00001
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|