rust 0.3 → 0.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,62 @@
1
+ require_relative 'core'
2
+
3
+ module Rust::Plots
4
+ class DistributionPlot < BasePlot
5
+ def initialize
6
+ super()
7
+ @series = []
8
+ end
9
+
10
+ def series(data, **options)
11
+ @series << [data, options]
12
+
13
+ return self
14
+ end
15
+ end
16
+
17
+ class DensityPlot < DistributionPlot
18
+ protected
19
+ def _show()
20
+ first = true
21
+ @series.each do |data, options|
22
+ Rust["plotter.series"] = data
23
+
24
+ if first
25
+ first = false
26
+ command = "plot"
27
+ else
28
+ command = "lines"
29
+ end
30
+
31
+ function = Rust::Function.new(command)
32
+ function.options = self._augmented_options({"col" => options[:color]})
33
+ function.arguments << Rust::Variable.new("density(plotter.series)")
34
+ function.call
35
+ end
36
+
37
+ return self
38
+ end
39
+ end
40
+
41
+ class BoxPlot < DistributionPlot
42
+ protected
43
+ def _show()
44
+ function = Rust::Function.new("boxplot")
45
+
46
+ names = []
47
+ @series.each_with_index do |data, i|
48
+ series, options = *data
49
+ varname = "plotter.series#{i}"
50
+ Rust[varname] = series
51
+ function.arguments << Rust::Variable.new(varname)
52
+ names << (options[:name] || (i+1).to_s)
53
+ end
54
+
55
+ function.options = self._augmented_options({'names' => names})
56
+
57
+ function.call
58
+
59
+ return self
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,4 @@
1
+ self_path = File.expand_path(__FILE__)
2
+ Dir.glob(File.dirname(self_path) + "/*.rb").each do |lib|
3
+ require_relative lib unless lib == self_path
4
+ end
@@ -1,6 +1,6 @@
1
- require_relative 'rust-core'
1
+ require_relative '../core'
2
2
 
3
- module Rust:: Correlation
3
+ module Rust::Correlation
4
4
  class Pearson
5
5
  def self.test(d1, d2)
6
6
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
@@ -10,13 +10,14 @@ module Rust:: Correlation
10
10
  Rust['correlation.a'] = d1
11
11
  Rust['correlation.b'] = d2
12
12
 
13
- Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
13
+ _, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')", true)
14
14
 
15
15
  result = Result.new
16
16
  result.name = "Pearson's product-moment correlation"
17
17
  result.statistics['t'] = Rust._pull('correlation.result$statistic')
18
18
  result.pvalue = Rust._pull('correlation.result$p.value')
19
19
  result.correlation = Rust._pull('correlation.result$estimate')
20
+ result.exact = !warnings.include?("Cannot compute exact p-value with ties")
20
21
 
21
22
  return result
22
23
  end
@@ -36,13 +37,14 @@ module Rust:: Correlation
36
37
  Rust['correlation.a'] = d1
37
38
  Rust['correlation.b'] = d2
38
39
 
39
- Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')")
40
+ _, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')", true)
40
41
 
41
42
  result = Result.new
42
43
  result.name = "Spearman's rank correlation rho"
43
44
  result.statistics['S'] = Rust._pull('correlation.result$statistic')
44
45
  result.pvalue = Rust._pull('correlation.result$p.value')
45
46
  result.correlation = Rust._pull('correlation.result$estimate')
47
+ result.exact = !warnings.include?("Cannot compute exact p-value with ties")
46
48
 
47
49
  return result
48
50
  end
@@ -62,13 +64,14 @@ module Rust:: Correlation
62
64
  Rust['correlation.a'] = d1
63
65
  Rust['correlation.b'] = d2
64
66
 
65
- Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
67
+ _, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='k')", true)
66
68
 
67
69
  result = Result.new
68
70
  result.name = "Kendall's rank correlation tau"
69
71
  result.statistics['T'] = Rust._pull('correlation.result$statistic')
70
72
  result.pvalue = Rust._pull('correlation.result$p.value')
71
73
  result.correlation = Rust._pull('correlation.result$estimate')
74
+ result.exact = !warnings.include?("Cannot compute exact p-value with ties")
72
75
 
73
76
  return result
74
77
  end
@@ -84,11 +87,13 @@ module Rust:: Correlation
84
87
  attr_accessor :statistics
85
88
  attr_accessor :pvalue
86
89
  attr_accessor :correlation
90
+ attr_accessor :exact
87
91
 
88
92
  alias :estimate :correlation
89
93
 
90
94
  def initialize
91
95
  @statistics = {}
96
+ @exact = true
92
97
  end
93
98
 
94
99
  def [](name)
@@ -101,6 +106,7 @@ module Rust:: Correlation
101
106
 
102
107
  def to_s
103
108
  return "#{name}. Correlation = #{correlation}, P-value = #{pvalue} " +
109
+ (!@exact ? "P-value is not exact. " : "") +
104
110
  "#{ statistics.map { |k, v| k.to_s + " -> " + v.to_s }.join(", ") }."
105
111
  end
106
112
  end
@@ -0,0 +1,128 @@
1
+ require_relative '../core'
2
+
3
+ module Rust::Descriptive
4
+ class << self
5
+ def mean(data)
6
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
7
+
8
+ return data.sum.to_f / data.size
9
+ end
10
+
11
+ def standard_deviation(data)
12
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
13
+
14
+ return Math.sqrt(variance(data))
15
+ end
16
+ alias :sd :standard_deviation
17
+ alias :stddev :standard_deviation
18
+
19
+ def variance(data)
20
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
21
+ return Float::NAN if data.size < 2
22
+
23
+ mean = mean(data)
24
+ return data.map { |v| (v - mean) ** 2 }.sum.to_f / (data.size - 1)
25
+ end
26
+ alias :var :variance
27
+
28
+ def median(data)
29
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
30
+
31
+ sorted = data.sort
32
+ if data.size == 0
33
+ return Float::NAN
34
+ elsif data.size.odd?
35
+ return sorted[data.size / 2]
36
+ else
37
+ i = (data.size / 2)
38
+ return (sorted[i - 1] + sorted[i]) / 2.0
39
+ end
40
+ end
41
+
42
+ def sum(data)
43
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
44
+
45
+ return data.sum
46
+ end
47
+
48
+ def quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
49
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
50
+ raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
51
+ raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
52
+
53
+ n = data.size
54
+ quantiles = percentiles.size
55
+ percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
56
+
57
+ rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
58
+ floor_indices = rough_indices.map { |i| i.floor }
59
+ ceil_indices = rough_indices.map { |i| i.ceil }
60
+
61
+ data = data.sort
62
+ result = floor_indices.map { |i| data[i] }
63
+ result_ceil = ceil_indices.map { |i| data[i] }
64
+
65
+ indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
66
+ index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
67
+ reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
68
+ hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
69
+ data_hi_indices = hi_indices.map { |i| data[i] }
70
+
71
+ j = 0
72
+ indices_to_fix.each do |i|
73
+ result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
74
+ j += 1
75
+ end
76
+
77
+ return percentiles.zip(result).to_h
78
+ end
79
+
80
+ def outliers(data, k=1.5, **opts)
81
+ outliers_according_to(data, data, k, **opts)
82
+ end
83
+
84
+ def outliers_according_to(data, data_distribution, k=1.5, **opts)
85
+ quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
86
+ q1 = quantiles[0.25]
87
+ q3 = quantiles[0.75]
88
+ iqr = q3 - q1
89
+
90
+ positive_outliers = data.select { |d| d > q3 + iqr * k }
91
+ negative_outliers = data.select { |d| d < q1 - iqr * k }
92
+
93
+ outliers = negative_outliers + positive_outliers
94
+ if opts[:side]
95
+ case opts[:side].to_sym
96
+ when :positive, :neg, :n, :+
97
+ outliers = positive_outliers
98
+ when :negative, :pos, :p, :-
99
+ outliers = negative_outliers
100
+ end
101
+ end
102
+
103
+ return outliers
104
+ end
105
+ end
106
+ end
107
+
108
+ module Rust::RBindings
109
+ def mean(series)
110
+ Rust::Descriptive.mean(series)
111
+ end
112
+
113
+ def median(series)
114
+ Rust::Descriptive.median(series)
115
+ end
116
+
117
+ def var(series)
118
+ Rust::Descriptive.variance(series)
119
+ end
120
+
121
+ def sd(series)
122
+ Rust::Descriptive.standard_deviation(series)
123
+ end
124
+
125
+ def quantile(series, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
126
+ Rust::Descriptive.quantile(series, percentiles)
127
+ end
128
+ end
@@ -1,8 +1,6 @@
1
- require 'code-assertions'
1
+ require_relative '../core'
2
2
 
3
- Rust.exclusive do
4
- Rust._eval("library(effsize)")
5
- end
3
+ Rust.prerequisite('effsize')
6
4
 
7
5
  module Rust::EffectSize
8
6
  class Result
@@ -16,14 +14,16 @@ module Rust::EffectSize
16
14
  return "#{name} = #{estimate} (#{magnitude}) [#{confidence_interval.min}, #{confidence_interval.max}]"
17
15
  end
18
16
  end
19
- end
20
17
 
21
- module Rust::EffectSize::CliffDelta
22
- class << self
23
- def compute(d1, d2)
18
+ class CliffDelta
19
+ def self.compute(d1, d2)
24
20
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
25
21
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
26
22
 
23
+ if d1.size <= 1 || d2.size <= 1
24
+ return Rust::EffectSize::Result.new
25
+ end
26
+
27
27
  Rust.exclusive do
28
28
  Rust['effsize.a'] = d1
29
29
  Rust['effsize.b'] = d2
@@ -32,23 +32,25 @@ module Rust::EffectSize::CliffDelta
32
32
 
33
33
  result = Rust::EffectSize::Result.new
34
34
  result.name = "Cliff's delta"
35
- result.estimate = Rust._pull("effsize.result$estimate")
36
- result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
37
- result.confidence_level = Rust._pull("effsize.result$conf.level")
38
- result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
35
+ result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
36
+ result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
37
+ result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
38
+ result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
39
39
 
40
40
  return result
41
41
  end
42
42
  end
43
43
  end
44
- end
45
-
46
- module Rust::EffectSize::CohenD
47
- class << self
48
- def compute(d1, d2)
44
+
45
+ class CohenD
46
+ def self.compute(d1, d2)
49
47
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
50
48
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
51
49
 
50
+ if d1.size <= 1 || d2.size <= 1
51
+ return Rust::EffectSize::Result.new
52
+ end
53
+
52
54
  Rust.exclusive do
53
55
  Rust['effsize.a'] = d1
54
56
  Rust['effsize.b'] = d2
@@ -57,10 +59,10 @@ module Rust::EffectSize::CohenD
57
59
 
58
60
  result = Rust::EffectSize::Result.new
59
61
  result.name = "Cohen's d"
60
- result.estimate = Rust._pull("effsize.result$estimate")
61
- result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
62
- result.confidence_level = Rust._pull("effsize.result$conf.level")
63
- result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
62
+ result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
63
+ result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
64
+ result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
65
+ result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
64
66
 
65
67
  return result
66
68
  end
@@ -0,0 +1,248 @@
1
+ require_relative '../core'
2
+
3
+ class Numeric
4
+ def distance(other)
5
+ raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
6
+
7
+ return (self - other).abs
8
+ end
9
+ end
10
+
11
+ class Array
12
+ def distance(other)
13
+ raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
14
+
15
+ longest, shortest = self.size > other.size ? [self, other] : [other, self]
16
+
17
+ distance = 0
18
+ for i in 0...longest.size
19
+ distance += longest[i].to_i.distance(shortest[i].to_i)
20
+ end
21
+
22
+ return distance
23
+ end
24
+ end
25
+
26
+ class String
27
+ def distance(other)
28
+ raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
29
+
30
+ return self.bytes.distance other.bytes
31
+ end
32
+ end
33
+
34
+ module Rust
35
+ class RandomVariableSlice
36
+ def initialize(values)
37
+ raise TypeError, "Expected Hash" unless values.is_a?(Hash)
38
+
39
+ @values = values
40
+ end
41
+
42
+ def probability(v=nil)
43
+ unless v
44
+ return @values.values.sum
45
+ else
46
+ return @values[v]
47
+ end
48
+ end
49
+
50
+ def ml
51
+ @values.max_by { |k, v| v }[0]
52
+ end
53
+
54
+ def expected
55
+ @values.map { |k, v| k*v }.sum
56
+ end
57
+
58
+ def >(n)
59
+ self.so_that { |k| k > n}
60
+ end
61
+
62
+ def >=(n)
63
+ self.so_that { |k| k >= n}
64
+ end
65
+
66
+ def <(n)
67
+ self.so_that { |k| k < n}
68
+ end
69
+
70
+ def <=(n)
71
+ self.so_that { |k| k <= n}
72
+ end
73
+
74
+ def ==(n)
75
+ self.so_that { |k| k == n}
76
+ end
77
+
78
+ def so_that
79
+ RandomVariableSlice.new(@values.select { |k, v| yield(k) })
80
+ end
81
+
82
+ def between(a, b)
83
+ RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
84
+ end
85
+ end
86
+
87
+ class RandomVariable < RandomVariableSlice
88
+ EPSILON = 1e-7
89
+
90
+ attr_reader :values
91
+
92
+ def initialize(values = {0 => 1.0}, exact = false)
93
+ @values = values
94
+ @exact = exact
95
+
96
+ raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
97
+ raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)" unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
98
+
99
+ approx!
100
+ end
101
+
102
+ def probability(v)
103
+ return @values[v].to_f
104
+ end
105
+
106
+ def +(other)
107
+ new_hash = {}
108
+
109
+ @values.each do |my_key, my_value|
110
+ other.values.each do |other_key, other_value|
111
+ sum_key = my_key + other_key
112
+
113
+ new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
114
+ end
115
+ end
116
+
117
+ return RandomVariable.new(new_hash, @exact)
118
+ end
119
+
120
+ def *(times)
121
+ if times.is_a? Integer
122
+ return rep(times)
123
+ elsif times.is_a? RandomVariable
124
+ return mul(times)
125
+ else
126
+ raise "The argument must be an Integer or a RandomVariable"
127
+ end
128
+ end
129
+
130
+ def mul(other)
131
+ new_hash = {}
132
+
133
+ @values.each do |my_key, my_value|
134
+ other.values.each do |other_key, other_value|
135
+ mul_key = my_key * other_key
136
+
137
+ new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
138
+ end
139
+ end
140
+
141
+ return RandomVariable.new(new_hash, @exact)
142
+ end
143
+
144
+ def rep(times)
145
+ rv = self
146
+ (times-1).times do
147
+ rv += self
148
+ end
149
+
150
+ return rv
151
+ end
152
+
153
+ def exact!
154
+ @exact = true
155
+ end
156
+
157
+ def approx!
158
+ return if @exact
159
+
160
+ to_delete = []
161
+ @values.each do |v, probability|
162
+ to_delete.push v if probability <= EPSILON
163
+ end
164
+
165
+ to_delete.each do |v|
166
+ probability = @values.delete v
167
+ nearest = @values.keys.min_by { |k| k.distance v }
168
+ @values[nearest] += probability
169
+ end
170
+ end
171
+
172
+ def extract
173
+ v = rand
174
+
175
+ cumulative = 0
176
+ @values.each do |key, prob|
177
+ cumulative += prob
178
+
179
+ return key if cumulative >= v
180
+ end
181
+ end
182
+
183
+ def self.complete(hash, key=0)
184
+ hash[key] = 1 - hash.values.sum
185
+ return RandomVariable.new(hash)
186
+ end
187
+ end
188
+
189
+ class UniformRandomVariable < RandomVariable
190
+ def initialize(values, exact = false)
191
+ super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
192
+ end
193
+ end
194
+
195
+ module Probabilities
196
+ def P(v)
197
+ if v.is_a? RandomVariableSlice
198
+ raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
199
+ return v.probability
200
+ else
201
+ raise "Cannot compute the expected value of a #{v.class}"
202
+ end
203
+ end
204
+
205
+ def E(v)
206
+ if v.is_a? RandomVariableSlice
207
+ return v.expected
208
+ else
209
+ raise "Cannot compute the expected value of a #{v.class}"
210
+ end
211
+ end
212
+ end
213
+
214
+ class RandomVariable
215
+ ENGLISH_ALPHABET = RandomVariable.new({
216
+ "a" => 0.08167,
217
+ "b" => 0.01492,
218
+ "c" => 0.02782,
219
+ "d" => 0.04253,
220
+ "e" => 0.12703,
221
+ "f" => 0.02228,
222
+ "g" => 0.02015,
223
+ "h" => 0.06094,
224
+ "i" => 0.06966,
225
+ "j" => 0.00153,
226
+ "k" => 0.00772,
227
+ "l" => 0.04025,
228
+ "m" => 0.02406,
229
+ "n" => 0.06749,
230
+ "o" => 0.07507,
231
+ "p" => 0.01929,
232
+ "q" => 0.00095,
233
+ "r" => 0.05987,
234
+ "s" => 0.06327,
235
+ "t" => 0.09056,
236
+ "u" => 0.02758,
237
+ "v" => 0.00978,
238
+ "w" => 0.02360,
239
+ "x" => 0.00150,
240
+ "y" => 0.01974,
241
+ "z" => 0.00074
242
+ })
243
+
244
+ DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
245
+
246
+ COIN = UniformRandomVariable.new(["h", "t"])
247
+ end
248
+ end