rust 0.3 → 0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ require_relative 'core'
2
+
3
+ module Rust::Plots
4
+ class DistributionPlot < BasePlot
5
+ def initialize
6
+ super()
7
+ @series = []
8
+ end
9
+
10
+ def series(data, **options)
11
+ @series << [data, options]
12
+
13
+ return self
14
+ end
15
+ end
16
+
17
+ class DensityPlot < DistributionPlot
18
+ protected
19
+ def _show()
20
+ first = true
21
+ @series.each do |data, options|
22
+ Rust["plotter.series"] = data
23
+
24
+ if first
25
+ first = false
26
+ command = "plot"
27
+ else
28
+ command = "lines"
29
+ end
30
+
31
+ function = Rust::Function.new(command)
32
+ function.options = self._augmented_options({"col" => options[:color]})
33
+ function.arguments << Rust::Variable.new("density(plotter.series)")
34
+ function.call
35
+ end
36
+
37
+ return self
38
+ end
39
+ end
40
+
41
+ class BoxPlot < DistributionPlot
42
+ protected
43
+ def _show()
44
+ function = Rust::Function.new("boxplot")
45
+
46
+ names = []
47
+ @series.each_with_index do |data, i|
48
+ series, options = *data
49
+ varname = "plotter.series#{i}"
50
+ Rust[varname] = series
51
+ function.arguments << Rust::Variable.new(varname)
52
+ names << (options[:name] || (i+1).to_s)
53
+ end
54
+
55
+ function.options = self._augmented_options({'names' => names})
56
+
57
+ function.call
58
+
59
+ return self
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,4 @@
1
+ self_path = File.expand_path(__FILE__)
2
+ Dir.glob(File.dirname(self_path) + "/*.rb").each do |lib|
3
+ require_relative lib unless lib == self_path
4
+ end
@@ -1,6 +1,6 @@
1
- require_relative 'rust-core'
1
+ require_relative '../core'
2
2
 
3
- module Rust:: Correlation
3
+ module Rust::Correlation
4
4
  class Pearson
5
5
  def self.test(d1, d2)
6
6
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
@@ -10,13 +10,14 @@ module Rust:: Correlation
10
10
  Rust['correlation.a'] = d1
11
11
  Rust['correlation.b'] = d2
12
12
 
13
- Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
13
+ _, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')", true)
14
14
 
15
15
  result = Result.new
16
16
  result.name = "Pearson's product-moment correlation"
17
17
  result.statistics['t'] = Rust._pull('correlation.result$statistic')
18
18
  result.pvalue = Rust._pull('correlation.result$p.value')
19
19
  result.correlation = Rust._pull('correlation.result$estimate')
20
+ result.exact = !warnings.include?("Cannot compute exact p-value with ties")
20
21
 
21
22
  return result
22
23
  end
@@ -36,13 +37,14 @@ module Rust:: Correlation
36
37
  Rust['correlation.a'] = d1
37
38
  Rust['correlation.b'] = d2
38
39
 
39
- Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')")
40
+ _, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='s')", true)
40
41
 
41
42
  result = Result.new
42
43
  result.name = "Spearman's rank correlation rho"
43
44
  result.statistics['S'] = Rust._pull('correlation.result$statistic')
44
45
  result.pvalue = Rust._pull('correlation.result$p.value')
45
46
  result.correlation = Rust._pull('correlation.result$estimate')
47
+ result.exact = !warnings.include?("Cannot compute exact p-value with ties")
46
48
 
47
49
  return result
48
50
  end
@@ -62,13 +64,14 @@ module Rust:: Correlation
62
64
  Rust['correlation.a'] = d1
63
65
  Rust['correlation.b'] = d2
64
66
 
65
- Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='p')")
67
+ _, warnings = Rust._eval("correlation.result <- cor.test(correlation.a, correlation.b, method='k')", true)
66
68
 
67
69
  result = Result.new
68
70
  result.name = "Kendall's rank correlation tau"
69
71
  result.statistics['T'] = Rust._pull('correlation.result$statistic')
70
72
  result.pvalue = Rust._pull('correlation.result$p.value')
71
73
  result.correlation = Rust._pull('correlation.result$estimate')
74
+ result.exact = !warnings.include?("Cannot compute exact p-value with ties")
72
75
 
73
76
  return result
74
77
  end
@@ -84,11 +87,13 @@ module Rust:: Correlation
84
87
  attr_accessor :statistics
85
88
  attr_accessor :pvalue
86
89
  attr_accessor :correlation
90
+ attr_accessor :exact
87
91
 
88
92
  alias :estimate :correlation
89
93
 
90
94
  def initialize
91
95
  @statistics = {}
96
+ @exact = true
92
97
  end
93
98
 
94
99
  def [](name)
@@ -101,6 +106,7 @@ module Rust:: Correlation
101
106
 
102
107
  def to_s
103
108
  return "#{name}. Correlation = #{correlation}, P-value = #{pvalue} " +
109
+ (!@exact ? "P-value is not exact. " : "") +
104
110
  "#{ statistics.map { |k, v| k.to_s + " -> " + v.to_s }.join(", ") }."
105
111
  end
106
112
  end
@@ -0,0 +1,128 @@
1
+ require_relative '../core'
2
+
3
+ module Rust::Descriptive
4
+ class << self
5
+ def mean(data)
6
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
7
+
8
+ return data.sum.to_f / data.size
9
+ end
10
+
11
+ def standard_deviation(data)
12
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
13
+
14
+ return Math.sqrt(variance(data))
15
+ end
16
+ alias :sd :standard_deviation
17
+ alias :stddev :standard_deviation
18
+
19
+ def variance(data)
20
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
21
+ return Float::NAN if data.size < 2
22
+
23
+ mean = mean(data)
24
+ return data.map { |v| (v - mean) ** 2 }.sum.to_f / (data.size - 1)
25
+ end
26
+ alias :var :variance
27
+
28
+ def median(data)
29
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
30
+
31
+ sorted = data.sort
32
+ if data.size == 0
33
+ return Float::NAN
34
+ elsif data.size.odd?
35
+ return sorted[data.size / 2]
36
+ else
37
+ i = (data.size / 2)
38
+ return (sorted[i - 1] + sorted[i]) / 2.0
39
+ end
40
+ end
41
+
42
+ def sum(data)
43
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
44
+
45
+ return data.sum
46
+ end
47
+
48
+ def quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
49
+ raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
50
+ raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
51
+ raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
52
+
53
+ n = data.size
54
+ quantiles = percentiles.size
55
+ percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
56
+
57
+ rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
58
+ floor_indices = rough_indices.map { |i| i.floor }
59
+ ceil_indices = rough_indices.map { |i| i.ceil }
60
+
61
+ data = data.sort
62
+ result = floor_indices.map { |i| data[i] }
63
+ result_ceil = ceil_indices.map { |i| data[i] }
64
+
65
+ indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
66
+ index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
67
+ reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
68
+ hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
69
+ data_hi_indices = hi_indices.map { |i| data[i] }
70
+
71
+ j = 0
72
+ indices_to_fix.each do |i|
73
+ result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
74
+ j += 1
75
+ end
76
+
77
+ return percentiles.zip(result).to_h
78
+ end
79
+
80
+ def outliers(data, k=1.5, **opts)
81
+ outliers_according_to(data, data, k, **opts)
82
+ end
83
+
84
+ def outliers_according_to(data, data_distribution, k=1.5, **opts)
85
+ quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
86
+ q1 = quantiles[0.25]
87
+ q3 = quantiles[0.75]
88
+ iqr = q3 - q1
89
+
90
+ positive_outliers = data.select { |d| d > q3 + iqr * k }
91
+ negative_outliers = data.select { |d| d < q1 - iqr * k }
92
+
93
+ outliers = negative_outliers + positive_outliers
94
+ if opts[:side]
95
+ case opts[:side].to_sym
96
+ when :positive, :neg, :n, :+
97
+ outliers = positive_outliers
98
+ when :negative, :pos, :p, :-
99
+ outliers = negative_outliers
100
+ end
101
+ end
102
+
103
+ return outliers
104
+ end
105
+ end
106
+ end
107
+
108
+ module Rust::RBindings
109
+ def mean(series)
110
+ Rust::Descriptive.mean(series)
111
+ end
112
+
113
+ def median(series)
114
+ Rust::Descriptive.median(series)
115
+ end
116
+
117
+ def var(series)
118
+ Rust::Descriptive.variance(series)
119
+ end
120
+
121
+ def sd(series)
122
+ Rust::Descriptive.standard_deviation(series)
123
+ end
124
+
125
+ def quantile(series, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
126
+ Rust::Descriptive.quantile(series, percentiles)
127
+ end
128
+ end
@@ -1,8 +1,6 @@
1
- require 'code-assertions'
1
+ require_relative '../core'
2
2
 
3
- Rust.exclusive do
4
- Rust._eval("library(effsize)")
5
- end
3
+ Rust.prerequisite('effsize')
6
4
 
7
5
  module Rust::EffectSize
8
6
  class Result
@@ -16,14 +14,16 @@ module Rust::EffectSize
16
14
  return "#{name} = #{estimate} (#{magnitude}) [#{confidence_interval.min}, #{confidence_interval.max}]"
17
15
  end
18
16
  end
19
- end
20
17
 
21
- module Rust::EffectSize::CliffDelta
22
- class << self
23
- def compute(d1, d2)
18
+ class CliffDelta
19
+ def self.compute(d1, d2)
24
20
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
25
21
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
26
22
 
23
+ if d1.size <= 1 || d2.size <= 1
24
+ return Rust::EffectSize::Result.new
25
+ end
26
+
27
27
  Rust.exclusive do
28
28
  Rust['effsize.a'] = d1
29
29
  Rust['effsize.b'] = d2
@@ -32,23 +32,25 @@ module Rust::EffectSize::CliffDelta
32
32
 
33
33
  result = Rust::EffectSize::Result.new
34
34
  result.name = "Cliff's delta"
35
- result.estimate = Rust._pull("effsize.result$estimate")
36
- result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
37
- result.confidence_level = Rust._pull("effsize.result$conf.level")
38
- result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
35
+ result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
36
+ result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
37
+ result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
38
+ result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
39
39
 
40
40
  return result
41
41
  end
42
42
  end
43
43
  end
44
- end
45
-
46
- module Rust::EffectSize::CohenD
47
- class << self
48
- def compute(d1, d2)
44
+
45
+ class CohenD
46
+ def self.compute(d1, d2)
49
47
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
50
48
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
51
49
 
50
+ if d1.size <= 1 || d2.size <= 1
51
+ return Rust::EffectSize::Result.new
52
+ end
53
+
52
54
  Rust.exclusive do
53
55
  Rust['effsize.a'] = d1
54
56
  Rust['effsize.b'] = d2
@@ -57,10 +59,10 @@ module Rust::EffectSize::CohenD
57
59
 
58
60
  result = Rust::EffectSize::Result.new
59
61
  result.name = "Cohen's d"
60
- result.estimate = Rust._pull("effsize.result$estimate")
61
- result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int"))
62
- result.confidence_level = Rust._pull("effsize.result$conf.level")
63
- result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym
62
+ result.estimate = Rust._pull("effsize.result$estimate") rescue Float::NAN
63
+ result.confidence_interval = Range.new(*Rust._pull("effsize.result$conf.int")) rescue nil
64
+ result.confidence_level = Rust._pull("effsize.result$conf.level") rescue Float::NAN
65
+ result.magnitude = Rust._pull("as.character(effsize.result$magnitude)").to_sym rescue nil
64
66
 
65
67
  return result
66
68
  end
@@ -0,0 +1,248 @@
1
+ require_relative '../core'
2
+
3
+ class Numeric
4
+ def distance(other)
5
+ raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
6
+
7
+ return (self - other).abs
8
+ end
9
+ end
10
+
11
+ class Array
12
+ def distance(other)
13
+ raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
14
+
15
+ longest, shortest = self.size > other.size ? [self, other] : [other, self]
16
+
17
+ distance = 0
18
+ for i in 0...longest.size
19
+ distance += longest[i].to_i.distance(shortest[i].to_i)
20
+ end
21
+
22
+ return distance
23
+ end
24
+ end
25
+
26
+ class String
27
+ def distance(other)
28
+ raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
29
+
30
+ return self.bytes.distance other.bytes
31
+ end
32
+ end
33
+
34
+ module Rust
35
+ class RandomVariableSlice
36
+ def initialize(values)
37
+ raise TypeError, "Expected Hash" unless values.is_a?(Hash)
38
+
39
+ @values = values
40
+ end
41
+
42
+ def probability(v=nil)
43
+ unless v
44
+ return @values.values.sum
45
+ else
46
+ return @values[v]
47
+ end
48
+ end
49
+
50
+ def ml
51
+ @values.max_by { |k, v| v }[0]
52
+ end
53
+
54
+ def expected
55
+ @values.map { |k, v| k*v }.sum
56
+ end
57
+
58
+ def >(n)
59
+ self.so_that { |k| k > n}
60
+ end
61
+
62
+ def >=(n)
63
+ self.so_that { |k| k >= n}
64
+ end
65
+
66
+ def <(n)
67
+ self.so_that { |k| k < n}
68
+ end
69
+
70
+ def <=(n)
71
+ self.so_that { |k| k <= n}
72
+ end
73
+
74
+ def ==(n)
75
+ self.so_that { |k| k == n}
76
+ end
77
+
78
+ def so_that
79
+ RandomVariableSlice.new(@values.select { |k, v| yield(k) })
80
+ end
81
+
82
+ def between(a, b)
83
+ RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
84
+ end
85
+ end
86
+
87
+ class RandomVariable < RandomVariableSlice
88
+ EPSILON = 1e-7
89
+
90
+ attr_reader :values
91
+
92
+ def initialize(values = {0 => 1.0}, exact = false)
93
+ @values = values
94
+ @exact = exact
95
+
96
+ raise "All the probabilities should be in the range [0, 1]" unless @values.values.all? { |v| v.between? 0, 1 }
97
+ raise "The cumulative probability must be exactly 1 (#{@values.values.sum} instead)" unless @values.values.sum.between? 1-EPSILON, 1+EPSILON
98
+
99
+ approx!
100
+ end
101
+
102
+ def probability(v)
103
+ return @values[v].to_f
104
+ end
105
+
106
+ def +(other)
107
+ new_hash = {}
108
+
109
+ @values.each do |my_key, my_value|
110
+ other.values.each do |other_key, other_value|
111
+ sum_key = my_key + other_key
112
+
113
+ new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
114
+ end
115
+ end
116
+
117
+ return RandomVariable.new(new_hash, @exact)
118
+ end
119
+
120
+ def *(times)
121
+ if times.is_a? Integer
122
+ return rep(times)
123
+ elsif times.is_a? RandomVariable
124
+ return mul(times)
125
+ else
126
+ raise "The argument must be an Integer or a RandomVariable"
127
+ end
128
+ end
129
+
130
+ def mul(other)
131
+ new_hash = {}
132
+
133
+ @values.each do |my_key, my_value|
134
+ other.values.each do |other_key, other_value|
135
+ mul_key = my_key * other_key
136
+
137
+ new_hash[mul_key] = new_hash[mul_key].to_f + (my_value * other_value)
138
+ end
139
+ end
140
+
141
+ return RandomVariable.new(new_hash, @exact)
142
+ end
143
+
144
+ def rep(times)
145
+ rv = self
146
+ (times-1).times do
147
+ rv += self
148
+ end
149
+
150
+ return rv
151
+ end
152
+
153
+ def exact!
154
+ @exact = true
155
+ end
156
+
157
+ def approx!
158
+ return if @exact
159
+
160
+ to_delete = []
161
+ @values.each do |v, probability|
162
+ to_delete.push v if probability <= EPSILON
163
+ end
164
+
165
+ to_delete.each do |v|
166
+ probability = @values.delete v
167
+ nearest = @values.keys.min_by { |k| k.distance v }
168
+ @values[nearest] += probability
169
+ end
170
+ end
171
+
172
+ def extract
173
+ v = rand
174
+
175
+ cumulative = 0
176
+ @values.each do |key, prob|
177
+ cumulative += prob
178
+
179
+ return key if cumulative >= v
180
+ end
181
+ end
182
+
183
+ def self.complete(hash, key=0)
184
+ hash[key] = 1 - hash.values.sum
185
+ return RandomVariable.new(hash)
186
+ end
187
+ end
188
+
189
+ class UniformRandomVariable < RandomVariable
190
+ def initialize(values, exact = false)
191
+ super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
192
+ end
193
+ end
194
+
195
+ module Probabilities
196
+ def P(v)
197
+ if v.is_a? RandomVariableSlice
198
+ raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
199
+ return v.probability
200
+ else
201
+ raise "Cannot compute the expected value of a #{v.class}"
202
+ end
203
+ end
204
+
205
+ def E(v)
206
+ if v.is_a? RandomVariableSlice
207
+ return v.expected
208
+ else
209
+ raise "Cannot compute the expected value of a #{v.class}"
210
+ end
211
+ end
212
+ end
213
+
214
+ class RandomVariable
215
+ ENGLISH_ALPHABET = RandomVariable.new({
216
+ "a" => 0.08167,
217
+ "b" => 0.01492,
218
+ "c" => 0.02782,
219
+ "d" => 0.04253,
220
+ "e" => 0.12703,
221
+ "f" => 0.02228,
222
+ "g" => 0.02015,
223
+ "h" => 0.06094,
224
+ "i" => 0.06966,
225
+ "j" => 0.00153,
226
+ "k" => 0.00772,
227
+ "l" => 0.04025,
228
+ "m" => 0.02406,
229
+ "n" => 0.06749,
230
+ "o" => 0.07507,
231
+ "p" => 0.01929,
232
+ "q" => 0.00095,
233
+ "r" => 0.05987,
234
+ "s" => 0.06327,
235
+ "t" => 0.09056,
236
+ "u" => 0.02758,
237
+ "v" => 0.00978,
238
+ "w" => 0.02360,
239
+ "x" => 0.00150,
240
+ "y" => 0.01974,
241
+ "z" => 0.00074
242
+ })
243
+
244
+ DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
245
+
246
+ COIN = UniformRandomVariable.new(["h", "t"])
247
+ end
248
+ end