rust 0.7 → 0.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/ruby-rust +3 -0
- data/lib/{rust-csv.rb → rust/core/csv.rb} +23 -1
- data/lib/rust/core/rust.rb +221 -0
- data/lib/rust/core/types/all.rb +4 -0
- data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +159 -331
- data/lib/rust/core/types/datatype.rb +195 -0
- data/lib/rust/core/types/factor.rb +158 -0
- data/lib/rust/core/types/language.rb +199 -0
- data/lib/rust/core/types/list.rb +97 -0
- data/lib/rust/core/types/matrix.rb +155 -0
- data/lib/rust/core/types/s4class.rb +78 -0
- data/lib/rust/core/types/utils.rb +122 -0
- data/lib/rust/core.rb +7 -0
- data/lib/rust/external/robustbase.rb +44 -0
- data/lib/rust/models/all.rb +4 -0
- data/lib/rust/models/anova.rb +77 -0
- data/lib/rust/models/regression.rb +258 -0
- data/lib/rust/plots/all.rb +4 -0
- data/lib/rust/plots/basic-plots.rb +143 -0
- data/lib/{rust-plots.rb → rust/plots/core.rb} +89 -167
- data/lib/rust/plots/distribution-plots.rb +75 -0
- data/lib/rust/stats/all.rb +4 -0
- data/lib/{rust-basics.rb → rust/stats/correlation.rb} +45 -2
- data/lib/{rust-descriptive.rb → rust/stats/descriptive.rb} +52 -3
- data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +28 -13
- data/lib/{rust-probabilities.rb → rust/stats/probabilities.rb} +142 -34
- data/lib/{rust-tests.rb → rust/stats/tests.rb} +178 -92
- data/lib/rust.rb +4 -9
- metadata +32 -13
- data/lib/rust-calls.rb +0 -80
@@ -1,10 +1,15 @@
|
|
1
|
-
|
1
|
+
require_relative '../core'
|
2
2
|
|
3
|
-
Rust.
|
4
|
-
|
5
|
-
|
3
|
+
Rust.prerequisite('effsize')
|
4
|
+
|
5
|
+
##
|
6
|
+
# Module containing utilities for computing effect size statistics.
|
6
7
|
|
7
8
|
module Rust::EffectSize
|
9
|
+
|
10
|
+
##
|
11
|
+
# Effect size results.
|
12
|
+
|
8
13
|
class Result
|
9
14
|
attr_accessor :name
|
10
15
|
attr_accessor :estimate
|
@@ -16,11 +21,16 @@ module Rust::EffectSize
|
|
16
21
|
return "#{name} = #{estimate} (#{magnitude}) [#{confidence_interval.min}, #{confidence_interval.max}]"
|
17
22
|
end
|
18
23
|
end
|
19
|
-
end
|
20
24
|
|
21
|
-
|
22
|
-
|
23
|
-
|
25
|
+
##
|
26
|
+
# Cliff delta effect size statistics.
|
27
|
+
|
28
|
+
class CliffDelta
|
29
|
+
|
30
|
+
##
|
31
|
+
# Computes and returns the effect size for +d1+ and +d2+.
|
32
|
+
|
33
|
+
def self.compute(d1, d2)
|
24
34
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
25
35
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
26
36
|
|
@@ -45,11 +55,16 @@ module Rust::EffectSize::CliffDelta
|
|
45
55
|
end
|
46
56
|
end
|
47
57
|
end
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
58
|
+
|
59
|
+
##
|
60
|
+
# Cohen D effect size statistics.
|
61
|
+
|
62
|
+
class CohenD
|
63
|
+
|
64
|
+
##
|
65
|
+
# Computes and returns the effect size for +d1+ and +d2+.
|
66
|
+
|
67
|
+
def self.compute(d1, d2)
|
53
68
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
54
69
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
55
70
|
|
@@ -1,7 +1,11 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative '../core'
|
2
2
|
|
3
3
|
class Numeric
|
4
|
-
|
4
|
+
|
5
|
+
##
|
6
|
+
# Computes the distance between this and another number.
|
7
|
+
|
8
|
+
def _rust_prob_distance(other)
|
5
9
|
raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
|
6
10
|
|
7
11
|
return (self - other).abs
|
@@ -9,14 +13,18 @@ class Numeric
|
|
9
13
|
end
|
10
14
|
|
11
15
|
class Array
|
12
|
-
|
16
|
+
|
17
|
+
##
|
18
|
+
# Computes the distance between this and another array.
|
19
|
+
|
20
|
+
def _rust_prob_distance(other)
|
13
21
|
raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
|
14
22
|
|
15
23
|
longest, shortest = self.size > other.size ? [self, other] : [other, self]
|
16
24
|
|
17
25
|
distance = 0
|
18
26
|
for i in 0...longest.size
|
19
|
-
distance += longest[i].to_i.
|
27
|
+
distance += longest[i].to_i._rust_prob_distance(shortest[i].to_i)
|
20
28
|
end
|
21
29
|
|
22
30
|
return distance
|
@@ -24,21 +32,37 @@ class Array
|
|
24
32
|
end
|
25
33
|
|
26
34
|
class String
|
27
|
-
|
35
|
+
|
36
|
+
##
|
37
|
+
# Computes the distance between this and another string.
|
38
|
+
|
39
|
+
def _rust_prob_distance(other)
|
28
40
|
raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
|
29
41
|
|
30
|
-
return self.bytes.
|
42
|
+
return self.bytes._rust_prob_distance other.bytes
|
31
43
|
end
|
32
44
|
end
|
33
45
|
|
34
46
|
module Rust
|
47
|
+
|
48
|
+
##
|
49
|
+
# Represents a slice of a random variable, for which no check is made in terms of cumulative probability.
|
50
|
+
|
35
51
|
class RandomVariableSlice
|
52
|
+
|
53
|
+
##
|
54
|
+
# Creates a new slice of random variable. +values+ is a hash of values associated with their probabilities.
|
55
|
+
|
36
56
|
def initialize(values)
|
37
57
|
raise TypeError, "Expected Hash" unless values.is_a?(Hash)
|
38
58
|
|
39
59
|
@values = values
|
40
60
|
end
|
41
61
|
|
62
|
+
##
|
63
|
+
# Gets the probability of a value +v+. If +v+ is not specified, returns the cumulative probability of the whole
|
64
|
+
# slice.
|
65
|
+
|
42
66
|
def probability(v=nil)
|
43
67
|
unless v
|
44
68
|
return @values.values.sum
|
@@ -47,48 +71,84 @@ module Rust
|
|
47
71
|
end
|
48
72
|
end
|
49
73
|
|
74
|
+
##
|
75
|
+
# Returns the value with the maximum probability.
|
76
|
+
|
50
77
|
def ml
|
51
78
|
@values.max_by { |k, v| v }[0]
|
52
79
|
end
|
53
80
|
|
81
|
+
##
|
82
|
+
# Returns the expected value for this slice.
|
83
|
+
|
54
84
|
def expected
|
55
85
|
@values.map { |k, v| k*v }.sum
|
56
86
|
end
|
57
87
|
|
88
|
+
##
|
89
|
+
# Returns a slice with the values that are greater than +n+.
|
90
|
+
|
58
91
|
def >(n)
|
59
|
-
self.so_that { |k| k > n}
|
92
|
+
self.so_that { |k| k > n }
|
60
93
|
end
|
61
94
|
|
95
|
+
##
|
96
|
+
# Returns a slice with the values that are greater than or equal to +n+.
|
97
|
+
|
62
98
|
def >=(n)
|
63
|
-
self.so_that { |k| k >= n}
|
99
|
+
self.so_that { |k| k >= n }
|
64
100
|
end
|
65
101
|
|
102
|
+
##
|
103
|
+
# Returns a slice with the values that are lower than +n+.
|
104
|
+
|
66
105
|
def <(n)
|
67
|
-
self.so_that { |k| k < n}
|
106
|
+
self.so_that { |k| k < n }
|
68
107
|
end
|
69
108
|
|
109
|
+
##
|
110
|
+
# Returns a slice with the values that are lower than or equal to +n+.
|
111
|
+
|
70
112
|
def <=(n)
|
71
|
-
self.so_that { |k| k <= n}
|
113
|
+
self.so_that { |k| k <= n }
|
72
114
|
end
|
73
115
|
|
116
|
+
##
|
117
|
+
# Returns a slice with the value +n+.
|
118
|
+
|
74
119
|
def ==(n)
|
75
|
-
self.so_that { |k| k == n}
|
120
|
+
self.so_that { |k| k == n }
|
76
121
|
end
|
77
122
|
|
78
|
-
|
79
|
-
|
80
|
-
end
|
123
|
+
##
|
124
|
+
# Returns a slice with the values between +a+ and +b+.
|
81
125
|
|
82
126
|
def between(a, b)
|
83
|
-
|
127
|
+
self.so_that { |k| k.between(a, b) }
|
128
|
+
end
|
129
|
+
|
130
|
+
##
|
131
|
+
# Returns a slice with the values for which the given block returns true.
|
132
|
+
|
133
|
+
def so_that
|
134
|
+
RandomVariableSlice.new(@values.select { |k, v| yield(k) })
|
84
135
|
end
|
85
136
|
end
|
137
|
+
|
138
|
+
##
|
139
|
+
# Represents a random variable. The cumulative probability of the values must equal 1.
|
86
140
|
|
87
141
|
class RandomVariable < RandomVariableSlice
|
88
142
|
EPSILON = 1e-7
|
89
143
|
|
90
144
|
attr_reader :values
|
91
145
|
|
146
|
+
##
|
147
|
+
# Creates a new random variable. +values+ is a hash of values associated with their probabilities.
|
148
|
+
# +exact+ indicates whether this variable, when combined with others, should force to keep all the values, even
|
149
|
+
# the most unlikely ones. If this is +false+ (default), the most improbable values (lower than EPSILON) are
|
150
|
+
# removed for efficiency reasons.
|
151
|
+
|
92
152
|
def initialize(values = {0 => 1.0}, exact = false)
|
93
153
|
@values = values
|
94
154
|
@exact = exact
|
@@ -99,34 +159,46 @@ module Rust
|
|
99
159
|
approx!
|
100
160
|
end
|
101
161
|
|
162
|
+
##
|
163
|
+
# Returns the probability of value +v+.
|
164
|
+
|
102
165
|
def probability(v)
|
103
166
|
return @values[v].to_f
|
104
167
|
end
|
105
168
|
|
106
|
-
|
107
|
-
|
169
|
+
##
|
170
|
+
# Returns a new random variable which represents the sum of this and the +other+ random variable.
|
108
171
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
172
|
+
def +(other)
|
173
|
+
new_hash = {}
|
174
|
+
|
175
|
+
@values.each do |my_key, my_value|
|
176
|
+
other.values.each do |other_key, other_value|
|
177
|
+
sum_key = my_key + other_key
|
178
|
+
|
179
|
+
new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
|
180
|
+
end
|
114
181
|
end
|
182
|
+
|
183
|
+
return RandomVariable.new(new_hash, @exact)
|
115
184
|
end
|
116
185
|
|
117
|
-
|
118
|
-
|
186
|
+
##
|
187
|
+
# Based on the type of +arg+, either mul (product with another random variable) or rep (repeated sum) is called.
|
119
188
|
|
120
|
-
def *(
|
121
|
-
if
|
122
|
-
return rep(
|
123
|
-
elsif
|
124
|
-
return mul(
|
189
|
+
def *(arg)
|
190
|
+
if arg.is_a? Integer
|
191
|
+
return rep(arg)
|
192
|
+
elsif arg.is_a? RandomVariable
|
193
|
+
return mul(arg)
|
125
194
|
else
|
126
195
|
raise "The argument must be an Integer or a RandomVariable"
|
127
196
|
end
|
128
197
|
end
|
129
198
|
|
199
|
+
##
|
200
|
+
# Returns a new random variable which represents the product of this and the +other+ random variable.
|
201
|
+
|
130
202
|
def mul(other)
|
131
203
|
new_hash = {}
|
132
204
|
|
@@ -141,6 +213,9 @@ module Rust
|
|
141
213
|
return RandomVariable.new(new_hash, @exact)
|
142
214
|
end
|
143
215
|
|
216
|
+
##
|
217
|
+
# Returns a new random variable which represents the sum of this random variable with itself +n+ times.
|
218
|
+
|
144
219
|
def rep(times)
|
145
220
|
rv = self
|
146
221
|
(times-1).times do
|
@@ -150,10 +225,16 @@ module Rust
|
|
150
225
|
return rv
|
151
226
|
end
|
152
227
|
|
228
|
+
##
|
229
|
+
# Makes sure that the operations yield all the values, even the most unlikely ones.
|
230
|
+
|
153
231
|
def exact!
|
154
232
|
@exact = true
|
155
233
|
end
|
156
234
|
|
235
|
+
##
|
236
|
+
# If this variable is not exact, the values with probability lower than EPSLION are removed.
|
237
|
+
|
157
238
|
def approx!
|
158
239
|
return if @exact
|
159
240
|
|
@@ -164,35 +245,56 @@ module Rust
|
|
164
245
|
|
165
246
|
to_delete.each do |v|
|
166
247
|
probability = @values.delete v
|
167
|
-
nearest = @values.keys.min_by { |k| k.
|
248
|
+
nearest = @values.keys.min_by { |k| k._rust_prob_distance v }
|
168
249
|
@values[nearest] += probability
|
169
250
|
end
|
170
251
|
end
|
171
252
|
|
253
|
+
##
|
254
|
+
# Returns a random value, according to the data distribution.
|
255
|
+
|
172
256
|
def extract
|
173
257
|
v = rand
|
174
258
|
|
175
259
|
cumulative = 0
|
176
|
-
@values.each do |key, prob|
|
260
|
+
@values.sort_by { |k, v| k }.each do |key, prob|
|
177
261
|
cumulative += prob
|
178
262
|
|
179
263
|
return key if cumulative >= v
|
180
264
|
end
|
181
265
|
end
|
182
266
|
|
267
|
+
##
|
268
|
+
# Creates a random variable by partially specifying the values through +hash+. The remaining probability is
|
269
|
+
# attributed to +key+ (0, by default).
|
270
|
+
|
183
271
|
def self.complete(hash, key=0)
|
184
272
|
hash[key] = 1 - hash.values.sum
|
185
273
|
return RandomVariable.new(hash)
|
186
274
|
end
|
187
275
|
end
|
188
276
|
|
277
|
+
##
|
278
|
+
# Represents a uniform random variable.
|
279
|
+
|
189
280
|
class UniformRandomVariable < RandomVariable
|
281
|
+
|
282
|
+
##
|
283
|
+
# Creates random variables for which all the +values+ have the same probability (1 / values.size).
|
284
|
+
|
190
285
|
def initialize(values, exact = false)
|
191
286
|
super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
|
192
287
|
end
|
193
288
|
end
|
194
289
|
|
290
|
+
##
|
291
|
+
# Module that contains utilities for handling random variables.
|
292
|
+
|
195
293
|
module Probabilities
|
294
|
+
|
295
|
+
##
|
296
|
+
# Computes the probability of the random variable +v+.
|
297
|
+
|
196
298
|
def P(v)
|
197
299
|
if v.is_a? RandomVariableSlice
|
198
300
|
raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
|
@@ -202,6 +304,9 @@ module Rust
|
|
202
304
|
end
|
203
305
|
end
|
204
306
|
|
307
|
+
##
|
308
|
+
# Computes the expected value of the random variable +v+.
|
309
|
+
|
205
310
|
def E(v)
|
206
311
|
if v.is_a? RandomVariableSlice
|
207
312
|
return v.expected
|
@@ -211,7 +316,10 @@ module Rust
|
|
211
316
|
end
|
212
317
|
end
|
213
318
|
|
214
|
-
|
319
|
+
##
|
320
|
+
# Module containing examples of commonly-used random variables.
|
321
|
+
|
322
|
+
module RandomVariableExamples
|
215
323
|
ENGLISH_ALPHABET = RandomVariable.new({
|
216
324
|
"a" => 0.08167,
|
217
325
|
"b" => 0.01492,
|
@@ -240,9 +348,9 @@ module Rust
|
|
240
348
|
"y" => 0.01974,
|
241
349
|
"z" => 0.00074
|
242
350
|
})
|
243
|
-
|
351
|
+
|
244
352
|
DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
|
245
|
-
|
353
|
+
|
246
354
|
COIN = UniformRandomVariable.new(["h", "t"])
|
247
355
|
end
|
248
356
|
end
|