rust 0.7 → 0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ruby-rust +3 -0
- data/lib/{rust-csv.rb → rust/core/csv.rb} +23 -1
- data/lib/rust/core/rust.rb +221 -0
- data/lib/rust/core/types/all.rb +4 -0
- data/lib/{rust-core.rb → rust/core/types/dataframe.rb} +159 -331
- data/lib/rust/core/types/datatype.rb +195 -0
- data/lib/rust/core/types/factor.rb +158 -0
- data/lib/rust/core/types/language.rb +199 -0
- data/lib/rust/core/types/list.rb +97 -0
- data/lib/rust/core/types/matrix.rb +155 -0
- data/lib/rust/core/types/s4class.rb +78 -0
- data/lib/rust/core/types/utils.rb +122 -0
- data/lib/rust/core.rb +7 -0
- data/lib/rust/external/robustbase.rb +44 -0
- data/lib/rust/models/all.rb +4 -0
- data/lib/rust/models/anova.rb +77 -0
- data/lib/rust/models/regression.rb +258 -0
- data/lib/rust/plots/all.rb +4 -0
- data/lib/rust/plots/basic-plots.rb +143 -0
- data/lib/{rust-plots.rb → rust/plots/core.rb} +89 -167
- data/lib/rust/plots/distribution-plots.rb +75 -0
- data/lib/rust/stats/all.rb +4 -0
- data/lib/{rust-basics.rb → rust/stats/correlation.rb} +45 -2
- data/lib/{rust-descriptive.rb → rust/stats/descriptive.rb} +52 -3
- data/lib/{rust-effsize.rb → rust/stats/effsize.rb} +28 -13
- data/lib/{rust-probabilities.rb → rust/stats/probabilities.rb} +142 -34
- data/lib/{rust-tests.rb → rust/stats/tests.rb} +178 -92
- data/lib/rust.rb +4 -9
- metadata +32 -13
- data/lib/rust-calls.rb +0 -80
@@ -1,10 +1,15 @@
|
|
1
|
-
|
1
|
+
require_relative '../core'
|
2
2
|
|
3
|
-
Rust.
|
4
|
-
|
5
|
-
|
3
|
+
Rust.prerequisite('effsize')
|
4
|
+
|
5
|
+
##
|
6
|
+
# Module containing utilities for computing effect size statistics.
|
6
7
|
|
7
8
|
module Rust::EffectSize
|
9
|
+
|
10
|
+
##
|
11
|
+
# Effect size results.
|
12
|
+
|
8
13
|
class Result
|
9
14
|
attr_accessor :name
|
10
15
|
attr_accessor :estimate
|
@@ -16,11 +21,16 @@ module Rust::EffectSize
|
|
16
21
|
return "#{name} = #{estimate} (#{magnitude}) [#{confidence_interval.min}, #{confidence_interval.max}]"
|
17
22
|
end
|
18
23
|
end
|
19
|
-
end
|
20
24
|
|
21
|
-
|
22
|
-
|
23
|
-
|
25
|
+
##
|
26
|
+
# Cliff delta effect size statistics.
|
27
|
+
|
28
|
+
class CliffDelta
|
29
|
+
|
30
|
+
##
|
31
|
+
# Computes and returns the effect size for +d1+ and +d2+.
|
32
|
+
|
33
|
+
def self.compute(d1, d2)
|
24
34
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
25
35
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
26
36
|
|
@@ -45,11 +55,16 @@ module Rust::EffectSize::CliffDelta
|
|
45
55
|
end
|
46
56
|
end
|
47
57
|
end
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
58
|
+
|
59
|
+
##
|
60
|
+
# Cohen D effect size statistics.
|
61
|
+
|
62
|
+
class CohenD
|
63
|
+
|
64
|
+
##
|
65
|
+
# Computes and returns the effect size for +d1+ and +d2+.
|
66
|
+
|
67
|
+
def self.compute(d1, d2)
|
53
68
|
raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
|
54
69
|
raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
|
55
70
|
|
@@ -1,7 +1,11 @@
|
|
1
|
-
require_relative '
|
1
|
+
require_relative '../core'
|
2
2
|
|
3
3
|
class Numeric
|
4
|
-
|
4
|
+
|
5
|
+
##
|
6
|
+
# Computes the distance between this and another number.
|
7
|
+
|
8
|
+
def _rust_prob_distance(other)
|
5
9
|
raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
|
6
10
|
|
7
11
|
return (self - other).abs
|
@@ -9,14 +13,18 @@ class Numeric
|
|
9
13
|
end
|
10
14
|
|
11
15
|
class Array
|
12
|
-
|
16
|
+
|
17
|
+
##
|
18
|
+
# Computes the distance between this and another array.
|
19
|
+
|
20
|
+
def _rust_prob_distance(other)
|
13
21
|
raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
|
14
22
|
|
15
23
|
longest, shortest = self.size > other.size ? [self, other] : [other, self]
|
16
24
|
|
17
25
|
distance = 0
|
18
26
|
for i in 0...longest.size
|
19
|
-
distance += longest[i].to_i.
|
27
|
+
distance += longest[i].to_i._rust_prob_distance(shortest[i].to_i)
|
20
28
|
end
|
21
29
|
|
22
30
|
return distance
|
@@ -24,21 +32,37 @@ class Array
|
|
24
32
|
end
|
25
33
|
|
26
34
|
class String
|
27
|
-
|
35
|
+
|
36
|
+
##
|
37
|
+
# Computes the distance between this and another string.
|
38
|
+
|
39
|
+
def _rust_prob_distance(other)
|
28
40
|
raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
|
29
41
|
|
30
|
-
return self.bytes.
|
42
|
+
return self.bytes._rust_prob_distance other.bytes
|
31
43
|
end
|
32
44
|
end
|
33
45
|
|
34
46
|
module Rust
|
47
|
+
|
48
|
+
##
|
49
|
+
# Represents a slice of a random variable, for which no check is made in terms of cumulative probability.
|
50
|
+
|
35
51
|
class RandomVariableSlice
|
52
|
+
|
53
|
+
##
|
54
|
+
# Creates a new slice of random variable. +values+ is a hash of values associated with their probabilities.
|
55
|
+
|
36
56
|
def initialize(values)
|
37
57
|
raise TypeError, "Expected Hash" unless values.is_a?(Hash)
|
38
58
|
|
39
59
|
@values = values
|
40
60
|
end
|
41
61
|
|
62
|
+
##
|
63
|
+
# Gets the probability of a value +v+. If +v+ is not specified, returns the cumulative probability of the whole
|
64
|
+
# slice.
|
65
|
+
|
42
66
|
def probability(v=nil)
|
43
67
|
unless v
|
44
68
|
return @values.values.sum
|
@@ -47,48 +71,84 @@ module Rust
|
|
47
71
|
end
|
48
72
|
end
|
49
73
|
|
74
|
+
##
|
75
|
+
# Returns the value with the maximum probability.
|
76
|
+
|
50
77
|
def ml
|
51
78
|
@values.max_by { |k, v| v }[0]
|
52
79
|
end
|
53
80
|
|
81
|
+
##
|
82
|
+
# Returns the expected value for this slice.
|
83
|
+
|
54
84
|
def expected
|
55
85
|
@values.map { |k, v| k*v }.sum
|
56
86
|
end
|
57
87
|
|
88
|
+
##
|
89
|
+
# Returns a slice with the values that are greater than +n+.
|
90
|
+
|
58
91
|
def >(n)
|
59
|
-
self.so_that { |k| k > n}
|
92
|
+
self.so_that { |k| k > n }
|
60
93
|
end
|
61
94
|
|
95
|
+
##
|
96
|
+
# Returns a slice with the values that are greater than or equal to +n+.
|
97
|
+
|
62
98
|
def >=(n)
|
63
|
-
self.so_that { |k| k >= n}
|
99
|
+
self.so_that { |k| k >= n }
|
64
100
|
end
|
65
101
|
|
102
|
+
##
|
103
|
+
# Returns a slice with the values that are lower than +n+.
|
104
|
+
|
66
105
|
def <(n)
|
67
|
-
self.so_that { |k| k < n}
|
106
|
+
self.so_that { |k| k < n }
|
68
107
|
end
|
69
108
|
|
109
|
+
##
|
110
|
+
# Returns a slice with the values that are lower than or equal to +n+.
|
111
|
+
|
70
112
|
def <=(n)
|
71
|
-
self.so_that { |k| k <= n}
|
113
|
+
self.so_that { |k| k <= n }
|
72
114
|
end
|
73
115
|
|
116
|
+
##
|
117
|
+
# Returns a slice with the value +n+.
|
118
|
+
|
74
119
|
def ==(n)
|
75
|
-
self.so_that { |k| k == n}
|
120
|
+
self.so_that { |k| k == n }
|
76
121
|
end
|
77
122
|
|
78
|
-
|
79
|
-
|
80
|
-
end
|
123
|
+
##
|
124
|
+
# Returns a slice with the values between +a+ and +b+.
|
81
125
|
|
82
126
|
def between(a, b)
|
83
|
-
|
127
|
+
self.so_that { |k| k.between(a, b) }
|
128
|
+
end
|
129
|
+
|
130
|
+
##
|
131
|
+
# Returns a slice with the values for which the given block returns true.
|
132
|
+
|
133
|
+
def so_that
|
134
|
+
RandomVariableSlice.new(@values.select { |k, v| yield(k) })
|
84
135
|
end
|
85
136
|
end
|
137
|
+
|
138
|
+
##
|
139
|
+
# Represents a random variable. The cumulative probability of the values must equal 1.
|
86
140
|
|
87
141
|
class RandomVariable < RandomVariableSlice
|
88
142
|
EPSILON = 1e-7
|
89
143
|
|
90
144
|
attr_reader :values
|
91
145
|
|
146
|
+
##
|
147
|
+
# Creates a new random variable. +values+ is a hash of values associated with their probabilities.
|
148
|
+
# +exact+ indicates whether this variable, when combined with others, should force to keep all the values, even
|
149
|
+
# the most unlikely ones. If this is +false+ (default), the most improbable values (lower than EPSILON) are
|
150
|
+
# removed for efficiency reasons.
|
151
|
+
|
92
152
|
def initialize(values = {0 => 1.0}, exact = false)
|
93
153
|
@values = values
|
94
154
|
@exact = exact
|
@@ -99,34 +159,46 @@ module Rust
|
|
99
159
|
approx!
|
100
160
|
end
|
101
161
|
|
162
|
+
##
|
163
|
+
# Returns the probability of value +v+.
|
164
|
+
|
102
165
|
def probability(v)
|
103
166
|
return @values[v].to_f
|
104
167
|
end
|
105
168
|
|
106
|
-
|
107
|
-
|
169
|
+
##
|
170
|
+
# Returns a new random variable which represents the sum of this and the +other+ random variable.
|
108
171
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
172
|
+
def +(other)
|
173
|
+
new_hash = {}
|
174
|
+
|
175
|
+
@values.each do |my_key, my_value|
|
176
|
+
other.values.each do |other_key, other_value|
|
177
|
+
sum_key = my_key + other_key
|
178
|
+
|
179
|
+
new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
|
180
|
+
end
|
114
181
|
end
|
182
|
+
|
183
|
+
return RandomVariable.new(new_hash, @exact)
|
115
184
|
end
|
116
185
|
|
117
|
-
|
118
|
-
|
186
|
+
##
|
187
|
+
# Based on the type of +arg+, either mul (product with another random variable) or rep (repeated sum) is called.
|
119
188
|
|
120
|
-
def *(
|
121
|
-
if
|
122
|
-
return rep(
|
123
|
-
elsif
|
124
|
-
return mul(
|
189
|
+
def *(arg)
|
190
|
+
if arg.is_a? Integer
|
191
|
+
return rep(arg)
|
192
|
+
elsif arg.is_a? RandomVariable
|
193
|
+
return mul(arg)
|
125
194
|
else
|
126
195
|
raise "The argument must be an Integer or a RandomVariable"
|
127
196
|
end
|
128
197
|
end
|
129
198
|
|
199
|
+
##
|
200
|
+
# Returns a new random variable which represents the product of this and the +other+ random variable.
|
201
|
+
|
130
202
|
def mul(other)
|
131
203
|
new_hash = {}
|
132
204
|
|
@@ -141,6 +213,9 @@ module Rust
|
|
141
213
|
return RandomVariable.new(new_hash, @exact)
|
142
214
|
end
|
143
215
|
|
216
|
+
##
|
217
|
+
# Returns a new random variable which represents the sum of this random variable with itself +n+ times.
|
218
|
+
|
144
219
|
def rep(times)
|
145
220
|
rv = self
|
146
221
|
(times-1).times do
|
@@ -150,10 +225,16 @@ module Rust
|
|
150
225
|
return rv
|
151
226
|
end
|
152
227
|
|
228
|
+
##
|
229
|
+
# Makes sure that the operations yield all the values, even the most unlikely ones.
|
230
|
+
|
153
231
|
def exact!
|
154
232
|
@exact = true
|
155
233
|
end
|
156
234
|
|
235
|
+
##
|
236
|
+
# If this variable is not exact, the values with probability lower than EPSLION are removed.
|
237
|
+
|
157
238
|
def approx!
|
158
239
|
return if @exact
|
159
240
|
|
@@ -164,35 +245,56 @@ module Rust
|
|
164
245
|
|
165
246
|
to_delete.each do |v|
|
166
247
|
probability = @values.delete v
|
167
|
-
nearest = @values.keys.min_by { |k| k.
|
248
|
+
nearest = @values.keys.min_by { |k| k._rust_prob_distance v }
|
168
249
|
@values[nearest] += probability
|
169
250
|
end
|
170
251
|
end
|
171
252
|
|
253
|
+
##
|
254
|
+
# Returns a random value, according to the data distribution.
|
255
|
+
|
172
256
|
def extract
|
173
257
|
v = rand
|
174
258
|
|
175
259
|
cumulative = 0
|
176
|
-
@values.each do |key, prob|
|
260
|
+
@values.sort_by { |k, v| k }.each do |key, prob|
|
177
261
|
cumulative += prob
|
178
262
|
|
179
263
|
return key if cumulative >= v
|
180
264
|
end
|
181
265
|
end
|
182
266
|
|
267
|
+
##
|
268
|
+
# Creates a random variable by partially specifying the values through +hash+. The remaining probability is
|
269
|
+
# attributed to +key+ (0, by default).
|
270
|
+
|
183
271
|
def self.complete(hash, key=0)
|
184
272
|
hash[key] = 1 - hash.values.sum
|
185
273
|
return RandomVariable.new(hash)
|
186
274
|
end
|
187
275
|
end
|
188
276
|
|
277
|
+
##
|
278
|
+
# Represents a uniform random variable.
|
279
|
+
|
189
280
|
class UniformRandomVariable < RandomVariable
|
281
|
+
|
282
|
+
##
|
283
|
+
# Creates random variables for which all the +values+ have the same probability (1 / values.size).
|
284
|
+
|
190
285
|
def initialize(values, exact = false)
|
191
286
|
super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
|
192
287
|
end
|
193
288
|
end
|
194
289
|
|
290
|
+
##
|
291
|
+
# Module that contains utilities for handling random variables.
|
292
|
+
|
195
293
|
module Probabilities
|
294
|
+
|
295
|
+
##
|
296
|
+
# Computes the probability of the random variable +v+.
|
297
|
+
|
196
298
|
def P(v)
|
197
299
|
if v.is_a? RandomVariableSlice
|
198
300
|
raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
|
@@ -202,6 +304,9 @@ module Rust
|
|
202
304
|
end
|
203
305
|
end
|
204
306
|
|
307
|
+
##
|
308
|
+
# Computes the expected value of the random variable +v+.
|
309
|
+
|
205
310
|
def E(v)
|
206
311
|
if v.is_a? RandomVariableSlice
|
207
312
|
return v.expected
|
@@ -211,7 +316,10 @@ module Rust
|
|
211
316
|
end
|
212
317
|
end
|
213
318
|
|
214
|
-
|
319
|
+
##
|
320
|
+
# Module containing examples of commonly-used random variables.
|
321
|
+
|
322
|
+
module RandomVariableExamples
|
215
323
|
ENGLISH_ALPHABET = RandomVariable.new({
|
216
324
|
"a" => 0.08167,
|
217
325
|
"b" => 0.01492,
|
@@ -240,9 +348,9 @@ module Rust
|
|
240
348
|
"y" => 0.01974,
|
241
349
|
"z" => 0.00074
|
242
350
|
})
|
243
|
-
|
351
|
+
|
244
352
|
DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
|
245
|
-
|
353
|
+
|
246
354
|
COIN = UniformRandomVariable.new(["h", "t"])
|
247
355
|
end
|
248
356
|
end
|