rust 0.9 → 0.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,11 @@
1
1
  require_relative '../core'
2
2
 
3
3
  class Numeric
4
- def distance(other)
4
+
5
+ ##
6
+ # Computes the distance between this and another number.
7
+
8
+ def _rust_prob_distance(other)
5
9
  raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
6
10
 
7
11
  return (self - other).abs
@@ -9,14 +13,18 @@ class Numeric
9
13
  end
10
14
 
11
15
  class Array
12
- def distance(other)
16
+
17
+ ##
18
+ # Computes the distance between this and another array.
19
+
20
+ def _rust_prob_distance(other)
13
21
  raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
14
22
 
15
23
  longest, shortest = self.size > other.size ? [self, other] : [other, self]
16
24
 
17
25
  distance = 0
18
26
  for i in 0...longest.size
19
- distance += longest[i].to_i.distance(shortest[i].to_i)
27
+ distance += longest[i].to_i._rust_prob_distance(shortest[i].to_i)
20
28
  end
21
29
 
22
30
  return distance
@@ -24,21 +32,37 @@ class Array
24
32
  end
25
33
 
26
34
  class String
27
- def distance(other)
35
+
36
+ ##
37
+ # Computes the distance between this and another string.
38
+
39
+ def _rust_prob_distance(other)
28
40
  raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
29
41
 
30
- return self.bytes.distance other.bytes
42
+ return self.bytes._rust_prob_distance other.bytes
31
43
  end
32
44
  end
33
45
 
34
46
  module Rust
47
+
48
+ ##
49
+ # Represents a slice of a random variable, for which no check is made in terms of cumulative probability.
50
+
35
51
  class RandomVariableSlice
52
+
53
+ ##
54
+ # Creates a new slice of random variable. +values+ is a hash of values associated with their probabilities.
55
+
36
56
  def initialize(values)
37
57
  raise TypeError, "Expected Hash" unless values.is_a?(Hash)
38
58
 
39
59
  @values = values
40
60
  end
41
61
 
62
+ ##
63
+ # Gets the probability of a value +v+. If +v+ is not specified, returns the cumulative probability of the whole
64
+ # slice.
65
+
42
66
  def probability(v=nil)
43
67
  unless v
44
68
  return @values.values.sum
@@ -47,48 +71,84 @@ module Rust
47
71
  end
48
72
  end
49
73
 
74
+ ##
75
+ # Returns the value with the maximum probability.
76
+
50
77
  def ml
51
78
  @values.max_by { |k, v| v }[0]
52
79
  end
53
80
 
81
+ ##
82
+ # Returns the expected value for this slice.
83
+
54
84
  def expected
55
85
  @values.map { |k, v| k*v }.sum
56
86
  end
57
87
 
88
+ ##
89
+ # Returns a slice with the values that are greater than +n+.
90
+
58
91
  def >(n)
59
- self.so_that { |k| k > n}
92
+ self.so_that { |k| k > n }
60
93
  end
61
94
 
95
+ ##
96
+ # Returns a slice with the values that are greater than or equal to +n+.
97
+
62
98
  def >=(n)
63
- self.so_that { |k| k >= n}
99
+ self.so_that { |k| k >= n }
64
100
  end
65
101
 
102
+ ##
103
+ # Returns a slice with the values that are lower than +n+.
104
+
66
105
  def <(n)
67
- self.so_that { |k| k < n}
106
+ self.so_that { |k| k < n }
68
107
  end
69
108
 
109
+ ##
110
+ # Returns a slice with the values that are lower than or equal to +n+.
111
+
70
112
  def <=(n)
71
- self.so_that { |k| k <= n}
113
+ self.so_that { |k| k <= n }
72
114
  end
73
115
 
116
+ ##
117
+ # Returns a slice with the value +n+.
118
+
74
119
  def ==(n)
75
- self.so_that { |k| k == n}
120
+ self.so_that { |k| k == n }
76
121
  end
77
122
 
78
- def so_that
79
- RandomVariableSlice.new(@values.select { |k, v| yield(k) })
80
- end
123
+ ##
124
+ # Returns a slice with the values between +a+ and +b+.
81
125
 
82
126
  def between(a, b)
83
- RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
127
+ self.so_that { |k| k.between(a, b) }
128
+ end
129
+
130
+ ##
131
+ # Returns a slice with the values for which the given block returns true.
132
+
133
+ def so_that
134
+ RandomVariableSlice.new(@values.select { |k, v| yield(k) })
84
135
  end
85
136
  end
137
+
138
+ ##
139
+ # Represents a random variable. The cumulative probability of the values must equal 1.
86
140
 
87
141
  class RandomVariable < RandomVariableSlice
88
142
  EPSILON = 1e-7
89
143
 
90
144
  attr_reader :values
91
145
 
146
+ ##
147
+ # Creates a new random variable. +values+ is a hash of values associated with their probabilities.
148
+ # +exact+ indicates whether this variable, when combined with others, should force to keep all the values, even
149
+ # the most unlikely ones. If this is +false+ (default), the most improbable values (lower than EPSILON) are
150
+ # removed for efficiency reasons.
151
+
92
152
  def initialize(values = {0 => 1.0}, exact = false)
93
153
  @values = values
94
154
  @exact = exact
@@ -99,34 +159,46 @@ module Rust
99
159
  approx!
100
160
  end
101
161
 
162
+ ##
163
+ # Returns the probability of value +v+.
164
+
102
165
  def probability(v)
103
166
  return @values[v].to_f
104
167
  end
105
168
 
106
- def +(other)
107
- new_hash = {}
169
+ ##
170
+ # Returns a new random variable which represents the sum of this and the +other+ random variable.
108
171
 
109
- @values.each do |my_key, my_value|
110
- other.values.each do |other_key, other_value|
111
- sum_key = my_key + other_key
112
-
113
- new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
172
+ def +(other)
173
+ new_hash = {}
174
+
175
+ @values.each do |my_key, my_value|
176
+ other.values.each do |other_key, other_value|
177
+ sum_key = my_key + other_key
178
+
179
+ new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
180
+ end
114
181
  end
182
+
183
+ return RandomVariable.new(new_hash, @exact)
115
184
  end
116
185
 
117
- return RandomVariable.new(new_hash, @exact)
118
- end
186
+ ##
187
+ # Based on the type of +arg+, either mul (product with another random variable) or rep (repeated sum) is called.
119
188
 
120
- def *(times)
121
- if times.is_a? Integer
122
- return rep(times)
123
- elsif times.is_a? RandomVariable
124
- return mul(times)
189
+ def *(arg)
190
+ if arg.is_a? Integer
191
+ return rep(arg)
192
+ elsif arg.is_a? RandomVariable
193
+ return mul(arg)
125
194
  else
126
195
  raise "The argument must be an Integer or a RandomVariable"
127
196
  end
128
197
  end
129
198
 
199
+ ##
200
+ # Returns a new random variable which represents the product of this and the +other+ random variable.
201
+
130
202
  def mul(other)
131
203
  new_hash = {}
132
204
 
@@ -141,6 +213,9 @@ module Rust
141
213
  return RandomVariable.new(new_hash, @exact)
142
214
  end
143
215
 
216
+ ##
217
+ # Returns a new random variable which represents the sum of this random variable with itself +n+ times.
218
+
144
219
  def rep(times)
145
220
  rv = self
146
221
  (times-1).times do
@@ -150,10 +225,16 @@ module Rust
150
225
  return rv
151
226
  end
152
227
 
228
+ ##
229
+ # Makes sure that the operations yield all the values, even the most unlikely ones.
230
+
153
231
  def exact!
154
232
  @exact = true
155
233
  end
156
234
 
235
+ ##
236
+ # If this variable is not exact, the values with probability lower than EPSLION are removed.
237
+
157
238
  def approx!
158
239
  return if @exact
159
240
 
@@ -164,35 +245,56 @@ module Rust
164
245
 
165
246
  to_delete.each do |v|
166
247
  probability = @values.delete v
167
- nearest = @values.keys.min_by { |k| k.distance v }
248
+ nearest = @values.keys.min_by { |k| k._rust_prob_distance v }
168
249
  @values[nearest] += probability
169
250
  end
170
251
  end
171
252
 
253
+ ##
254
+ # Returns a random value, according to the data distribution.
255
+
172
256
  def extract
173
257
  v = rand
174
258
 
175
259
  cumulative = 0
176
- @values.each do |key, prob|
260
+ @values.sort_by { |k, v| k }.each do |key, prob|
177
261
  cumulative += prob
178
262
 
179
263
  return key if cumulative >= v
180
264
  end
181
265
  end
182
266
 
267
+ ##
268
+ # Creates a random variable by partially specifying the values through +hash+. The remaining probability is
269
+ # attributed to +key+ (0, by default).
270
+
183
271
  def self.complete(hash, key=0)
184
272
  hash[key] = 1 - hash.values.sum
185
273
  return RandomVariable.new(hash)
186
274
  end
187
275
  end
188
276
 
277
+ ##
278
+ # Represents a uniform random variable.
279
+
189
280
  class UniformRandomVariable < RandomVariable
281
+
282
+ ##
283
+ # Creates random variables for which all the +values+ have the same probability (1 / values.size).
284
+
190
285
  def initialize(values, exact = false)
191
286
  super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
192
287
  end
193
288
  end
194
289
 
290
+ ##
291
+ # Module that contains utilities for handling random variables.
292
+
195
293
  module Probabilities
294
+
295
+ ##
296
+ # Computes the probability of the random variable +v+.
297
+
196
298
  def P(v)
197
299
  if v.is_a? RandomVariableSlice
198
300
  raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
@@ -202,6 +304,9 @@ module Rust
202
304
  end
203
305
  end
204
306
 
307
+ ##
308
+ # Computes the expected value of the random variable +v+.
309
+
205
310
  def E(v)
206
311
  if v.is_a? RandomVariableSlice
207
312
  return v.expected
@@ -211,7 +316,10 @@ module Rust
211
316
  end
212
317
  end
213
318
 
214
- class RandomVariable
319
+ ##
320
+ # Module containing examples of commonly-used random variables.
321
+
322
+ module RandomVariableExamples
215
323
  ENGLISH_ALPHABET = RandomVariable.new({
216
324
  "a" => 0.08167,
217
325
  "b" => 0.01492,
@@ -240,9 +348,9 @@ module Rust
240
348
  "y" => 0.01974,
241
349
  "z" => 0.00074
242
350
  })
243
-
351
+
244
352
  DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
245
-
353
+
246
354
  COIN = UniformRandomVariable.new(["h", "t"])
247
355
  end
248
356
  end
@@ -1,6 +1,13 @@
1
1
  require_relative '../core'
2
2
 
3
+ ##
4
+ # Module with utilities for running statistical hypothesis tests.
5
+
3
6
  module Rust::StatisticalTests
7
+
8
+ ##
9
+ # Represents the result of a statistical hypothesis test.
10
+
4
11
  class Result
5
12
  attr_accessor :name
6
13
  attr_accessor :statistics
@@ -21,16 +28,28 @@ module Rust::StatisticalTests
21
28
  @statistics[name.to_sym] = value
22
29
  end
23
30
 
31
+ ##
32
+ # If a hypothesis is available, returns the adjusted p-value with respect to all the other results obtained for
33
+ # the same hypothesis. Otherwise, simply returns the p-value for this result.
34
+ # The +method+ for adjustment can be optionally specified (Bonferroni, by default).
35
+
24
36
  def adjusted_pvalue(method='bonferroni')
25
- return 1 unless @hypothesis
37
+ return @pvalue unless @hypothesis
26
38
  @hypothesis.adjusted_pvalue_for(self, method)
27
39
  end
28
40
 
41
+ ##
42
+ # Sets the underlying hypothesis for the test. The p-values of the results belonging to the same hypothesis can
43
+ # be adjusted through the adjusted_pvalue method.
44
+
29
45
  def hypothesis=(value)
30
46
  @hypothesis = value
31
47
  @hypothesis.add(self)
32
48
  end
33
49
 
50
+ ##
51
+ # Returns true if the results are significant according to the specified alpha.
52
+
34
53
  def significant
35
54
  pvalue < alpha
36
55
  end
@@ -43,7 +62,13 @@ module Rust::StatisticalTests
43
62
  end
44
63
  end
45
64
 
46
- class Hypothesis
65
+ ##
66
+ # Represents a hypothesis behind one or more results.
67
+
68
+ class Hypothesis
69
+ ##
70
+ # Returns the hypothesis with the given +title_or_instance+ as title (if String).
71
+
47
72
  def self.find(title_or_instance)
48
73
  return Hypothesis.new(nil) if title_or_instance == nil
49
74
 
@@ -63,18 +88,28 @@ module Rust::StatisticalTests
63
88
  attr_reader :results
64
89
  attr_reader :title
65
90
 
91
+ ##
92
+ # Creates a new hypothesis with a given +title+.
93
+
66
94
  def initialize(title)
67
95
  @title = title
68
96
  @results = []
69
97
  end
70
98
 
99
+ ##
100
+ # Registers a +result+ for this hypothesis.
101
+
71
102
  def add(result)
72
103
  @results << result
73
104
  end
74
105
 
75
- def adjusted_pvalue_for(instance, method)
106
+ ##
107
+ # Returns the adjusted p-value for a specific +result+ with respect to all the other results obtained under this
108
+ # same hypothesis, using the specified +method+.
109
+
110
+ def adjusted_pvalue_for(result, method)
76
111
  p_values = @results.map { |r| r.pvalue }
77
- index = @results.index(instance)
112
+ index = @results.index(result)
78
113
 
79
114
  adjusted_pvalues = Rust::StatisticalTests::PValueAdjustment.method(method).adjust(*p_values)
80
115
 
@@ -85,9 +120,17 @@ module Rust::StatisticalTests
85
120
  end
86
121
  end
87
122
  end
123
+
124
+ ##
125
+ # Class with utilities for running Wilcoxon Signed-Rank test and Ranked-Sum test (a.k.a. Mann-Whitney U test).
88
126
 
89
127
  class Wilcoxon
90
- def self.paired(d1, d2, alpha = 0.05, **options)
128
+
129
+ ##
130
+ # Runs a Wilxoson Signed-Rank test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
131
+ # +options+ can be specified and directly passed to the R function.
132
+
133
+ def self.paired(d1, d2, alpha = 0.05, **options)
91
134
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
92
135
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
93
136
  raise "The two distributions have different size" if d1.size != d2.size
@@ -109,6 +152,10 @@ module Rust::StatisticalTests
109
152
  end
110
153
  end
111
154
 
155
+ ##
156
+ # Runs a Wilxoson Ranked-Sum (a.k.a. Mann-Whitney U) test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
157
+ # +options+ can be specified and directly passed to the R function.
158
+
112
159
  def self.unpaired(d1, d2, alpha = 0.05, **options)
113
160
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
114
161
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -131,7 +178,15 @@ module Rust::StatisticalTests
131
178
  end
132
179
  end
133
180
 
181
+ ##
182
+ # Class with utilities for running the T test.
183
+
134
184
  class T
185
+
186
+ ##
187
+ # Runs a paired T test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
188
+ # +options+ can be specified and directly passed to the R function.
189
+
135
190
  def self.paired(d1, d2, alpha = 0.05, **options)
136
191
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
137
192
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -154,6 +209,10 @@ module Rust::StatisticalTests
154
209
  end
155
210
  end
156
211
 
212
+ ##
213
+ # Runs an unpaired T test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
214
+ # +options+ can be specified and directly passed to the R function.
215
+
157
216
  def self.unpaired(d1, d2, alpha = 0.05, **options)
158
217
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
159
218
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -176,7 +235,15 @@ module Rust::StatisticalTests
176
235
  end
177
236
  end
178
237
 
238
+ ##
239
+ # Utilities for the Shapiro normality test.
240
+
179
241
  class Shapiro
242
+
243
+ ##
244
+ # Runs the Shapiro normality test for +vector+ and a given +alpha+ (0.05, by default).
245
+ # +options+ can be specified and directly passed to the R function.
246
+
180
247
  def self.compute(vector, alpha = 0.05, **options)
181
248
  raise TypeError, "Expecting Array of numerics" if !vector.is_a?(Array) || !vector.all? { |e| e.is_a?(Numeric) }
182
249
  Rust.exclusive do
@@ -196,7 +263,14 @@ module Rust::StatisticalTests
196
263
  end
197
264
  end
198
265
 
266
+ ##
267
+ # Module with utilities for adjusting the p-values.
268
+
199
269
  module PValueAdjustment
270
+
271
+ ##
272
+ # Returns the Ruby class given the R name of the p-value adjustment method.
273
+
200
274
  def self.method(name)
201
275
  name = name.to_s
202
276
  case name.downcase
@@ -215,6 +289,9 @@ module Rust::StatisticalTests
215
289
  end
216
290
  end
217
291
 
292
+ ##
293
+ # Bonferroni p-value adjustment method.
294
+
218
295
  class Bonferroni
219
296
  def self.adjust(*p_values)
220
297
  Rust.exclusive do
@@ -224,6 +301,9 @@ module Rust::StatisticalTests
224
301
  end
225
302
  end
226
303
 
304
+ ##
305
+ # Holm p-value adjustment method.
306
+
227
307
  class Holm
228
308
  def self.adjust(*p_values)
229
309
  Rust.exclusive do
@@ -233,6 +313,9 @@ module Rust::StatisticalTests
233
313
  end
234
314
  end
235
315
 
316
+ ##
317
+ # Hochberg p-value adjustment method.
318
+
236
319
  class Hochberg
237
320
  def self.adjust(*p_values)
238
321
  Rust.exclusive do
@@ -242,6 +325,9 @@ module Rust::StatisticalTests
242
325
  end
243
326
  end
244
327
 
328
+ ##
329
+ # Hommel p-value adjustment method.
330
+
245
331
  class Hommel
246
332
  def self.adjust(*p_values)
247
333
  Rust.exclusive do
@@ -251,6 +337,9 @@ module Rust::StatisticalTests
251
337
  end
252
338
  end
253
339
 
340
+ ##
341
+ # Benjamini-Hochberg p-value adjustment method.
342
+
254
343
  class BenjaminiHochberg
255
344
  def self.adjust(*p_values)
256
345
  Rust.exclusive do
@@ -260,6 +349,9 @@ module Rust::StatisticalTests
260
349
  end
261
350
  end
262
351
 
352
+ ##
353
+ # Benjamini-Yekutieli p-value adjustment method.
354
+
263
355
  class BenjaminiYekutieli
264
356
  def self.adjust(*p_values)
265
357
  Rust.exclusive do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.9'
4
+ version: '0.10'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-09 00:00:00.000000000 Z
11
+ date: 2022-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rinruby