rust 0.9 → 0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,11 @@
1
1
  require_relative '../core'
2
2
 
3
3
  class Numeric
4
- def distance(other)
4
+
5
+ ##
6
+ # Computes the distance between this and another number.
7
+
8
+ def _rust_prob_distance(other)
5
9
  raise TypeError, "no implicit conversion of #{other.class} into Numeric" unless other.is_a? Numeric
6
10
 
7
11
  return (self - other).abs
@@ -9,14 +13,18 @@ class Numeric
9
13
  end
10
14
 
11
15
  class Array
12
- def distance(other)
16
+
17
+ ##
18
+ # Computes the distance between this and another array.
19
+
20
+ def _rust_prob_distance(other)
13
21
  raise TypeError, "no implicit conversion of #{other.class} into Array" unless other.is_a? Array
14
22
 
15
23
  longest, shortest = self.size > other.size ? [self, other] : [other, self]
16
24
 
17
25
  distance = 0
18
26
  for i in 0...longest.size
19
- distance += longest[i].to_i.distance(shortest[i].to_i)
27
+ distance += longest[i].to_i._rust_prob_distance(shortest[i].to_i)
20
28
  end
21
29
 
22
30
  return distance
@@ -24,21 +32,37 @@ class Array
24
32
  end
25
33
 
26
34
  class String
27
- def distance(other)
35
+
36
+ ##
37
+ # Computes the distance between this and another string.
38
+
39
+ def _rust_prob_distance(other)
28
40
  raise TypeError, "no implicit conversion of #{other.class} into String" unless other.is_a? String
29
41
 
30
- return self.bytes.distance other.bytes
42
+ return self.bytes._rust_prob_distance other.bytes
31
43
  end
32
44
  end
33
45
 
34
46
  module Rust
47
+
48
+ ##
49
+ # Represents a slice of a random variable, for which no check is made in terms of cumulative probability.
50
+
35
51
  class RandomVariableSlice
52
+
53
+ ##
54
+ # Creates a new slice of random variable. +values+ is a hash of values associated with their probabilities.
55
+
36
56
  def initialize(values)
37
57
  raise TypeError, "Expected Hash" unless values.is_a?(Hash)
38
58
 
39
59
  @values = values
40
60
  end
41
61
 
62
+ ##
63
+ # Gets the probability of a value +v+. If +v+ is not specified, returns the cumulative probability of the whole
64
+ # slice.
65
+
42
66
  def probability(v=nil)
43
67
  unless v
44
68
  return @values.values.sum
@@ -47,48 +71,84 @@ module Rust
47
71
  end
48
72
  end
49
73
 
74
+ ##
75
+ # Returns the value with the maximum probability.
76
+
50
77
  def ml
51
78
  @values.max_by { |k, v| v }[0]
52
79
  end
53
80
 
81
+ ##
82
+ # Returns the expected value for this slice.
83
+
54
84
  def expected
55
85
  @values.map { |k, v| k*v }.sum
56
86
  end
57
87
 
88
+ ##
89
+ # Returns a slice with the values that are greater than +n+.
90
+
58
91
  def >(n)
59
- self.so_that { |k| k > n}
92
+ self.so_that { |k| k > n }
60
93
  end
61
94
 
95
+ ##
96
+ # Returns a slice with the values that are greater than or equal to +n+.
97
+
62
98
  def >=(n)
63
- self.so_that { |k| k >= n}
99
+ self.so_that { |k| k >= n }
64
100
  end
65
101
 
102
+ ##
103
+ # Returns a slice with the values that are lower than +n+.
104
+
66
105
  def <(n)
67
- self.so_that { |k| k < n}
106
+ self.so_that { |k| k < n }
68
107
  end
69
108
 
109
+ ##
110
+ # Returns a slice with the values that are lower than or equal to +n+.
111
+
70
112
  def <=(n)
71
- self.so_that { |k| k <= n}
113
+ self.so_that { |k| k <= n }
72
114
  end
73
115
 
116
+ ##
117
+ # Returns a slice with the value +n+.
118
+
74
119
  def ==(n)
75
- self.so_that { |k| k == n}
120
+ self.so_that { |k| k == n }
76
121
  end
77
122
 
78
- def so_that
79
- RandomVariableSlice.new(@values.select { |k, v| yield(k) })
80
- end
123
+ ##
124
+ # Returns a slice with the values between +a+ and +b+.
81
125
 
82
126
  def between(a, b)
83
- RandomVariableSlice.new(@values.select { |k, v| k.between? a, b })
127
+ self.so_that { |k| k.between(a, b) }
128
+ end
129
+
130
+ ##
131
+ # Returns a slice with the values for which the given block returns true.
132
+
133
+ def so_that
134
+ RandomVariableSlice.new(@values.select { |k, v| yield(k) })
84
135
  end
85
136
  end
137
+
138
+ ##
139
+ # Represents a random variable. The cumulative probability of the values must equal 1.
86
140
 
87
141
  class RandomVariable < RandomVariableSlice
88
142
  EPSILON = 1e-7
89
143
 
90
144
  attr_reader :values
91
145
 
146
+ ##
147
+ # Creates a new random variable. +values+ is a hash of values associated with their probabilities.
148
+ # +exact+ indicates whether this variable, when combined with others, should force to keep all the values, even
149
+ # the most unlikely ones. If this is +false+ (default), the most improbable values (lower than EPSILON) are
150
+ # removed for efficiency reasons.
151
+
92
152
  def initialize(values = {0 => 1.0}, exact = false)
93
153
  @values = values
94
154
  @exact = exact
@@ -99,34 +159,46 @@ module Rust
99
159
  approx!
100
160
  end
101
161
 
162
+ ##
163
+ # Returns the probability of value +v+.
164
+
102
165
  def probability(v)
103
166
  return @values[v].to_f
104
167
  end
105
168
 
106
- def +(other)
107
- new_hash = {}
169
+ ##
170
+ # Returns a new random variable which represents the sum of this and the +other+ random variable.
108
171
 
109
- @values.each do |my_key, my_value|
110
- other.values.each do |other_key, other_value|
111
- sum_key = my_key + other_key
112
-
113
- new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
172
+ def +(other)
173
+ new_hash = {}
174
+
175
+ @values.each do |my_key, my_value|
176
+ other.values.each do |other_key, other_value|
177
+ sum_key = my_key + other_key
178
+
179
+ new_hash[sum_key] = new_hash[sum_key].to_f + (my_value * other_value)
180
+ end
114
181
  end
182
+
183
+ return RandomVariable.new(new_hash, @exact)
115
184
  end
116
185
 
117
- return RandomVariable.new(new_hash, @exact)
118
- end
186
+ ##
187
+ # Based on the type of +arg+, either mul (product with another random variable) or rep (repeated sum) is called.
119
188
 
120
- def *(times)
121
- if times.is_a? Integer
122
- return rep(times)
123
- elsif times.is_a? RandomVariable
124
- return mul(times)
189
+ def *(arg)
190
+ if arg.is_a? Integer
191
+ return rep(arg)
192
+ elsif arg.is_a? RandomVariable
193
+ return mul(arg)
125
194
  else
126
195
  raise "The argument must be an Integer or a RandomVariable"
127
196
  end
128
197
  end
129
198
 
199
+ ##
200
+ # Returns a new random variable which represents the product of this and the +other+ random variable.
201
+
130
202
  def mul(other)
131
203
  new_hash = {}
132
204
 
@@ -141,6 +213,9 @@ module Rust
141
213
  return RandomVariable.new(new_hash, @exact)
142
214
  end
143
215
 
216
+ ##
217
+ # Returns a new random variable which represents the sum of this random variable with itself +n+ times.
218
+
144
219
  def rep(times)
145
220
  rv = self
146
221
  (times-1).times do
@@ -150,10 +225,16 @@ module Rust
150
225
  return rv
151
226
  end
152
227
 
228
+ ##
229
+ # Makes sure that the operations yield all the values, even the most unlikely ones.
230
+
153
231
  def exact!
154
232
  @exact = true
155
233
  end
156
234
 
235
+ ##
236
+ # If this variable is not exact, the values with probability lower than EPSLION are removed.
237
+
157
238
  def approx!
158
239
  return if @exact
159
240
 
@@ -164,35 +245,56 @@ module Rust
164
245
 
165
246
  to_delete.each do |v|
166
247
  probability = @values.delete v
167
- nearest = @values.keys.min_by { |k| k.distance v }
248
+ nearest = @values.keys.min_by { |k| k._rust_prob_distance v }
168
249
  @values[nearest] += probability
169
250
  end
170
251
  end
171
252
 
253
+ ##
254
+ # Returns a random value, according to the data distribution.
255
+
172
256
  def extract
173
257
  v = rand
174
258
 
175
259
  cumulative = 0
176
- @values.each do |key, prob|
260
+ @values.sort_by { |k, v| k }.each do |key, prob|
177
261
  cumulative += prob
178
262
 
179
263
  return key if cumulative >= v
180
264
  end
181
265
  end
182
266
 
267
+ ##
268
+ # Creates a random variable by partially specifying the values through +hash+. The remaining probability is
269
+ # attributed to +key+ (0, by default).
270
+
183
271
  def self.complete(hash, key=0)
184
272
  hash[key] = 1 - hash.values.sum
185
273
  return RandomVariable.new(hash)
186
274
  end
187
275
  end
188
276
 
277
+ ##
278
+ # Represents a uniform random variable.
279
+
189
280
  class UniformRandomVariable < RandomVariable
281
+
282
+ ##
283
+ # Creates random variables for which all the +values+ have the same probability (1 / values.size).
284
+
190
285
  def initialize(values, exact = false)
191
286
  super(values.map { |k| [k, 1.0 / values.size]}.to_h, exact)
192
287
  end
193
288
  end
194
289
 
290
+ ##
291
+ # Module that contains utilities for handling random variables.
292
+
195
293
  module Probabilities
294
+
295
+ ##
296
+ # Computes the probability of the random variable +v+.
297
+
196
298
  def P(v)
197
299
  if v.is_a? RandomVariableSlice
198
300
  raise "Cannot compute the probability of a random variable" if v.is_a? RandomVariable
@@ -202,6 +304,9 @@ module Rust
202
304
  end
203
305
  end
204
306
 
307
+ ##
308
+ # Computes the expected value of the random variable +v+.
309
+
205
310
  def E(v)
206
311
  if v.is_a? RandomVariableSlice
207
312
  return v.expected
@@ -211,7 +316,10 @@ module Rust
211
316
  end
212
317
  end
213
318
 
214
- class RandomVariable
319
+ ##
320
+ # Module containing examples of commonly-used random variables.
321
+
322
+ module RandomVariableExamples
215
323
  ENGLISH_ALPHABET = RandomVariable.new({
216
324
  "a" => 0.08167,
217
325
  "b" => 0.01492,
@@ -240,9 +348,9 @@ module Rust
240
348
  "y" => 0.01974,
241
349
  "z" => 0.00074
242
350
  })
243
-
351
+
244
352
  DICE = UniformRandomVariable.new([1, 2, 3, 4, 5, 6])
245
-
353
+
246
354
  COIN = UniformRandomVariable.new(["h", "t"])
247
355
  end
248
356
  end
@@ -1,6 +1,13 @@
1
1
  require_relative '../core'
2
2
 
3
+ ##
4
+ # Module with utilities for running statistical hypothesis tests.
5
+
3
6
  module Rust::StatisticalTests
7
+
8
+ ##
9
+ # Represents the result of a statistical hypothesis test.
10
+
4
11
  class Result
5
12
  attr_accessor :name
6
13
  attr_accessor :statistics
@@ -21,16 +28,28 @@ module Rust::StatisticalTests
21
28
  @statistics[name.to_sym] = value
22
29
  end
23
30
 
31
+ ##
32
+ # If a hypothesis is available, returns the adjusted p-value with respect to all the other results obtained for
33
+ # the same hypothesis. Otherwise, simply returns the p-value for this result.
34
+ # The +method+ for adjustment can be optionally specified (Bonferroni, by default).
35
+
24
36
  def adjusted_pvalue(method='bonferroni')
25
- return 1 unless @hypothesis
37
+ return @pvalue unless @hypothesis
26
38
  @hypothesis.adjusted_pvalue_for(self, method)
27
39
  end
28
40
 
41
+ ##
42
+ # Sets the underlying hypothesis for the test. The p-values of the results belonging to the same hypothesis can
43
+ # be adjusted through the adjusted_pvalue method.
44
+
29
45
  def hypothesis=(value)
30
46
  @hypothesis = value
31
47
  @hypothesis.add(self)
32
48
  end
33
49
 
50
+ ##
51
+ # Returns true if the results are significant according to the specified alpha.
52
+
34
53
  def significant
35
54
  pvalue < alpha
36
55
  end
@@ -43,7 +62,13 @@ module Rust::StatisticalTests
43
62
  end
44
63
  end
45
64
 
46
- class Hypothesis
65
+ ##
66
+ # Represents a hypothesis behind one or more results.
67
+
68
+ class Hypothesis
69
+ ##
70
+ # Returns the hypothesis with the given +title_or_instance+ as title (if String).
71
+
47
72
  def self.find(title_or_instance)
48
73
  return Hypothesis.new(nil) if title_or_instance == nil
49
74
 
@@ -63,18 +88,28 @@ module Rust::StatisticalTests
63
88
  attr_reader :results
64
89
  attr_reader :title
65
90
 
91
+ ##
92
+ # Creates a new hypothesis with a given +title+.
93
+
66
94
  def initialize(title)
67
95
  @title = title
68
96
  @results = []
69
97
  end
70
98
 
99
+ ##
100
+ # Registers a +result+ for this hypothesis.
101
+
71
102
  def add(result)
72
103
  @results << result
73
104
  end
74
105
 
75
- def adjusted_pvalue_for(instance, method)
106
+ ##
107
+ # Returns the adjusted p-value for a specific +result+ with respect to all the other results obtained under this
108
+ # same hypothesis, using the specified +method+.
109
+
110
+ def adjusted_pvalue_for(result, method)
76
111
  p_values = @results.map { |r| r.pvalue }
77
- index = @results.index(instance)
112
+ index = @results.index(result)
78
113
 
79
114
  adjusted_pvalues = Rust::StatisticalTests::PValueAdjustment.method(method).adjust(*p_values)
80
115
 
@@ -85,9 +120,17 @@ module Rust::StatisticalTests
85
120
  end
86
121
  end
87
122
  end
123
+
124
+ ##
125
+ # Class with utilities for running Wilcoxon Signed-Rank test and Ranked-Sum test (a.k.a. Mann-Whitney U test).
88
126
 
89
127
  class Wilcoxon
90
- def self.paired(d1, d2, alpha = 0.05, **options)
128
+
129
+ ##
130
+ # Runs a Wilxoson Signed-Rank test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
131
+ # +options+ can be specified and directly passed to the R function.
132
+
133
+ def self.paired(d1, d2, alpha = 0.05, **options)
91
134
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
92
135
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
93
136
  raise "The two distributions have different size" if d1.size != d2.size
@@ -109,6 +152,10 @@ module Rust::StatisticalTests
109
152
  end
110
153
  end
111
154
 
155
+ ##
156
+ # Runs a Wilxoson Ranked-Sum (a.k.a. Mann-Whitney U) test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
157
+ # +options+ can be specified and directly passed to the R function.
158
+
112
159
  def self.unpaired(d1, d2, alpha = 0.05, **options)
113
160
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
114
161
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -131,7 +178,15 @@ module Rust::StatisticalTests
131
178
  end
132
179
  end
133
180
 
181
+ ##
182
+ # Class with utilities for running the T test.
183
+
134
184
  class T
185
+
186
+ ##
187
+ # Runs a paired T test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
188
+ # +options+ can be specified and directly passed to the R function.
189
+
135
190
  def self.paired(d1, d2, alpha = 0.05, **options)
136
191
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
137
192
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -154,6 +209,10 @@ module Rust::StatisticalTests
154
209
  end
155
210
  end
156
211
 
212
+ ##
213
+ # Runs an unpaired T test for +d1+ and +d2+, with a given +alpha+ (0.05, by default).
214
+ # +options+ can be specified and directly passed to the R function.
215
+
157
216
  def self.unpaired(d1, d2, alpha = 0.05, **options)
158
217
  raise TypeError, "Expecting Array of numerics" if !d1.is_a?(Array) || !d1.all? { |e| e.is_a?(Numeric) }
159
218
  raise TypeError, "Expecting Array of numerics" if !d2.is_a?(Array) || !d2.all? { |e| e.is_a?(Numeric) }
@@ -176,7 +235,15 @@ module Rust::StatisticalTests
176
235
  end
177
236
  end
178
237
 
238
+ ##
239
+ # Utilities for the Shapiro normality test.
240
+
179
241
  class Shapiro
242
+
243
+ ##
244
+ # Runs the Shapiro normality test for +vector+ and a given +alpha+ (0.05, by default).
245
+ # +options+ can be specified and directly passed to the R function.
246
+
180
247
  def self.compute(vector, alpha = 0.05, **options)
181
248
  raise TypeError, "Expecting Array of numerics" if !vector.is_a?(Array) || !vector.all? { |e| e.is_a?(Numeric) }
182
249
  Rust.exclusive do
@@ -196,7 +263,14 @@ module Rust::StatisticalTests
196
263
  end
197
264
  end
198
265
 
266
+ ##
267
+ # Module with utilities for adjusting the p-values.
268
+
199
269
  module PValueAdjustment
270
+
271
+ ##
272
+ # Returns the Ruby class given the R name of the p-value adjustment method.
273
+
200
274
  def self.method(name)
201
275
  name = name.to_s
202
276
  case name.downcase
@@ -215,6 +289,9 @@ module Rust::StatisticalTests
215
289
  end
216
290
  end
217
291
 
292
+ ##
293
+ # Bonferroni p-value adjustment method.
294
+
218
295
  class Bonferroni
219
296
  def self.adjust(*p_values)
220
297
  Rust.exclusive do
@@ -224,6 +301,9 @@ module Rust::StatisticalTests
224
301
  end
225
302
  end
226
303
 
304
+ ##
305
+ # Holm p-value adjustment method.
306
+
227
307
  class Holm
228
308
  def self.adjust(*p_values)
229
309
  Rust.exclusive do
@@ -233,6 +313,9 @@ module Rust::StatisticalTests
233
313
  end
234
314
  end
235
315
 
316
+ ##
317
+ # Hochberg p-value adjustment method.
318
+
236
319
  class Hochberg
237
320
  def self.adjust(*p_values)
238
321
  Rust.exclusive do
@@ -242,6 +325,9 @@ module Rust::StatisticalTests
242
325
  end
243
326
  end
244
327
 
328
+ ##
329
+ # Hommel p-value adjustment method.
330
+
245
331
  class Hommel
246
332
  def self.adjust(*p_values)
247
333
  Rust.exclusive do
@@ -251,6 +337,9 @@ module Rust::StatisticalTests
251
337
  end
252
338
  end
253
339
 
340
+ ##
341
+ # Benjamini-Hochberg p-value adjustment method.
342
+
254
343
  class BenjaminiHochberg
255
344
  def self.adjust(*p_values)
256
345
  Rust.exclusive do
@@ -260,6 +349,9 @@ module Rust::StatisticalTests
260
349
  end
261
350
  end
262
351
 
352
+ ##
353
+ # Benjamini-Yekutieli p-value adjustment method.
354
+
263
355
  class BenjaminiYekutieli
264
356
  def self.adjust(*p_values)
265
357
  Rust.exclusive do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.9'
4
+ version: '0.10'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-09 00:00:00.000000000 Z
11
+ date: 2022-08-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rinruby