rust 0.11 → 0.13

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,6 +14,11 @@ module Rust::Models::Regression
14
14
  # Generic regression model in R.
15
15
 
16
16
  class RegressionModel < Rust::RustDatatype
17
+
18
+ attr_accessor :data
19
+ attr_accessor :dependent_variable
20
+ attr_accessor :options
21
+
17
22
  def self.can_pull?(type, klass)
18
23
  # Can only pull specific sub-types
19
24
  return false
@@ -38,22 +43,30 @@ module Rust::Models::Regression
38
43
 
39
44
  formula = Rust::Formula.new(dependent_variable, independent_variables.join(" + "))
40
45
 
46
+ result = nil
41
47
  Rust.exclusive do
42
48
  Rust["#{model_type}.data"] = data
43
49
 
44
50
  Rust._eval("#{model_type}.model.result <- #{model_type}(#{formula.to_R}, data=#{model_type}.data#{mapped})")
45
51
  result = Rust["#{model_type}.model.result"]
46
- result.r_mirror_to("#{model_type}.model.result")
47
52
 
48
- return result
53
+ raise "An error occurred while building the model" unless result
54
+
55
+ result.r_mirror_to("#{model_type}.model.result")
49
56
  end
57
+
58
+ result.dependent_variable = dependent_variable
59
+ result.data = data
60
+ result.options = options
61
+
62
+ return result
50
63
  end
51
64
 
52
65
  ##
53
- # Creates a new +model+.
66
+ # Creates a new model based on +model+.
54
67
 
55
68
  def initialize(model)
56
- raise StandardError if model.is_a?(RegressionModel)
69
+ raise "Expected a R list, given a #{model.class}" if !model.is_a?(Rust::List)
57
70
  @model = model
58
71
  end
59
72
 
@@ -118,6 +131,58 @@ module Rust::Models::Regression
118
131
  a = self.summary|"coefficients"
119
132
  end
120
133
 
134
+ ##
135
+ # Returns object variables for the model with basic data (coefficients and p-values). Use the method `coefficients`
136
+ # to get more data.
137
+
138
+ def variables
139
+ unless @variables
140
+ coefficients = self.coefficients
141
+
142
+ @variables = coefficients.rownames.map do |name|
143
+ ModelVariable.new(name, coefficients[name, "Estimate"], coefficients[name, "Pr(>|t|)"])
144
+ end
145
+ end
146
+
147
+ return @variables
148
+ end
149
+
150
+ ##
151
+ # Returns only the significant variables as ModelVariable instances. See the method `variables`.
152
+
153
+ def significant_variables(a = 0.05)
154
+ self.variables.select { |v| v.significant?(a) }
155
+ end
156
+
157
+ ##
158
+ # Runs backward selection (recursively removes a variable until the best model is found).
159
+ # Returns both the best model and the list of excluded variable at each step
160
+ # Note: Not fully tested
161
+
162
+ def backward_selection(excluded = [])
163
+ candidates = self.variables.select { |v| !v.intercept? && !v.significant? }.sort_by { |v| v.pvalue }.reverse
164
+ all = self.variables.select { |v| !v.intercept? }
165
+
166
+ candidates.each do |candidate|
167
+ new_model = RegressionModel.generate(
168
+ self.class,
169
+ self.class.r_model_name,
170
+ self.dependent_variable,
171
+ (all - [candidate]).map { |v| v.name },
172
+ self.data,
173
+ **self.options
174
+ )
175
+
176
+ if new_model.r_2_adjusted >= self.r_2_adjusted
177
+ puts "Excluded #{candidate}" if Rust.debug?
178
+ return *new_model.backward_selection(excluded + [candidate])
179
+ end
180
+ end
181
+
182
+ return self, excluded
183
+ end
184
+
185
+
121
186
  def method_missing(name, *args)
122
187
  return model|name.to_s
123
188
  end
@@ -145,7 +210,11 @@ module Rust::Models::Regression
145
210
 
146
211
  class LinearRegressionModel < RegressionModel
147
212
  def self.can_pull?(type, klass)
148
- return type == "list" && klass == "lm"
213
+ return type == "list" && klass == self.r_model_name
214
+ end
215
+
216
+ def self.pull_priority
217
+ 1
149
218
  end
150
219
 
151
220
  def self.pull_variable(variable, type, klass)
@@ -154,6 +223,10 @@ module Rust::Models::Regression
154
223
  return LinearRegressionModel.new(model)
155
224
  end
156
225
 
226
+ def self.r_model_name
227
+ "lm"
228
+ end
229
+
157
230
  ##
158
231
  # Generates a linear regression model, given its +dependent_variable+ and +independent_variables+ and its +data+.
159
232
  # +options+ can be specified and directly passed to the model.
@@ -161,7 +234,7 @@ module Rust::Models::Regression
161
234
  def self.generate(dependent_variable, independent_variables, data, **options)
162
235
  RegressionModel.generate(
163
236
  LinearRegressionModel,
164
- "lm",
237
+ self.r_model_name,
165
238
  dependent_variable,
166
239
  independent_variables,
167
240
  data,
@@ -175,13 +248,17 @@ module Rust::Models::Regression
175
248
 
176
249
  class LinearMixedEffectsModel < RegressionModel
177
250
  def self.can_pull?(type, klass)
178
- return type == "S4" && klass == "lmerModLmerTest"
251
+ return type == "S4" && klass == self.r_model_name
179
252
  end
180
253
 
181
254
  def self.pull_priority
182
255
  1
183
256
  end
184
257
 
258
+ def self.r_model_name
259
+ "lmerModLmerTest"
260
+ end
261
+
185
262
  def self.pull_variable(variable, type, klass)
186
263
  model = Rust::RustDatatype.pull_variable(variable, Rust::S4Class)
187
264
 
@@ -213,7 +290,7 @@ module Rust::Models::Regression
213
290
 
214
291
  RegressionModel.generate(
215
292
  LinearMixedEffectsModel,
216
- "lmer",
293
+ self.r_model_name,
217
294
  dependent_variable,
218
295
  fixed_effects + random_effects,
219
296
  data,
@@ -235,18 +312,44 @@ module Rust::Models::Regression
235
312
  end
236
313
  end
237
314
  end
315
+
316
+ ##
317
+ # Slim representation for a variable in a model, with just the variable name, its coefficient and its p-value.
318
+
319
+ class ModelVariable
320
+ attr_accessor :name
321
+ attr_accessor :coefficient
322
+ attr_accessor :pvalue
323
+
324
+ def initialize(name, coefficient, pvalue)
325
+ @name = name
326
+ @coefficient = coefficient
327
+ @pvalue = pvalue
328
+ end
329
+
330
+ def intercept?
331
+ @name == "(Intercept)"
332
+ end
333
+
334
+ ##
335
+ # Checks whether the variable is significant w.r.t. a given +a+ (0.05 by default)
336
+
337
+ def significant?(a = 0.05)
338
+ @pvalue <= a
339
+ end
340
+ end
238
341
  end
239
342
 
240
343
  module Rust::RBindings
241
344
  def lm(formula, data, **options)
242
345
  independent = formula.right_part.split("+").map { |v| v.strip }
243
- return LinearRegressionModel.generate(formula.left_part, independent, data, **options)
346
+ return Rust::Models::Regression::LinearRegressionModel.generate(formula.left_part, independent, data, **options)
244
347
  end
245
348
 
246
349
  def lmer(formula, data, **options)
247
350
  independent = formula.right_part.split("+").map { |v| v.strip }
248
351
 
249
- RegressionModel.generate(
352
+ Rust::Models::Regression::RegressionModel.generate(
250
353
  LinearMixedEffectsModel,
251
354
  "lmer",
252
355
  formula.left_part,
@@ -85,6 +85,20 @@ module Rust
85
85
  @values.map { |k, v| k*v }.sum
86
86
  end
87
87
 
88
+ ##
89
+ # Returns the variance for this slice.
90
+
91
+ def variance
92
+ @values.map { |k, v| k**2 * v }.sum - (self.expected ** 2)
93
+ end
94
+
95
+ ##
96
+ # Returns the standard deviation for this slice.
97
+
98
+ def sd
99
+ Math.sqrt(self.variance)
100
+ end
101
+
88
102
  ##
89
103
  # Returns a slice with the values that are greater than +n+.
90
104
 
@@ -124,7 +138,7 @@ module Rust
124
138
  # Returns a slice with the values between +a+ and +b+.
125
139
 
126
140
  def between(a, b)
127
- self.so_that { |k| k.between(a, b) }
141
+ self.so_that { |k| k.between?(a, b) }
128
142
  end
129
143
 
130
144
  ##
@@ -133,6 +147,13 @@ module Rust
133
147
  def so_that
134
148
  RandomVariableSlice.new(@values.select { |k, v| yield(k) })
135
149
  end
150
+
151
+ ##
152
+ # Creates a bar plot of the distribution
153
+
154
+ def plot
155
+ Rust::Plots::BarPlot.new(@values.sort_by { |k, v| k }.to_h)
156
+ end
136
157
  end
137
158
 
138
159
  ##
data/lib/rust.rb CHANGED
@@ -2,3 +2,22 @@ require_relative 'rust/core'
2
2
  require_relative 'rust/models/all'
3
3
  require_relative 'rust/plots/all'
4
4
  require_relative 'rust/stats/all'
5
+
6
+ module Rust
7
+ @@datasets = {}
8
+
9
+ def self.toothgrowth
10
+ @@datasets[:ToothGrowth] = Rust.exclusive { Rust['ToothGrowth'] } unless @@datasets[:ToothGrowth]
11
+ return @@datasets[:ToothGrowth]
12
+ end
13
+
14
+ def self.cars
15
+ @@datasets[:cars] = Rust.exclusive { Rust['cars'] } unless @@datasets[:cars]
16
+ return @@datasets[:cars]
17
+ end
18
+
19
+ def self.iris
20
+ @@datasets[:iris] = Rust.exclusive { Rust['iris'] } unless @@datasets[:iris]
21
+ return @@datasets[:iris]
22
+ end
23
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.11'
4
+ version: '0.13'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-11 00:00:00.000000000 Z
11
+ date: 2024-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rinruby
@@ -61,6 +61,7 @@ files:
61
61
  - lib/rust.rb
62
62
  - lib/rust/core.rb
63
63
  - lib/rust/core/csv.rb
64
+ - lib/rust/core/manual.rb
64
65
  - lib/rust/core/rust.rb
65
66
  - lib/rust/core/types/all.rb
66
67
  - lib/rust/core/types/dataframe.rb
@@ -71,6 +72,12 @@ files:
71
72
  - lib/rust/core/types/matrix.rb
72
73
  - lib/rust/core/types/s4class.rb
73
74
  - lib/rust/core/types/utils.rb
75
+ - lib/rust/external/ggplot2.rb
76
+ - lib/rust/external/ggplot2/core.rb
77
+ - lib/rust/external/ggplot2/geoms.rb
78
+ - lib/rust/external/ggplot2/plot_builder.rb
79
+ - lib/rust/external/ggplot2/scale.rb
80
+ - lib/rust/external/ggplot2/themes.rb
74
81
  - lib/rust/external/robustbase.rb
75
82
  - lib/rust/models/all.rb
76
83
  - lib/rust/models/anova.rb
@@ -104,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
111
  - !ruby/object:Gem::Version
105
112
  version: '0'
106
113
  requirements: []
107
- rubygems_version: 3.3.15
114
+ rubygems_version: 3.5.16
108
115
  signing_key:
109
116
  specification_version: 4
110
117
  summary: Ruby advanced statistical library