rust 0.11 → 0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,11 @@ module Rust::Models::Regression
14
14
  # Generic regression model in R.
15
15
 
16
16
  class RegressionModel < Rust::RustDatatype
17
+
18
+ attr_accessor :data
19
+ attr_accessor :dependent_variable
20
+ attr_accessor :options
21
+
17
22
  def self.can_pull?(type, klass)
18
23
  # Can only pull specific sub-types
19
24
  return false
@@ -38,22 +43,30 @@ module Rust::Models::Regression
38
43
 
39
44
  formula = Rust::Formula.new(dependent_variable, independent_variables.join(" + "))
40
45
 
46
+ result = nil
41
47
  Rust.exclusive do
42
48
  Rust["#{model_type}.data"] = data
43
49
 
44
50
  Rust._eval("#{model_type}.model.result <- #{model_type}(#{formula.to_R}, data=#{model_type}.data#{mapped})")
45
51
  result = Rust["#{model_type}.model.result"]
46
- result.r_mirror_to("#{model_type}.model.result")
47
52
 
48
- return result
53
+ raise "An error occurred while building the model" unless result
54
+
55
+ result.r_mirror_to("#{model_type}.model.result")
49
56
  end
57
+
58
+ result.dependent_variable = dependent_variable
59
+ result.data = data
60
+ result.options = options
61
+
62
+ return result
50
63
  end
51
64
 
52
65
  ##
53
- # Creates a new +model+.
66
+ # Creates a new model based on +model+.
54
67
 
55
68
  def initialize(model)
56
- raise StandardError if model.is_a?(RegressionModel)
69
+ raise "Expected a R list, given a #{model.class}" if !model.is_a?(Rust::List)
57
70
  @model = model
58
71
  end
59
72
 
@@ -118,6 +131,58 @@ module Rust::Models::Regression
118
131
  a = self.summary|"coefficients"
119
132
  end
120
133
 
134
+ ##
135
+ # Returns object variables for the model with basic data (coefficients and p-values). Use the method `coefficients`
136
+ # to get more data.
137
+
138
+ def variables
139
+ unless @variables
140
+ coefficients = self.coefficients
141
+
142
+ @variables = coefficients.rownames.map do |name|
143
+ ModelVariable.new(name, coefficients[name, "Estimate"], coefficients[name, "Pr(>|t|)"])
144
+ end
145
+ end
146
+
147
+ return @variables
148
+ end
149
+
150
+ ##
151
+ # Returns only the significant variables as ModelVariable instances. See the method `variables`.
152
+
153
+ def significant_variables(a = 0.05)
154
+ self.variables.select { |v| v.significant?(a) }
155
+ end
156
+
157
+ ##
158
+ # Runs backward selection (recursively removes a variable until the best model is found).
159
+ # Returns both the best model and the list of excluded variable at each step
160
+ # Note: Not fully tested
161
+
162
+ def backward_selection(excluded = [])
163
+ candidates = self.variables.select { |v| !v.intercept? && !v.significant? }.sort_by { |v| v.pvalue }.reverse
164
+ all = self.variables.select { |v| !v.intercept? }
165
+
166
+ candidates.each do |candidate|
167
+ new_model = RegressionModel.generate(
168
+ self.class,
169
+ self.class.r_model_name,
170
+ self.dependent_variable,
171
+ (all - [candidate]).map { |v| v.name },
172
+ self.data,
173
+ **self.options
174
+ )
175
+
176
+ if new_model.r_2_adjusted >= self.r_2_adjusted
177
+ puts "Excluded #{candidate}" if Rust.debug?
178
+ return *new_model.backward_selection(excluded + [candidate])
179
+ end
180
+ end
181
+
182
+ return self, excluded
183
+ end
184
+
185
+
121
186
  def method_missing(name, *args)
122
187
  return model|name.to_s
123
188
  end
@@ -145,7 +210,11 @@ module Rust::Models::Regression
145
210
 
146
211
  class LinearRegressionModel < RegressionModel
147
212
  def self.can_pull?(type, klass)
148
- return type == "list" && klass == "lm"
213
+ return type == "list" && klass == self.r_model_name
214
+ end
215
+
216
+ def self.pull_priority
217
+ 1
149
218
  end
150
219
 
151
220
  def self.pull_variable(variable, type, klass)
@@ -154,6 +223,10 @@ module Rust::Models::Regression
154
223
  return LinearRegressionModel.new(model)
155
224
  end
156
225
 
226
+ def self.r_model_name
227
+ "lm"
228
+ end
229
+
157
230
  ##
158
231
  # Generates a linear regression model, given its +dependent_variable+ and +independent_variables+ and its +data+.
159
232
  # +options+ can be specified and directly passed to the model.
@@ -161,7 +234,7 @@ module Rust::Models::Regression
161
234
  def self.generate(dependent_variable, independent_variables, data, **options)
162
235
  RegressionModel.generate(
163
236
  LinearRegressionModel,
164
- "lm",
237
+ self.r_model_name,
165
238
  dependent_variable,
166
239
  independent_variables,
167
240
  data,
@@ -175,13 +248,17 @@ module Rust::Models::Regression
175
248
 
176
249
  class LinearMixedEffectsModel < RegressionModel
177
250
  def self.can_pull?(type, klass)
178
- return type == "S4" && klass == "lmerModLmerTest"
251
+ return type == "S4" && klass == self.r_model_name
179
252
  end
180
253
 
181
254
  def self.pull_priority
182
255
  1
183
256
  end
184
257
 
258
+ def self.r_model_name
259
+ "lmerModLmerTest"
260
+ end
261
+
185
262
  def self.pull_variable(variable, type, klass)
186
263
  model = Rust::RustDatatype.pull_variable(variable, Rust::S4Class)
187
264
 
@@ -213,7 +290,7 @@ module Rust::Models::Regression
213
290
 
214
291
  RegressionModel.generate(
215
292
  LinearMixedEffectsModel,
216
- "lmer",
293
+ self.r_model_name,
217
294
  dependent_variable,
218
295
  fixed_effects + random_effects,
219
296
  data,
@@ -235,18 +312,44 @@ module Rust::Models::Regression
235
312
  end
236
313
  end
237
314
  end
315
+
316
+ ##
317
+ # Slim representation for a variable in a model, with just the variable name, its coefficient and its p-value.
318
+
319
+ class ModelVariable
320
+ attr_accessor :name
321
+ attr_accessor :coefficient
322
+ attr_accessor :pvalue
323
+
324
+ def initialize(name, coefficient, pvalue)
325
+ @name = name
326
+ @coefficient = coefficient
327
+ @pvalue = pvalue
328
+ end
329
+
330
+ def intercept?
331
+ @name == "(Intercept)"
332
+ end
333
+
334
+ ##
335
+ # Checks whether the variable is significant w.r.t. a given +a+ (0.05 by default)
336
+
337
+ def significant?(a = 0.05)
338
+ @pvalue <= a
339
+ end
340
+ end
238
341
  end
239
342
 
240
343
  module Rust::RBindings
241
344
  def lm(formula, data, **options)
242
345
  independent = formula.right_part.split("+").map { |v| v.strip }
243
- return LinearRegressionModel.generate(formula.left_part, independent, data, **options)
346
+ return Rust::Models::Regression::LinearRegressionModel.generate(formula.left_part, independent, data, **options)
244
347
  end
245
348
 
246
349
  def lmer(formula, data, **options)
247
350
  independent = formula.right_part.split("+").map { |v| v.strip }
248
351
 
249
- RegressionModel.generate(
352
+ Rust::Models::Regression::RegressionModel.generate(
250
353
  LinearMixedEffectsModel,
251
354
  "lmer",
252
355
  formula.left_part,
@@ -85,6 +85,20 @@ module Rust
85
85
  @values.map { |k, v| k*v }.sum
86
86
  end
87
87
 
88
+ ##
89
+ # Returns the variance for this slice.
90
+
91
+ def variance
92
+ @values.map { |k, v| k**2 * v }.sum - (self.expected ** 2)
93
+ end
94
+
95
+ ##
96
+ # Returns the standard deviation for this slice.
97
+
98
+ def sd
99
+ Math.sqrt(self.variance)
100
+ end
101
+
88
102
  ##
89
103
  # Returns a slice with the values that are greater than +n+.
90
104
 
@@ -124,7 +138,7 @@ module Rust
124
138
  # Returns a slice with the values between +a+ and +b+.
125
139
 
126
140
  def between(a, b)
127
- self.so_that { |k| k.between(a, b) }
141
+ self.so_that { |k| k.between?(a, b) }
128
142
  end
129
143
 
130
144
  ##
@@ -133,6 +147,13 @@ module Rust
133
147
  def so_that
134
148
  RandomVariableSlice.new(@values.select { |k, v| yield(k) })
135
149
  end
150
+
151
+ ##
152
+ # Creates a bar plot of the distribution
153
+
154
+ def plot
155
+ Rust::Plots::BarPlot.new(@values.sort_by { |k, v| k }.to_h)
156
+ end
136
157
  end
137
158
 
138
159
  ##
data/lib/rust.rb CHANGED
@@ -2,3 +2,22 @@ require_relative 'rust/core'
2
2
  require_relative 'rust/models/all'
3
3
  require_relative 'rust/plots/all'
4
4
  require_relative 'rust/stats/all'
5
+
6
+ module Rust
7
+ @@datasets = {}
8
+
9
+ def self.toothgrowth
10
+ @@datasets[:ToothGrowth] = Rust.exclusive { Rust['ToothGrowth'] } unless @@datasets[:ToothGrowth]
11
+ return @@datasets[:ToothGrowth]
12
+ end
13
+
14
+ def self.cars
15
+ @@datasets[:cars] = Rust.exclusive { Rust['cars'] } unless @@datasets[:cars]
16
+ return @@datasets[:cars]
17
+ end
18
+
19
+ def self.iris
20
+ @@datasets[:iris] = Rust.exclusive { Rust['iris'] } unless @@datasets[:iris]
21
+ return @@datasets[:iris]
22
+ end
23
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.11'
4
+ version: '0.13'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-11 00:00:00.000000000 Z
11
+ date: 2024-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rinruby
@@ -61,6 +61,7 @@ files:
61
61
  - lib/rust.rb
62
62
  - lib/rust/core.rb
63
63
  - lib/rust/core/csv.rb
64
+ - lib/rust/core/manual.rb
64
65
  - lib/rust/core/rust.rb
65
66
  - lib/rust/core/types/all.rb
66
67
  - lib/rust/core/types/dataframe.rb
@@ -71,6 +72,12 @@ files:
71
72
  - lib/rust/core/types/matrix.rb
72
73
  - lib/rust/core/types/s4class.rb
73
74
  - lib/rust/core/types/utils.rb
75
+ - lib/rust/external/ggplot2.rb
76
+ - lib/rust/external/ggplot2/core.rb
77
+ - lib/rust/external/ggplot2/geoms.rb
78
+ - lib/rust/external/ggplot2/plot_builder.rb
79
+ - lib/rust/external/ggplot2/scale.rb
80
+ - lib/rust/external/ggplot2/themes.rb
74
81
  - lib/rust/external/robustbase.rb
75
82
  - lib/rust/models/all.rb
76
83
  - lib/rust/models/anova.rb
@@ -104,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
111
  - !ruby/object:Gem::Version
105
112
  version: '0'
106
113
  requirements: []
107
- rubygems_version: 3.3.15
114
+ rubygems_version: 3.5.16
108
115
  signing_key:
109
116
  specification_version: 4
110
117
  summary: Ruby advanced statistical library