rust 0.11 → 0.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rust/core/csv.rb +4 -4
- data/lib/rust/core/manual.rb +89 -0
- data/lib/rust/core/rust.rb +28 -5
- data/lib/rust/core/types/dataframe.rb +69 -2
- data/lib/rust/core/types/datatype.rb +2 -2
- data/lib/rust/core/types/factor.rb +4 -0
- data/lib/rust/core/types/language.rb +33 -1
- data/lib/rust/core/types/list.rb +2 -0
- data/lib/rust/core/types/matrix.rb +8 -0
- data/lib/rust/core.rb +50 -0
- data/lib/rust/external/ggplot2/core.rb +171 -0
- data/lib/rust/external/ggplot2/geoms.rb +83 -0
- data/lib/rust/external/ggplot2/plot_builder.rb +292 -0
- data/lib/rust/external/ggplot2/scale.rb +12 -0
- data/lib/rust/external/ggplot2/themes.rb +458 -0
- data/lib/rust/external/ggplot2.rb +116 -0
- data/lib/rust/models/regression.rb +113 -10
- data/lib/rust/stats/probabilities.rb +22 -1
- data/lib/rust.rb +19 -0
- metadata +10 -3
@@ -14,6 +14,11 @@ module Rust::Models::Regression
|
|
14
14
|
# Generic regression model in R.
|
15
15
|
|
16
16
|
class RegressionModel < Rust::RustDatatype
|
17
|
+
|
18
|
+
attr_accessor :data
|
19
|
+
attr_accessor :dependent_variable
|
20
|
+
attr_accessor :options
|
21
|
+
|
17
22
|
def self.can_pull?(type, klass)
|
18
23
|
# Can only pull specific sub-types
|
19
24
|
return false
|
@@ -38,22 +43,30 @@ module Rust::Models::Regression
|
|
38
43
|
|
39
44
|
formula = Rust::Formula.new(dependent_variable, independent_variables.join(" + "))
|
40
45
|
|
46
|
+
result = nil
|
41
47
|
Rust.exclusive do
|
42
48
|
Rust["#{model_type}.data"] = data
|
43
49
|
|
44
50
|
Rust._eval("#{model_type}.model.result <- #{model_type}(#{formula.to_R}, data=#{model_type}.data#{mapped})")
|
45
51
|
result = Rust["#{model_type}.model.result"]
|
46
|
-
result.r_mirror_to("#{model_type}.model.result")
|
47
52
|
|
48
|
-
|
53
|
+
raise "An error occurred while building the model" unless result
|
54
|
+
|
55
|
+
result.r_mirror_to("#{model_type}.model.result")
|
49
56
|
end
|
57
|
+
|
58
|
+
result.dependent_variable = dependent_variable
|
59
|
+
result.data = data
|
60
|
+
result.options = options
|
61
|
+
|
62
|
+
return result
|
50
63
|
end
|
51
64
|
|
52
65
|
##
|
53
|
-
# Creates a new +model+.
|
66
|
+
# Creates a new model based on +model+.
|
54
67
|
|
55
68
|
def initialize(model)
|
56
|
-
raise
|
69
|
+
raise "Expected a R list, given a #{model.class}" if !model.is_a?(Rust::List)
|
57
70
|
@model = model
|
58
71
|
end
|
59
72
|
|
@@ -118,6 +131,58 @@ module Rust::Models::Regression
|
|
118
131
|
a = self.summary|"coefficients"
|
119
132
|
end
|
120
133
|
|
134
|
+
##
|
135
|
+
# Returns object variables for the model with basic data (coefficients and p-values). Use the method `coefficients`
|
136
|
+
# to get more data.
|
137
|
+
|
138
|
+
def variables
|
139
|
+
unless @variables
|
140
|
+
coefficients = self.coefficients
|
141
|
+
|
142
|
+
@variables = coefficients.rownames.map do |name|
|
143
|
+
ModelVariable.new(name, coefficients[name, "Estimate"], coefficients[name, "Pr(>|t|)"])
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
return @variables
|
148
|
+
end
|
149
|
+
|
150
|
+
##
|
151
|
+
# Returns only the significant variables as ModelVariable instances. See the method `variables`.
|
152
|
+
|
153
|
+
def significant_variables(a = 0.05)
|
154
|
+
self.variables.select { |v| v.significant?(a) }
|
155
|
+
end
|
156
|
+
|
157
|
+
##
|
158
|
+
# Runs backward selection (recursively removes a variable until the best model is found).
|
159
|
+
# Returns both the best model and the list of excluded variable at each step
|
160
|
+
# Note: Not fully tested
|
161
|
+
|
162
|
+
def backward_selection(excluded = [])
|
163
|
+
candidates = self.variables.select { |v| !v.intercept? && !v.significant? }.sort_by { |v| v.pvalue }.reverse
|
164
|
+
all = self.variables.select { |v| !v.intercept? }
|
165
|
+
|
166
|
+
candidates.each do |candidate|
|
167
|
+
new_model = RegressionModel.generate(
|
168
|
+
self.class,
|
169
|
+
self.class.r_model_name,
|
170
|
+
self.dependent_variable,
|
171
|
+
(all - [candidate]).map { |v| v.name },
|
172
|
+
self.data,
|
173
|
+
**self.options
|
174
|
+
)
|
175
|
+
|
176
|
+
if new_model.r_2_adjusted >= self.r_2_adjusted
|
177
|
+
puts "Excluded #{candidate}" if Rust.debug?
|
178
|
+
return *new_model.backward_selection(excluded + [candidate])
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
return self, excluded
|
183
|
+
end
|
184
|
+
|
185
|
+
|
121
186
|
def method_missing(name, *args)
|
122
187
|
return model|name.to_s
|
123
188
|
end
|
@@ -145,7 +210,11 @@ module Rust::Models::Regression
|
|
145
210
|
|
146
211
|
class LinearRegressionModel < RegressionModel
|
147
212
|
def self.can_pull?(type, klass)
|
148
|
-
return type == "list" && klass ==
|
213
|
+
return type == "list" && klass == self.r_model_name
|
214
|
+
end
|
215
|
+
|
216
|
+
def self.pull_priority
|
217
|
+
1
|
149
218
|
end
|
150
219
|
|
151
220
|
def self.pull_variable(variable, type, klass)
|
@@ -154,6 +223,10 @@ module Rust::Models::Regression
|
|
154
223
|
return LinearRegressionModel.new(model)
|
155
224
|
end
|
156
225
|
|
226
|
+
def self.r_model_name
|
227
|
+
"lm"
|
228
|
+
end
|
229
|
+
|
157
230
|
##
|
158
231
|
# Generates a linear regression model, given its +dependent_variable+ and +independent_variables+ and its +data+.
|
159
232
|
# +options+ can be specified and directly passed to the model.
|
@@ -161,7 +234,7 @@ module Rust::Models::Regression
|
|
161
234
|
def self.generate(dependent_variable, independent_variables, data, **options)
|
162
235
|
RegressionModel.generate(
|
163
236
|
LinearRegressionModel,
|
164
|
-
|
237
|
+
self.r_model_name,
|
165
238
|
dependent_variable,
|
166
239
|
independent_variables,
|
167
240
|
data,
|
@@ -175,13 +248,17 @@ module Rust::Models::Regression
|
|
175
248
|
|
176
249
|
class LinearMixedEffectsModel < RegressionModel
|
177
250
|
def self.can_pull?(type, klass)
|
178
|
-
return type == "S4" && klass ==
|
251
|
+
return type == "S4" && klass == self.r_model_name
|
179
252
|
end
|
180
253
|
|
181
254
|
def self.pull_priority
|
182
255
|
1
|
183
256
|
end
|
184
257
|
|
258
|
+
def self.r_model_name
|
259
|
+
"lmerModLmerTest"
|
260
|
+
end
|
261
|
+
|
185
262
|
def self.pull_variable(variable, type, klass)
|
186
263
|
model = Rust::RustDatatype.pull_variable(variable, Rust::S4Class)
|
187
264
|
|
@@ -213,7 +290,7 @@ module Rust::Models::Regression
|
|
213
290
|
|
214
291
|
RegressionModel.generate(
|
215
292
|
LinearMixedEffectsModel,
|
216
|
-
|
293
|
+
self.r_model_name,
|
217
294
|
dependent_variable,
|
218
295
|
fixed_effects + random_effects,
|
219
296
|
data,
|
@@ -235,18 +312,44 @@ module Rust::Models::Regression
|
|
235
312
|
end
|
236
313
|
end
|
237
314
|
end
|
315
|
+
|
316
|
+
##
|
317
|
+
# Slim representation for a variable in a model, with just the variable name, its coefficient and its p-value.
|
318
|
+
|
319
|
+
class ModelVariable
|
320
|
+
attr_accessor :name
|
321
|
+
attr_accessor :coefficient
|
322
|
+
attr_accessor :pvalue
|
323
|
+
|
324
|
+
def initialize(name, coefficient, pvalue)
|
325
|
+
@name = name
|
326
|
+
@coefficient = coefficient
|
327
|
+
@pvalue = pvalue
|
328
|
+
end
|
329
|
+
|
330
|
+
def intercept?
|
331
|
+
@name == "(Intercept)"
|
332
|
+
end
|
333
|
+
|
334
|
+
##
|
335
|
+
# Checks whether the variable is significant w.r.t. a given +a+ (0.05 by default)
|
336
|
+
|
337
|
+
def significant?(a = 0.05)
|
338
|
+
@pvalue <= a
|
339
|
+
end
|
340
|
+
end
|
238
341
|
end
|
239
342
|
|
240
343
|
module Rust::RBindings
|
241
344
|
def lm(formula, data, **options)
|
242
345
|
independent = formula.right_part.split("+").map { |v| v.strip }
|
243
|
-
return LinearRegressionModel.generate(formula.left_part, independent, data, **options)
|
346
|
+
return Rust::Models::Regression::LinearRegressionModel.generate(formula.left_part, independent, data, **options)
|
244
347
|
end
|
245
348
|
|
246
349
|
def lmer(formula, data, **options)
|
247
350
|
independent = formula.right_part.split("+").map { |v| v.strip }
|
248
351
|
|
249
|
-
RegressionModel.generate(
|
352
|
+
Rust::Models::Regression::RegressionModel.generate(
|
250
353
|
LinearMixedEffectsModel,
|
251
354
|
"lmer",
|
252
355
|
formula.left_part,
|
@@ -85,6 +85,20 @@ module Rust
|
|
85
85
|
@values.map { |k, v| k*v }.sum
|
86
86
|
end
|
87
87
|
|
88
|
+
##
|
89
|
+
# Returns the variance for this slice.
|
90
|
+
|
91
|
+
def variance
|
92
|
+
@values.map { |k, v| k**2 * v }.sum - (self.expected ** 2)
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# Returns the standard deviation for this slice.
|
97
|
+
|
98
|
+
def sd
|
99
|
+
Math.sqrt(self.variance)
|
100
|
+
end
|
101
|
+
|
88
102
|
##
|
89
103
|
# Returns a slice with the values that are greater than +n+.
|
90
104
|
|
@@ -124,7 +138,7 @@ module Rust
|
|
124
138
|
# Returns a slice with the values between +a+ and +b+.
|
125
139
|
|
126
140
|
def between(a, b)
|
127
|
-
self.so_that { |k| k.between(a, b) }
|
141
|
+
self.so_that { |k| k.between?(a, b) }
|
128
142
|
end
|
129
143
|
|
130
144
|
##
|
@@ -133,6 +147,13 @@ module Rust
|
|
133
147
|
def so_that
|
134
148
|
RandomVariableSlice.new(@values.select { |k, v| yield(k) })
|
135
149
|
end
|
150
|
+
|
151
|
+
##
|
152
|
+
# Creates a bar plot of the distribution
|
153
|
+
|
154
|
+
def plot
|
155
|
+
Rust::Plots::BarPlot.new(@values.sort_by { |k, v| k }.to_h)
|
156
|
+
end
|
136
157
|
end
|
137
158
|
|
138
159
|
##
|
data/lib/rust.rb
CHANGED
@@ -2,3 +2,22 @@ require_relative 'rust/core'
|
|
2
2
|
require_relative 'rust/models/all'
|
3
3
|
require_relative 'rust/plots/all'
|
4
4
|
require_relative 'rust/stats/all'
|
5
|
+
|
6
|
+
module Rust
|
7
|
+
@@datasets = {}
|
8
|
+
|
9
|
+
def self.toothgrowth
|
10
|
+
@@datasets[:ToothGrowth] = Rust.exclusive { Rust['ToothGrowth'] } unless @@datasets[:ToothGrowth]
|
11
|
+
return @@datasets[:ToothGrowth]
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.cars
|
15
|
+
@@datasets[:cars] = Rust.exclusive { Rust['cars'] } unless @@datasets[:cars]
|
16
|
+
return @@datasets[:cars]
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.iris
|
20
|
+
@@datasets[:iris] = Rust.exclusive { Rust['iris'] } unless @@datasets[:iris]
|
21
|
+
return @@datasets[:iris]
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rust
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.13'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-11-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rinruby
|
@@ -61,6 +61,7 @@ files:
|
|
61
61
|
- lib/rust.rb
|
62
62
|
- lib/rust/core.rb
|
63
63
|
- lib/rust/core/csv.rb
|
64
|
+
- lib/rust/core/manual.rb
|
64
65
|
- lib/rust/core/rust.rb
|
65
66
|
- lib/rust/core/types/all.rb
|
66
67
|
- lib/rust/core/types/dataframe.rb
|
@@ -71,6 +72,12 @@ files:
|
|
71
72
|
- lib/rust/core/types/matrix.rb
|
72
73
|
- lib/rust/core/types/s4class.rb
|
73
74
|
- lib/rust/core/types/utils.rb
|
75
|
+
- lib/rust/external/ggplot2.rb
|
76
|
+
- lib/rust/external/ggplot2/core.rb
|
77
|
+
- lib/rust/external/ggplot2/geoms.rb
|
78
|
+
- lib/rust/external/ggplot2/plot_builder.rb
|
79
|
+
- lib/rust/external/ggplot2/scale.rb
|
80
|
+
- lib/rust/external/ggplot2/themes.rb
|
74
81
|
- lib/rust/external/robustbase.rb
|
75
82
|
- lib/rust/models/all.rb
|
76
83
|
- lib/rust/models/anova.rb
|
@@ -104,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
111
|
- !ruby/object:Gem::Version
|
105
112
|
version: '0'
|
106
113
|
requirements: []
|
107
|
-
rubygems_version: 3.
|
114
|
+
rubygems_version: 3.5.16
|
108
115
|
signing_key:
|
109
116
|
specification_version: 4
|
110
117
|
summary: Ruby advanced statistical library
|