rust 0.11 → 0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rust/core/csv.rb +4 -4
- data/lib/rust/core/manual.rb +89 -0
- data/lib/rust/core/rust.rb +28 -5
- data/lib/rust/core/types/dataframe.rb +69 -2
- data/lib/rust/core/types/datatype.rb +2 -2
- data/lib/rust/core/types/factor.rb +4 -0
- data/lib/rust/core/types/language.rb +33 -1
- data/lib/rust/core/types/list.rb +2 -0
- data/lib/rust/core/types/matrix.rb +8 -0
- data/lib/rust/core.rb +50 -0
- data/lib/rust/external/ggplot2/core.rb +171 -0
- data/lib/rust/external/ggplot2/geoms.rb +83 -0
- data/lib/rust/external/ggplot2/plot_builder.rb +292 -0
- data/lib/rust/external/ggplot2/scale.rb +12 -0
- data/lib/rust/external/ggplot2/themes.rb +458 -0
- data/lib/rust/external/ggplot2.rb +116 -0
- data/lib/rust/models/regression.rb +113 -10
- data/lib/rust/stats/probabilities.rb +22 -1
- data/lib/rust.rb +19 -0
- metadata +10 -3
@@ -14,6 +14,11 @@ module Rust::Models::Regression
|
|
14
14
|
# Generic regression model in R.
|
15
15
|
|
16
16
|
class RegressionModel < Rust::RustDatatype
|
17
|
+
|
18
|
+
attr_accessor :data
|
19
|
+
attr_accessor :dependent_variable
|
20
|
+
attr_accessor :options
|
21
|
+
|
17
22
|
def self.can_pull?(type, klass)
|
18
23
|
# Can only pull specific sub-types
|
19
24
|
return false
|
@@ -38,22 +43,30 @@ module Rust::Models::Regression
|
|
38
43
|
|
39
44
|
formula = Rust::Formula.new(dependent_variable, independent_variables.join(" + "))
|
40
45
|
|
46
|
+
result = nil
|
41
47
|
Rust.exclusive do
|
42
48
|
Rust["#{model_type}.data"] = data
|
43
49
|
|
44
50
|
Rust._eval("#{model_type}.model.result <- #{model_type}(#{formula.to_R}, data=#{model_type}.data#{mapped})")
|
45
51
|
result = Rust["#{model_type}.model.result"]
|
46
|
-
result.r_mirror_to("#{model_type}.model.result")
|
47
52
|
|
48
|
-
|
53
|
+
raise "An error occurred while building the model" unless result
|
54
|
+
|
55
|
+
result.r_mirror_to("#{model_type}.model.result")
|
49
56
|
end
|
57
|
+
|
58
|
+
result.dependent_variable = dependent_variable
|
59
|
+
result.data = data
|
60
|
+
result.options = options
|
61
|
+
|
62
|
+
return result
|
50
63
|
end
|
51
64
|
|
52
65
|
##
|
53
|
-
# Creates a new +model+.
|
66
|
+
# Creates a new model based on +model+.
|
54
67
|
|
55
68
|
def initialize(model)
|
56
|
-
raise
|
69
|
+
raise "Expected a R list, given a #{model.class}" if !model.is_a?(Rust::List)
|
57
70
|
@model = model
|
58
71
|
end
|
59
72
|
|
@@ -118,6 +131,58 @@ module Rust::Models::Regression
|
|
118
131
|
a = self.summary|"coefficients"
|
119
132
|
end
|
120
133
|
|
134
|
+
##
|
135
|
+
# Returns object variables for the model with basic data (coefficients and p-values). Use the method `coefficients`
|
136
|
+
# to get more data.
|
137
|
+
|
138
|
+
def variables
|
139
|
+
unless @variables
|
140
|
+
coefficients = self.coefficients
|
141
|
+
|
142
|
+
@variables = coefficients.rownames.map do |name|
|
143
|
+
ModelVariable.new(name, coefficients[name, "Estimate"], coefficients[name, "Pr(>|t|)"])
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
return @variables
|
148
|
+
end
|
149
|
+
|
150
|
+
##
|
151
|
+
# Returns only the significant variables as ModelVariable instances. See the method `variables`.
|
152
|
+
|
153
|
+
def significant_variables(a = 0.05)
|
154
|
+
self.variables.select { |v| v.significant?(a) }
|
155
|
+
end
|
156
|
+
|
157
|
+
##
|
158
|
+
# Runs backward selection (recursively removes a variable until the best model is found).
|
159
|
+
# Returns both the best model and the list of excluded variable at each step
|
160
|
+
# Note: Not fully tested
|
161
|
+
|
162
|
+
def backward_selection(excluded = [])
|
163
|
+
candidates = self.variables.select { |v| !v.intercept? && !v.significant? }.sort_by { |v| v.pvalue }.reverse
|
164
|
+
all = self.variables.select { |v| !v.intercept? }
|
165
|
+
|
166
|
+
candidates.each do |candidate|
|
167
|
+
new_model = RegressionModel.generate(
|
168
|
+
self.class,
|
169
|
+
self.class.r_model_name,
|
170
|
+
self.dependent_variable,
|
171
|
+
(all - [candidate]).map { |v| v.name },
|
172
|
+
self.data,
|
173
|
+
**self.options
|
174
|
+
)
|
175
|
+
|
176
|
+
if new_model.r_2_adjusted >= self.r_2_adjusted
|
177
|
+
puts "Excluded #{candidate}" if Rust.debug?
|
178
|
+
return *new_model.backward_selection(excluded + [candidate])
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
return self, excluded
|
183
|
+
end
|
184
|
+
|
185
|
+
|
121
186
|
def method_missing(name, *args)
|
122
187
|
return model|name.to_s
|
123
188
|
end
|
@@ -145,7 +210,11 @@ module Rust::Models::Regression
|
|
145
210
|
|
146
211
|
class LinearRegressionModel < RegressionModel
|
147
212
|
def self.can_pull?(type, klass)
|
148
|
-
return type == "list" && klass ==
|
213
|
+
return type == "list" && klass == self.r_model_name
|
214
|
+
end
|
215
|
+
|
216
|
+
def self.pull_priority
|
217
|
+
1
|
149
218
|
end
|
150
219
|
|
151
220
|
def self.pull_variable(variable, type, klass)
|
@@ -154,6 +223,10 @@ module Rust::Models::Regression
|
|
154
223
|
return LinearRegressionModel.new(model)
|
155
224
|
end
|
156
225
|
|
226
|
+
def self.r_model_name
|
227
|
+
"lm"
|
228
|
+
end
|
229
|
+
|
157
230
|
##
|
158
231
|
# Generates a linear regression model, given its +dependent_variable+ and +independent_variables+ and its +data+.
|
159
232
|
# +options+ can be specified and directly passed to the model.
|
@@ -161,7 +234,7 @@ module Rust::Models::Regression
|
|
161
234
|
def self.generate(dependent_variable, independent_variables, data, **options)
|
162
235
|
RegressionModel.generate(
|
163
236
|
LinearRegressionModel,
|
164
|
-
|
237
|
+
self.r_model_name,
|
165
238
|
dependent_variable,
|
166
239
|
independent_variables,
|
167
240
|
data,
|
@@ -175,13 +248,17 @@ module Rust::Models::Regression
|
|
175
248
|
|
176
249
|
class LinearMixedEffectsModel < RegressionModel
|
177
250
|
def self.can_pull?(type, klass)
|
178
|
-
return type == "S4" && klass ==
|
251
|
+
return type == "S4" && klass == self.r_model_name
|
179
252
|
end
|
180
253
|
|
181
254
|
def self.pull_priority
|
182
255
|
1
|
183
256
|
end
|
184
257
|
|
258
|
+
def self.r_model_name
|
259
|
+
"lmerModLmerTest"
|
260
|
+
end
|
261
|
+
|
185
262
|
def self.pull_variable(variable, type, klass)
|
186
263
|
model = Rust::RustDatatype.pull_variable(variable, Rust::S4Class)
|
187
264
|
|
@@ -213,7 +290,7 @@ module Rust::Models::Regression
|
|
213
290
|
|
214
291
|
RegressionModel.generate(
|
215
292
|
LinearMixedEffectsModel,
|
216
|
-
|
293
|
+
self.r_model_name,
|
217
294
|
dependent_variable,
|
218
295
|
fixed_effects + random_effects,
|
219
296
|
data,
|
@@ -235,18 +312,44 @@ module Rust::Models::Regression
|
|
235
312
|
end
|
236
313
|
end
|
237
314
|
end
|
315
|
+
|
316
|
+
##
|
317
|
+
# Slim representation for a variable in a model, with just the variable name, its coefficient and its p-value.
|
318
|
+
|
319
|
+
class ModelVariable
|
320
|
+
attr_accessor :name
|
321
|
+
attr_accessor :coefficient
|
322
|
+
attr_accessor :pvalue
|
323
|
+
|
324
|
+
def initialize(name, coefficient, pvalue)
|
325
|
+
@name = name
|
326
|
+
@coefficient = coefficient
|
327
|
+
@pvalue = pvalue
|
328
|
+
end
|
329
|
+
|
330
|
+
def intercept?
|
331
|
+
@name == "(Intercept)"
|
332
|
+
end
|
333
|
+
|
334
|
+
##
|
335
|
+
# Checks whether the variable is significant w.r.t. a given +a+ (0.05 by default)
|
336
|
+
|
337
|
+
def significant?(a = 0.05)
|
338
|
+
@pvalue <= a
|
339
|
+
end
|
340
|
+
end
|
238
341
|
end
|
239
342
|
|
240
343
|
module Rust::RBindings
|
241
344
|
def lm(formula, data, **options)
|
242
345
|
independent = formula.right_part.split("+").map { |v| v.strip }
|
243
|
-
return LinearRegressionModel.generate(formula.left_part, independent, data, **options)
|
346
|
+
return Rust::Models::Regression::LinearRegressionModel.generate(formula.left_part, independent, data, **options)
|
244
347
|
end
|
245
348
|
|
246
349
|
def lmer(formula, data, **options)
|
247
350
|
independent = formula.right_part.split("+").map { |v| v.strip }
|
248
351
|
|
249
|
-
RegressionModel.generate(
|
352
|
+
Rust::Models::Regression::RegressionModel.generate(
|
250
353
|
LinearMixedEffectsModel,
|
251
354
|
"lmer",
|
252
355
|
formula.left_part,
|
@@ -85,6 +85,20 @@ module Rust
|
|
85
85
|
@values.map { |k, v| k*v }.sum
|
86
86
|
end
|
87
87
|
|
88
|
+
##
|
89
|
+
# Returns the variance for this slice.
|
90
|
+
|
91
|
+
def variance
|
92
|
+
@values.map { |k, v| k**2 * v }.sum - (self.expected ** 2)
|
93
|
+
end
|
94
|
+
|
95
|
+
##
|
96
|
+
# Returns the standard deviation for this slice.
|
97
|
+
|
98
|
+
def sd
|
99
|
+
Math.sqrt(self.variance)
|
100
|
+
end
|
101
|
+
|
88
102
|
##
|
89
103
|
# Returns a slice with the values that are greater than +n+.
|
90
104
|
|
@@ -124,7 +138,7 @@ module Rust
|
|
124
138
|
# Returns a slice with the values between +a+ and +b+.
|
125
139
|
|
126
140
|
def between(a, b)
|
127
|
-
self.so_that { |k| k.between(a, b) }
|
141
|
+
self.so_that { |k| k.between?(a, b) }
|
128
142
|
end
|
129
143
|
|
130
144
|
##
|
@@ -133,6 +147,13 @@ module Rust
|
|
133
147
|
def so_that
|
134
148
|
RandomVariableSlice.new(@values.select { |k, v| yield(k) })
|
135
149
|
end
|
150
|
+
|
151
|
+
##
|
152
|
+
# Creates a bar plot of the distribution
|
153
|
+
|
154
|
+
def plot
|
155
|
+
Rust::Plots::BarPlot.new(@values.sort_by { |k, v| k }.to_h)
|
156
|
+
end
|
136
157
|
end
|
137
158
|
|
138
159
|
##
|
data/lib/rust.rb
CHANGED
@@ -2,3 +2,22 @@ require_relative 'rust/core'
|
|
2
2
|
require_relative 'rust/models/all'
|
3
3
|
require_relative 'rust/plots/all'
|
4
4
|
require_relative 'rust/stats/all'
|
5
|
+
|
6
|
+
module Rust
|
7
|
+
@@datasets = {}
|
8
|
+
|
9
|
+
def self.toothgrowth
|
10
|
+
@@datasets[:ToothGrowth] = Rust.exclusive { Rust['ToothGrowth'] } unless @@datasets[:ToothGrowth]
|
11
|
+
return @@datasets[:ToothGrowth]
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.cars
|
15
|
+
@@datasets[:cars] = Rust.exclusive { Rust['cars'] } unless @@datasets[:cars]
|
16
|
+
return @@datasets[:cars]
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.iris
|
20
|
+
@@datasets[:iris] = Rust.exclusive { Rust['iris'] } unless @@datasets[:iris]
|
21
|
+
return @@datasets[:iris]
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rust
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.13'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simone Scalabrino
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-11-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rinruby
|
@@ -61,6 +61,7 @@ files:
|
|
61
61
|
- lib/rust.rb
|
62
62
|
- lib/rust/core.rb
|
63
63
|
- lib/rust/core/csv.rb
|
64
|
+
- lib/rust/core/manual.rb
|
64
65
|
- lib/rust/core/rust.rb
|
65
66
|
- lib/rust/core/types/all.rb
|
66
67
|
- lib/rust/core/types/dataframe.rb
|
@@ -71,6 +72,12 @@ files:
|
|
71
72
|
- lib/rust/core/types/matrix.rb
|
72
73
|
- lib/rust/core/types/s4class.rb
|
73
74
|
- lib/rust/core/types/utils.rb
|
75
|
+
- lib/rust/external/ggplot2.rb
|
76
|
+
- lib/rust/external/ggplot2/core.rb
|
77
|
+
- lib/rust/external/ggplot2/geoms.rb
|
78
|
+
- lib/rust/external/ggplot2/plot_builder.rb
|
79
|
+
- lib/rust/external/ggplot2/scale.rb
|
80
|
+
- lib/rust/external/ggplot2/themes.rb
|
74
81
|
- lib/rust/external/robustbase.rb
|
75
82
|
- lib/rust/models/all.rb
|
76
83
|
- lib/rust/models/anova.rb
|
@@ -104,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
104
111
|
- !ruby/object:Gem::Version
|
105
112
|
version: '0'
|
106
113
|
requirements: []
|
107
|
-
rubygems_version: 3.
|
114
|
+
rubygems_version: 3.5.16
|
108
115
|
signing_key:
|
109
116
|
specification_version: 4
|
110
117
|
summary: Ruby advanced statistical library
|