rust 0.12 → 0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,6 +14,11 @@ module Rust::Models::Regression
14
14
  # Generic regression model in R.
15
15
 
16
16
  class RegressionModel < Rust::RustDatatype
17
+
18
+ attr_accessor :data
19
+ attr_accessor :dependent_variable
20
+ attr_accessor :options
21
+
17
22
  def self.can_pull?(type, klass)
18
23
  # Can only pull specific sub-types
19
24
  return false
@@ -38,22 +43,30 @@ module Rust::Models::Regression
38
43
 
39
44
  formula = Rust::Formula.new(dependent_variable, independent_variables.join(" + "))
40
45
 
46
+ result = nil
41
47
  Rust.exclusive do
42
48
  Rust["#{model_type}.data"] = data
43
49
 
44
50
  Rust._eval("#{model_type}.model.result <- #{model_type}(#{formula.to_R}, data=#{model_type}.data#{mapped})")
45
51
  result = Rust["#{model_type}.model.result"]
46
- result.r_mirror_to("#{model_type}.model.result")
47
52
 
48
- return result
53
+ raise "An error occurred while building the model" unless result
54
+
55
+ result.r_mirror_to("#{model_type}.model.result")
49
56
  end
57
+
58
+ result.dependent_variable = dependent_variable
59
+ result.data = data
60
+ result.options = options
61
+
62
+ return result
50
63
  end
51
64
 
52
65
  ##
53
- # Creates a new +model+.
66
+ # Creates a new model based on +model+.
54
67
 
55
68
  def initialize(model)
56
- raise StandardError if model.is_a?(RegressionModel)
69
+ raise "Expected a R list, given a #{model.class}" if !model.is_a?(Rust::List)
57
70
  @model = model
58
71
  end
59
72
 
@@ -118,10 +131,72 @@ module Rust::Models::Regression
118
131
  a = self.summary|"coefficients"
119
132
  end
120
133
 
134
+ ##
135
+ # Returns object variables for the model with basic data (coefficients and p-values). Use the method `coefficients`
136
+ # to get more data.
137
+
138
+ def variables
139
+ unless @variables
140
+ coefficients = self.coefficients
141
+
142
+ @variables = coefficients.rownames.map do |name|
143
+ ModelVariable.new(name, coefficients[name, "Estimate"], coefficients[name, "Pr(>|t|)"])
144
+ end
145
+ end
146
+
147
+ return @variables
148
+ end
149
+
150
+ ##
151
+ # Returns only the significant variables as ModelVariable instances. See the method `variables`.
152
+
153
+ def significant_variables(a = 0.05)
154
+ self.variables.select { |v| v.significant?(a) }
155
+ end
156
+
157
+ ##
158
+ # Runs backward selection (recursively removes a variable until the best model is found).
159
+ # Returns both the best model and the list of excluded variable at each step
160
+ # Note: Not fully tested
161
+
162
+ def backward_selection(excluded = [])
163
+ candidates = self.variables.select { |v| !v.intercept? && !v.significant? }.sort_by { |v| v.pvalue }.reverse
164
+ all = self.variables.select { |v| !v.intercept? }
165
+
166
+ candidates.each do |candidate|
167
+ new_model = RegressionModel.generate(
168
+ self.class,
169
+ self.class.r_model_name,
170
+ self.dependent_variable,
171
+ (all - [candidate]).map { |v| v.name },
172
+ self.data,
173
+ **self.options
174
+ )
175
+
176
+ if new_model.r_2_adjusted >= self.r_2_adjusted
177
+ puts "Excluded #{candidate}" if Rust.debug?
178
+ return *new_model.backward_selection(excluded + [candidate])
179
+ end
180
+ end
181
+
182
+ return self, excluded
183
+ end
184
+
185
+ ##
186
+ # Predicts the dependent variable from a new observation of the independent ones.
187
+ # Note: Not fully tested
188
+
189
+ def predict(line)
190
+ Rust.exclusive do
191
+ Rust['tmp.model.newline'] = line
192
+ return Rust["predict(#{self.r_mirror}, tmp.model.newline)"]
193
+ end
194
+ end
195
+
121
196
  def method_missing(name, *args)
122
197
  return model|name.to_s
123
198
  end
124
-
199
+
125
200
  ##
126
201
  # Returns a summary for the model using the summary function in R.
127
202
 
@@ -145,7 +220,11 @@ module Rust::Models::Regression
145
220
 
146
221
  class LinearRegressionModel < RegressionModel
147
222
  def self.can_pull?(type, klass)
148
- return type == "list" && klass == "lm"
223
+ return type == "list" && klass == self.r_model_name
224
+ end
225
+
226
+ def self.pull_priority
227
+ 1
149
228
  end
150
229
 
151
230
  def self.pull_variable(variable, type, klass)
@@ -154,6 +233,10 @@ module Rust::Models::Regression
154
233
  return LinearRegressionModel.new(model)
155
234
  end
156
235
 
236
+ def self.r_model_name
237
+ "lm"
238
+ end
239
+
157
240
  ##
158
241
  # Generates a linear regression model, given its +dependent_variable+ and +independent_variables+ and its +data+.
159
242
  # +options+ can be specified and directly passed to the model.
@@ -161,13 +244,64 @@ module Rust::Models::Regression
161
244
  def self.generate(dependent_variable, independent_variables, data, **options)
162
245
  RegressionModel.generate(
163
246
  LinearRegressionModel,
164
- "lm",
247
+ self.r_model_name,
165
248
  dependent_variable,
166
249
  independent_variables,
167
250
  data,
168
251
  **options
169
252
  )
170
253
  end
254
+
255
+ ##
256
+ # Returns the model as a proc that can be used to predict values
257
+
258
+ def to_proc
259
+ proc do |unnormalized_data|
260
+ data = Rust::DataFrame.new(["__TOREM__"])
261
+ unnormalized_data.rows.times do
262
+ data << [0]
263
+ end
264
+ unnormalized_data.colnames.each do |col|
265
+ if (unnormalized_data|col)[0].is_a?(Numeric)
266
+ newcol = Rust::DataFrame.new([col])
267
+ (unnormalized_data|col).each do |v|
268
+ newcol << [v]
269
+ end
270
+ data.cbind!(newcol)
271
+ else
272
+ (unnormalized_data|col).uniq.each do |val|
273
+ newcol = Rust::DataFrame.new([col + val])
274
+ (unnormalized_data|col).each do |v|
275
+ if v == val
276
+ newcol << [1]
277
+ else
278
+ newcol << [0]
279
+ end
280
+ end
281
+ data.cbind!(newcol)
282
+ end
283
+ end
284
+ end
285
+ data.delete_column("__TOREM__")
286
+ value = 0
287
+ @variables.each do |var|
288
+ p var
289
+ if var.name == "(Intercept)"
290
+ value += var.coefficient
291
+ else
292
+ if data.colnames.include?(var.name)
293
+ value += (data|var.name)[0] * var.coefficient
294
+ end
295
+ end
296
+ end
297
+
298
+ value
299
+ end
300
+ end
301
+
302
+ def predict(line)
303
+ self.to_proc.call(line)
304
+ end
171
305
  end
172
306
 
173
307
  ##
@@ -175,13 +309,17 @@ module Rust::Models::Regression
175
309
 
176
310
  class LinearMixedEffectsModel < RegressionModel
177
311
  def self.can_pull?(type, klass)
178
- return type == "S4" && klass == "lmerModLmerTest"
312
+ return type == "S4" && klass == self.r_model_name
179
313
  end
180
314
 
181
315
  def self.pull_priority
182
316
  1
183
317
  end
184
318
 
319
+ def self.r_model_name
320
+ "lmerModLmerTest"
321
+ end
322
+
185
323
  def self.pull_variable(variable, type, klass)
186
324
  model = Rust::RustDatatype.pull_variable(variable, Rust::S4Class)
187
325
 
@@ -213,7 +351,7 @@ module Rust::Models::Regression
213
351
 
214
352
  RegressionModel.generate(
215
353
  LinearMixedEffectsModel,
216
- "lmer",
354
+ self.r_model_name,
217
355
  dependent_variable,
218
356
  fixed_effects + random_effects,
219
357
  data,
@@ -235,18 +373,44 @@ module Rust::Models::Regression
235
373
  end
236
374
  end
237
375
  end
376
+
377
+ ##
378
+ # Slim representation for a variable in a model, with just the variable name, its coefficient and its p-value.
379
+
380
+ class ModelVariable
381
+ attr_accessor :name
382
+ attr_accessor :coefficient
383
+ attr_accessor :pvalue
384
+
385
+ def initialize(name, coefficient, pvalue)
386
+ @name = name
387
+ @coefficient = coefficient
388
+ @pvalue = pvalue
389
+ end
390
+
391
+ def intercept?
392
+ @name == "(Intercept)"
393
+ end
394
+
395
+ ##
396
+ # Checks whether the variable is significant w.r.t. a given +a+ (0.05 by default)
397
+
398
+ def significant?(a = 0.05)
399
+ @pvalue <= a
400
+ end
401
+ end
238
402
  end
239
403
 
240
404
  module Rust::RBindings
241
405
  def lm(formula, data, **options)
242
406
  independent = formula.right_part.split("+").map { |v| v.strip }
243
- return LinearRegressionModel.generate(formula.left_part, independent, data, **options)
407
+ return Rust::Models::Regression::LinearRegressionModel.generate(formula.left_part, independent, data, **options)
244
408
  end
245
409
 
246
410
  def lmer(formula, data, **options)
247
411
  independent = formula.right_part.split("+").map { |v| v.strip }
248
412
 
249
- RegressionModel.generate(
413
+ Rust::Models::Regression::RegressionModel.generate(
250
414
  LinearMixedEffectsModel,
251
415
  "lmer",
252
416
  formula.left_part,
@@ -85,6 +85,20 @@ module Rust
85
85
  @values.map { |k, v| k*v }.sum
86
86
  end
87
87
 
88
+ ##
89
+ # Returns the variance for this slice.
90
+
91
+ def variance
92
+ @values.map { |k, v| k**2 * v }.sum - (self.expected ** 2)
93
+ end
94
+
95
+ ##
96
+ # Returns the standard deviation for this slice.
97
+
98
+ def sd
99
+ Math.sqrt(self.variance)
100
+ end
101
+
88
102
  ##
89
103
  # Returns a slice with the values that are greater than +n+.
90
104
 
@@ -124,7 +138,7 @@ module Rust
124
138
  # Returns a slice with the values between +a+ and +b+.
125
139
 
126
140
  def between(a, b)
127
- self.so_that { |k| k.between(a, b) }
141
+ self.so_that { |k| k.between?(a, b) }
128
142
  end
129
143
 
130
144
  ##
@@ -133,6 +147,13 @@ module Rust
133
147
  def so_that
134
148
  RandomVariableSlice.new(@values.select { |k, v| yield(k) })
135
149
  end
150
+
151
+ ##
152
+ # Creates a bar plot of the distribution
153
+
154
+ def plot
155
+ Rust::Plots::BarPlot.new(@values.sort_by { |k, v| k }.to_h)
156
+ end
136
157
  end
137
158
 
138
159
  ##
data/lib/rust.rb CHANGED
@@ -2,6 +2,7 @@ require_relative 'rust/core'
2
2
  require_relative 'rust/models/all'
3
3
  require_relative 'rust/plots/all'
4
4
  require_relative 'rust/stats/all'
5
+ require_relative 'rust/forms/all'
5
6
 
6
7
  module Rust
7
8
  @@datasets = {}
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.12'
4
+ version: '0.15'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2022-08-17 00:00:00.000000000 Z
10
+ date: 2026-06-25 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: rinruby
@@ -61,6 +60,7 @@ files:
61
60
  - lib/rust.rb
62
61
  - lib/rust/core.rb
63
62
  - lib/rust/core/csv.rb
63
+ - lib/rust/core/manual.rb
64
64
  - lib/rust/core/rust.rb
65
65
  - lib/rust/core/types/all.rb
66
66
  - lib/rust/core/types/dataframe.rb
@@ -74,10 +74,14 @@ files:
74
74
  - lib/rust/external/ggplot2.rb
75
75
  - lib/rust/external/ggplot2/core.rb
76
76
  - lib/rust/external/ggplot2/geoms.rb
77
- - lib/rust/external/ggplot2/helper.rb
78
77
  - lib/rust/external/ggplot2/plot_builder.rb
78
+ - lib/rust/external/ggplot2/scale.rb
79
79
  - lib/rust/external/ggplot2/themes.rb
80
80
  - lib/rust/external/robustbase.rb
81
+ - lib/rust/forms/all.rb
82
+ - lib/rust/forms/google_forms.rb
83
+ - lib/rust/jobs/all.rb
84
+ - lib/rust/jobs/jobs.rb
81
85
  - lib/rust/models/all.rb
82
86
  - lib/rust/models/anova.rb
83
87
  - lib/rust/models/regression.rb
@@ -95,7 +99,6 @@ homepage: https://github.com/intersimone999/ruby-rust
95
99
  licenses:
96
100
  - GPL-3.0-only
97
101
  metadata: {}
98
- post_install_message:
99
102
  rdoc_options: []
100
103
  require_paths:
101
104
  - lib
@@ -110,8 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
113
  - !ruby/object:Gem::Version
111
114
  version: '0'
112
115
  requirements: []
113
- rubygems_version: 3.3.15
114
- signing_key:
116
+ rubygems_version: 3.6.9
115
117
  specification_version: 4
116
118
  summary: Ruby advanced statistical library
117
119
  test_files: []
@@ -1,122 +0,0 @@
1
- require_relative 'core'
2
-
3
- GGPLOT_EXAMPLES = {}
4
-
5
- GGPLOT_EXAMPLES[["Quick introduction", /intro/]] = <<-EOS
6
- bind_ggplot! # Avoid using long module names to reach Rust::Plots::GGPlot (simply includes this module)
7
-
8
- # Best with a dataframe, but not necessary. If you have it...
9
- df = Rust.toothgrowth
10
- plot = PlotBuilder.for_dataframe(df). # Use a dataframe (symbols will be variable names)
11
- labeled("Example plot"). # "labeled" sets the label to the last set aesthetic item (x, y, or title, in this case)
12
- with_x(:len).labeled("X data from df"). # Set all the aesthetics (x, y, ...)
13
- with_y(:dose).labeled("Y data from df").
14
- draw_points. # Set the geometries to plot (based on the plot type)
15
- build # Returns the plot ready to use
16
- plot.show # Show the plot in a window
17
- plot.save("output.pdf", width: 5, height: 4) # Save the plot, width, height etc. are optional
18
-
19
- # If you don't have a dataframe...
20
- plot2 = PlotBuilder.new.
21
- with_x([1,2,3]).labeled("X data from df").
22
- with_y([3,4,5]).labeled("Y data from df").
23
- draw_points.
24
- build
25
- plot2.show
26
- EOS
27
-
28
- GGPLOT_EXAMPLES[["Scatter plots", /scatter/]] = <<-EOS
29
- bind_ggplot!
30
- df = Rust.toothgrowth
31
- plot = PlotBuilder.for_dataframe(df).
32
- with_x(:len).labeled("X data").
33
- with_y(:dose).labeled("Y data").
34
- draw_points. # To draw points
35
- draw_lines. # To draw lines (keep both to draw both)
36
- build
37
- plot.show
38
- EOS
39
-
40
- GGPLOT_EXAMPLES[["Bar plots", /bar/]] = <<-EOS
41
- bind_ggplot!
42
- df = Rust.toothgrowth
43
- plot = PlotBuilder.for_dataframe(df).
44
- with_x(:len).labeled("X data").
45
- with_fill(:supp).labeled("Legend"). # Use with_fill or with_color for stacked plots
46
- draw_bars. # To draw bars
47
- build
48
- plot.show
49
- EOS
50
-
51
- GGPLOT_EXAMPLES[["Box plots", /box/]] = <<-EOS
52
- bind_ggplot!
53
- df = Rust.toothgrowth
54
- plot = PlotBuilder.for_dataframe(df).
55
- with_y(:len).labeled("Data to boxplot").
56
- with_group(:supp).labeled("Groups"). # Groups to plot
57
- draw_boxplot.
58
- build
59
- plot.show
60
- EOS
61
-
62
- GGPLOT_EXAMPLES[["Histograms", /hist/]] = <<-EOS
63
- bind_ggplot!
64
- df = Rust.toothgrowth
65
- plot = PlotBuilder.for_dataframe(df).
66
- with_x(:len).labeled("Data to plot").
67
- with_fill(:supp).labeled("Color"). # Use with_fill or with_color for multiple plots
68
- draw_histogram.
69
- build
70
- plot.show
71
- EOS
72
-
73
- GGPLOT_EXAMPLES[["Themes", /them/]] = <<-EOS
74
- bind_ggplot!
75
- df = Rust.toothgrowth
76
- # The method with_theme allows to change theme options. The method can be called
77
- # several times, each time the argument does not overwrite the previous options,
78
- # unless they are specified again (in that case, the last specified ones win).
79
- plot = PlotBuilder.for_dataframe(df).
80
- with_x(:len).labeled("X data").
81
- with_y(:dose).labeled("Y data").
82
- draw_points.
83
- with_theme(
84
- ThemeBuilder.new('bw').
85
- title(face: 'bold', size: 12). # Each method sets the property for the related element
86
- legend do |legend| # Legend and other parts can be set like this
87
- legend.position(:left) # Puts the legend on the left
88
- end.
89
- axis do |axis| # Modifies the axes
90
- axis.line(Theme::BlankElement.new) # Hides the lines for the axes
91
- axis.text_x(size: 3) # X axis labels
92
- end.
93
- panel do |panel|
94
- panel.grid_major(colour: 'grey70', size: 0.2) # Sets the major ticks grid
95
- panel.grid_minor(Theme::BlankElement.new) # Hides the minor ticks grid
96
- end.
97
- build
98
- ).build
99
- plot.show
100
- EOS
101
-
102
- module Rust::Plots::GGPlot
103
- def self.help!(topic = nil)
104
- unless topic
105
- puts "Topics:"
106
- GGPLOT_EXAMPLES.keys.each do |key, matcher|
107
- puts "- #{key}"
108
- end
109
- puts "Call again specifying the topic of interest."
110
- else
111
- GGPLOT_EXAMPLES.each do |key, value|
112
- if topic.match(key[1])
113
- puts "*** #{key[0]} ***"
114
- puts value
115
- return
116
- end
117
- end
118
-
119
- puts "Topic not found"
120
- end
121
- end
122
- end