rust 0.12 → 0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8eb6e3759ef38070603a941ef348ac46e4b6c08b4638c493edb2169acf16c793
4
- data.tar.gz: f855ca774695688ee64d7513ac94c8b98d3bae154bfae56b803ed1da567cb282
3
+ metadata.gz: 56e795fb0a8893df45abd976e2ed91344156f3c3dd4a68e17afd1a0fb317ece3
4
+ data.tar.gz: 406416738f1ab84fca06edd5cb59efdc623b12cefe03cd51b1a2cd840e218647
5
5
  SHA512:
6
- metadata.gz: c281de698d8b4750832d77971dac857dbee2c31252b7950abf91777d37763bbb79577098ab48efeee6f8a5ef2a28949ef72816d33703eaad14887d588b4aac32
7
- data.tar.gz: d44c0532c19ff8eb2f505d4da62e82b4b4f32dada4b39a05cddf57679a3a725dd38ceecf63aaf2d551a8ae691841196bad7cc0694d3613b928a84a9b6291ef6a
6
+ metadata.gz: 56854c3ff1bbd64ca8ff9d1201bc16fd37f4d3d465527217ab5c49d5cee0d6f4f34998bdf8a3ebdedf7ea8379909ca0db75d05da1bd46460c3e7066ee882ba7b
7
+ data.tar.gz: 6b21ba70c7d144384d1647dfa76f8894457bf4d65b74ce32d4e775cc3ecdc660f4596f61edd81e91b08c9d2b3def3491cebe10b7923b9f2988d072f7c5d25674
data/lib/rust/core/csv.rb CHANGED
@@ -90,9 +90,9 @@ module Rust
90
90
  dataframe.column_names.each do |column_name|
91
91
  values = dataframe.column(column_name)
92
92
 
93
- if values.all? { |s| !!Integer(s) rescue false }
93
+ if values.all? { |s| s == nil || !!Integer(s) rescue false }
94
94
  integer_columns << column_name
95
- elsif values.all? { |s| !!Float(s) rescue false }
95
+ elsif values.all? { |s| s == nil || !!Float(s) rescue false }
96
96
  float_columns << column_name
97
97
  end
98
98
  end
@@ -103,11 +103,11 @@ module Rust
103
103
  end
104
104
 
105
105
  integer_columns.each do |numeric_column|
106
- dataframe.transform_column!(numeric_column) { |v| v.to_i }
106
+ dataframe.transform_column!(numeric_column) { |v| v != nil ? v.to_i : v }
107
107
  end
108
108
 
109
109
  float_columns.each do |numeric_column|
110
- dataframe.transform_column!(numeric_column) { |v| v.to_f }
110
+ dataframe.transform_column!(numeric_column) { |v| v != nil ? v.to_f : v }
111
111
  end
112
112
 
113
113
  return dataframe
@@ -0,0 +1,89 @@
1
+ require_relative 'rust'
2
+
3
+ module Rust
4
+ class Manual
5
+ @@manuals = {}
6
+
7
+ def self.about
8
+ puts "Manuals available:"
9
+ @@manuals.each do |category, manual|
10
+ puts "\t- #{manual.name} (:#{category}) → #{manual.description}"
11
+ end
12
+
13
+ return nil
14
+ end
15
+
16
+ def self.for(category)
17
+ category = category.to_sym
18
+ raise "No manual found for '#{category}'." unless @@manuals[category]
19
+
20
+ return @@manuals[category]
21
+ end
22
+
23
+ def self.register(category, name, description)
24
+ category = category.to_sym
25
+
26
+ @@manuals[category] = Manual.new(name, description)
27
+
28
+ return nil
29
+ end
30
+
31
+ attr_reader :name
32
+ attr_reader :description
33
+
34
+ def initialize(name, description)
35
+ @name = name
36
+ @description = description
37
+ @voices = {}
38
+ end
39
+
40
+ def lookup(query)
41
+ @voices.each do |key, value|
42
+ if query.match(key[1])
43
+ puts "*** #{key[0]} ***"
44
+ puts value
45
+ return
46
+ end
47
+ end
48
+
49
+ puts "Voice not found"
50
+
51
+ return nil
52
+ end
53
+
54
+ def n_voices
55
+ @voices.size
56
+ end
57
+
58
+ def about
59
+ puts "****** Manual for #@name ******"
60
+ puts @description
61
+ puts "Voices in manual #@name:"
62
+ @voices.keys.each do |key, matcher|
63
+ puts "\t- #{key}"
64
+ end
65
+
66
+ return nil
67
+ end
68
+
69
+ def register(voice, matcher, description)
70
+ @voices[[voice, matcher]] = description
71
+ end
72
+
73
+ def inspect
74
+ return "Manual for #@name with #{self.n_voices} voices"
75
+ end
76
+ end
77
+ end
78
+
79
+ module Rust::RBindings
80
+ def rust_help(category = nil, query = nil)
81
+ if !category
82
+ return Rust::Manual.about
83
+ elsif !query
84
+ return Rust::Manual.for(category).about
85
+ else
86
+ return Rust::Manual.for(category).lookup(query)
87
+ end
88
+ end
89
+ end
@@ -102,7 +102,10 @@ module Rust
102
102
  end
103
103
 
104
104
  def self._rexec(r_command, return_warnings = false)
105
- puts "Calling _rexec with command: #{r_command}" if @@debugging
105
+ if @@debugging
106
+ puts "Calling _rexec with command: #{r_command}"
107
+ puts "\t" + Kernel.caller.select { |v| !v.include?("irb") }.last(3).map { |v| v.sub(/^.*gems\//, "")}.join("\n\t")
108
+ end
106
109
  R_MUTEX.synchronize do
107
110
  assert("This command must be executed in an exclusive block") { @@in_client_mutex }
108
111
 
@@ -154,10 +157,17 @@ module Rust
154
157
 
155
158
  ##
156
159
  # Installs the given +name+ library and its dependencies.
160
+ # +github+ indicates whether the package is in GitHub.
157
161
 
158
- def self.install_library(name)
162
+ def self.install_library(name, github = false)
163
+ self.prerequisite("remotes") if github
164
+
159
165
  self.exclusive do
160
- self._eval("install.packages(\"#{name}\", dependencies = TRUE)")
166
+ if github
167
+ self._eval("remotes::install_github(\"#{name}\", dependencies=TRUE)")
168
+ else
169
+ self._eval("install.packages(\"#{name}\", dependencies = TRUE)")
170
+ end
161
171
  end
162
172
 
163
173
  return nil
@@ -165,9 +175,15 @@ module Rust
165
175
 
166
176
  ##
167
177
  # Installs the +library+ library if it is not available and loads it.
178
+ # +github+ indicates whether the package appears in GitHub.
168
179
 
169
- def self.prerequisite(library)
170
- self.install_library(library) unless self.check_library(library)
180
+ def self.prerequisite(library, github = false)
181
+ full_library = library
182
+ library = library.split("/").last if github
183
+
184
+ unless self.check_library(library)
185
+ self.install_library(full_library, github)
186
+ end
171
187
  self.load_library(library)
172
188
  end
173
189
 
@@ -218,4 +234,11 @@ def bind_r!
218
234
  include Rust::RBindings
219
235
  end
220
236
 
237
+ ##
238
+ # Shortcut for requiring rust external libraries
239
+
240
+ def require_rust(name)
241
+ require "rust/external/#{name}"
242
+ end
243
+
221
244
  bind_r! if ENV['RUBY_RUST_BINDING'] == '1'
@@ -1,4 +1,5 @@
1
1
  require_relative 'datatype'
2
+ require 'tempfile'
2
3
 
3
4
  module Rust
4
5
 
@@ -355,8 +356,25 @@ module Rust
355
356
  end
356
357
 
357
358
  def load_in_r_as(variable_name)
358
- command = []
359
+ tempfile = Tempfile.new('rust.dfport')
360
+ tempfile.close
361
+
362
+ Rust::CSV.write(tempfile.path, self)
363
+ Rust._eval("#{variable_name} <- read.csv(\"#{tempfile.path}\", header=T)")
364
+
365
+ if Rust.debug?
366
+ FileUtils.cp(tempfile.path, tempfile.path + ".debug.csv")
367
+ puts "Debug CSV port file available at: #{tempfile.path + ".debug.csv"}"
368
+ end
369
+
370
+ tempfile.unlink
359
371
 
372
+ return true
373
+ end
374
+
375
+ def directly_load_in_r_as(variable_name)
376
+ command = []
377
+
360
378
  command << "#{variable_name} <- data.frame()"
361
379
  row_index = 1
362
380
  self.each do |row|
@@ -374,6 +392,10 @@ module Rust
374
392
  end
375
393
 
376
394
  Rust._eval_big(command)
395
+
396
+ tempfile.unlink
397
+
398
+ return true
377
399
  end
378
400
 
379
401
  def inspect
@@ -408,16 +430,39 @@ module Rust
408
430
  return result
409
431
  end
410
432
 
433
+ ##
434
+ # Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String). Keeps all the rows in this data frame.
435
+ # +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
436
+ # for this and the +other+ data-frame, respectively.
437
+
438
+ def left_merge(other, by, first_alias, second_alias, **options)
439
+ options[:keep_right] = true
440
+ options[:keep_left] = false
441
+ return other.merge(self, by, first_alias, second_alias, **options)
442
+ end
443
+
444
+ ##
445
+ # Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String). Keeps all the rows in the other data frame.
446
+ # +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
447
+ # for this and the +other+ data-frame, respectively.
448
+
449
+ def right_merge(other, by, first_alias, second_alias, **options)
450
+ options[:keep_right] = true
451
+ options[:keep_left] = false
452
+ return self.merge(other, by, first_alias, second_alias, **options)
453
+ end
454
+
411
455
  ##
412
456
  # Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String).
413
457
  # +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
414
458
  # for this and the +other+ data-frame, respectively.
415
459
 
416
- def merge(other, by, first_alias = "x", second_alias = "y")
460
+ def merge(other, by, first_alias = "x", second_alias = "y", **options)
417
461
  raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
418
462
  raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
419
463
  raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
420
464
  raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
465
+ raise "Either keep_right or keep_left should be provided as options, not both" if options[:keep_right] && options[:keep_left]
421
466
 
422
467
  if first_alias == second_alias
423
468
  if first_alias == ""
@@ -473,6 +518,28 @@ module Rust
473
518
  end
474
519
 
475
520
  result << to_add
521
+
522
+ elsif options[:keep_right]
523
+ to_add = {}
524
+
525
+ by.each do |colname|
526
+ to_add[colname] = other_row[colname]
527
+ end
528
+
529
+ merged_column_self.each do |colname|
530
+ to_add["#{first_alias}#{colname}"] = nil
531
+ end
532
+
533
+ merged_column_other.each do |colname|
534
+ to_add["#{second_alias}#{colname}"] = other_row[colname]
535
+ end
536
+
537
+ result << to_add
538
+
539
+ elsif options[:keep_left]
540
+ options[:keep_left] = false
541
+ options[:keep_right] = true
542
+ return other.merge(self, by, first_alias, second_alias, **options)
476
543
  end
477
544
  end
478
545
 
@@ -36,7 +36,7 @@ module Rust
36
36
  if candidates.size > 0
37
37
  type = candidates.max_by { |c| c.pull_priority }
38
38
 
39
- puts "Using #{type} to pull #{variable}" if Rust.debug?
39
+ puts "Using #{type} to pull #{variable} (candidates: #{candidates.map { |c| c.to_s + "=>" + c.pull_priority.to_s}.join(", ")})" if Rust.debug?
40
40
  return type.pull_variable(variable, r_type, r_class)
41
41
  else
42
42
  if Rust._pull("length(#{variable})") == 0
@@ -80,7 +80,7 @@ module Rust
80
80
  def r_mirror
81
81
  varname = self.mirrored_R_variable_name
82
82
 
83
- if !Rust._pull("exists(\"#{varname}\")") || Rust._pull("#{varname}.hash") != self.r_hash
83
+ if !Rust._pull("exists(\"#{varname}\")") || Rust["#{varname}.hash"] != self.r_hash
84
84
  puts "Loading #{varname}" if Rust.debug?
85
85
  Rust[varname] = self
86
86
  Rust["#{varname}.hash"] = self.r_hash
@@ -125,6 +125,10 @@ module Rust
125
125
  @level
126
126
  end
127
127
 
128
+ def to_str
129
+ @level.to_s
130
+ end
131
+
128
132
  def to_R
129
133
  self.to_i
130
134
  end
@@ -171,6 +171,22 @@ module Rust
171
171
  end
172
172
  end
173
173
 
174
+ ##
175
+ # Represents a verbatim R expression
176
+
177
+ class Verbatim
178
+ ##
179
+ #Creates a verbatim R expression
180
+
181
+ def initialize(expression)
182
+ @expression = expression
183
+ end
184
+
185
+ def to_R
186
+ @expression
187
+ end
188
+ end
189
+
174
190
  ##
175
191
  # Represents the arguments of a function in R. Works as an Array of objects.
176
192
 
@@ -196,4 +212,20 @@ module Rust
196
212
  return options
197
213
  end
198
214
  end
215
+
216
+ def self.verbatim(expression)
217
+ Verbatim.new(expression)
218
+ end
219
+
220
+ def self.variable(variable)
221
+ Variable.new(variable)
222
+ end
223
+
224
+ def self.function(name)
225
+ Function.new(name)
226
+ end
227
+
228
+ def self.formula(left_part, right_part)
229
+ Formula.new(left_part, right_part)
230
+ end
199
231
  end
@@ -29,6 +29,8 @@ module Rust
29
29
  @data.each do |key, value|
30
30
  Rust["#{variable_name}[[#{key + 1}]]"] = value
31
31
  end
32
+
33
+ Rust._eval("names(#{variable_name}) <- #{self.names.to_R}")
32
34
  end
33
35
 
34
36
  ##
@@ -81,6 +81,14 @@ module Rust
81
81
  @data.size
82
82
  end
83
83
 
84
+ def rownames
85
+ @row_names
86
+ end
87
+
88
+ def colnames
89
+ @column_names
90
+ end
91
+
84
92
  ##
85
93
  # Returns the number of columns.
86
94
 
data/lib/rust/core.rb CHANGED
@@ -1,7 +1,57 @@
1
1
  require_relative 'core/rust'
2
2
  require_relative 'core/csv'
3
+ require_relative 'core/manual'
3
4
 
4
5
  self_path = File.expand_path(__FILE__)
5
6
  Dir.glob(File.join(File.dirname(self_path), "core/types/*.rb")).each do |lib|
6
7
  require_relative lib
7
8
  end
9
+
10
+ Rust::Manual.register(:base, "Quick intro", "Core philosophy behind Rust.")
11
+ Rust::Manual.for(:base).register('Introduction', /intro/,
12
+ <<-EOS
13
+ Rust is a statistical library. Rust wraps R and its libraries to achieve this goal.
14
+ Rust aims at:
15
+ - Making easier for Ruby developers make all the kinds of operations that are straightforward in R;
16
+ - Providing an object-oriented interface, more familiar than the one in R.
17
+
18
+ Rust can be used in two ways:
19
+ - By using the object-oriented interface (advised if you are writing a script);
20
+ - By using the R bindings, that allow to use Ruby pretty much like R (handful if you are using it from IRB).
21
+
22
+ Rust provides wrappers for many elements, including types (e.g., data frames), statistical hypothesis tests, plots, and so on.
23
+ Under the hood, Rust creates an R environment (through rinruby), through which Rust can perform the most advanced operations,
24
+ for which a re-implementation would be impractical.
25
+ EOS
26
+ )
27
+
28
+ Rust::Manual.for(:base).register('Types', /type/,
29
+ <<-EOS
30
+ Rust provides wrappers for the most commonly-found types in R. Specifically, the following types are available:
31
+ - Data frames → Rust::DataFrame
32
+ - Factors → Rust::Factor
33
+ - Matrices → Rust::Matrix
34
+ - Lists → Rust::List
35
+ - S4 classes → Rust::S4Class
36
+ - Formulas → Rust::Formula
37
+
38
+ Note that some of them (e.g., data frames and matrices) are not just wrappers, but complete re-implementations of the R
39
+ types (for performance reasons).
40
+ EOS
41
+ )
42
+
43
+ Rust::Manual.for(:base).register('CSVs', /csv/,
44
+ <<-EOS
45
+ Rust allows to read and write CSV files, mostly like in R.
46
+ To read a CSV file, you can use:
47
+ Rust::CSV.read(filename)
48
+
49
+ It returns a data frame. You can also specify the option "headers" to tell if the first row in the CSV contains the headers
50
+ (column names for the data frame). Other options get directly passed to the R function "read.csv".
51
+
52
+ To write a CSV file, you can use:
53
+ Rust::CSV.write(filename, data_frame)
54
+
55
+ It writes the given data frame on the file at filename.
56
+ EOS
57
+ )
@@ -64,14 +64,6 @@ module Rust::Plots::GGPlot
64
64
  return self
65
65
  end
66
66
 
67
- def labeled(value)
68
- raise "No context for assigning a label" unless @current_context
69
- @label_options[@current_context] = value
70
- @current_context = nil
71
-
72
- return self
73
- end
74
-
75
67
  def with_x_label(value)
76
68
  @label_options[:x] = value
77
69
 
@@ -90,6 +82,76 @@ module Rust::Plots::GGPlot
90
82
  return self
91
83
  end
92
84
 
85
+ def scale_x_continuous(**options)
86
+ raise "No context for assigning a label" unless @current_context
87
+ @layers << AxisScaler.new(:x, :continuous, **options)
88
+
89
+ return self
90
+ end
91
+
92
+ def scale_y_continuous(**options)
93
+ raise "No context for assigning a label" unless @current_context
94
+ @layers << AxisScaler.new(:y, :continuous, **options)
95
+
96
+ return self
97
+ end
98
+
99
+ def scale_x_discrete(**options)
100
+ raise "No context for assigning a label" unless @current_context
101
+ @layers << AxisScaler.new(:x, :discrete, **options)
102
+
103
+ return self
104
+ end
105
+
106
+ def scale_y_discrete(**options)
107
+ raise "No context for assigning a label" unless @current_context
108
+ @layers << AxisScaler.new(:y, :discrete, **options)
109
+
110
+ return self
111
+ end
112
+
113
+ def scale_x_log10(**options)
114
+ raise "No context for assigning a label" unless @current_context
115
+ @layers << AxisScaler.new(:x, :log10, **options)
116
+
117
+ return self
118
+ end
119
+
120
+ def scale_y_log10(**options)
121
+ raise "No context for assigning a label" unless @current_context
122
+ @layers << AxisScaler.new(:y, :log10, **options)
123
+
124
+ return self
125
+ end
126
+
127
+ def scale_x_reverse(**options)
128
+ raise "No context for assigning a label" unless @current_context
129
+ @layers << AxisScaler.new(:x, :reverse, **options)
130
+
131
+ return self
132
+ end
133
+
134
+ def scale_y_reverse(**options)
135
+ raise "No context for assigning a label" unless @current_context
136
+ @layers << AxisScaler.new(:y, :reverse, **options)
137
+
138
+ return self
139
+ end
140
+
141
+ def scale_x_sqrt(**options)
142
+ raise "No context for assigning a label" unless @current_context
143
+ @layers << AxisScaler.new(:x, :sqrt, **options)
144
+
145
+ return self
146
+ end
147
+
148
+ def scale_y_sqrt(**options)
149
+ raise "No context for assigning a label" unless @current_context
150
+ @layers << AxisScaler.new(:y, :sqrt, **options)
151
+
152
+ return self
153
+ end
154
+
93
155
  def with_title(value)
94
156
  @label_options[:title] = value
95
157
 
@@ -160,6 +222,48 @@ module Rust::Plots::GGPlot
160
222
  return self
161
223
  end
162
224
 
225
+ def labeled(value)
226
+ raise "No context for assigning a label" unless @current_context
227
+ @label_options[@current_context] = value
228
+
229
+ return self
230
+ end
231
+
232
+ def scale_continuous(**options)
233
+ raise "No context for assigning a label" unless @current_context
234
+ @layers << AxisScaler.new(@current_context, :continuous, **options)
235
+
236
+ return self
237
+ end
238
+
239
+ def scale_discrete(**options)
240
+ raise "No context for assigning a label" unless @current_context
241
+ @layers << AxisScaler.new(@current_context, :discrete, **options)
242
+
243
+ return self
244
+ end
245
+
246
+ def scale_log10(**options)
247
+ raise "No context for assigning a label" unless @current_context
248
+ @layers << AxisScaler.new(@current_context, :log10, **options)
249
+
250
+ return self
251
+ end
252
+
253
+ def scale_reverse(**options)
254
+ raise "No context for assigning a label" unless @current_context
255
+ @layers << AxisScaler.new(@current_context, :reverse, **options)
256
+
257
+ return self
258
+ end
259
+
260
+ def scale_sqrt(**options)
261
+ raise "No context for assigning a label" unless @current_context
262
+ @layers << AxisScaler.new(@current_context, :sqrt, **options)
263
+
264
+ return self
265
+ end
266
+
163
267
  def flip_coordinates
164
268
  @layers << FlipCoordinates.new
165
269
 
@@ -0,0 +1,12 @@
1
+ require_relative 'core'
2
+
3
+ module Rust::Plots::GGPlot
4
+ class AxisScaler < Layer
5
+ def initialize(axis, type = :continuous, **options)
6
+ @axis = axis
7
+ @type = type
8
+
9
+ super("scale_#{@axis}_#{@type}", **options)
10
+ end
11
+ end
12
+ end
@@ -53,7 +53,7 @@ module Rust::Plots::GGPlot
53
53
  def to_h
54
54
  options = @options.clone
55
55
 
56
- options['_starting'] = @starting.sub("theme_", "")
56
+ options['_starting'] = @starting.sub("theme_", "") if @starting
57
57
  options = options.map do |key, value|
58
58
  [key, value.is_a?(Theme::Element) ? value.to_h : value]
59
59
  end.to_h
@@ -78,6 +78,9 @@ module Rust::Plots::GGPlot
78
78
  end
79
79
  end
80
80
 
81
+ class ExistingTheme < Layer
82
+ end
83
+
81
84
  class Theme::Element
82
85
  attr_reader :options
83
86
 
@@ -152,6 +155,8 @@ module Rust::Plots::GGPlot
152
155
  return value
153
156
  elsif value.is_a?(Hash)
154
157
  return Theme::LineElement.new(**value)
158
+ elsif !value
159
+ return Theme::BlankElement.new
155
160
  else
156
161
  raise "Expected line or hash"
157
162
  end
@@ -162,6 +167,8 @@ module Rust::Plots::GGPlot
162
167
  return value
163
168
  elsif value.is_a?(Hash)
164
169
  return Theme::RectElement.new(**value)
170
+ elsif !value
171
+ return Theme::BlankElement.new
165
172
  else
166
173
  raise "Expected rect or hash"
167
174
  end
@@ -172,6 +179,8 @@ module Rust::Plots::GGPlot
172
179
  return value
173
180
  elsif value.is_a?(Hash)
174
181
  return Theme::TextElement.new(**value)
182
+ elsif !value
183
+ return Theme::BlankElement.new
175
184
  else
176
185
  raise "Expected text or hash"
177
186
  end
@@ -225,7 +234,7 @@ module Rust::Plots::GGPlot
225
234
  end
226
235
 
227
236
  class ThemeBuilder < ThemeComponentBuilder
228
- def initialize(starting = 'bw')
237
+ def initialize(starting = nil)
229
238
  super("plot")
230
239
  @starting = starting
231
240
  end
@@ -417,7 +426,21 @@ module Rust::Plots::GGPlot
417
426
  end
418
427
  end
419
428
 
420
- self.default_theme = ThemeBuilder.new.
429
+ class ThemeCollection
430
+ def self.ggtech(name = "google")
431
+ Rust.prerequisite("ricardo-bion/ggtech", true)
432
+
433
+ return ExistingTheme.new("theme_tech", theme: name)
434
+ end
435
+
436
+ def self.ggdark(style = "classic")
437
+ Rust.prerequisite("ggdark")
438
+
439
+ return ExistingTheme.new("dark_theme_#{style}")
440
+ end
441
+ end
442
+
443
+ self.default_theme = ThemeBuilder.new("bw").
421
444
  title(face: 'bold', size: 12).
422
445
  legend do |legend|
423
446
  legend.background(fill: 'white', size: 4, colour: 'white')
@@ -2,4 +2,115 @@ require_relative 'ggplot2/core'
2
2
  require_relative 'ggplot2/geoms'
3
3
  require_relative 'ggplot2/themes'
4
4
  require_relative 'ggplot2/plot_builder'
5
- require_relative 'ggplot2/helper'
5
+ require_relative 'ggplot2/scale'
6
+
7
+ Rust::Manual.register(:ggplot2, "ggplot2", "Informations on the wrapper of the popular ggplot2 plotting library for R.")
8
+
9
+ Rust::Manual.for(:ggplot2).register("Introduction", /intro/,
10
+ <<-EOS
11
+ bind_ggplot! # Avoid using long module names to reach Rust::Plots::GGPlot (simply includes this module)
12
+
13
+ # Best with a dataframe, but not necessary. If you have it...
14
+ df = Rust.toothgrowth
15
+ plot = PlotBuilder.for_dataframe(df). # Use a dataframe (symbols will be variable names)
16
+ labeled("Example plot"). # "labeled" sets the label to the last set aesthetic item (x, y, or title, in this case)
17
+ with_x(:len).labeled("X data from df"). # Set all the aesthetics (x, y, ...)
18
+ with_y(:dose).labeled("Y data from df").
19
+ draw_points. # Set the geometries to plot (based on the plot type)
20
+ build # Returns the plot ready to use
21
+ plot.show # Show the plot in a window
22
+ plot.save("output.pdf", width: 5, height: 4) # Save the plot, width, height etc. are optional
23
+
24
+ # If you don't have a dataframe...
25
+ plot2 = PlotBuilder.new.
26
+ with_x([1,2,3]).labeled("X data from df").
27
+ with_y([3,4,5]).labeled("Y data from df").
28
+ draw_points.
29
+ build
30
+ plot2.show
31
+ EOS
32
+ )
33
+
34
+ Rust::Manual.for(:ggplot2).register("Scatter plots", /scatter/,
35
+ <<-EOS
36
+ bind_ggplot!
37
+ df = Rust.toothgrowth
38
+ plot = PlotBuilder.for_dataframe(df).
39
+ with_x(:len).labeled("X data").
40
+ with_y(:dose).labeled("Y data").
41
+ draw_points. # To draw points
42
+ draw_lines. # To draw lines (keep both to draw both)
43
+ build
44
+ plot.show
45
+ EOS
46
+ )
47
+
48
+ Rust::Manual.for(:ggplot2).register("Bar plots", /bar/,
49
+ <<-EOS
50
+ bind_ggplot!
51
+ df = Rust.toothgrowth
52
+ plot = PlotBuilder.for_dataframe(df).
53
+ with_x(:len).labeled("X data").
54
+ with_fill(:supp).labeled("Legend"). # Use with_fill or with_color for stacked plots
55
+ draw_bars. # To draw bars
56
+ build
57
+ plot.show
58
+ EOS
59
+ )
60
+
61
+ Rust::Manual.for(:ggplot2).register("Box plots", /box/,
62
+ <<-EOS
63
+ bind_ggplot!
64
+ df = Rust.toothgrowth
65
+ plot = PlotBuilder.for_dataframe(df).
66
+ with_y(:len).labeled("Data to boxplot").
67
+ with_group(:supp).labeled("Groups"). # Groups to plot
68
+ draw_boxplot.
69
+ build
70
+ plot.show
71
+ EOS
72
+ )
73
+
74
+ Rust::Manual.for(:ggplot2).register("Histograms", /hist/,
75
+ <<-EOS
76
+ bind_ggplot!
77
+ df = Rust.toothgrowth
78
+ plot = PlotBuilder.for_dataframe(df).
79
+ with_x(:len).labeled("Data to plot").
80
+ with_fill(:supp).labeled("Color"). # Use with_fill or with_color for multiple plots
81
+ draw_histogram.
82
+ build
83
+ plot.show
84
+ EOS
85
+ )
86
+
87
+ Rust::Manual.for(:ggplot2).register("Themes", /them/,
88
+ <<-EOS
89
+ bind_ggplot!
90
+ df = Rust.toothgrowth
91
+ # The method with_theme allows to change theme options. The method can be called
92
+ # several times, each time the argument does not overwrite the previous options,
93
+ # unless they are specified again (in that case, the last specified ones win).
94
+ plot = PlotBuilder.for_dataframe(df).
95
+ with_x(:len).labeled("X data").
96
+ with_y(:dose).labeled("Y data").
97
+ draw_points.
98
+ with_theme(
99
+ ThemeBuilder.new('bw').
100
+ title(face: 'bold', size: 12). # Each method sets the property for the related element
101
+ legend do |legend| # Legend and other parts can be set like this
102
+ legend.position(:left) # Puts the legend on the left
103
+ end.
104
+ axis do |axis| # Modifies the axes
105
+ axis.line(Theme::BlankElement.new) # Hides the lines for the axes
106
+ axis.text_x(size: 3) # X axis labels
107
+ end.
108
+ panel do |panel|
109
+ panel.grid_major(colour: 'grey70', size: 0.2) # Sets the major ticks grid
110
+ panel.grid_minor(Theme::BlankElement.new) # Hides the minor ticks grid
111
+ end.
112
+ build
113
+ ).build
114
+ plot.show
115
+ EOS
116
+ )
@@ -14,6 +14,11 @@ module Rust::Models::Regression
14
14
  # Generic regression model in R.
15
15
 
16
16
  class RegressionModel < Rust::RustDatatype
17
+
18
+ attr_accessor :data
19
+ attr_accessor :dependent_variable
20
+ attr_accessor :options
21
+
17
22
  def self.can_pull?(type, klass)
18
23
  # Can only pull specific sub-types
19
24
  return false
@@ -38,22 +43,30 @@ module Rust::Models::Regression
38
43
 
39
44
  formula = Rust::Formula.new(dependent_variable, independent_variables.join(" + "))
40
45
 
46
+ result = nil
41
47
  Rust.exclusive do
42
48
  Rust["#{model_type}.data"] = data
43
49
 
44
50
  Rust._eval("#{model_type}.model.result <- #{model_type}(#{formula.to_R}, data=#{model_type}.data#{mapped})")
45
51
  result = Rust["#{model_type}.model.result"]
46
- result.r_mirror_to("#{model_type}.model.result")
47
52
 
48
- return result
53
+ raise "An error occurred while building the model" unless result
54
+
55
+ result.r_mirror_to("#{model_type}.model.result")
49
56
  end
57
+
58
+ result.dependent_variable = dependent_variable
59
+ result.data = data
60
+ result.options = options
61
+
62
+ return result
50
63
  end
51
64
 
52
65
  ##
53
- # Creates a new +model+.
66
+ # Creates a new model based on +model+.
54
67
 
55
68
  def initialize(model)
56
- raise StandardError if model.is_a?(RegressionModel)
69
+ raise "Expected a R list, given a #{model.class}" if !model.is_a?(Rust::List)
57
70
  @model = model
58
71
  end
59
72
 
@@ -118,6 +131,58 @@ module Rust::Models::Regression
118
131
  a = self.summary|"coefficients"
119
132
  end
120
133
 
134
+ ##
135
+ # Returns object variables for the model with basic data (coefficients and p-values). Use the method `coefficients`
136
+ # to get more data.
137
+
138
+ def variables
139
+ unless @variables
140
+ coefficients = self.coefficients
141
+
142
+ @variables = coefficients.rownames.map do |name|
143
+ ModelVariable.new(name, coefficients[name, "Estimate"], coefficients[name, "Pr(>|t|)"])
144
+ end
145
+ end
146
+
147
+ return @variables
148
+ end
149
+
150
+ ##
151
+ # Returns only the significant variables as ModelVariable instances. See the method `variables`.
152
+
153
+ def significant_variables(a = 0.05)
154
+ self.variables.select { |v| v.significant?(a) }
155
+ end
156
+
157
+ ##
158
+ # Runs backward selection (recursively removes a variable until the best model is found).
159
+ # Returns both the best model and the list of excluded variable at each step
160
+ # Note: Not fully tested
161
+
162
+ def backward_selection(excluded = [])
163
+ candidates = self.variables.select { |v| !v.intercept? && !v.significant? }.sort_by { |v| v.pvalue }.reverse
164
+ all = self.variables.select { |v| !v.intercept? }
165
+
166
+ candidates.each do |candidate|
167
+ new_model = RegressionModel.generate(
168
+ self.class,
169
+ self.class.r_model_name,
170
+ self.dependent_variable,
171
+ (all - [candidate]).map { |v| v.name },
172
+ self.data,
173
+ **self.options
174
+ )
175
+
176
+ if new_model.r_2_adjusted >= self.r_2_adjusted
177
+ puts "Excluded #{candidate}" if Rust.debug?
178
+ return *new_model.backward_selection(excluded + [candidate])
179
+ end
180
+ end
181
+
182
+ return self, excluded
183
+ end
184
+
185
+
121
186
  def method_missing(name, *args)
122
187
  return model|name.to_s
123
188
  end
@@ -145,7 +210,11 @@ module Rust::Models::Regression
145
210
 
146
211
  class LinearRegressionModel < RegressionModel
147
212
  def self.can_pull?(type, klass)
148
- return type == "list" && klass == "lm"
213
+ return type == "list" && klass == self.r_model_name
214
+ end
215
+
216
+ def self.pull_priority
217
+ 1
149
218
  end
150
219
 
151
220
  def self.pull_variable(variable, type, klass)
@@ -154,6 +223,10 @@ module Rust::Models::Regression
154
223
  return LinearRegressionModel.new(model)
155
224
  end
156
225
 
226
+ def self.r_model_name
227
+ "lm"
228
+ end
229
+
157
230
  ##
158
231
  # Generates a linear regression model, given its +dependent_variable+ and +independent_variables+ and its +data+.
159
232
  # +options+ can be specified and directly passed to the model.
@@ -161,7 +234,7 @@ module Rust::Models::Regression
161
234
  def self.generate(dependent_variable, independent_variables, data, **options)
162
235
  RegressionModel.generate(
163
236
  LinearRegressionModel,
164
- "lm",
237
+ self.r_model_name,
165
238
  dependent_variable,
166
239
  independent_variables,
167
240
  data,
@@ -175,13 +248,17 @@ module Rust::Models::Regression
175
248
 
176
249
  class LinearMixedEffectsModel < RegressionModel
177
250
  def self.can_pull?(type, klass)
178
- return type == "S4" && klass == "lmerModLmerTest"
251
+ return type == "S4" && klass == self.r_model_name
179
252
  end
180
253
 
181
254
  def self.pull_priority
182
255
  1
183
256
  end
184
257
 
258
+ def self.r_model_name
259
+ "lmerModLmerTest"
260
+ end
261
+
185
262
  def self.pull_variable(variable, type, klass)
186
263
  model = Rust::RustDatatype.pull_variable(variable, Rust::S4Class)
187
264
 
@@ -213,7 +290,7 @@ module Rust::Models::Regression
213
290
 
214
291
  RegressionModel.generate(
215
292
  LinearMixedEffectsModel,
216
- "lmer",
293
+ self.r_model_name,
217
294
  dependent_variable,
218
295
  fixed_effects + random_effects,
219
296
  data,
@@ -235,18 +312,44 @@ module Rust::Models::Regression
235
312
  end
236
313
  end
237
314
  end
315
+
316
+ ##
317
+ # Slim representation for a variable in a model, with just the variable name, its coefficient and its p-value.
318
+
319
+ class ModelVariable
320
+ attr_accessor :name
321
+ attr_accessor :coefficient
322
+ attr_accessor :pvalue
323
+
324
+ def initialize(name, coefficient, pvalue)
325
+ @name = name
326
+ @coefficient = coefficient
327
+ @pvalue = pvalue
328
+ end
329
+
330
+ def intercept?
331
+ @name == "(Intercept)"
332
+ end
333
+
334
+ ##
335
+ # Checks whether the variable is significant w.r.t. a given +a+ (0.05 by default)
336
+
337
+ def significant?(a = 0.05)
338
+ @pvalue <= a
339
+ end
340
+ end
238
341
  end
239
342
 
240
343
  module Rust::RBindings
241
344
  def lm(formula, data, **options)
242
345
  independent = formula.right_part.split("+").map { |v| v.strip }
243
- return LinearRegressionModel.generate(formula.left_part, independent, data, **options)
346
+ return Rust::Models::Regression::LinearRegressionModel.generate(formula.left_part, independent, data, **options)
244
347
  end
245
348
 
246
349
  def lmer(formula, data, **options)
247
350
  independent = formula.right_part.split("+").map { |v| v.strip }
248
351
 
249
- RegressionModel.generate(
352
+ Rust::Models::Regression::RegressionModel.generate(
250
353
  LinearMixedEffectsModel,
251
354
  "lmer",
252
355
  formula.left_part,
@@ -85,6 +85,20 @@ module Rust
85
85
  @values.map { |k, v| k*v }.sum
86
86
  end
87
87
 
88
+ ##
89
+ # Returns the variance for this slice.
90
+
91
+ def variance
92
+ @values.map { |k, v| k**2 * v }.sum - (self.expected ** 2)
93
+ end
94
+
95
+ ##
96
+ # Returns the standard deviation for this slice.
97
+
98
+ def sd
99
+ Math.sqrt(self.variance)
100
+ end
101
+
88
102
  ##
89
103
  # Returns a slice with the values that are greater than +n+.
90
104
 
@@ -124,7 +138,7 @@ module Rust
124
138
  # Returns a slice with the values between +a+ and +b+.
125
139
 
126
140
  def between(a, b)
127
- self.so_that { |k| k.between(a, b) }
141
+ self.so_that { |k| k.between?(a, b) }
128
142
  end
129
143
 
130
144
  ##
@@ -133,6 +147,13 @@ module Rust
133
147
  def so_that
134
148
  RandomVariableSlice.new(@values.select { |k, v| yield(k) })
135
149
  end
150
+
151
+ ##
152
+ # Creates a bar plot of the distribution
153
+
154
+ def plot
155
+ Rust::Plots::BarPlot.new(@values.sort_by { |k, v| k }.to_h)
156
+ end
136
157
  end
137
158
 
138
159
  ##
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.12'
4
+ version: '0.13'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-17 00:00:00.000000000 Z
11
+ date: 2024-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rinruby
@@ -61,6 +61,7 @@ files:
61
61
  - lib/rust.rb
62
62
  - lib/rust/core.rb
63
63
  - lib/rust/core/csv.rb
64
+ - lib/rust/core/manual.rb
64
65
  - lib/rust/core/rust.rb
65
66
  - lib/rust/core/types/all.rb
66
67
  - lib/rust/core/types/dataframe.rb
@@ -74,8 +75,8 @@ files:
74
75
  - lib/rust/external/ggplot2.rb
75
76
  - lib/rust/external/ggplot2/core.rb
76
77
  - lib/rust/external/ggplot2/geoms.rb
77
- - lib/rust/external/ggplot2/helper.rb
78
78
  - lib/rust/external/ggplot2/plot_builder.rb
79
+ - lib/rust/external/ggplot2/scale.rb
79
80
  - lib/rust/external/ggplot2/themes.rb
80
81
  - lib/rust/external/robustbase.rb
81
82
  - lib/rust/models/all.rb
@@ -110,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
111
  - !ruby/object:Gem::Version
111
112
  version: '0'
112
113
  requirements: []
113
- rubygems_version: 3.3.15
114
+ rubygems_version: 3.5.16
114
115
  signing_key:
115
116
  specification_version: 4
116
117
  summary: Ruby advanced statistical library
@@ -1,122 +0,0 @@
1
- require_relative 'core'
2
-
3
- GGPLOT_EXAMPLES = {}
4
-
5
- GGPLOT_EXAMPLES[["Quick introduction", /intro/]] = <<-EOS
6
- bind_ggplot! # Avoid using long module names to reach Rust::Plots::GGPlot (simply includes this module)
7
-
8
- # Best with a dataframe, but not necessary. If you have it...
9
- df = Rust.toothgrowth
10
- plot = PlotBuilder.for_dataframe(df). # Use a dataframe (symbols will be variable names)
11
- labeled("Example plot"). # "labeled" sets the label to the last set aesthetic item (x, y, or title, in this case)
12
- with_x(:len).labeled("X data from df"). # Set all the aesthetics (x, y, ...)
13
- with_y(:dose).labeled("Y data from df").
14
- draw_points. # Set the geometries to plot (based on the plot type)
15
- build # Returns the plot ready to use
16
- plot.show # Show the plot in a window
17
- plot.save("output.pdf", width: 5, height: 4) # Save the plot, width, height etc. are optional
18
-
19
- # If you don't have a dataframe...
20
- plot2 = PlotBuilder.new.
21
- with_x([1,2,3]).labeled("X data from df").
22
- with_y([3,4,5]).labeled("Y data from df").
23
- draw_points.
24
- build
25
- plot2.show
26
- EOS
27
-
28
- GGPLOT_EXAMPLES[["Scatter plots", /scatter/]] = <<-EOS
29
- bind_ggplot!
30
- df = Rust.toothgrowth
31
- plot = PlotBuilder.for_dataframe(df).
32
- with_x(:len).labeled("X data").
33
- with_y(:dose).labeled("Y data").
34
- draw_points. # To draw points
35
- draw_lines. # To draw lines (keep both to draw both)
36
- build
37
- plot.show
38
- EOS
39
-
40
- GGPLOT_EXAMPLES[["Bar plots", /bar/]] = <<-EOS
41
- bind_ggplot!
42
- df = Rust.toothgrowth
43
- plot = PlotBuilder.for_dataframe(df).
44
- with_x(:len).labeled("X data").
45
- with_fill(:supp).labeled("Legend"). # Use with_fill or with_color for stacked plots
46
- draw_bars. # To draw bars
47
- build
48
- plot.show
49
- EOS
50
-
51
- GGPLOT_EXAMPLES[["Box plots", /box/]] = <<-EOS
52
- bind_ggplot!
53
- df = Rust.toothgrowth
54
- plot = PlotBuilder.for_dataframe(df).
55
- with_y(:len).labeled("Data to boxplot").
56
- with_group(:supp).labeled("Groups"). # Groups to plot
57
- draw_boxplot.
58
- build
59
- plot.show
60
- EOS
61
-
62
- GGPLOT_EXAMPLES[["Histograms", /hist/]] = <<-EOS
63
- bind_ggplot!
64
- df = Rust.toothgrowth
65
- plot = PlotBuilder.for_dataframe(df).
66
- with_x(:len).labeled("Data to plot").
67
- with_fill(:supp).labeled("Color"). # Use with_fill or with_color for multiple plots
68
- draw_histogram.
69
- build
70
- plot.show
71
- EOS
72
-
73
- GGPLOT_EXAMPLES[["Themes", /them/]] = <<-EOS
74
- bind_ggplot!
75
- df = Rust.toothgrowth
76
- # The method with_theme allows to change theme options. The method can be called
77
- # several times, each time the argument does not overwrite the previous options,
78
- # unless they are specified again (in that case, the last specified ones win).
79
- plot = PlotBuilder.for_dataframe(df).
80
- with_x(:len).labeled("X data").
81
- with_y(:dose).labeled("Y data").
82
- draw_points.
83
- with_theme(
84
- ThemeBuilder.new('bw').
85
- title(face: 'bold', size: 12). # Each method sets the property for the related element
86
- legend do |legend| # Legend and other parts can be set like this
87
- legend.position(:left) # Puts the legend on the left
88
- end.
89
- axis do |axis| # Modifies the axes
90
- axis.line(Theme::BlankElement.new) # Hides the lines for the axes
91
- axis.text_x(size: 3) # X axis labels
92
- end.
93
- panel do |panel|
94
- panel.grid_major(colour: 'grey70', size: 0.2) # Sets the major ticks grid
95
- panel.grid_minor(Theme::BlankElement.new) # Hides the minor ticks grid
96
- end.
97
- build
98
- ).build
99
- plot.show
100
- EOS
101
-
102
- module Rust::Plots::GGPlot
103
- def self.help!(topic = nil)
104
- unless topic
105
- puts "Topics:"
106
- GGPLOT_EXAMPLES.keys.each do |key, matcher|
107
- puts "- #{key}"
108
- end
109
- puts "Call again specifying the topic of interest."
110
- else
111
- GGPLOT_EXAMPLES.each do |key, value|
112
- if topic.match(key[1])
113
- puts "*** #{key[0]} ***"
114
- puts value
115
- return
116
- end
117
- end
118
-
119
- puts "Topic not found"
120
- end
121
- end
122
- end