rust 0.12 → 0.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8eb6e3759ef38070603a941ef348ac46e4b6c08b4638c493edb2169acf16c793
4
- data.tar.gz: f855ca774695688ee64d7513ac94c8b98d3bae154bfae56b803ed1da567cb282
3
+ metadata.gz: 56e795fb0a8893df45abd976e2ed91344156f3c3dd4a68e17afd1a0fb317ece3
4
+ data.tar.gz: 406416738f1ab84fca06edd5cb59efdc623b12cefe03cd51b1a2cd840e218647
5
5
  SHA512:
6
- metadata.gz: c281de698d8b4750832d77971dac857dbee2c31252b7950abf91777d37763bbb79577098ab48efeee6f8a5ef2a28949ef72816d33703eaad14887d588b4aac32
7
- data.tar.gz: d44c0532c19ff8eb2f505d4da62e82b4b4f32dada4b39a05cddf57679a3a725dd38ceecf63aaf2d551a8ae691841196bad7cc0694d3613b928a84a9b6291ef6a
6
+ metadata.gz: 56854c3ff1bbd64ca8ff9d1201bc16fd37f4d3d465527217ab5c49d5cee0d6f4f34998bdf8a3ebdedf7ea8379909ca0db75d05da1bd46460c3e7066ee882ba7b
7
+ data.tar.gz: 6b21ba70c7d144384d1647dfa76f8894457bf4d65b74ce32d4e775cc3ecdc660f4596f61edd81e91b08c9d2b3def3491cebe10b7923b9f2988d072f7c5d25674
data/lib/rust/core/csv.rb CHANGED
@@ -90,9 +90,9 @@ module Rust
90
90
  dataframe.column_names.each do |column_name|
91
91
  values = dataframe.column(column_name)
92
92
 
93
- if values.all? { |s| !!Integer(s) rescue false }
93
+ if values.all? { |s| s == nil || !!Integer(s) rescue false }
94
94
  integer_columns << column_name
95
- elsif values.all? { |s| !!Float(s) rescue false }
95
+ elsif values.all? { |s| s == nil || !!Float(s) rescue false }
96
96
  float_columns << column_name
97
97
  end
98
98
  end
@@ -103,11 +103,11 @@ module Rust
103
103
  end
104
104
 
105
105
  integer_columns.each do |numeric_column|
106
- dataframe.transform_column!(numeric_column) { |v| v.to_i }
106
+ dataframe.transform_column!(numeric_column) { |v| v != nil ? v.to_i : v }
107
107
  end
108
108
 
109
109
  float_columns.each do |numeric_column|
110
- dataframe.transform_column!(numeric_column) { |v| v.to_f }
110
+ dataframe.transform_column!(numeric_column) { |v| v != nil ? v.to_f : v }
111
111
  end
112
112
 
113
113
  return dataframe
@@ -0,0 +1,89 @@
1
+ require_relative 'rust'
2
+
3
+ module Rust
4
+ class Manual
5
+ @@manuals = {}
6
+
7
+ def self.about
8
+ puts "Manuals available:"
9
+ @@manuals.each do |category, manual|
10
+ puts "\t- #{manual.name} (:#{category}) → #{manual.description}"
11
+ end
12
+
13
+ return nil
14
+ end
15
+
16
+ def self.for(category)
17
+ category = category.to_sym
18
+ raise "No manual found for '#{category}'." unless @@manuals[category]
19
+
20
+ return @@manuals[category]
21
+ end
22
+
23
+ def self.register(category, name, description)
24
+ category = category.to_sym
25
+
26
+ @@manuals[category] = Manual.new(name, description)
27
+
28
+ return nil
29
+ end
30
+
31
+ attr_reader :name
32
+ attr_reader :description
33
+
34
+ def initialize(name, description)
35
+ @name = name
36
+ @description = description
37
+ @voices = {}
38
+ end
39
+
40
+ def lookup(query)
41
+ @voices.each do |key, value|
42
+ if query.match(key[1])
43
+ puts "*** #{key[0]} ***"
44
+ puts value
45
+ return
46
+ end
47
+ end
48
+
49
+ puts "Voice not found"
50
+
51
+ return nil
52
+ end
53
+
54
+ def n_voices
55
+ @voices.size
56
+ end
57
+
58
+ def about
59
+ puts "****** Manual for #@name ******"
60
+ puts @description
61
+ puts "Voices in manual #@name:"
62
+ @voices.keys.each do |key, matcher|
63
+ puts "\t- #{key}"
64
+ end
65
+
66
+ return nil
67
+ end
68
+
69
+ def register(voice, matcher, description)
70
+ @voices[[voice, matcher]] = description
71
+ end
72
+
73
+ def inspect
74
+ return "Manual for #@name with #{self.n_voices} voices"
75
+ end
76
+ end
77
+ end
78
+
79
+ module Rust::RBindings
80
+ def rust_help(category = nil, query = nil)
81
+ if !category
82
+ return Rust::Manual.about
83
+ elsif !query
84
+ return Rust::Manual.for(category).about
85
+ else
86
+ return Rust::Manual.for(category).lookup(query)
87
+ end
88
+ end
89
+ end
@@ -102,7 +102,10 @@ module Rust
102
102
  end
103
103
 
104
104
  def self._rexec(r_command, return_warnings = false)
105
- puts "Calling _rexec with command: #{r_command}" if @@debugging
105
+ if @@debugging
106
+ puts "Calling _rexec with command: #{r_command}"
107
+ puts "\t" + Kernel.caller.select { |v| !v.include?("irb") }.last(3).map { |v| v.sub(/^.*gems\//, "")}.join("\n\t")
108
+ end
106
109
  R_MUTEX.synchronize do
107
110
  assert("This command must be executed in an exclusive block") { @@in_client_mutex }
108
111
 
@@ -154,10 +157,17 @@ module Rust
154
157
 
155
158
  ##
156
159
  # Installs the given +name+ library and its dependencies.
160
+ # +github+ indicates whether the package is in GitHub.
157
161
 
158
- def self.install_library(name)
162
+ def self.install_library(name, github = false)
163
+ self.prerequisite("remotes") if github
164
+
159
165
  self.exclusive do
160
- self._eval("install.packages(\"#{name}\", dependencies = TRUE)")
166
+ if github
167
+ self._eval("remotes::install_github(\"#{name}\", dependencies=TRUE)")
168
+ else
169
+ self._eval("install.packages(\"#{name}\", dependencies = TRUE)")
170
+ end
161
171
  end
162
172
 
163
173
  return nil
@@ -165,9 +175,15 @@ module Rust
165
175
 
166
176
  ##
167
177
  # Installs the +library+ library if it is not available and loads it.
178
+ # +github+ indicates whether the package appears in GitHub.
168
179
 
169
- def self.prerequisite(library)
170
- self.install_library(library) unless self.check_library(library)
180
+ def self.prerequisite(library, github = false)
181
+ full_library = library
182
+ library = library.split("/").last if github
183
+
184
+ unless self.check_library(library)
185
+ self.install_library(full_library, github)
186
+ end
171
187
  self.load_library(library)
172
188
  end
173
189
 
@@ -218,4 +234,11 @@ def bind_r!
218
234
  include Rust::RBindings
219
235
  end
220
236
 
237
+ ##
238
+ # Shortcut for requiring rust external libraries
239
+
240
+ def require_rust(name)
241
+ require "rust/external/#{name}"
242
+ end
243
+
221
244
  bind_r! if ENV['RUBY_RUST_BINDING'] == '1'
@@ -1,4 +1,5 @@
1
1
  require_relative 'datatype'
2
+ require 'tempfile'
2
3
 
3
4
  module Rust
4
5
 
@@ -355,8 +356,25 @@ module Rust
355
356
  end
356
357
 
357
358
  def load_in_r_as(variable_name)
358
- command = []
359
+ tempfile = Tempfile.new('rust.dfport')
360
+ tempfile.close
361
+
362
+ Rust::CSV.write(tempfile.path, self)
363
+ Rust._eval("#{variable_name} <- read.csv(\"#{tempfile.path}\", header=T)")
364
+
365
+ if Rust.debug?
366
+ FileUtils.cp(tempfile.path, tempfile.path + ".debug.csv")
367
+ puts "Debug CSV port file available at: #{tempfile.path + ".debug.csv"}"
368
+ end
369
+
370
+ tempfile.unlink
359
371
 
372
+ return true
373
+ end
374
+
375
+ def directly_load_in_r_as(variable_name)
376
+ command = []
377
+
360
378
  command << "#{variable_name} <- data.frame()"
361
379
  row_index = 1
362
380
  self.each do |row|
@@ -374,6 +392,10 @@ module Rust
374
392
  end
375
393
 
376
394
  Rust._eval_big(command)
395
+
396
+ tempfile.unlink
397
+
398
+ return true
377
399
  end
378
400
 
379
401
  def inspect
@@ -408,16 +430,39 @@ module Rust
408
430
  return result
409
431
  end
410
432
 
433
+ ##
434
+ # Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String). Keeps all the rows in this data frame.
435
+ # +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
436
+ # for this and the +other+ data-frame, respectively.
437
+
438
+ def left_merge(other, by, first_alias, second_alias, **options)
439
+ options[:keep_right] = true
440
+ options[:keep_left] = false
441
+ return other.merge(self, by, first_alias, second_alias, **options)
442
+ end
443
+
444
+ ##
445
+ # Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String). Keeps all the rows in the other data frame.
446
+ # +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
447
+ # for this and the +other+ data-frame, respectively.
448
+
449
+ def right_merge(other, by, first_alias, second_alias, **options)
450
+ options[:keep_right] = true
451
+ options[:keep_left] = false
452
+ return self.merge(other, by, first_alias, second_alias, **options)
453
+ end
454
+
411
455
  ##
412
456
  # Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String).
413
457
  # +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
414
458
  # for this and the +other+ data-frame, respectively.
415
459
 
416
- def merge(other, by, first_alias = "x", second_alias = "y")
460
+ def merge(other, by, first_alias = "x", second_alias = "y", **options)
417
461
  raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
418
462
  raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
419
463
  raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
420
464
  raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
465
+ raise "Either keep_right or keep_left should be provided as options, not both" if options[:keep_right] && options[:keep_left]
421
466
 
422
467
  if first_alias == second_alias
423
468
  if first_alias == ""
@@ -473,6 +518,28 @@ module Rust
473
518
  end
474
519
 
475
520
  result << to_add
521
+
522
+ elsif options[:keep_right]
523
+ to_add = {}
524
+
525
+ by.each do |colname|
526
+ to_add[colname] = other_row[colname]
527
+ end
528
+
529
+ merged_column_self.each do |colname|
530
+ to_add["#{first_alias}#{colname}"] = nil
531
+ end
532
+
533
+ merged_column_other.each do |colname|
534
+ to_add["#{second_alias}#{colname}"] = other_row[colname]
535
+ end
536
+
537
+ result << to_add
538
+
539
+ elsif options[:keep_left]
540
+ options[:keep_left] = false
541
+ options[:keep_right] = true
542
+ return other.merge(self, by, first_alias, second_alias, **options)
476
543
  end
477
544
  end
478
545
 
@@ -36,7 +36,7 @@ module Rust
36
36
  if candidates.size > 0
37
37
  type = candidates.max_by { |c| c.pull_priority }
38
38
 
39
- puts "Using #{type} to pull #{variable}" if Rust.debug?
39
+ puts "Using #{type} to pull #{variable} (candidates: #{candidates.map { |c| c.to_s + "=>" + c.pull_priority.to_s}.join(", ")})" if Rust.debug?
40
40
  return type.pull_variable(variable, r_type, r_class)
41
41
  else
42
42
  if Rust._pull("length(#{variable})") == 0
@@ -80,7 +80,7 @@ module Rust
80
80
  def r_mirror
81
81
  varname = self.mirrored_R_variable_name
82
82
 
83
- if !Rust._pull("exists(\"#{varname}\")") || Rust._pull("#{varname}.hash") != self.r_hash
83
+ if !Rust._pull("exists(\"#{varname}\")") || Rust["#{varname}.hash"] != self.r_hash
84
84
  puts "Loading #{varname}" if Rust.debug?
85
85
  Rust[varname] = self
86
86
  Rust["#{varname}.hash"] = self.r_hash
@@ -125,6 +125,10 @@ module Rust
125
125
  @level
126
126
  end
127
127
 
128
+ def to_str
129
+ @level.to_s
130
+ end
131
+
128
132
  def to_R
129
133
  self.to_i
130
134
  end
@@ -171,6 +171,22 @@ module Rust
171
171
  end
172
172
  end
173
173
 
174
+ ##
175
+ # Represents a verbatim R expression
176
+
177
+ class Verbatim
178
+ ##
179
+ #Creates a verbatim R expression
180
+
181
+ def initialize(expression)
182
+ @expression = expression
183
+ end
184
+
185
+ def to_R
186
+ @expression
187
+ end
188
+ end
189
+
174
190
  ##
175
191
  # Represents the arguments of a function in R. Works as an Array of objects.
176
192
 
@@ -196,4 +212,20 @@ module Rust
196
212
  return options
197
213
  end
198
214
  end
215
+
216
+ def self.verbatim(expression)
217
+ Verbatim.new(expression)
218
+ end
219
+
220
+ def self.variable(variable)
221
+ Variable.new(variable)
222
+ end
223
+
224
+ def self.function(name)
225
+ Function.new(name)
226
+ end
227
+
228
+ def self.formula(left_part, right_part)
229
+ Formula.new(left_part, right_part)
230
+ end
199
231
  end
@@ -29,6 +29,8 @@ module Rust
29
29
  @data.each do |key, value|
30
30
  Rust["#{variable_name}[[#{key + 1}]]"] = value
31
31
  end
32
+
33
+ Rust._eval("names(#{variable_name}) <- #{self.names.to_R}")
32
34
  end
33
35
 
34
36
  ##
@@ -81,6 +81,14 @@ module Rust
81
81
  @data.size
82
82
  end
83
83
 
84
+ def rownames
85
+ @row_names
86
+ end
87
+
88
+ def colnames
89
+ @column_names
90
+ end
91
+
84
92
  ##
85
93
  # Returns the number of columns.
86
94
 
data/lib/rust/core.rb CHANGED
@@ -1,7 +1,57 @@
1
1
  require_relative 'core/rust'
2
2
  require_relative 'core/csv'
3
+ require_relative 'core/manual'
3
4
 
4
5
  self_path = File.expand_path(__FILE__)
5
6
  Dir.glob(File.join(File.dirname(self_path), "core/types/*.rb")).each do |lib|
6
7
  require_relative lib
7
8
  end
9
+
10
+ Rust::Manual.register(:base, "Quick intro", "Core philosophy behind Rust.")
11
+ Rust::Manual.for(:base).register('Introduction', /intro/,
12
+ <<-EOS
13
+ Rust is a statistical library. Rust wraps R and its libraries to achieve this goal.
14
+ Rust aims at:
15
+ - Making easier for Ruby developers make all the kinds of operations that are straightforward in R;
16
+ - Providing an object-oriented interface, more familiar than the one in R.
17
+
18
+ Rust can be used in two ways:
19
+ - By using the object-oriented interface (advised if you are writing a script);
20
+ - By using the R bindings, that allow to use Ruby pretty much like R (handful if you are using it from IRB).
21
+
22
+ Rust provides wrappers for many elements, including types (e.g., data frames), statistical hypothesis tests, plots, and so on.
23
+ Under the hood, Rust creates an R environment (through rinruby), through which Rust can perform the most advanced operations,
24
+ for which a re-implementation would be impractical.
25
+ EOS
26
+ )
27
+
28
+ Rust::Manual.for(:base).register('Types', /type/,
29
+ <<-EOS
30
+ Rust provides wrappers for the most commonly-found types in R. Specifically, the following types are available:
31
+ - Data frames → Rust::DataFrame
32
+ - Factors → Rust::Factor
33
+ - Matrices → Rust::Matrix
34
+ - Lists → Rust::List
35
+ - S4 classes → Rust::S4Class
36
+ - Formulas → Rust::Formula
37
+
38
+ Note that some of them (e.g., data frames and matrices) are not just wrappers, but complete re-implementations of the R
39
+ types (for performance reasons).
40
+ EOS
41
+ )
42
+
43
+ Rust::Manual.for(:base).register('CSVs', /csv/,
44
+ <<-EOS
45
+ Rust allows to read and write CSV files, mostly like in R.
46
+ To read a CSV file, you can use:
47
+ Rust::CSV.read(filename)
48
+
49
+ It returns a data frame. You can also specify the option "headers" to tell if the first row in the CSV contains the headers
50
+ (column names for the data frame). Other options get directly passed to the R function "read.csv".
51
+
52
+ To write a CSV file, you can use:
53
+ Rust::CSV.write(filename, data_frame)
54
+
55
+ It writes the given data frame on the file at filename.
56
+ EOS
57
+ )
@@ -64,14 +64,6 @@ module Rust::Plots::GGPlot
64
64
  return self
65
65
  end
66
66
 
67
- def labeled(value)
68
- raise "No context for assigning a label" unless @current_context
69
- @label_options[@current_context] = value
70
- @current_context = nil
71
-
72
- return self
73
- end
74
-
75
67
  def with_x_label(value)
76
68
  @label_options[:x] = value
77
69
 
@@ -90,6 +82,76 @@ module Rust::Plots::GGPlot
90
82
  return self
91
83
  end
92
84
 
85
+ def scale_x_continuous(**options)
86
+ raise "No context for assigning a label" unless @current_context
87
+ @layers << AxisScaler.new(:x, :continuous, **options)
88
+
89
+ return self
90
+ end
91
+
92
+ def scale_y_continuous(**options)
93
+ raise "No context for assigning a label" unless @current_context
94
+ @layers << AxisScaler.new(:y, :continuous, **options)
95
+
96
+ return self
97
+ end
98
+
99
+ def scale_x_discrete(**options)
100
+ raise "No context for assigning a label" unless @current_context
101
+ @layers << AxisScaler.new(:x, :discrete, **options)
102
+
103
+ return self
104
+ end
105
+
106
+ def scale_y_discrete(**options)
107
+ raise "No context for assigning a label" unless @current_context
108
+ @layers << AxisScaler.new(:y, :discrete, **options)
109
+
110
+ return self
111
+ end
112
+
113
+ def scale_x_log10(**options)
114
+ raise "No context for assigning a label" unless @current_context
115
+ @layers << AxisScaler.new(:x, :log10, **options)
116
+
117
+ return self
118
+ end
119
+
120
+ def scale_y_log10(**options)
121
+ raise "No context for assigning a label" unless @current_context
122
+ @layers << AxisScaler.new(:y, :log10, **options)
123
+
124
+ return self
125
+ end
126
+
127
+ def scale_x_reverse(**options)
128
+ raise "No context for assigning a label" unless @current_context
129
+ @layers << AxisScaler.new(:x, :reverse, **options)
130
+
131
+ return self
132
+ end
133
+
134
+ def scale_y_reverse(**options)
135
+ raise "No context for assigning a label" unless @current_context
136
+ @layers << AxisScaler.new(:y, :reverse, **options)
137
+
138
+ return self
139
+ end
140
+
141
+ def scale_x_sqrt(**options)
142
+ raise "No context for assigning a label" unless @current_context
143
+ @layers << AxisScaler.new(:x, :sqrt, **options)
144
+
145
+ return self
146
+ end
147
+
148
+ def scale_y_sqrt(**options)
149
+ raise "No context for assigning a label" unless @current_context
150
+ @layers << AxisScaler.new(:y, :sqrt, **options)
151
+
152
+ return self
153
+ end
154
+
93
155
  def with_title(value)
94
156
  @label_options[:title] = value
95
157
 
@@ -160,6 +222,48 @@ module Rust::Plots::GGPlot
160
222
  return self
161
223
  end
162
224
 
225
+ def labeled(value)
226
+ raise "No context for assigning a label" unless @current_context
227
+ @label_options[@current_context] = value
228
+
229
+ return self
230
+ end
231
+
232
+ def scale_continuous(**options)
233
+ raise "No context for assigning a label" unless @current_context
234
+ @layers << AxisScaler.new(@current_context, :continuous, **options)
235
+
236
+ return self
237
+ end
238
+
239
+ def scale_discrete(**options)
240
+ raise "No context for assigning a label" unless @current_context
241
+ @layers << AxisScaler.new(@current_context, :discrete, **options)
242
+
243
+ return self
244
+ end
245
+
246
+ def scale_log10(**options)
247
+ raise "No context for assigning a label" unless @current_context
248
+ @layers << AxisScaler.new(@current_context, :log10, **options)
249
+
250
+ return self
251
+ end
252
+
253
+ def scale_reverse(**options)
254
+ raise "No context for assigning a label" unless @current_context
255
+ @layers << AxisScaler.new(@current_context, :reverse, **options)
256
+
257
+ return self
258
+ end
259
+
260
+ def scale_sqrt(**options)
261
+ raise "No context for assigning a label" unless @current_context
262
+ @layers << AxisScaler.new(@current_context, :sqrt, **options)
263
+
264
+ return self
265
+ end
266
+
163
267
  def flip_coordinates
164
268
  @layers << FlipCoordinates.new
165
269
 
@@ -0,0 +1,12 @@
1
+ require_relative 'core'
2
+
3
+ module Rust::Plots::GGPlot
4
+ class AxisScaler < Layer
5
+ def initialize(axis, type = :continuous, **options)
6
+ @axis = axis
7
+ @type = type
8
+
9
+ super("scale_#{@axis}_#{@type}", **options)
10
+ end
11
+ end
12
+ end
@@ -53,7 +53,7 @@ module Rust::Plots::GGPlot
53
53
  def to_h
54
54
  options = @options.clone
55
55
 
56
- options['_starting'] = @starting.sub("theme_", "")
56
+ options['_starting'] = @starting.sub("theme_", "") if @starting
57
57
  options = options.map do |key, value|
58
58
  [key, value.is_a?(Theme::Element) ? value.to_h : value]
59
59
  end.to_h
@@ -78,6 +78,9 @@ module Rust::Plots::GGPlot
78
78
  end
79
79
  end
80
80
 
81
+ class ExistingTheme < Layer
82
+ end
83
+
81
84
  class Theme::Element
82
85
  attr_reader :options
83
86
 
@@ -152,6 +155,8 @@ module Rust::Plots::GGPlot
152
155
  return value
153
156
  elsif value.is_a?(Hash)
154
157
  return Theme::LineElement.new(**value)
158
+ elsif !value
159
+ return Theme::BlankElement.new
155
160
  else
156
161
  raise "Expected line or hash"
157
162
  end
@@ -162,6 +167,8 @@ module Rust::Plots::GGPlot
162
167
  return value
163
168
  elsif value.is_a?(Hash)
164
169
  return Theme::RectElement.new(**value)
170
+ elsif !value
171
+ return Theme::BlankElement.new
165
172
  else
166
173
  raise "Expected rect or hash"
167
174
  end
@@ -172,6 +179,8 @@ module Rust::Plots::GGPlot
172
179
  return value
173
180
  elsif value.is_a?(Hash)
174
181
  return Theme::TextElement.new(**value)
182
+ elsif !value
183
+ return Theme::BlankElement.new
175
184
  else
176
185
  raise "Expected text or hash"
177
186
  end
@@ -225,7 +234,7 @@ module Rust::Plots::GGPlot
225
234
  end
226
235
 
227
236
  class ThemeBuilder < ThemeComponentBuilder
228
- def initialize(starting = 'bw')
237
+ def initialize(starting = nil)
229
238
  super("plot")
230
239
  @starting = starting
231
240
  end
@@ -417,7 +426,21 @@ module Rust::Plots::GGPlot
417
426
  end
418
427
  end
419
428
 
420
- self.default_theme = ThemeBuilder.new.
429
+ class ThemeCollection
430
+ def self.ggtech(name = "google")
431
+ Rust.prerequisite("ricardo-bion/ggtech", true)
432
+
433
+ return ExistingTheme.new("theme_tech", theme: name)
434
+ end
435
+
436
+ def self.ggdark(style = "classic")
437
+ Rust.prerequisite("ggdark")
438
+
439
+ return ExistingTheme.new("dark_theme_#{style}")
440
+ end
441
+ end
442
+
443
+ self.default_theme = ThemeBuilder.new("bw").
421
444
  title(face: 'bold', size: 12).
422
445
  legend do |legend|
423
446
  legend.background(fill: 'white', size: 4, colour: 'white')
@@ -2,4 +2,115 @@ require_relative 'ggplot2/core'
2
2
  require_relative 'ggplot2/geoms'
3
3
  require_relative 'ggplot2/themes'
4
4
  require_relative 'ggplot2/plot_builder'
5
- require_relative 'ggplot2/helper'
5
+ require_relative 'ggplot2/scale'
6
+
7
+ Rust::Manual.register(:ggplot2, "ggplot2", "Informations on the wrapper of the popular ggplot2 plotting library for R.")
8
+
9
+ Rust::Manual.for(:ggplot2).register("Introduction", /intro/,
10
+ <<-EOS
11
+ bind_ggplot! # Avoid using long module names to reach Rust::Plots::GGPlot (simply includes this module)
12
+
13
+ # Best with a dataframe, but not necessary. If you have it...
14
+ df = Rust.toothgrowth
15
+ plot = PlotBuilder.for_dataframe(df). # Use a dataframe (symbols will be variable names)
16
+ labeled("Example plot"). # "labeled" sets the label to the last set aesthetic item (x, y, or title, in this case)
17
+ with_x(:len).labeled("X data from df"). # Set all the aesthetics (x, y, ...)
18
+ with_y(:dose).labeled("Y data from df").
19
+ draw_points. # Set the geometries to plot (based on the plot type)
20
+ build # Returns the plot ready to use
21
+ plot.show # Show the plot in a window
22
+ plot.save("output.pdf", width: 5, height: 4) # Save the plot, width, height etc. are optional
23
+
24
+ # If you don't have a dataframe...
25
+ plot2 = PlotBuilder.new.
26
+ with_x([1,2,3]).labeled("X data from df").
27
+ with_y([3,4,5]).labeled("Y data from df").
28
+ draw_points.
29
+ build
30
+ plot2.show
31
+ EOS
32
+ )
33
+
34
+ Rust::Manual.for(:ggplot2).register("Scatter plots", /scatter/,
35
+ <<-EOS
36
+ bind_ggplot!
37
+ df = Rust.toothgrowth
38
+ plot = PlotBuilder.for_dataframe(df).
39
+ with_x(:len).labeled("X data").
40
+ with_y(:dose).labeled("Y data").
41
+ draw_points. # To draw points
42
+ draw_lines. # To draw lines (keep both to draw both)
43
+ build
44
+ plot.show
45
+ EOS
46
+ )
47
+
48
+ Rust::Manual.for(:ggplot2).register("Bar plots", /bar/,
49
+ <<-EOS
50
+ bind_ggplot!
51
+ df = Rust.toothgrowth
52
+ plot = PlotBuilder.for_dataframe(df).
53
+ with_x(:len).labeled("X data").
54
+ with_fill(:supp).labeled("Legend"). # Use with_fill or with_color for stacked plots
55
+ draw_bars. # To draw bars
56
+ build
57
+ plot.show
58
+ EOS
59
+ )
60
+
61
+ Rust::Manual.for(:ggplot2).register("Box plots", /box/,
62
+ <<-EOS
63
+ bind_ggplot!
64
+ df = Rust.toothgrowth
65
+ plot = PlotBuilder.for_dataframe(df).
66
+ with_y(:len).labeled("Data to boxplot").
67
+ with_group(:supp).labeled("Groups"). # Groups to plot
68
+ draw_boxplot.
69
+ build
70
+ plot.show
71
+ EOS
72
+ )
73
+
74
+ Rust::Manual.for(:ggplot2).register("Histograms", /hist/,
75
+ <<-EOS
76
+ bind_ggplot!
77
+ df = Rust.toothgrowth
78
+ plot = PlotBuilder.for_dataframe(df).
79
+ with_x(:len).labeled("Data to plot").
80
+ with_fill(:supp).labeled("Color"). # Use with_fill or with_color for multiple plots
81
+ draw_histogram.
82
+ build
83
+ plot.show
84
+ EOS
85
+ )
86
+
87
+ Rust::Manual.for(:ggplot2).register("Themes", /them/,
88
+ <<-EOS
89
+ bind_ggplot!
90
+ df = Rust.toothgrowth
91
+ # The method with_theme allows to change theme options. The method can be called
92
+ # several times, each time the argument does not overwrite the previous options,
93
+ # unless they are specified again (in that case, the last specified ones win).
94
+ plot = PlotBuilder.for_dataframe(df).
95
+ with_x(:len).labeled("X data").
96
+ with_y(:dose).labeled("Y data").
97
+ draw_points.
98
+ with_theme(
99
+ ThemeBuilder.new('bw').
100
+ title(face: 'bold', size: 12). # Each method sets the property for the related element
101
+ legend do |legend| # Legend and other parts can be set like this
102
+ legend.position(:left) # Puts the legend on the left
103
+ end.
104
+ axis do |axis| # Modifies the axes
105
+ axis.line(Theme::BlankElement.new) # Hides the lines for the axes
106
+ axis.text_x(size: 3) # X axis labels
107
+ end.
108
+ panel do |panel|
109
+ panel.grid_major(colour: 'grey70', size: 0.2) # Sets the major ticks grid
110
+ panel.grid_minor(Theme::BlankElement.new) # Hides the minor ticks grid
111
+ end.
112
+ build
113
+ ).build
114
+ plot.show
115
+ EOS
116
+ )
@@ -14,6 +14,11 @@ module Rust::Models::Regression
14
14
  # Generic regression model in R.
15
15
 
16
16
  class RegressionModel < Rust::RustDatatype
17
+
18
+ attr_accessor :data
19
+ attr_accessor :dependent_variable
20
+ attr_accessor :options
21
+
17
22
  def self.can_pull?(type, klass)
18
23
  # Can only pull specific sub-types
19
24
  return false
@@ -38,22 +43,30 @@ module Rust::Models::Regression
38
43
 
39
44
  formula = Rust::Formula.new(dependent_variable, independent_variables.join(" + "))
40
45
 
46
+ result = nil
41
47
  Rust.exclusive do
42
48
  Rust["#{model_type}.data"] = data
43
49
 
44
50
  Rust._eval("#{model_type}.model.result <- #{model_type}(#{formula.to_R}, data=#{model_type}.data#{mapped})")
45
51
  result = Rust["#{model_type}.model.result"]
46
- result.r_mirror_to("#{model_type}.model.result")
47
52
 
48
- return result
53
+ raise "An error occurred while building the model" unless result
54
+
55
+ result.r_mirror_to("#{model_type}.model.result")
49
56
  end
57
+
58
+ result.dependent_variable = dependent_variable
59
+ result.data = data
60
+ result.options = options
61
+
62
+ return result
50
63
  end
51
64
 
52
65
  ##
53
- # Creates a new +model+.
66
+ # Creates a new model based on +model+.
54
67
 
55
68
  def initialize(model)
56
- raise StandardError if model.is_a?(RegressionModel)
69
+ raise "Expected a R list, given a #{model.class}" if !model.is_a?(Rust::List)
57
70
  @model = model
58
71
  end
59
72
 
@@ -118,6 +131,58 @@ module Rust::Models::Regression
118
131
  a = self.summary|"coefficients"
119
132
  end
120
133
 
134
+ ##
135
+ # Returns object variables for the model with basic data (coefficients and p-values). Use the method `coefficients`
136
+ # to get more data.
137
+
138
+ def variables
139
+ unless @variables
140
+ coefficients = self.coefficients
141
+
142
+ @variables = coefficients.rownames.map do |name|
143
+ ModelVariable.new(name, coefficients[name, "Estimate"], coefficients[name, "Pr(>|t|)"])
144
+ end
145
+ end
146
+
147
+ return @variables
148
+ end
149
+
150
+ ##
151
+ # Returns only the significant variables as ModelVariable instances. See the method `variables`.
152
+
153
+ def significant_variables(a = 0.05)
154
+ self.variables.select { |v| v.significant?(a) }
155
+ end
156
+
157
+ ##
158
+ # Runs backward selection (recursively removes a variable until the best model is found).
159
+ # Returns both the best model and the list of excluded variable at each step
160
+ # Note: Not fully tested
161
+
162
+ def backward_selection(excluded = [])
163
+ candidates = self.variables.select { |v| !v.intercept? && !v.significant? }.sort_by { |v| v.pvalue }.reverse
164
+ all = self.variables.select { |v| !v.intercept? }
165
+
166
+ candidates.each do |candidate|
167
+ new_model = RegressionModel.generate(
168
+ self.class,
169
+ self.class.r_model_name,
170
+ self.dependent_variable,
171
+ (all - [candidate]).map { |v| v.name },
172
+ self.data,
173
+ **self.options
174
+ )
175
+
176
+ if new_model.r_2_adjusted >= self.r_2_adjusted
177
+ puts "Excluded #{candidate}" if Rust.debug?
178
+ return *new_model.backward_selection(excluded + [candidate])
179
+ end
180
+ end
181
+
182
+ return self, excluded
183
+ end
184
+
185
+
121
186
  def method_missing(name, *args)
122
187
  return model|name.to_s
123
188
  end
@@ -145,7 +210,11 @@ module Rust::Models::Regression
145
210
 
146
211
  class LinearRegressionModel < RegressionModel
147
212
  def self.can_pull?(type, klass)
148
- return type == "list" && klass == "lm"
213
+ return type == "list" && klass == self.r_model_name
214
+ end
215
+
216
+ def self.pull_priority
217
+ 1
149
218
  end
150
219
 
151
220
  def self.pull_variable(variable, type, klass)
@@ -154,6 +223,10 @@ module Rust::Models::Regression
154
223
  return LinearRegressionModel.new(model)
155
224
  end
156
225
 
226
+ def self.r_model_name
227
+ "lm"
228
+ end
229
+
157
230
  ##
158
231
  # Generates a linear regression model, given its +dependent_variable+ and +independent_variables+ and its +data+.
159
232
  # +options+ can be specified and directly passed to the model.
@@ -161,7 +234,7 @@ module Rust::Models::Regression
161
234
  def self.generate(dependent_variable, independent_variables, data, **options)
162
235
  RegressionModel.generate(
163
236
  LinearRegressionModel,
164
- "lm",
237
+ self.r_model_name,
165
238
  dependent_variable,
166
239
  independent_variables,
167
240
  data,
@@ -175,13 +248,17 @@ module Rust::Models::Regression
175
248
 
176
249
  class LinearMixedEffectsModel < RegressionModel
177
250
  def self.can_pull?(type, klass)
178
- return type == "S4" && klass == "lmerModLmerTest"
251
+ return type == "S4" && klass == self.r_model_name
179
252
  end
180
253
 
181
254
  def self.pull_priority
182
255
  1
183
256
  end
184
257
 
258
+ def self.r_model_name
259
+ "lmerModLmerTest"
260
+ end
261
+
185
262
  def self.pull_variable(variable, type, klass)
186
263
  model = Rust::RustDatatype.pull_variable(variable, Rust::S4Class)
187
264
 
@@ -213,7 +290,7 @@ module Rust::Models::Regression
213
290
 
214
291
  RegressionModel.generate(
215
292
  LinearMixedEffectsModel,
216
- "lmer",
293
+ self.r_model_name,
217
294
  dependent_variable,
218
295
  fixed_effects + random_effects,
219
296
  data,
@@ -235,18 +312,44 @@ module Rust::Models::Regression
235
312
  end
236
313
  end
237
314
  end
315
+
316
+ ##
317
+ # Slim representation for a variable in a model, with just the variable name, its coefficient and its p-value.
318
+
319
+ class ModelVariable
320
+ attr_accessor :name
321
+ attr_accessor :coefficient
322
+ attr_accessor :pvalue
323
+
324
+ def initialize(name, coefficient, pvalue)
325
+ @name = name
326
+ @coefficient = coefficient
327
+ @pvalue = pvalue
328
+ end
329
+
330
+ def intercept?
331
+ @name == "(Intercept)"
332
+ end
333
+
334
+ ##
335
+ # Checks whether the variable is significant w.r.t. a given +a+ (0.05 by default)
336
+
337
+ def significant?(a = 0.05)
338
+ @pvalue <= a
339
+ end
340
+ end
238
341
  end
239
342
 
240
343
  module Rust::RBindings
241
344
  def lm(formula, data, **options)
242
345
  independent = formula.right_part.split("+").map { |v| v.strip }
243
- return LinearRegressionModel.generate(formula.left_part, independent, data, **options)
346
+ return Rust::Models::Regression::LinearRegressionModel.generate(formula.left_part, independent, data, **options)
244
347
  end
245
348
 
246
349
  def lmer(formula, data, **options)
247
350
  independent = formula.right_part.split("+").map { |v| v.strip }
248
351
 
249
- RegressionModel.generate(
352
+ Rust::Models::Regression::RegressionModel.generate(
250
353
  LinearMixedEffectsModel,
251
354
  "lmer",
252
355
  formula.left_part,
@@ -85,6 +85,20 @@ module Rust
85
85
  @values.map { |k, v| k*v }.sum
86
86
  end
87
87
 
88
+ ##
89
+ # Returns the variance for this slice.
90
+
91
+ def variance
92
+ @values.map { |k, v| k**2 * v }.sum - (self.expected ** 2)
93
+ end
94
+
95
+ ##
96
+ # Returns the standard deviation for this slice.
97
+
98
+ def sd
99
+ Math.sqrt(self.variance)
100
+ end
101
+
88
102
  ##
89
103
  # Returns a slice with the values that are greater than +n+.
90
104
 
@@ -124,7 +138,7 @@ module Rust
124
138
  # Returns a slice with the values between +a+ and +b+.
125
139
 
126
140
  def between(a, b)
127
- self.so_that { |k| k.between(a, b) }
141
+ self.so_that { |k| k.between?(a, b) }
128
142
  end
129
143
 
130
144
  ##
@@ -133,6 +147,13 @@ module Rust
133
147
  def so_that
134
148
  RandomVariableSlice.new(@values.select { |k, v| yield(k) })
135
149
  end
150
+
151
+ ##
152
+ # Creates a bar plot of the distribution
153
+
154
+ def plot
155
+ Rust::Plots::BarPlot.new(@values.sort_by { |k, v| k }.to_h)
156
+ end
136
157
  end
137
158
 
138
159
  ##
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.12'
4
+ version: '0.13'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-17 00:00:00.000000000 Z
11
+ date: 2024-11-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rinruby
@@ -61,6 +61,7 @@ files:
61
61
  - lib/rust.rb
62
62
  - lib/rust/core.rb
63
63
  - lib/rust/core/csv.rb
64
+ - lib/rust/core/manual.rb
64
65
  - lib/rust/core/rust.rb
65
66
  - lib/rust/core/types/all.rb
66
67
  - lib/rust/core/types/dataframe.rb
@@ -74,8 +75,8 @@ files:
74
75
  - lib/rust/external/ggplot2.rb
75
76
  - lib/rust/external/ggplot2/core.rb
76
77
  - lib/rust/external/ggplot2/geoms.rb
77
- - lib/rust/external/ggplot2/helper.rb
78
78
  - lib/rust/external/ggplot2/plot_builder.rb
79
+ - lib/rust/external/ggplot2/scale.rb
79
80
  - lib/rust/external/ggplot2/themes.rb
80
81
  - lib/rust/external/robustbase.rb
81
82
  - lib/rust/models/all.rb
@@ -110,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
111
  - !ruby/object:Gem::Version
111
112
  version: '0'
112
113
  requirements: []
113
- rubygems_version: 3.3.15
114
+ rubygems_version: 3.5.16
114
115
  signing_key:
115
116
  specification_version: 4
116
117
  summary: Ruby advanced statistical library
@@ -1,122 +0,0 @@
1
- require_relative 'core'
2
-
3
- GGPLOT_EXAMPLES = {}
4
-
5
- GGPLOT_EXAMPLES[["Quick introduction", /intro/]] = <<-EOS
6
- bind_ggplot! # Avoid using long module names to reach Rust::Plots::GGPlot (simply includes this module)
7
-
8
- # Best with a dataframe, but not necessary. If you have it...
9
- df = Rust.toothgrowth
10
- plot = PlotBuilder.for_dataframe(df). # Use a dataframe (symbols will be variable names)
11
- labeled("Example plot"). # "labeled" sets the label to the last set aesthetic item (x, y, or title, in this case)
12
- with_x(:len).labeled("X data from df"). # Set all the aesthetics (x, y, ...)
13
- with_y(:dose).labeled("Y data from df").
14
- draw_points. # Set the geometries to plot (based on the plot type)
15
- build # Returns the plot ready to use
16
- plot.show # Show the plot in a window
17
- plot.save("output.pdf", width: 5, height: 4) # Save the plot, width, height etc. are optional
18
-
19
- # If you don't have a dataframe...
20
- plot2 = PlotBuilder.new.
21
- with_x([1,2,3]).labeled("X data from df").
22
- with_y([3,4,5]).labeled("Y data from df").
23
- draw_points.
24
- build
25
- plot2.show
26
- EOS
27
-
28
- GGPLOT_EXAMPLES[["Scatter plots", /scatter/]] = <<-EOS
29
- bind_ggplot!
30
- df = Rust.toothgrowth
31
- plot = PlotBuilder.for_dataframe(df).
32
- with_x(:len).labeled("X data").
33
- with_y(:dose).labeled("Y data").
34
- draw_points. # To draw points
35
- draw_lines. # To draw lines (keep both to draw both)
36
- build
37
- plot.show
38
- EOS
39
-
40
- GGPLOT_EXAMPLES[["Bar plots", /bar/]] = <<-EOS
41
- bind_ggplot!
42
- df = Rust.toothgrowth
43
- plot = PlotBuilder.for_dataframe(df).
44
- with_x(:len).labeled("X data").
45
- with_fill(:supp).labeled("Legend"). # Use with_fill or with_color for stacked plots
46
- draw_bars. # To draw bars
47
- build
48
- plot.show
49
- EOS
50
-
51
- GGPLOT_EXAMPLES[["Box plots", /box/]] = <<-EOS
52
- bind_ggplot!
53
- df = Rust.toothgrowth
54
- plot = PlotBuilder.for_dataframe(df).
55
- with_y(:len).labeled("Data to boxplot").
56
- with_group(:supp).labeled("Groups"). # Groups to plot
57
- draw_boxplot.
58
- build
59
- plot.show
60
- EOS
61
-
62
- GGPLOT_EXAMPLES[["Histograms", /hist/]] = <<-EOS
63
- bind_ggplot!
64
- df = Rust.toothgrowth
65
- plot = PlotBuilder.for_dataframe(df).
66
- with_x(:len).labeled("Data to plot").
67
- with_fill(:supp).labeled("Color"). # Use with_fill or with_color for multiple plots
68
- draw_histogram.
69
- build
70
- plot.show
71
- EOS
72
-
73
- GGPLOT_EXAMPLES[["Themes", /them/]] = <<-EOS
74
- bind_ggplot!
75
- df = Rust.toothgrowth
76
- # The method with_theme allows to change theme options. The method can be called
77
- # several times, each time the argument does not overwrite the previous options,
78
- # unless they are specified again (in that case, the last specified ones win).
79
- plot = PlotBuilder.for_dataframe(df).
80
- with_x(:len).labeled("X data").
81
- with_y(:dose).labeled("Y data").
82
- draw_points.
83
- with_theme(
84
- ThemeBuilder.new('bw').
85
- title(face: 'bold', size: 12). # Each method sets the property for the related element
86
- legend do |legend| # Legend and other parts can be set like this
87
- legend.position(:left) # Puts the legend on the left
88
- end.
89
- axis do |axis| # Modifies the axes
90
- axis.line(Theme::BlankElement.new) # Hides the lines for the axes
91
- axis.text_x(size: 3) # X axis labels
92
- end.
93
- panel do |panel|
94
- panel.grid_major(colour: 'grey70', size: 0.2) # Sets the major ticks grid
95
- panel.grid_minor(Theme::BlankElement.new) # Hides the minor ticks grid
96
- end.
97
- build
98
- ).build
99
- plot.show
100
- EOS
101
-
102
- module Rust::Plots::GGPlot
103
- def self.help!(topic = nil)
104
- unless topic
105
- puts "Topics:"
106
- GGPLOT_EXAMPLES.keys.each do |key, matcher|
107
- puts "- #{key}"
108
- end
109
- puts "Call again specifying the topic of interest."
110
- else
111
- GGPLOT_EXAMPLES.each do |key, value|
112
- if topic.match(key[1])
113
- puts "*** #{key[0]} ***"
114
- puts value
115
- return
116
- end
117
- end
118
-
119
- puts "Topic not found"
120
- end
121
- end
122
- end