rust 0.12 → 0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8eb6e3759ef38070603a941ef348ac46e4b6c08b4638c493edb2169acf16c793
4
- data.tar.gz: f855ca774695688ee64d7513ac94c8b98d3bae154bfae56b803ed1da567cb282
3
+ metadata.gz: aef18d4ed8bce09d5931fa0e9fde630ff36e1436897a448bbbc1dd2b451a28fc
4
+ data.tar.gz: 9313ac648a27c9f1cb6b369ba43852e0defb4bd4ab0a089b1c58ad4e43322de5
5
5
  SHA512:
6
- metadata.gz: c281de698d8b4750832d77971dac857dbee2c31252b7950abf91777d37763bbb79577098ab48efeee6f8a5ef2a28949ef72816d33703eaad14887d588b4aac32
7
- data.tar.gz: d44c0532c19ff8eb2f505d4da62e82b4b4f32dada4b39a05cddf57679a3a725dd38ceecf63aaf2d551a8ae691841196bad7cc0694d3613b928a84a9b6291ef6a
6
+ metadata.gz: 9908bd416aa81bbd07ad8b5f4960481c31e9ed8da2b106e3e129b4165d835343029b43ee0cd1b74b1fd56451039a5558abda553037c4887ebf6f9b5865dc5202
7
+ data.tar.gz: db41177a2182dc57516459bbfff5ee6d361c275ccd1d78b5a5b2767da433a5cde366303b598d4e9bd00d692c14c682044bbc4976392973415099573185fcee15
data/lib/rust/core/csv.rb CHANGED
@@ -90,9 +90,9 @@ module Rust
90
90
  dataframe.column_names.each do |column_name|
91
91
  values = dataframe.column(column_name)
92
92
 
93
- if values.all? { |s| !!Integer(s) rescue false }
93
+ if values.all? { |s| s == nil || !!Integer(s) rescue false }
94
94
  integer_columns << column_name
95
- elsif values.all? { |s| !!Float(s) rescue false }
95
+ elsif values.all? { |s| s == nil || !!Float(s) rescue false }
96
96
  float_columns << column_name
97
97
  end
98
98
  end
@@ -103,11 +103,11 @@ module Rust
103
103
  end
104
104
 
105
105
  integer_columns.each do |numeric_column|
106
- dataframe.transform_column!(numeric_column) { |v| v.to_i }
106
+ dataframe.transform_column!(numeric_column) { |v| v != nil ? v.to_i : v }
107
107
  end
108
108
 
109
109
  float_columns.each do |numeric_column|
110
- dataframe.transform_column!(numeric_column) { |v| v.to_f }
110
+ dataframe.transform_column!(numeric_column) { |v| v != nil ? v.to_f : v }
111
111
  end
112
112
 
113
113
  return dataframe
@@ -0,0 +1,89 @@
1
+ require_relative 'rust'
2
+
3
+ module Rust
4
+ class Manual
5
+ @@manuals = {}
6
+
7
+ def self.about
8
+ puts "Manuals available:"
9
+ @@manuals.each do |category, manual|
10
+ puts "\t- #{manual.name} (:#{category}) → #{manual.description}"
11
+ end
12
+
13
+ return nil
14
+ end
15
+
16
+ def self.for(category)
17
+ category = category.to_sym
18
+ raise "No manual found for '#{category}'." unless @@manuals[category]
19
+
20
+ return @@manuals[category]
21
+ end
22
+
23
+ def self.register(category, name, description)
24
+ category = category.to_sym
25
+
26
+ @@manuals[category] = Manual.new(name, description)
27
+
28
+ return nil
29
+ end
30
+
31
+ attr_reader :name
32
+ attr_reader :description
33
+
34
+ def initialize(name, description)
35
+ @name = name
36
+ @description = description
37
+ @voices = {}
38
+ end
39
+
40
+ def lookup(query)
41
+ @voices.each do |key, value|
42
+ if query.match(key[1])
43
+ puts "*** #{key[0]} ***"
44
+ puts value
45
+ return
46
+ end
47
+ end
48
+
49
+ puts "Voice not found"
50
+
51
+ return nil
52
+ end
53
+
54
+ def n_voices
55
+ @voices.size
56
+ end
57
+
58
+ def about
59
+ puts "****** Manual for #@name ******"
60
+ puts @description
61
+ puts "Voices in manual #@name:"
62
+ @voices.keys.each do |key, matcher|
63
+ puts "\t- #{key}"
64
+ end
65
+
66
+ return nil
67
+ end
68
+
69
+ def register(voice, matcher, description)
70
+ @voices[[voice, matcher]] = description
71
+ end
72
+
73
+ def inspect
74
+ return "Manual for #@name with #{self.n_voices} voices"
75
+ end
76
+ end
77
+ end
78
+
79
+ module Rust::RBindings
80
+ def rust_help(category = nil, query = nil)
81
+ if !category
82
+ return Rust::Manual.about
83
+ elsif !query
84
+ return Rust::Manual.for(category).about
85
+ else
86
+ return Rust::Manual.for(category).lookup(query)
87
+ end
88
+ end
89
+ end
@@ -102,7 +102,10 @@ module Rust
102
102
  end
103
103
 
104
104
  def self._rexec(r_command, return_warnings = false)
105
- puts "Calling _rexec with command: #{r_command}" if @@debugging
105
+ if @@debugging
106
+ puts "Calling _rexec with command: #{r_command}"
107
+ puts "\t" + Kernel.caller.select { |v| !v.include?("irb") }.last(3).map { |v| v.sub(/^.*gems\//, "")}.join("\n\t")
108
+ end
106
109
  R_MUTEX.synchronize do
107
110
  assert("This command must be executed in an exclusive block") { @@in_client_mutex }
108
111
 
@@ -154,10 +157,17 @@ module Rust
154
157
 
155
158
  ##
156
159
  # Installs the given +name+ library and its dependencies.
160
+ # +github+ indicates whether the package is in GitHub.
157
161
 
158
- def self.install_library(name)
162
+ def self.install_library(name, github = false)
163
+ self.prerequisite("remotes") if github
164
+
159
165
  self.exclusive do
160
- self._eval("install.packages(\"#{name}\", dependencies = TRUE)")
166
+ if github
167
+ self._eval("remotes::install_github(\"#{name}\", dependencies=TRUE)")
168
+ else
169
+ self._eval("install.packages(\"#{name}\", dependencies = TRUE)")
170
+ end
161
171
  end
162
172
 
163
173
  return nil
@@ -165,9 +175,15 @@ module Rust
165
175
 
166
176
  ##
167
177
  # Installs the +library+ library if it is not available and loads it.
178
+ # +github+ indicates whether the package appears in GitHub.
168
179
 
169
- def self.prerequisite(library)
170
- self.install_library(library) unless self.check_library(library)
180
+ def self.prerequisite(library, github = false)
181
+ full_library = library
182
+ library = library.split("/").last if github
183
+
184
+ unless self.check_library(library)
185
+ self.install_library(full_library, github)
186
+ end
171
187
  self.load_library(library)
172
188
  end
173
189
 
@@ -218,4 +234,11 @@ def bind_r!
218
234
  include Rust::RBindings
219
235
  end
220
236
 
237
+ ##
238
+ # Shortcut for requiring rust external libraries
239
+
240
+ def require_rust(name)
241
+ require "rust/external/#{name}"
242
+ end
243
+
221
244
  bind_r! if ENV['RUBY_RUST_BINDING'] == '1'
@@ -1,4 +1,5 @@
1
1
  require_relative 'datatype'
2
+ require 'tempfile'
2
3
 
3
4
  module Rust
4
5
 
@@ -355,8 +356,25 @@ module Rust
355
356
  end
356
357
 
357
358
  def load_in_r_as(variable_name)
358
- command = []
359
+ tempfile = Tempfile.new('rust.dfport')
360
+ tempfile.close
361
+
362
+ Rust::CSV.write(tempfile.path, self)
363
+ Rust._eval("#{variable_name} <- read.csv(\"#{tempfile.path}\", header=T)")
364
+
365
+ if Rust.debug?
366
+ FileUtils.cp(tempfile.path, tempfile.path + ".debug.csv")
367
+ puts "Debug CSV port file available at: #{tempfile.path + ".debug.csv"}"
368
+ end
369
+
370
+ tempfile.unlink
359
371
 
372
+ return true
373
+ end
374
+
375
+ def directly_load_in_r_as(variable_name)
376
+ command = []
377
+
360
378
  command << "#{variable_name} <- data.frame()"
361
379
  row_index = 1
362
380
  self.each do |row|
@@ -374,6 +392,10 @@ module Rust
374
392
  end
375
393
 
376
394
  Rust._eval_big(command)
395
+
396
+ tempfile.unlink
397
+
398
+ return true
377
399
  end
378
400
 
379
401
  def inspect
@@ -408,16 +430,39 @@ module Rust
408
430
  return result
409
431
  end
410
432
 
433
+ ##
434
+ # Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String). Keeps all the rows in this data frame.
435
+ # +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
436
+ # for this and the +other+ data-frame, respectively.
437
+
438
+ def left_merge(other, by, first_alias, second_alias, **options)
439
+ options[:keep_right] = true
440
+ options[:keep_left] = false
441
+ return other.merge(self, by, first_alias, second_alias, **options)
442
+ end
443
+
444
+ ##
445
+ # Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String). Keeps all the rows in the other data frame.
446
+ # +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
447
+ # for this and the +other+ data-frame, respectively.
448
+
449
+ def right_merge(other, by, first_alias, second_alias, **options)
450
+ options[:keep_right] = true
451
+ options[:keep_left] = false
452
+ return self.merge(other, by, first_alias, second_alias, **options)
453
+ end
454
+
411
455
  ##
412
456
  # Merges this data-frame with +other+ in terms of the +by+ column(s) (Array or String).
413
457
  # +first_alias+ and +second_alias+ allow to specify the prefix that should be used for the columns not in +by+
414
458
  # for this and the +other+ data-frame, respectively.
415
459
 
416
- def merge(other, by, first_alias = "x", second_alias = "y")
460
+ def merge(other, by, first_alias = "x", second_alias = "y", **options)
417
461
  raise TypeError, "Expected Rust::DataFrame" unless other.is_a?(DataFrame)
418
462
  raise TypeError, "Expected list of strings" if !by.is_a?(Array) || !by.all? { |e| e.is_a?(String) }
419
463
  raise "This dataset should have all the columns in #{by}" unless (by & self.column_names).size == by.size
420
464
  raise "The passed dataset should have all the columns in #{by}" unless (by & other.column_names).size == by.size
465
+ raise "Either keep_right or keep_left should be provided as options, not both" if options[:keep_right] && options[:keep_left]
421
466
 
422
467
  if first_alias == second_alias
423
468
  if first_alias == ""
@@ -473,6 +518,28 @@ module Rust
473
518
  end
474
519
 
475
520
  result << to_add
521
+
522
+ elsif options[:keep_right]
523
+ to_add = {}
524
+
525
+ by.each do |colname|
526
+ to_add[colname] = other_row[colname]
527
+ end
528
+
529
+ merged_column_self.each do |colname|
530
+ to_add["#{first_alias}#{colname}"] = nil
531
+ end
532
+
533
+ merged_column_other.each do |colname|
534
+ to_add["#{second_alias}#{colname}"] = other_row[colname]
535
+ end
536
+
537
+ result << to_add
538
+
539
+ elsif options[:keep_left]
540
+ options[:keep_left] = false
541
+ options[:keep_right] = true
542
+ return other.merge(self, by, first_alias, second_alias, **options)
476
543
  end
477
544
  end
478
545
 
@@ -540,6 +607,8 @@ module Rust
540
607
  # Sorts the rows of this data-frame by the values of the +by+ column.
541
608
 
542
609
  def sort_by!(by)
610
+ raise TypeError, "String expected" unless by.is_a?(String)
611
+ raise "'#{by}' is not a valid column name (#{self.colnames.to_s})" unless self.colnames.include?(by)
543
612
  copy = @data[by].clone
544
613
  copy.sort!
545
614
 
@@ -36,7 +36,7 @@ module Rust
36
36
  if candidates.size > 0
37
37
  type = candidates.max_by { |c| c.pull_priority }
38
38
 
39
- puts "Using #{type} to pull #{variable}" if Rust.debug?
39
+ puts "Using #{type} to pull #{variable} (candidates: #{candidates.map { |c| c.to_s + "=>" + c.pull_priority.to_s}.join(", ")})" if Rust.debug?
40
40
  return type.pull_variable(variable, r_type, r_class)
41
41
  else
42
42
  if Rust._pull("length(#{variable})") == 0
@@ -80,7 +80,7 @@ module Rust
80
80
  def r_mirror
81
81
  varname = self.mirrored_R_variable_name
82
82
 
83
- if !Rust._pull("exists(\"#{varname}\")") || Rust._pull("#{varname}.hash") != self.r_hash
83
+ if !Rust._pull("exists(\"#{varname}\")") || Rust["#{varname}.hash"] != self.r_hash
84
84
  puts "Loading #{varname}" if Rust.debug?
85
85
  Rust[varname] = self
86
86
  Rust["#{varname}.hash"] = self.r_hash
@@ -109,13 +109,23 @@ module Rust
109
109
 
110
110
  class Null < RustDatatype
111
111
  def self.can_pull?(type, klass)
112
- return type == "NULL" && klass == "NULL"
112
+ return (type == "NULL" && klass == "NULL")
113
113
  end
114
114
 
115
115
  def self.pull_variable(variable, type, klass)
116
116
  return nil
117
117
  end
118
118
  end
119
+
120
+ class Omit < RustDatatype
121
+ def self.can_pull?(type, klass)
122
+ return (klass == "omit")
123
+ end
124
+
125
+ def self.pull_variable(variable, type, klass)
126
+ return Rust["as.#{type}(#{variable})"]
127
+ end
128
+ end
119
129
  end
120
130
 
121
131
  class TrueClass
@@ -125,6 +125,10 @@ module Rust
125
125
  @level
126
126
  end
127
127
 
128
+ def to_str
129
+ @level.to_s
130
+ end
131
+
128
132
  def to_R
129
133
  self.to_i
130
134
  end
@@ -171,6 +171,22 @@ module Rust
171
171
  end
172
172
  end
173
173
 
174
+ ##
175
+ # Represents a verbatim R expression
176
+
177
+ class Verbatim
178
+ ##
179
+ #Creates a verbatim R expression
180
+
181
+ def initialize(expression)
182
+ @expression = expression
183
+ end
184
+
185
+ def to_R
186
+ @expression
187
+ end
188
+ end
189
+
174
190
  ##
175
191
  # Represents the arguments of a function in R. Works as an Array of objects.
176
192
 
@@ -196,4 +212,20 @@ module Rust
196
212
  return options
197
213
  end
198
214
  end
215
+
216
+ def self.verbatim(expression)
217
+ Verbatim.new(expression)
218
+ end
219
+
220
+ def self.variable(variable)
221
+ Variable.new(variable)
222
+ end
223
+
224
+ def self.function(name)
225
+ Function.new(name)
226
+ end
227
+
228
+ def self.formula(left_part, right_part)
229
+ Formula.new(left_part, right_part)
230
+ end
199
231
  end
@@ -29,6 +29,8 @@ module Rust
29
29
  @data.each do |key, value|
30
30
  Rust["#{variable_name}[[#{key + 1}]]"] = value
31
31
  end
32
+
33
+ Rust._eval("names(#{variable_name}) <- #{self.names.to_R}")
32
34
  end
33
35
 
34
36
  ##
@@ -81,6 +81,14 @@ module Rust
81
81
  @data.size
82
82
  end
83
83
 
84
+ def rownames
85
+ @row_names
86
+ end
87
+
88
+ def colnames
89
+ @column_names
90
+ end
91
+
84
92
  ##
85
93
  # Returns the number of columns.
86
94
 
data/lib/rust/core.rb CHANGED
@@ -1,7 +1,57 @@
1
1
  require_relative 'core/rust'
2
2
  require_relative 'core/csv'
3
+ require_relative 'core/manual'
3
4
 
4
5
  self_path = File.expand_path(__FILE__)
5
6
  Dir.glob(File.join(File.dirname(self_path), "core/types/*.rb")).each do |lib|
6
7
  require_relative lib
7
8
  end
9
+
10
+ Rust::Manual.register(:base, "Quick intro", "Core philosophy behind Rust.")
11
+ Rust::Manual.for(:base).register('Introduction', /intro/,
12
+ <<-EOS
13
+ Rust is a statistical library. Rust wraps R and its libraries to achieve this goal.
14
+ Rust aims at:
15
+ - Making easier for Ruby developers make all the kinds of operations that are straightforward in R;
16
+ - Providing an object-oriented interface, more familiar than the one in R.
17
+
18
+ Rust can be used in two ways:
19
+ - By using the object-oriented interface (advised if you are writing a script);
20
+ - By using the R bindings, that allow to use Ruby pretty much like R (handful if you are using it from IRB).
21
+
22
+ Rust provides wrappers for many elements, including types (e.g., data frames), statistical hypothesis tests, plots, and so on.
23
+ Under the hood, Rust creates an R environment (through rinruby), through which Rust can perform the most advanced operations,
24
+ for which a re-implementation would be impractical.
25
+ EOS
26
+ )
27
+
28
+ Rust::Manual.for(:base).register('Types', /type/,
29
+ <<-EOS
30
+ Rust provides wrappers for the most commonly-found types in R. Specifically, the following types are available:
31
+ - Data frames → Rust::DataFrame
32
+ - Factors → Rust::Factor
33
+ - Matrices → Rust::Matrix
34
+ - Lists → Rust::List
35
+ - S4 classes → Rust::S4Class
36
+ - Formulas → Rust::Formula
37
+
38
+ Note that some of them (e.g., data frames and matrices) are not just wrappers, but complete re-implementations of the R
39
+ types (for performance reasons).
40
+ EOS
41
+ )
42
+
43
+ Rust::Manual.for(:base).register('CSVs', /csv/,
44
+ <<-EOS
45
+ Rust allows to read and write CSV files, mostly like in R.
46
+ To read a CSV file, you can use:
47
+ Rust::CSV.read(filename)
48
+
49
+ It returns a data frame. You can also specify the option "headers" to tell if the first row in the CSV contains the headers
50
+ (column names for the data frame). Other options get directly passed to the R function "read.csv".
51
+
52
+ To write a CSV file, you can use:
53
+ Rust::CSV.write(filename, data_frame)
54
+
55
+ It writes the given data frame on the file at filename.
56
+ EOS
57
+ )
@@ -64,14 +64,6 @@ module Rust::Plots::GGPlot
64
64
  return self
65
65
  end
66
66
 
67
- def labeled(value)
68
- raise "No context for assigning a label" unless @current_context
69
- @label_options[@current_context] = value
70
- @current_context = nil
71
-
72
- return self
73
- end
74
-
75
67
  def with_x_label(value)
76
68
  @label_options[:x] = value
77
69
 
@@ -90,6 +82,76 @@ module Rust::Plots::GGPlot
90
82
  return self
91
83
  end
92
84
 
85
+ def scale_x_continuous(**options)
86
+ raise "No context for assigning a label" unless @current_context
87
+ @layers << AxisScaler.new(:x, :continuous, **options)
88
+
89
+ return self
90
+ end
91
+
92
+ def scale_y_continuous(**options)
93
+ raise "No context for assigning a label" unless @current_context
94
+ @layers << AxisScaler.new(:y, :continuous, **options)
95
+
96
+ return self
97
+ end
98
+
99
+ def scale_x_discrete(**options)
100
+ raise "No context for assigning a label" unless @current_context
101
+ @layers << AxisScaler.new(:x, :discrete, **options)
102
+
103
+ return self
104
+ end
105
+
106
+ def scale_y_discrete(**options)
107
+ raise "No context for assigning a label" unless @current_context
108
+ @layers << AxisScaler.new(:y, :discrete, **options)
109
+
110
+ return self
111
+ end
112
+
113
+ def scale_x_log10(**options)
114
+ raise "No context for assigning a label" unless @current_context
115
+ @layers << AxisScaler.new(:x, :log10, **options)
116
+
117
+ return self
118
+ end
119
+
120
+ def scale_y_log10(**options)
121
+ raise "No context for assigning a label" unless @current_context
122
+ @layers << AxisScaler.new(:y, :log10, **options)
123
+
124
+ return self
125
+ end
126
+
127
+ def scale_x_reverse(**options)
128
+ raise "No context for assigning a label" unless @current_context
129
+ @layers << AxisScaler.new(:x, :reverse, **options)
130
+
131
+ return self
132
+ end
133
+
134
+ def scale_y_reverse(**options)
135
+ raise "No context for assigning a label" unless @current_context
136
+ @layers << AxisScaler.new(:y, :reverse, **options)
137
+
138
+ return self
139
+ end
140
+
141
+ def scale_x_sqrt(**options)
142
+ raise "No context for assigning a label" unless @current_context
143
+ @layers << AxisScaler.new(:x, :sqrt, **options)
144
+
145
+ return self
146
+ end
147
+
148
+ def scale_y_sqrt(**options)
149
+ raise "No context for assigning a label" unless @current_context
150
+ @layers << AxisScaler.new(:y, :sqrt, **options)
151
+
152
+ return self
153
+ end
154
+
93
155
  def with_title(value)
94
156
  @label_options[:title] = value
95
157
 
@@ -160,6 +222,48 @@ module Rust::Plots::GGPlot
160
222
  return self
161
223
  end
162
224
 
225
+ def labeled(value)
226
+ raise "No context for assigning a label" unless @current_context
227
+ @label_options[@current_context] = value
228
+
229
+ return self
230
+ end
231
+
232
+ def scale_continuous(**options)
233
+ raise "No context for assigning a label" unless @current_context
234
+ @layers << AxisScaler.new(@current_context, :continuous, **options)
235
+
236
+ return self
237
+ end
238
+
239
+ def scale_discrete(**options)
240
+ raise "No context for assigning a label" unless @current_context
241
+ @layers << AxisScaler.new(@current_context, :discrete, **options)
242
+
243
+ return self
244
+ end
245
+
246
+ def scale_log10(**options)
247
+ raise "No context for assigning a label" unless @current_context
248
+ @layers << AxisScaler.new(@current_context, :log10, **options)
249
+
250
+ return self
251
+ end
252
+
253
+ def scale_reverse(**options)
254
+ raise "No context for assigning a label" unless @current_context
255
+ @layers << AxisScaler.new(@current_context, :reverse, **options)
256
+
257
+ return self
258
+ end
259
+
260
+ def scale_sqrt(**options)
261
+ raise "No context for assigning a label" unless @current_context
262
+ @layers << AxisScaler.new(@current_context, :sqrt, **options)
263
+
264
+ return self
265
+ end
266
+
163
267
  def flip_coordinates
164
268
  @layers << FlipCoordinates.new
165
269
 
@@ -0,0 +1,12 @@
1
+ require_relative 'core'
2
+
3
+ module Rust::Plots::GGPlot
4
+ class AxisScaler < Layer
5
+ def initialize(axis, type = :continuous, **options)
6
+ @axis = axis
7
+ @type = type
8
+
9
+ super("scale_#{@axis}_#{@type}", **options)
10
+ end
11
+ end
12
+ end