rust 0.4 → 0.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,155 @@
1
+ require_relative 'datatype'
2
+
3
+ module Rust
4
+
5
+ ##
6
+ # Mirror of the matrix type in R.
7
+
8
+ class Matrix < RustDatatype
9
+ def self.can_pull?(type, klass)
10
+ return klass.is_a?(Array) && klass.include?("matrix")
11
+ end
12
+
13
+ def self.pull_variable(variable, type, klass)
14
+ if Rust._pull("length(#{variable})") == 1
15
+ core = ::Matrix[[Rust._pull("#{variable}[1]")]]
16
+ else
17
+ core = Rust._pull(variable)
18
+ end
19
+ row_names = [Rust["rownames(#{variable})"]].flatten
20
+ column_names = [Rust["colnames(#{variable})"]].flatten
21
+
22
+ row_names = nil if row_names.all? { |v| v == nil }
23
+ column_names = nil if column_names.all? { |v| v == nil }
24
+
25
+ Matrix.new(core, row_names, column_names)
26
+ end
27
+
28
+ def load_in_r_as(variable_name)
29
+ matrix = ::Matrix[*@data]
30
+
31
+ Rust[variable_name] = matrix
32
+ end
33
+
34
+ ##
35
+ # Creates a new matrix with the given +data+ (Ruby Matrix). Optionally, +row_names+ and +column_names+ can
36
+ # be specified.
37
+
38
+ def initialize(data, row_names = nil, column_names = nil)
39
+ @data = data.clone
40
+
41
+ @row_names = row_names
42
+ @column_names = column_names
43
+
44
+ if @data.is_a?(::Matrix)
45
+ @data = @data.row_vectors.map { |v| v.to_a }
46
+ end
47
+
48
+ if self.flatten.size == 0
49
+ raise "Empty matrices are not allowed"
50
+ else
51
+ raise TypeError, "Expected array of array" unless @data.is_a?(Array) || @data[0].is_a?(Array)
52
+ raise TypeError, "Only numeric matrices are supported" unless self.flatten.all? { |e| e.is_a?(Numeric) }
53
+ raise "All the rows must have the same size" unless @data.map { |row| row.size }.uniq.size == 1
54
+ raise ArgumentError, "Expected row names #@row_names to match the number of rows in #{self.inspect}" if @row_names && @row_names.size != self.rows
55
+ raise ArgumentError, "Expected column names #@column_names to match the number of columns in #{self.inspect}" if @column_names && @column_names.size != self.cols
56
+ end
57
+ end
58
+
59
+ ##
60
+ # Returns the matrix element at row +i+ and column +j+.
61
+
62
+ def [](i, j)
63
+ i, j = indices(i, j)
64
+
65
+ return @data[i][j]
66
+ end
67
+
68
+ ##
69
+ # Sets the matrix element at row +i+ and column +j+ with +value+.
70
+
71
+ def []=(i, j, value)
72
+ i, j = indices(i, j)
73
+
74
+ @data[i][j] = value
75
+ end
76
+
77
+ ##
78
+ # Returns the number of rows.
79
+
80
+ def rows
81
+ @data.size
82
+ end
83
+
84
+ ##
85
+ # Returns the number of columns.
86
+
87
+ def cols
88
+ @data[0].size
89
+ end
90
+
91
+ ##
92
+ # Returns a flattened version of the matrix (Array).
93
+
94
+ def flatten
95
+ return @data.flatten
96
+ end
97
+
98
+ def inspect
99
+ row_names = @row_names || (0...self.rows).to_a.map { |v| v.to_s }
100
+ column_names = @column_names || (0...self.cols).to_a.map { |v| v.to_s }
101
+
102
+ separator = " | "
103
+ col_widths = column_names.map do |colname|
104
+ [
105
+ colname,
106
+ (
107
+ [colname ? colname.length : 1] +
108
+ @data.map {|r| r[column_names.index(colname)]}.map { |e| e.inspect.length }
109
+ ).max
110
+ ]
111
+ end.to_h
112
+ col_widths[:rowscol] = row_names.map { |rowname| rowname.length }.max + 3
113
+
114
+ result = ""
115
+ result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
116
+ result << (" " * col_widths[:rowscol]) + column_names.map { |colname| (" " * (col_widths[colname] - colname.length)) + colname }.join(separator) + "\n"
117
+ result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length)) + "\n"
118
+
119
+ @data.each_with_index do |row, i|
120
+ row_name = row_names[i]
121
+ row = column_names.zip(row)
122
+
123
+ index_part = "[" + (" " * (col_widths[:rowscol] - row_name.length - 3)) + "#{row_name}] "
124
+ row_part = row.map { |colname, value| (" " * (col_widths[colname] - value.inspect.length)) + value.inspect }.join(separator)
125
+
126
+ result << index_part + row_part + "\n"
127
+ end
128
+
129
+ result << "-" * (col_widths.values.sum + ((col_widths.size - 1) * separator.length))
130
+
131
+ return result
132
+ end
133
+
134
+ private
135
+ def indices(i, j)
136
+ if i.is_a?(String)
137
+ ri = @row_names.index(i)
138
+ raise ArgumentError, "Can not find row #{i}" unless ri
139
+ i = ri
140
+ end
141
+
142
+ if j.is_a?(String)
143
+ rj = @column_names.index(j)
144
+ raise ArgumentError, "Can not find column #{j}" unless rj
145
+ j = rj
146
+ end
147
+
148
+ raise ArgumentError, "Expected i and j to be both integers or strings" unless i.is_a?(Integer) && j.is_a?(Integer)
149
+ raise "Wrong i" unless i.between?(0, @data.size - 1)
150
+ raise "Wrong j" unless j.between?(0, @data[0].size - 1)
151
+
152
+ return [i, j]
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,78 @@
1
+ require_relative 'datatype'
2
+
3
+ module Rust
4
+
5
+ ##
6
+ # Mirror for the S4 class in R.
7
+
8
+ class S4Class < RustDatatype
9
+ def self.can_pull?(type, klass)
10
+ return type == "S4"
11
+ end
12
+
13
+ def self.pull_variable(variable, type, klass)
14
+ slots = [Rust._pull("names(getSlots(\"#{klass}\"))")].flatten
15
+
16
+ return S4Class.new(variable, klass, slots)
17
+ end
18
+
19
+ def load_in_r_as(variable_name)
20
+ Rust._eval("#{variable_name} <- #{self.r_mirror}")
21
+ end
22
+
23
+ def r_hash
24
+ "immutable"
25
+ end
26
+
27
+ ##
28
+ # Creates a new S4 element, given its +variable_name+, class name (+klass+), and +slots+.
29
+
30
+ def initialize(variable_name, klass, slots)
31
+ @klass = klass
32
+ @slots = slots
33
+
34
+ self.r_mirror_to(variable_name)
35
+ end
36
+
37
+ ##
38
+ # Returns the slot +key+ for the class name (+klass+).
39
+
40
+ def [](key)
41
+ raise ArgumentError, "Unknown slot `#{key}` for class `#@klass`" unless @slots.include?(key)
42
+
43
+ Rust.exclusive do
44
+ return Rust["#{self.r_mirror}@#{key}"]
45
+ end
46
+ end
47
+ alias :| :[]
48
+
49
+ ##
50
+ # Returns the slot +key+ for the class name (+klass+) with +value+.
51
+
52
+ def []=(key, value)
53
+ raise ArgumentError, "Unknown slot `#{key}` for class `#@klass`" unless @slots.include?(key)
54
+
55
+ Rust.exclusive do
56
+ return Rust["#{self.r_mirror}@#{key}"] = value
57
+ end
58
+ end
59
+
60
+ ##
61
+ # Returns the slots.
62
+
63
+ def slots
64
+ @slots
65
+ end
66
+
67
+ ##
68
+ # Returns the class name.
69
+
70
+ def class_name
71
+ @klass
72
+ end
73
+
74
+ def inspect
75
+ return "<S4 instance of #@klass, with slots #@slots>"
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,122 @@
1
+ require_relative 'datatype'
2
+
3
+ module Rust
4
+
5
+ ##
6
+ # Represents a sequence of values in R (through a call to the seq function).
7
+
8
+ class Sequence < RustDatatype
9
+ attr_reader :min
10
+ attr_reader :max
11
+
12
+ def self.can_pull?(type, klass)
13
+ return false
14
+ end
15
+
16
+ ##
17
+ # Creates a new sequence from +min+ to +max+ with a given +step+ (default = 1).
18
+
19
+ def initialize(min, max, step=1)
20
+ @min = min
21
+ @max = max
22
+ @step = step
23
+ end
24
+
25
+ ##
26
+ # Sets the step to +step+.
27
+
28
+ def step=(step)
29
+ @step = step
30
+
31
+ return self
32
+ end
33
+ alias :step :step=
34
+
35
+ def each
36
+ (@min..@max).step(@step) do |v|
37
+ yield v
38
+ end
39
+ end
40
+
41
+ def to_a
42
+ result = []
43
+ self.each do |v|
44
+ result << v
45
+ end
46
+ return result
47
+ end
48
+
49
+ def to_R
50
+ "seq(from=#@min, to=#@max, by=#@step)"
51
+ end
52
+
53
+ def load_in_r_as(variable_name)
54
+ Rust._eval("#{variable_name} <- #{self.to_R}")
55
+ end
56
+ end
57
+
58
+ class MathArray < Array
59
+ def -(other)
60
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
61
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
62
+
63
+ result = self.clone
64
+ other = [other] * self.size if other.is_a?(Numeric)
65
+ for i in 0...self.size
66
+ result[i] -= other[i]
67
+ end
68
+
69
+ return result
70
+ end
71
+
72
+ def *(other)
73
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
74
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
75
+
76
+ result = self.clone
77
+ other = [other] * self.size if other.is_a?(Numeric)
78
+ for i in 0...self.size
79
+ result[i] *= other[i]
80
+ end
81
+
82
+ return result
83
+ end
84
+
85
+ def +(other)
86
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
87
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
88
+
89
+ result = self.clone
90
+ other = [other] * self.size if other.is_a?(Numeric)
91
+ for i in 0...self.size
92
+ result[i] += other[i]
93
+ end
94
+
95
+ return result
96
+ end
97
+
98
+ def /(other) #/# <- this comment is just to recover the syntax highlighting bug in Kate
99
+ raise ArgumentError, "Expected array or numeric" if !other.is_a?(::Array) && !other.is_a?(Numeric)
100
+ raise ArgumentError, "The two arrays must have the same size" if other.is_a?(::Array) && self.size != other.size
101
+
102
+ result = self.clone
103
+ other = [other] * self.size if other.is_a?(Numeric)
104
+ for i in 0...self.size
105
+ result[i] /= other[i]
106
+ end
107
+
108
+ return result
109
+ end
110
+
111
+ def **(other)
112
+ raise ArgumentError, "Expected numeric" if !other.is_a?(Numeric)
113
+
114
+ result = self.clone
115
+ for i in 0...self.size
116
+ result[i] = result[i] ** other
117
+ end
118
+
119
+ return result
120
+ end
121
+ end
122
+ end
data/lib/rust/core.rb ADDED
@@ -0,0 +1,7 @@
1
+ require_relative 'core/rust'
2
+ require_relative 'core/csv'
3
+
4
+ self_path = File.expand_path(__FILE__)
5
+ Dir.glob(File.join(File.dirname(self_path), "core/types/*.rb")).each do |lib|
6
+ require_relative lib
7
+ end
@@ -0,0 +1,4 @@
1
+ self_path = File.expand_path(__FILE__)
2
+ Dir.glob(File.dirname(self_path) + "/*.rb").each do |lib|
3
+ require_relative lib unless lib == self_path
4
+ end
@@ -0,0 +1,77 @@
1
+ require_relative '../core'
2
+
3
+ module Rust
4
+
5
+ ##
6
+ # Mirror for an ANOVA model type in R. To create a new ANOVA model (aov), call the #generate method.
7
+
8
+ class ANOVAModel < RustDatatype
9
+ def self.can_pull?(type, klass)
10
+ return type == "list" && [klass].flatten.include?("aov")
11
+ end
12
+
13
+ def self.pull_variable(variable, type, klass)
14
+ model = RustDatatype.pull_variable(variable, Rust::List)
15
+
16
+ return ANOVAModel.new(model)
17
+ end
18
+
19
+ def load_in_r_as(variable_name)
20
+ @model.load_in_r_as(variable_name)
21
+ end
22
+
23
+ ##
24
+ # Generates a new ANOVA model with a given +formula+, +data+. +options+ can be specified and directly passed
25
+ # to the aov function in R.
26
+
27
+ def self.generate(formula, data, **options)
28
+ mapped = ""
29
+ if options.size > 0
30
+ mapped = options.map { |k, v| "#{k}=#{v}" }.join(", ")
31
+ mapped = ", " + mapped
32
+ end
33
+
34
+ Rust.exclusive do
35
+ Rust["aov.data"] = data
36
+ Rust._eval("aov.model.result <- aov(#{formula.to_R}, data=aov.data#{mapped})")
37
+ result = ANOVAModel.new(Rust["aov.model.result"])
38
+ result.r_mirror_to("aov.model.result")
39
+ return result
40
+ end
41
+ end
42
+
43
+ ##
44
+ # Creates a new +model+.
45
+
46
+ def initialize(model)
47
+ @model = model
48
+ end
49
+
50
+ ##
51
+ # Returns the model.
52
+
53
+ def model
54
+ @model
55
+ end
56
+
57
+ ##
58
+ # Returns a summary of the ANOVA model through the summary function in R.
59
+
60
+ def summary
61
+ unless @summary
62
+ Rust.exclusive do
63
+ Rust._eval("aov.smr <- summary(#{self.r_mirror})")
64
+ @summary = Rust['aov.smr']
65
+ end
66
+ end
67
+
68
+ return @summary
69
+ end
70
+ end
71
+ end
72
+
73
+ module Rust::RBindings
74
+ def aov(formula, data, **options)
75
+ return ANOVAModel.generate(formula, data, **options)
76
+ end
77
+ end
@@ -0,0 +1,258 @@
1
+ require_relative '../core'
2
+ require_relative '../stats/descriptive'
3
+ require_relative '../stats/correlation'
4
+
5
+ module Rust::Models
6
+ end
7
+
8
+ ##
9
+ # Contains classes that allow to run regression models.
10
+
11
+ module Rust::Models::Regression
12
+
13
+ ##
14
+ # Generic regression model in R.
15
+
16
+ class RegressionModel < Rust::RustDatatype
17
+ def self.can_pull?(type, klass)
18
+ # Can only pull specific sub-types
19
+ return false
20
+ end
21
+
22
+ def load_in_r_as(variable_name)
23
+ @model.load_in_r_as(variable_name)
24
+ end
25
+
26
+ ##
27
+ # Generates a new regression model. +object_type+ is the Ruby class of the model object; +model_type+ represents
28
+ # the type of model at hand; +dependent_variable+ and +independent_variables+ are directly used as part of the
29
+ # model formula. +data+ represents the dataset to be used. +options+ can be specified and directly passed to the
30
+ # model.
31
+
32
+ def self.generate(object_type, model_type, dependent_variable, independent_variables, data, **options)
33
+ mapped = ""
34
+ if options.size > 0
35
+ mapped = options.map { |k, v| "#{k}=#{v}" }.join(", ")
36
+ mapped = ", " + mapped
37
+ end
38
+
39
+ formula = Rust::Formula.new(dependent_variable, independent_variables.join(" + "))
40
+
41
+ Rust.exclusive do
42
+ Rust["#{model_type}.data"] = data
43
+
44
+ Rust._eval("#{model_type}.model.result <- #{model_type}(#{formula.to_R}, data=#{model_type}.data#{mapped})")
45
+ result = Rust["#{model_type}.model.result"]
46
+ result.r_mirror_to("#{model_type}.model.result")
47
+
48
+ return result
49
+ end
50
+ end
51
+
52
+ ##
53
+ # Creates a new +model+.
54
+
55
+ def initialize(model)
56
+ raise StandardError if model.is_a?(RegressionModel)
57
+ @model = model
58
+ end
59
+
60
+ def model
61
+ @model
62
+ end
63
+
64
+ ##
65
+ # Returns the residuals of the model.
66
+
67
+ def residuals
68
+ Rust.exclusive do
69
+ @residuals = Rust["residuals(#{self.r_mirror})"] unless @residuals
70
+ end
71
+
72
+ return @residuals
73
+ end
74
+
75
+ ##
76
+ # Returns the fitted values of the model.
77
+
78
+ def fitted
79
+ Rust.exclusive do
80
+ @fitted = Rust["fitted(#{self.r_mirror})"] unless @fitted
81
+ end
82
+
83
+ return @fitted
84
+ end
85
+
86
+ ##
87
+ # Returns the actual values in the dataset.
88
+
89
+ def actuals
90
+ return self.fitted.zip(self.residuals).map { |couple| couple.sum }
91
+ end
92
+
93
+ ##
94
+ # Returns the r-squared of the model.
95
+
96
+ def r_2
97
+ return self.summary|"r.squared"
98
+ end
99
+
100
+ ##
101
+ # Returns the adjusted r-squared of the model.
102
+
103
+ def r_2_adjusted
104
+ return self.summary|"adj.r.squared"
105
+ end
106
+
107
+ ##
108
+ # Returns the mean squared error of the model.
109
+
110
+ def mse
111
+ Rust::Descriptive.variance(self.residuals)
112
+ end
113
+
114
+ ##
115
+ # Returns the coefficients of the model.
116
+
117
+ def coefficients
118
+ a = self.summary|"coefficients"
119
+ end
120
+
121
+ def method_missing(name, *args)
122
+ return model|name.to_s
123
+ end
124
+
125
+ ##
126
+ # Returns a summary for the model using the summary function in R.
127
+
128
+ def summary
129
+ unless @summary
130
+ Rust.exclusive do
131
+ @summary = Rust["summary(#{self.r_mirror})"]
132
+ end
133
+ end
134
+
135
+ return @summary
136
+ end
137
+
138
+ def r_hash
139
+ @model.r_hash
140
+ end
141
+ end
142
+
143
+ ##
144
+ # Represents a linear regression model in R.
145
+
146
+ class LinearRegressionModel < RegressionModel
147
+ def self.can_pull?(type, klass)
148
+ return type == "list" && klass == "lm"
149
+ end
150
+
151
+ def self.pull_variable(variable, type, klass)
152
+ model = Rust::RustDatatype.pull_variable(variable, Rust::List)
153
+
154
+ return LinearRegressionModel.new(model)
155
+ end
156
+
157
+ ##
158
+ # Generates a linear regression model, given its +dependent_variable+ and +independent_variables+ and its +data+.
159
+ # +options+ can be specified and directly passed to the model.
160
+
161
+ def self.generate(dependent_variable, independent_variables, data, **options)
162
+ RegressionModel.generate(
163
+ LinearRegressionModel,
164
+ "lm",
165
+ dependent_variable,
166
+ independent_variables,
167
+ data,
168
+ **options
169
+ )
170
+ end
171
+ end
172
+
173
+ ##
174
+ # Represents a linear mixed effects model in R.
175
+
176
+ class LinearMixedEffectsModel < RegressionModel
177
+ def self.can_pull?(type, klass)
178
+ return type == "S4" && klass == "lmerModLmerTest"
179
+ end
180
+
181
+ def self.pull_priority
182
+ 1
183
+ end
184
+
185
+ def self.pull_variable(variable, type, klass)
186
+ model = Rust::RustDatatype.pull_variable(variable, Rust::S4Class)
187
+
188
+ return LinearMixedEffectsModel.new(model)
189
+ end
190
+
191
+ def summary
192
+ unless @summary
193
+ Rust.exclusive do
194
+ Rust._eval("tmp.summary <- summary(#{self.r_mirror})")
195
+ Rust._eval("mode(tmp.summary$objClass) <- \"list\"")
196
+ Rust._eval("tmp.summary$logLik <- attributes(tmp.summary$logLik)")
197
+ @summary = Rust["tmp.summary"]
198
+ end
199
+ end
200
+
201
+ return @summary
202
+ end
203
+
204
+ ##
205
+ # Generates a linear mixed effects model, given its +dependent_variable+ and +independent_variables+ and its +data+.
206
+ # +options+ can be specified and directly passed to the model.
207
+
208
+ def self.generate(dependent_variable, fixed_effects, random_effects, data, **options)
209
+ Rust.prerequisite("lmerTest")
210
+ Rust.prerequisite("rsq")
211
+
212
+ random_effects = random_effects.map { |effect| "(1|#{effect})" }
213
+
214
+ RegressionModel.generate(
215
+ LinearMixedEffectsModel,
216
+ "lmer",
217
+ dependent_variable,
218
+ fixed_effects + random_effects,
219
+ data,
220
+ **options
221
+ )
222
+ end
223
+
224
+ def r_2
225
+ Rust.exclusive do
226
+ Rust._eval("tmp.rsq <- rsq(#{self.r_mirror}, adj=F)")
227
+ return Rust['tmp.rsq']
228
+ end
229
+ end
230
+
231
+ def r_2_adjusted
232
+ Rust.exclusive do
233
+ Rust._eval("tmp.rsq <- rsq(#{self.r_mirror}, adj=T)")
234
+ return Rust['tmp.rsq']
235
+ end
236
+ end
237
+ end
238
+ end
239
+
240
+ module Rust::RBindings
241
+ def lm(formula, data, **options)
242
+ independent = formula.right_part.split("+").map { |v| v.strip }
243
+ return LinearRegressionModel.generate(formula.left_part, independent, data, **options)
244
+ end
245
+
246
+ def lmer(formula, data, **options)
247
+ independent = formula.right_part.split("+").map { |v| v.strip }
248
+
249
+ RegressionModel.generate(
250
+ LinearMixedEffectsModel,
251
+ "lmer",
252
+ formula.left_part,
253
+ independent,
254
+ data,
255
+ **options
256
+ )
257
+ end
258
+ end