galaaz 0.4.2 → 0.4.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +25 -0
- data/Rakefile +8 -0
- data/bin/gknit +9 -5
- data/bin/gstudio +4 -2
- data/bin/gstudio.rb +32 -2
- data/blogs/dev/dev.html +219 -34
- data/blogs/dev/dev.md +26 -26
- data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
- data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
- data/blogs/dplyr/dplyr.rb +63 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +38 -26
- data/blogs/galaaz_ggplot/galaaz_ggplot.aux +16 -17
- data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.tex +65 -31
- data/blogs/oh_my/not_so.rb +2342 -0
- data/blogs/oh_my/oh_my.Rmd +493 -0
- data/blogs/oh_my/oh_my.html +680 -0
- data/blogs/oh_my/oh_my.md +597 -0
- data/blogs/oh_my/old.Rmd +2100 -0
- data/blogs/ruby_plot/figures/facets_with_decorations.png +0 -0
- data/blogs/ruby_plot/figures/facets_with_jitter.png +0 -0
- data/blogs/ruby_plot/figures/final_box_plot.png +0 -0
- data/blogs/ruby_plot/figures/final_violin_plot.png +0 -0
- data/blogs/ruby_plot/figures/violin_with_jitter.png +0 -0
- data/blogs/ruby_plot/ruby_plot.Rmd +147 -122
- data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +662 -0
- data/blogs/ruby_plot/ruby_plot.html +49 -54
- data/blogs/ruby_plot/ruby_plot.md +147 -122
- data/blogs/ruby_plot/ruby_plot.pdf +0 -0
- data/blogs/ruby_plot/ruby_plot.tex +776 -157
- data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +57 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +106 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +110 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +174 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +236 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +296 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +236 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +218 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +128 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +150 -0
- data/examples/islr/ch2.spec.rb +21 -18
- data/examples/islr/ch3_boston.rb +14 -5
- data/examples/islr/ch3_multiple_regression.rb +2 -3
- data/examples/islr/ch6.spec.rb +1 -1
- data/examples/islr/x_y_rnorm.jpg +0 -0
- data/lib/R_interface/r.rb +14 -10
- data/lib/R_interface/r_libs.R +9 -0
- data/lib/R_interface/r_methods.rb +77 -6
- data/lib/R_interface/{expression.rb → r_module_s.rb} +13 -14
- data/lib/R_interface/rbinary_operators.rb +58 -71
- data/lib/R_interface/rdata_frame.rb +2 -1
- data/lib/R_interface/rdevices.R +4 -0
- data/lib/R_interface/rdevices.rb +1 -1
- data/lib/R_interface/renvironment.rb +34 -1
- data/lib/R_interface/rexpression.rb +108 -2
- data/lib/R_interface/rindexed_object.rb +3 -1
- data/lib/R_interface/rlanguage.rb +18 -2
- data/lib/R_interface/rmatrix.rb +14 -0
- data/lib/R_interface/rmd_indexed_object.rb +5 -1
- data/lib/R_interface/robject.rb +61 -23
- data/lib/R_interface/rsupport.rb +111 -53
- data/lib/R_interface/rsymbol.rb +6 -5
- data/lib/R_interface/ruby_extensions.rb +130 -4
- data/lib/R_interface/runary_operators.rb +35 -3
- data/lib/R_interface/rvector.rb +1 -0
- data/lib/galaaz.rb +0 -2
- data/lib/gknit/knitr_engine.rb +58 -4
- data/lib/gknit/ruby_engine.rb +5 -6
- data/lib/util/exec_ruby.rb +55 -9
- data/specs/all.rb +13 -3
- data/specs/figures/dose_len.png +0 -0
- data/specs/r_dataframe.spec.rb +49 -26
- data/specs/r_environment.spec.rb +140 -0
- data/specs/r_eval.spec.rb +0 -15
- data/specs/r_formula.spec.rb +232 -0
- data/specs/r_function.spec.rb +7 -8
- data/specs/r_list.spec.rb +4 -0
- data/specs/r_list_apply.spec.rb +11 -11
- data/specs/r_matrix.spec.rb +3 -3
- data/specs/{r_plots.spec.rb~ → r_nse.spec.rb} +29 -6
- data/specs/r_vector_creation.spec.rb +6 -0
- data/specs/r_vector_object.spec.rb +2 -2
- data/specs/r_vector_operators.spec.rb +3 -3
- data/specs/r_vector_subsetting.spec.rb +4 -4
- data/specs/ruby_expression.spec.rb +324 -0
- data/specs/tmp.rb +12 -524
- data/sty/galaaz.sty +71 -0
- data/version.rb +1 -1
- metadata +31 -41
- data/bin/gknit2~ +0 -6
- data/bin/ogk~ +0 -4
- data/bin/prepareR.rb~ +0 -1
- data/blogs/dev/dev.Rmd~ +0 -104
- data/blogs/galaaz_ggplot/galaaz_ggplot.dvi +0 -0
- data/blogs/galaaz_ggplot/midwest_external_png~ +0 -1
- data/blogs/gknit/gknit.Rmd~ +0 -184
- data/blogs/gknit/gknit.Rnd~ +0 -17
- data/blogs/gknit/model.rb~ +0 -46
- data/blogs/ruby_plot/ruby_plot.Rmd~ +0 -215
- data/examples/islr/Figure.jpg +0 -0
- data/examples/misc/moneyball.rb~ +0 -16
- data/examples/misc/subsetting.rb~ +0 -372
- data/lib/R/eng_ruby.R~ +0 -63
- data/lib/R_interface/capture_plot.rb~ +0 -23
- data/lib/R_interface/r.rb~ +0 -121
- data/lib/R_interface/rdevices.rb~ +0 -27
- data/lib/gknit.rb~ +0 -26
- data/lib/gknit/knitr_engine.rb~ +0 -102
- data/lib/gknit/ruby_engine.rb~ +0 -72
- data/lib/util/inline_file.rb~ +0 -23
- data/r_requires/knitr.rb~ +0 -4
- data/specs/r_language.spec.rb +0 -157
data/blogs/oh_my/old.Rmd
ADDED
@@ -0,0 +1,2100 @@
|
|
1
|
+
---
|
2
|
+
title: "Extending R with classes, modules, procs, lambdas, oh my!"
|
3
|
+
author:
|
4
|
+
- "Rodrigo Botafogo"
|
5
|
+
- "Daniel Mossé - University of Pittsburgh"
|
6
|
+
tags: [Tech, Data Science, Ruby, R, GraalVM]
|
7
|
+
date: "November 19th, 2018"
|
8
|
+
output:
|
9
|
+
html_document:
|
10
|
+
self_contained: true
|
11
|
+
keep_md: true
|
12
|
+
pdf_document:
|
13
|
+
includes:
|
14
|
+
in_header: "../../sty/galaaz.sty"
|
15
|
+
keep_tex: yes
|
16
|
+
number_sections: yes
|
17
|
+
toc: true
|
18
|
+
toc_depth: 2
|
19
|
+
fontsize: 11pt
|
20
|
+
---
|
21
|
+
|
22
|
+
```{r setup, echo=FALSE}
|
23
|
+
|
24
|
+
```
|
25
|
+
|
26
|
+
# Introduction
|
27
|
+
|
28
|
+
This paper introduces and compares Galaaz with R's S4. It is a shameless rip off of
|
29
|
+
["A '(not so)' Short Introduction to S4"](https://cran.r-project.org/doc/contrib/Genolini-S4tutorialV0-5en.pdf) by Christophe Genolini and follows the same structure and examples presented there.
|
30
|
+
|
31
|
+
Galaaz is a Ruby Gem (library) that allows very tight integration between Ruby and R.
|
32
|
+
It's integration is much tigher and transparent from what one can get beetween RinRuby
|
33
|
+
or similar solutions in Python
|
34
|
+
such as [PypeR](https://pypi.python.org/pypi/PypeR/1.1.0), [rpy2](http://rpy2.bitbucket.org/)
|
35
|
+
and other similar solutions. Galaaz targets the Java Virtual Machine and it
|
36
|
+
integrates with Renjin (http://www.renjin.org/), an R interpreter for Java.
|
37
|
+
|
38
|
+
From the Renjin page we can get the following description of Renjin and its objectives:
|
39
|
+
|
40
|
+
The goal of Renjin
|
41
|
+
is to eventually be compatible with GNU R such that most existing R language programs will
|
42
|
+
run in Renjin without the need to make any changes to the code. Needless to say, Renjin is
|
43
|
+
currently not 100% compatible with GNU R so your mileage may vary.
|
44
|
+
|
45
|
+
The biggest advantage of Renjin is that the R interpreter itself is a Java module which can be
|
46
|
+
seamlessly integrated into any Java application. This dispenses with the need to load dynamic
|
47
|
+
libraries or to provide some form of communication between separate processes. These types of
|
48
|
+
interfaces are often the source of much agony because they place very specific demands on the
|
49
|
+
environment in which they run.
|
50
|
+
|
51
|
+
We frequently see on the web people asking: "which is better for data analysis: R or Python?" In
|
52
|
+
This article we also have the objective to try to answer this question. As you will see, our
|
53
|
+
point is: "when in doubt about R or Python, use Galaaz!"
|
54
|
+
|
55
|
+
# Bases of Object Programming
|
56
|
+
|
57
|
+
In this paper, we will start our discussion from Part II of "The (not so) Short Introduction
|
58
|
+
to S4", which from now on we will reference as SS4 for "short S4". Interested readers are directed
|
59
|
+
to this paper to understand the motivation and examples in that paper. In this paper we will
|
60
|
+
present the S4 code from SS4 and then the same code in Ruby/Galaaz. We will not comment on the
|
61
|
+
S4 code, as all the comments can be found in SS4, we will only focus on the Ruby/Galaaz
|
62
|
+
description.
|
63
|
+
|
64
|
+
S4 defines classes by using the setClass function:
|
65
|
+
|
66
|
+
# Classes Declaration
|
67
|
+
|
68
|
+
```
|
69
|
+
# > setClass(
|
70
|
+
# + Class="Trajectories",
|
71
|
+
# + representation=representation(
|
72
|
+
# + times = "numeric",
|
73
|
+
# + traj = "matrix"
|
74
|
+
# + )
|
75
|
+
# + )
|
76
|
+
```
|
77
|
+
|
78
|
+
# Instance Variables
|
79
|
+
|
80
|
+
In Ruby a class is defined by the keyword 'class'. Every class should start with a capital
|
81
|
+
letter. S4 'slots' are called 'instance variables' in Ruby. Differently from R's S4,
|
82
|
+
instance variables in Ruby do not have type information. It should be clear though, that S4
|
83
|
+
type information is also not a "compile" time type, since R is not compiled. The type is
|
84
|
+
checked at runtime. The same checking can be done in Ruby and we will do it later in this
|
85
|
+
document.
|
86
|
+
|
87
|
+
In the example bellow, we create
|
88
|
+
class Trajectories with two instance variables, 'times' and 'matrix'. We will not go over
|
89
|
+
the details of instance variables in Ruby, but here we created those variables with the
|
90
|
+
keyword 'attr_reader' and a column before the variables name:
|
91
|
+
|
92
|
+
|
93
|
+
```
|
94
|
+
class Trajectories
|
95
|
+
|
96
|
+
attr_reader :times
|
97
|
+
attr_reader :matrix
|
98
|
+
|
99
|
+
end
|
100
|
+
```
|
101
|
+
|
102
|
+
|
103
|
+
In order to create a new instance of object Trajectories we call method new on the class and
|
104
|
+
we can store the result in a varible (not an instance variable) as bellow:
|
105
|
+
|
106
|
+
```
|
107
|
+
traj = Trajectories.new
|
108
|
+
```
|
109
|
+
|
110
|
+
We now have in variable 'traj' a Trajectories object. In Ruby, printing variable 'traj' will
|
111
|
+
only print the class name of the object and not it contents as in R.
|
112
|
+
|
113
|
+
```
|
114
|
+
puts traj
|
115
|
+
```
|
116
|
+
|
117
|
+
To see the contents of an object, one needs to access its components using the '.' operator:
|
118
|
+
|
119
|
+
```
|
120
|
+
puts traj.times
|
121
|
+
```
|
122
|
+
|
123
|
+
# Constructor
|
124
|
+
|
125
|
+
Since there is no content stored in 'times' nor 'matrix', nil is returned. In order to add
|
126
|
+
a value in the variables, we need to add a constructor the class Trajectories. In R, a
|
127
|
+
constructor is build by default, in Ruby, this has to be created by adding a method called
|
128
|
+
'initialize'. In the example bellow, we will create the initializer that accepts two values,
|
129
|
+
a 'times' value and a 'matrix' value and they are used to initialize the value of the
|
130
|
+
instance variables:
|
131
|
+
EOT
|
132
|
+
|
133
|
+
```
|
134
|
+
class Trajectories
|
135
|
+
|
136
|
+
attr_reader :times
|
137
|
+
attr_reader :matrix
|
138
|
+
|
139
|
+
def initialize(times: nil, matrix: nil)
|
140
|
+
@times = times
|
141
|
+
@matrix = matrix
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
```
|
146
|
+
|
147
|
+
Up to this point, everything described in pure Ruby code and has absolutely no relationship is R.
|
148
|
+
We now want to create a Trajectories with a 'times' vector. Ruby has a vector class and we could
|
149
|
+
use this class to create a vector and add it to the 'times' instance variable; however, in order
|
150
|
+
to make use of R's functions, we want to create a R vector to add to 'times'. In Galaaz,
|
151
|
+
creating R objects is done using the corresponding R functions by just preceding them with 'R.',
|
152
|
+
i.e., R functions are all defined in Galaaz in the R namespace.
|
153
|
+
|
154
|
+
Since Galaaz is Ruby and not R, some syntax adjustments are sometimes necessary. For instance,
|
155
|
+
in R, a range is represented as '(1:4)', in Ruby, the same range is represented as '(1..4)'.
|
156
|
+
When passing arguments to an R function in R one uses the '=' sign after the slot name; in R,
|
157
|
+
one uses the ':' operator after parameter's name as we can see bellow:
|
158
|
+
EOT
|
159
|
+
|
160
|
+
```
|
161
|
+
# Create a Trajectories with the times vector [1, 2, 3, 4] and not matrix
|
162
|
+
traj = Trajectories.new(times: R.c(1, 2, 3, 4))
|
163
|
+
|
164
|
+
# Create a Trajectories with times and matrix
|
165
|
+
traj2 = Trajectories.new(times: R.c(1, 3), matrix: R.matrix((1..4), ncol: 2))
|
166
|
+
```
|
167
|
+
|
168
|
+
# Access to Instance Variables (to reach a slot)
|
169
|
+
|
170
|
+
In order to access data in an instance variable the operator '.' is used. In R, a similar
|
171
|
+
result is obtained by use of the '@' operator, but SS4 does not recommend its use. In Galaaz,
|
172
|
+
the '.' operator is the recommended way of accessing an instance variable.
|
173
|
+
|
174
|
+
Now that we have created two trajectories, let's try to print its instance variables to see
|
175
|
+
that everything is fine:
|
176
|
+
|
177
|
+
|
178
|
+
```
|
179
|
+
puts traj.times
|
180
|
+
```
|
181
|
+
|
182
|
+
Well this wasn't really what we had expected... as explained before, printing a variable, will
|
183
|
+
actually only show the class name and vector 'times' in Galaaz is actually a Renjin::Vector.
|
184
|
+
In order to print the content of a Galaaz object we use method 'pp' as follows:
|
185
|
+
|
186
|
+
```{ruby}
|
187
|
+
puts @traj.times
|
188
|
+
```
|
189
|
+
|
190
|
+
We now have the expected value. Note that the 'times' vector is printed exactly as it would
|
191
|
+
if we were using GNU R. Let's now take a look at variable 'traj2':
|
192
|
+
|
193
|
+
```{ruby}
|
194
|
+
puts @traj2.times
|
195
|
+
```
|
196
|
+
|
197
|
+
```{ruby}
|
198
|
+
puts @traj2.matrix
|
199
|
+
```
|
200
|
+
|
201
|
+
Let's now build the same examples as in SS4: Three hospitals take part in a
|
202
|
+
study. The Pitié Salpêtriere (which has not yet returned its data file, shame on them!),
|
203
|
+
Cochin and Saint-Anne. We first show the code in R and the corresponding Galaaz:
|
204
|
+
|
205
|
+
```
|
206
|
+
> trajPitie <- new(Class="Trajectories")
|
207
|
+
> trajCochin <- new(
|
208
|
+
+ Class= "Trajectories",
|
209
|
+
+ times=c(1,3,4,5),
|
210
|
+
+ traj=rbind (
|
211
|
+
+ c(15,15.1, 15.2, 15.2),
|
212
|
+
+ c(16,15.9, 16,16.4),
|
213
|
+
+ c(15.2, NA, 15.3, 15.3),
|
214
|
+
+ c(15.7, 15.6, 15.8, 16)
|
215
|
+
+ )
|
216
|
+
+ )
|
217
|
+
> trajStAnne <- new(
|
218
|
+
+ Class= "Trajectories",
|
219
|
+
+ times=c(1: 10, (6: 16) *2),
|
220
|
+
+ traj=rbind(
|
221
|
+
+ matrix (seq (16,19, length=21), ncol=21, nrow=50, byrow=TRUE),
|
222
|
+
+ matrix (seq (15.8, 18, length=21), ncol=21, nrow=30, byrow=TRUE)
|
223
|
+
+ )+rnorm (21*80,0,0.2)
|
224
|
+
+ )
|
225
|
+
```
|
226
|
+
|
227
|
+
This same code in Galaaz becomes:
|
228
|
+
|
229
|
+
```{ruby}
|
230
|
+
@trajPitie = Trajectories.new
|
231
|
+
```
|
232
|
+
|
233
|
+
```{ruby}
|
234
|
+
@trajCochin = Trajectories.new(times: R.c(1,3,4,5),
|
235
|
+
matrix: R.rbind(
|
236
|
+
R.c(15,15.1, 15.2, 15.2),
|
237
|
+
R.c(16,15.9, 16,16.4),
|
238
|
+
R.c(15.2, NA, 15.3, 15.3),
|
239
|
+
R.c(15.7, 15.6, 15.8, 16)))
|
240
|
+
```
|
241
|
+
|
242
|
+
```{ruby}
|
243
|
+
@trajStAnne =
|
244
|
+
Trajectories.new(times: R.c((1..10), R.c(6..16) * 2),
|
245
|
+
matrix: (R.rbind(
|
246
|
+
R.matrix(R.seq(16, 19, length: 21), ncol: 21,
|
247
|
+
nrow: 50, byrow: true),
|
248
|
+
R.matrix(R.seq(15.8, 18, length: 21), ncol: 21,
|
249
|
+
nrow: 30, byrow: true)) + R.rnorm(21*80, 0, 0.2)))
|
250
|
+
|
251
|
+
```
|
252
|
+
|
253
|
+
Let's check that the 'times' and 'matrix' instance variables were correctly set:
|
254
|
+
|
255
|
+
```{ruby}
|
256
|
+
puts @trajCochin.times
|
257
|
+
```
|
258
|
+
|
259
|
+
```{ruby}
|
260
|
+
puts @trajCochin.matrix
|
261
|
+
```
|
262
|
+
|
263
|
+
```{ruby}
|
264
|
+
puts @trajStAnne.times
|
265
|
+
```
|
266
|
+
|
267
|
+
We will not at this time print trajStAnne.matrix, since this is a huge matrix and the result
|
268
|
+
would just take too much space. Later we will print just a partial view of the matrix.
|
269
|
+
|
270
|
+
# Default Values
|
271
|
+
|
272
|
+
Default values are very useful and quite often used in Ruby programs. Although SS4 does not
|
273
|
+
recommend its use, there are many cases in which default values are useful and make code simpler.
|
274
|
+
We have already seen default values in this document, with the default being 'nil'. This was
|
275
|
+
necessary in order to be able to create our constructor and passing it the proper values.
|
276
|
+
|
277
|
+
In the example bellow, a class TrajectoriesBis is created with default value 1 for times and a
|
278
|
+
matrix with no elements in matrix.
|
279
|
+
|
280
|
+
```{ruby}
|
281
|
+
class TrajectoriesBis
|
282
|
+
|
283
|
+
attr_reader :times
|
284
|
+
attr_reader :matrix
|
285
|
+
|
286
|
+
def initialize(times: 1, matrix: R.matrix(0))
|
287
|
+
@times = times
|
288
|
+
@matrix = matrix
|
289
|
+
end
|
290
|
+
|
291
|
+
end
|
292
|
+
|
293
|
+
traj_bis = TrajectoriesBis.new
|
294
|
+
```
|
295
|
+
|
296
|
+
|
297
|
+
Let's take a look at our new class:
|
298
|
+
|
299
|
+
|
300
|
+
```{ruby}
|
301
|
+
puts @traj_bis.times
|
302
|
+
```
|
303
|
+
|
304
|
+
Well, not exactly what we had in mind. We got an error saying that .pp is undefined for
|
305
|
+
Fixnum. In R, numbers are automatically converted to vectors, but this is not the case
|
306
|
+
in Ruby and Galaaz. In Ruby, numbers are numbers and vectors are vectors. In the
|
307
|
+
initialize method above, we stored 1 in variable @times and 1 is a number. Method .pp is
|
308
|
+
only available for R objects.
|
309
|
+
|
310
|
+
In order to fix this, we need to fix our initializer to convert number 1 to a vector with
|
311
|
+
one element of value 1. Galaaz provides the method R.i to do this conversion.
|
312
|
+
|
313
|
+
When calling an R function that expects a number as argument, this conversion is
|
314
|
+
automatically done by Galaaz; however, in the initialize method, there is no indication
|
315
|
+
to Galaaz that variable @times is actually a Galaaz variable, since there is no type
|
316
|
+
information. In this case, we need to be explicit and use R.i:
|
317
|
+
|
318
|
+
```{ruby}
|
319
|
+
class TrajectoriesBis
|
320
|
+
|
321
|
+
attr_reader :times
|
322
|
+
attr_reader :matrix
|
323
|
+
|
324
|
+
# Use R.i to convert number 1 to a vector
|
325
|
+
def initialize(times: R.i(1), matrix: R.matrix(0))
|
326
|
+
@times = times
|
327
|
+
@matrix = matrix
|
328
|
+
end
|
329
|
+
|
330
|
+
end
|
331
|
+
|
332
|
+
@traj_bis = TrajectoriesBis.new
|
333
|
+
```
|
334
|
+
|
335
|
+
```{ruby}
|
336
|
+
puts @traj_bis.times
|
337
|
+
```
|
338
|
+
|
339
|
+
```{ruby}
|
340
|
+
puts @traj_bis.matrix
|
341
|
+
```
|
342
|
+
|
343
|
+
# To Remove an Object
|
344
|
+
|
345
|
+
As far as I know, there isn't a good way of removing a defined class, but there might be
|
346
|
+
one and the interested user is directed to google it! In principle, there should not be
|
347
|
+
any real need to remove a defined class. Both in R and Galaaz, large programs are usually
|
348
|
+
written in a file and the file loaded. If one writes a wrong class, the better solution is
|
349
|
+
to correct it on and then load it again. If the class is written directly on the console,
|
350
|
+
then leaving it there will not have any serious impact.
|
351
|
+
|
352
|
+
# The Empty Object
|
353
|
+
|
354
|
+
When a Trajectories is created with new, and no argument is given, all its instance variables
|
355
|
+
will have the default nil value. Since Ruby has no type information, then there is only one
|
356
|
+
type (or actually no type) of nil. To check if a variable is empty, we check it against the nil
|
357
|
+
value.
|
358
|
+
|
359
|
+
# To See an Object
|
360
|
+
|
361
|
+
Ruby has very strong meta-programming features, in particular, one can use introspection to
|
362
|
+
see methods and instance variables from a given class. Method 'instance_variables' shows all
|
363
|
+
the instance variables of an object:
|
364
|
+
|
365
|
+
```
|
366
|
+
puts @traj.instance_variables
|
367
|
+
```
|
368
|
+
|
369
|
+
The description of all meta-programming features of Ruby is well beyond the scope of this
|
370
|
+
document, but it is a very frequent a powerful feature of Ruby, that makes programming in
|
371
|
+
Ruby a different experience than programming in other languages.
|
372
|
+
|
373
|
+
# Methods
|
374
|
+
|
375
|
+
Methods are a fundamental feature of object oriented programming. We will now extend our class
|
376
|
+
Trajectories to add methods to it. In SS4, a method 'plot' is added to Trajectories. At this
|
377
|
+
point, Renjin and Galaaz do not yet have plotting capabilities, so we will have to skip this
|
378
|
+
method and go directly to the implementation of the 'print' method.
|
379
|
+
|
380
|
+
Bellow is the R code for method print:
|
381
|
+
|
382
|
+
```
|
383
|
+
> setMethod ("print","Trajectories",
|
384
|
+
+ function(x,...){
|
385
|
+
+ cat("*** Class Trajectories, method Print *** \\n")
|
386
|
+
+ cat("* Times ="); print (x@times)
|
387
|
+
+ cat("* Traj = \\n"); print (x@traj)
|
388
|
+
+ cat("******* End Print (trajectories) ******* \\n")
|
389
|
+
+ }
|
390
|
+
+ )
|
391
|
+
```
|
392
|
+
|
393
|
+
Now the same code for class Trajectories in Galaaz. In general methods are defined in a class
|
394
|
+
together with all the class definition. We will first use this approach. Later, we will show
|
395
|
+
how to 'reopen' a class to add new methods to it.
|
396
|
+
|
397
|
+
In this example, we are defining a method named 'print'. We have being using method 'puts' to
|
398
|
+
output data. There is a Ruby method that is more flexible than puts and that we need to use to
|
399
|
+
implement our function: 'print'. However, trying to use Ruby print inside the definition of
|
400
|
+
Trajectories's print will not work, as Ruby will understand that as a recursive call to print.
|
401
|
+
Ruby's print is defined inside the Kernel class, so, in order to call Ruby's print inside the
|
402
|
+
definition of Trajectories's print we need to write 'Kernel.print'.
|
403
|
+
|
404
|
+
```{ruby}
|
405
|
+
class Trajectories
|
406
|
+
|
407
|
+
attr_reader :times
|
408
|
+
attr_reader :matrix
|
409
|
+
|
410
|
+
#
|
411
|
+
#
|
412
|
+
#
|
413
|
+
def initialize(times: nil, matrix: nil)
|
414
|
+
@times = times
|
415
|
+
@matrix = matrix
|
416
|
+
end
|
417
|
+
|
418
|
+
def print
|
419
|
+
puts("*** Class Trajectories, method Print *** ")
|
420
|
+
Kernel.print("times = ")
|
421
|
+
@times.pp
|
422
|
+
puts("traj =")
|
423
|
+
@matrix.pp
|
424
|
+
puts("******* End Print (trajectories) ******* ")
|
425
|
+
end
|
426
|
+
|
427
|
+
end
|
428
|
+
```
|
429
|
+
|
430
|
+
```{ruby}
|
431
|
+
puts @trajCochin
|
432
|
+
```
|
433
|
+
|
434
|
+
For Cochin, the result is correct. For Saint-Anne, print will display too much
|
435
|
+
information. So we need a second method.
|
436
|
+
|
437
|
+
Show is the default R method used to show an object when its name is written in the
|
438
|
+
console. We thus define 'show' by taking into account the size of the object: if there are too
|
439
|
+
many trajectories, 'show' posts only part of them.
|
440
|
+
|
441
|
+
Here is the R code for method 'show':
|
442
|
+
|
443
|
+
```
|
444
|
+
> setMethod("show","Trajectories",
|
445
|
+
+ function(object){
|
446
|
+
+ cat("*** Class Trajectories, method Show *** \\n")
|
447
|
+
+ cat("* Times ="); print(object@times)
|
448
|
+
+ nrowShow <- min(10,nrow(object@traj))
|
449
|
+
+ ncolShow <- min(10,ncol(object@traj))
|
450
|
+
+ cat("* Traj (limited to a matrix 10x10) = \\n")
|
451
|
+
+ print(formatC(object@traj[1:nrowShow,1:ncolShow]),quote=FALSE)
|
452
|
+
+ cat("******* End Show (trajectories) ******* \\n")
|
453
|
+
+ }
|
454
|
+
+ )
|
455
|
+
```
|
456
|
+
|
457
|
+
Now, let's write it with Galaaz. This time though, we will not rewrite the whole Trajectories
|
458
|
+
class, but just reopen it to add this specific method. The next example has many interesting
|
459
|
+
features of Galaaz, some we have already seen, others will be described now:
|
460
|
+
|
461
|
+
* As we have already seen, to call an R function one uses the R.<function> notation. There
|
462
|
+
is however another way: when the first argument to the R function is an R object such as a
|
463
|
+
matrix, a list, a vector, etc. we can use '.' notation to call the function. This makes the
|
464
|
+
function look like a method of the object. For instance, R.nrow(@matrix), can be called by
|
465
|
+
doing @matrix.nrow;
|
466
|
+
|
467
|
+
* In R, every number is converted to a vector and this can be done with method R.i. Converting
|
468
|
+
a vector with only one number back to a number can be done with method '.gz'. So if @num is
|
469
|
+
an R vector that holds a number, then @num.gz is a number that can be used normally with Ruby
|
470
|
+
methods;
|
471
|
+
|
472
|
+
* R functions and Ruby methods can be used freely in Galaaz. We show bellow two different ways
|
473
|
+
of getting the minimum of a number, either by calling R.min or by getting the minimum of an
|
474
|
+
array, with the min method;
|
475
|
+
|
476
|
+
* Galaaz allows for method 'chaining'. Method chaining, also known as named parameter idiom, is
|
477
|
+
a common syntax for invoking multiple method calls in object-oriented programming languages.
|
478
|
+
Each method returns an object, allowing the calls to be chained together in a single statement
|
479
|
+
without requiring variables to store the intermediate results. For instance @matrix.nrow.gz,
|
480
|
+
which returns the number of rows of the matrix as a number;
|
481
|
+
|
482
|
+
* Ranges in Ruby are represented by (x..y), where x is the beginning of the range and y its end.
|
483
|
+
An R matrix can be indexed by range, object@traj[1:nrowShow,1:ncolShow], the same result is
|
484
|
+
obtained in Galaaz by indexing @matrix[(1..nrow_show), (1..ncol_show)]. Observe that this
|
485
|
+
statement is then chained with the format function and with the pp method to print the matrix.
|
486
|
+
|
487
|
+
|
488
|
+
```{ruby}
|
489
|
+
class Trajectories
|
490
|
+
|
491
|
+
def show
|
492
|
+
puts("*** Class Trajectories, method Show *** ")
|
493
|
+
Kernel.print("times = ")
|
494
|
+
@times.pp
|
495
|
+
nrow_show = [10, @matrix.nrow.gz].min
|
496
|
+
ncol_show = R.min(10, @matrix.ncol).gz
|
497
|
+
puts("* Traj (limited to a matrix 10x10) = ")
|
498
|
+
@matrix[(1..nrow_show), (1..ncol_show)].format(digits: 2, nsmall: 2).pp
|
499
|
+
puts("******* End Show (trajectories) ******* ")
|
500
|
+
end
|
501
|
+
|
502
|
+
end
|
503
|
+
```
|
504
|
+
|
505
|
+
```{ruby}
|
506
|
+
@trajStAnne.show
|
507
|
+
```
|
508
|
+
|
509
|
+
Our show method has the same problem as SS4, i.e., if an empty trajectories object is created and
|
510
|
+
we try to 'show' it, it will generate an error. Let's see it:
|
511
|
+
|
512
|
+
```{ruby}
|
513
|
+
@empty_traj = Trajectories.new
|
514
|
+
```
|
515
|
+
|
516
|
+
```{ruby}
|
517
|
+
@empty_traj.show
|
518
|
+
```
|
519
|
+
|
520
|
+
```
|
521
|
+
NoMethodError: undefined method `pp' for nil:NilClass
|
522
|
+
show at :6
|
523
|
+
<eval> at :1
|
524
|
+
eval at org/jruby/RubyKernel.java:976
|
525
|
+
console at T:/Rodrigo/Desenv/Galaaz/examples/rbmarkdown.rb:61
|
526
|
+
<top> at T:\Rodrigo\Desenv\Galaaz\examples\not_so.rb:533
|
527
|
+
```
|
528
|
+
|
529
|
+
In this example, we try to call method .pp on a nil (empty) object and this method is not
|
530
|
+
defined. In order to fix this, we can either prevent an empty trajectories class to be created,
|
531
|
+
or make sure that method show will not choke on the empty object. We will take the second
|
532
|
+
alternative, to follow SS4 and will check if either @times or @matrix are empty. If either one
|
533
|
+
of them is nil, then we will print a message saying so.
|
534
|
+
|
535
|
+
Although the first alternative, i.e., not allow for empty objects is a possibility in Ruby,
|
536
|
+
it seems that this is not the case for S4.
|
537
|
+
|
538
|
+
```{ruby}
|
539
|
+
class Trajectories
|
540
|
+
|
541
|
+
def show
|
542
|
+
if (@times.nil? || @matrix.nil?)
|
543
|
+
puts("*** Class Trajectories is empty!! *** ")
|
544
|
+
return
|
545
|
+
end
|
546
|
+
puts("*** Class Trajectories, method Show *** ")
|
547
|
+
Kernel.print("times = ")
|
548
|
+
@times.pp
|
549
|
+
nrow_show = [10, @matrix.nrow.gz].min
|
550
|
+
ncol_show = R.min(10, @matrix.ncol).gz
|
551
|
+
puts("* Traj (limited to a matrix 10x10) = ")
|
552
|
+
@matrix[(1..nrow_show), (1..ncol_show)].format(digits: 2, nsmall: 2).pp
|
553
|
+
puts("******* End Show (trajectories) ******* ")
|
554
|
+
end
|
555
|
+
|
556
|
+
end
|
557
|
+
```
|
558
|
+
|
559
|
+
```{ruby}
|
560
|
+
empty_traj.show
|
561
|
+
|
562
|
+
# Method count_missing
|
563
|
+
|
564
|
+
In R, methods 'print' and 'show' are methods that already exist. SS4 wants to add a method
|
565
|
+
called 'countMissing' which does not exist in R, and thus requires some special preparation. In
|
566
|
+
Ruby, every method we've created is a new method that exists inside the class. The fact that
|
567
|
+
'print' happens to be also a method for class Kernel and 'show' is not, is not of special interest.
|
568
|
+
Actually we've seen that in order to call method print from the Kernel class we had to call
|
569
|
+
Kernel.print.
|
570
|
+
|
571
|
+
To create method 'count_missing' we just need to reopen the Trajectories class and add the
|
572
|
+
method the same way we've done with method 'show'. Again, let's first look at R's 'countMissing'
|
573
|
+
and then at Ruby's:
|
574
|
+
|
575
|
+
|
576
|
+
```
|
577
|
+
> setMethod(
|
578
|
+
+ f= "countMissing",
|
579
|
+
+ signature= "Trajectories",
|
580
|
+
+ definition=function(object){
|
581
|
+
+ return(sum(is.na(object@traj)))
|
582
|
+
+ }
|
583
|
+
+ )
|
584
|
+
```
|
585
|
+
|
586
|
+
Here we introduce another particular case of Galaaz. R has many methods that have a '.' in
|
587
|
+
their names, such as 'is.na'. In Ruby, the dot '.' is has a special meaning as it is the way
|
588
|
+
we call a method on an object. Doing 'R.is.na' will not work. So, in Galaaz, R functions that
|
589
|
+
have a dot in then will have the dot substituted by '__'. So, method is.na in Galaaz, becomes
|
590
|
+
R.is__na. In method count_missing we use method chaining and convert the final count to a number.
|
591
|
+
|
592
|
+
```{ruby}
|
593
|
+
class Trajectories
|
594
|
+
|
595
|
+
def count_missing
|
596
|
+
return @matrix.is__na.sum.gz
|
597
|
+
end
|
598
|
+
|
599
|
+
end
|
600
|
+
```
|
601
|
+
|
602
|
+
```{ruby}
|
603
|
+
puts @trajCochin.count_missing
|
604
|
+
```
|
605
|
+
|
606
|
+
# To See the Methods
|
607
|
+
|
608
|
+
In order to see the methods we have defined so far, we call call on class Trajectories the method
|
609
|
+
'instace_method' passing it one argument, 'false', as follows:
|
610
|
+
|
611
|
+
```{ruby}
|
612
|
+
puts @Trajectories.instance_methods(false)
|
613
|
+
```
|
614
|
+
|
615
|
+
It is interesting to observe that we see our three methods 'count_missing', 'print' and 'show', but
|
616
|
+
we also see two other methods 'times' and 'matrix', but those last two as far as we know are
|
617
|
+
just instance variables and not methods, right? More on that when we talk about Accessors.
|
618
|
+
|
619
|
+
Galaaz and Ruby, do not by default provide a way to see a method's code. However, if the user uses
|
620
|
+
a Ruby console such as Pry, then seeing methods and debugging is possible. Pry, is beyond the
|
621
|
+
scope of this document.
|
622
|
+
|
623
|
+
# Construction
|
624
|
+
|
625
|
+
Every class in Ruby has a constructor, if not explicitly defined, at least implicitly. Method
|
626
|
+
initialize is the constructor method and the one that coordinates the whole construction process.
|
627
|
+
|
628
|
+
# Inspector
|
629
|
+
|
630
|
+
There is no default 'inspector' in Ruby as is R, although there is nothing that prevents the
|
631
|
+
developer to inspect and validate the imput. For example, in the object Trajectories, one may
|
632
|
+
want to check that the number of elements in 'times' is equal to the number of columns in 'matrix'
|
633
|
+
and if they are not, issue an error. In order to understand why this is restriction, the user is
|
634
|
+
again directed to SS4.
|
635
|
+
|
636
|
+
Here we show the R code for this validation:
|
637
|
+
|
638
|
+
```
|
639
|
+
> setClass(
|
640
|
+
+ Class="Trajectories",
|
641
|
+
+ representation(times="numeric",traj="matrix"),
|
642
|
+
+ validity=function(object){
|
643
|
+
+ cat("~~~ Trajectories: inspector ~~~ \\n")
|
644
|
+
+ if(length(object@times)!=ncol(object@traj)){
|
645
|
+
+ stop ("[Trajectories: validation] the number of temporal measurements does not correspond
|
646
|
+
+ }else{}
|
647
|
+
+ return(TRUE)
|
648
|
+
+ }
|
649
|
+
+ )
|
650
|
+
```
|
651
|
+
|
652
|
+
In order to implement this validation we will coordinate it in the initialize method.
|
653
|
+
|
654
|
+
```{ruby}
|
655
|
+
class Trajectories
|
656
|
+
|
657
|
+
def initialize(times: nil, matrix: nil)
|
658
|
+
@times = times
|
659
|
+
@matrix = matrix
|
660
|
+
|
661
|
+
# validate the input, to make sure that size of @times and the number of columns in
|
662
|
+
# @matrix are the same
|
663
|
+
puts ("~~~ Trajectories: inspector ~~~ ")
|
664
|
+
raise "[Trajectories: validation] the number of temporal measurements does not correspond with the number of columns in the matrix" if (@times.length.gz != @matrix.ncol.gz)
|
665
|
+
|
666
|
+
# show the object just created
|
667
|
+
show
|
668
|
+
|
669
|
+
end
|
670
|
+
|
671
|
+
end
|
672
|
+
```
|
673
|
+
|
674
|
+
Let's first create a Trajectories that validates fine, i.e., the number of elements in @times is
|
675
|
+
equal to the number of columns of the matrix. In this case, we will show a message saying that
|
676
|
+
validation was done and then print the object.
|
677
|
+
|
678
|
+
```{ruby}
|
679
|
+
ok = Trajectories.new(times: R.c(1..2), matrix: R.matrix((1..2), ncol: 2))
|
680
|
+
```
|
681
|
+
|
682
|
+
Now, if we try to create a Trajectories that does not pass the validation criteria, our code
|
683
|
+
will raise an exception. Exceptions are a standard way to deal with errors in Ruby code and
|
684
|
+
many other object oriented languages. The interested reader should look for further documentation
|
685
|
+
on exception in the web.
|
686
|
+
|
687
|
+
|
688
|
+
```{ruby}
|
689
|
+
error = Trajectories.new(times: R.c(1..3), matrix: R.matrix((1..2), ncol: 2))
|
690
|
+
```
|
691
|
+
|
692
|
+
The validation above does not consider the case when an empty object is created. Here we will
|
693
|
+
check to see if either times or matrix are nil, if either one of them is nil, then we will raise
|
694
|
+
an exception and interrupt the creation of the object. We also create a method validate that is
|
695
|
+
called from our initialize method.
|
696
|
+
|
697
|
+
Method validate has some interesting features about the integration of Galaaz and R. First,
|
698
|
+
observe that instead of using @times.length.gz and @matrix.ncol.gz to get the length and number of
|
699
|
+
columns of variables 'times' and 'matrix' we actually compared (@times.length != @matrix.ncol).
|
700
|
+
In this case, the actual R operator '!=' is being used. This operator works on vectors and
|
701
|
+
matrices and returns a logical vector with TRUE or FALSE. In order to convert the logical vector,
|
702
|
+
with one element, to a logical value in Ruby we use method 'gt' (get truth).
|
703
|
+
|
704
|
+
|
705
|
+
```{ruby}
|
706
|
+
class Trajectories
|
707
|
+
|
708
|
+
def initialize(times: nil, matrix: nil)
|
709
|
+
@times = times
|
710
|
+
@matrix = matrix
|
711
|
+
|
712
|
+
# call method validate to validate our imput
|
713
|
+
validate
|
714
|
+
|
715
|
+
# show the object just created
|
716
|
+
show
|
717
|
+
|
718
|
+
end
|
719
|
+
|
720
|
+
def validate
|
721
|
+
|
722
|
+
# Let's first check that we do not have an empty object
|
723
|
+
raise "Neither times nor matrix can be an empty object" if (@times.nil? || @matrix.nil?)
|
724
|
+
|
725
|
+
# validate the input, to make sure that size of @times and the number of columns in
|
726
|
+
# @matrix are the same
|
727
|
+
puts ("~~~ Trajectories: inspector ~~~ ")
|
728
|
+
raise "[Trajectories: validation] the number of temporal measurements does not correspond with the number of columns in the matrix" if (@times.length != @matrix.ncol).gt
|
729
|
+
|
730
|
+
end
|
731
|
+
|
732
|
+
end
|
733
|
+
```
|
734
|
+
|
735
|
+
Let's try then creating an empty object:
|
736
|
+
|
737
|
+
|
738
|
+
```{ruby}
|
739
|
+
error = Trajectories.new
|
740
|
+
```
|
741
|
+
|
742
|
+
Another example:
|
743
|
+
|
744
|
+
```{ruby}
|
745
|
+
error = Trajectories.new(times: 1)
|
746
|
+
```
|
747
|
+
|
748
|
+
Let's see now that the implementation is correct and that it does not raise an error on valid
|
749
|
+
input:
|
750
|
+
|
751
|
+
```{ruby}
|
752
|
+
ok = Trajectories.new(times: R.c(1, 2), matrix: R.matrix((1..2), ncol: 2))
|
753
|
+
```
|
754
|
+
|
755
|
+
The 'initialize' method is called ONLY during the initial creation of the object. If any instance
|
756
|
+
variable is later modified, no control is done. At this moment though, there is no way to change
|
757
|
+
the value of any of our instance variables.
|
758
|
+
|
759
|
+
```
|
760
|
+
error.times = R.c(1, 2, 3)
|
761
|
+
```
|
762
|
+
|
763
|
+
The Trajectories class works for R objects and not for Ruby objects and thus expects as input R
|
764
|
+
objects. Passing R objects in all examples has being the obligation of the programmer. Galaaz,
|
765
|
+
however, can translate Ruby objects to R objects and does so for parameter passing. Here we do
|
766
|
+
an explicit conversion of Ruby object to R in class Trajectories by calling R.convert for our
|
767
|
+
input parameters
|
768
|
+
|
769
|
+
```{ruby}
|
770
|
+
class Trajectories
|
771
|
+
|
772
|
+
def initialize(times: nil, matrix: nil)
|
773
|
+
@times = R.convert(times)
|
774
|
+
@matrix = R.convert(matrix)
|
775
|
+
|
776
|
+
# call method validate to validate our imput
|
777
|
+
validate
|
778
|
+
|
779
|
+
# show the object just created
|
780
|
+
show
|
781
|
+
|
782
|
+
end
|
783
|
+
|
784
|
+
def validate
|
785
|
+
|
786
|
+
# Let's first check that we do not have an empty object
|
787
|
+
raise "Neither times nor matrix can be an empty object" if (@times.nil? || @matrix.nil?)
|
788
|
+
|
789
|
+
# validate the input, to make sure that size of @times and the number of columns in
|
790
|
+
# @matrix are the same
|
791
|
+
puts ("~~~ Trajectories: inspector ~~~ ")
|
792
|
+
raise "[Trajectories: validation] the number of temporal measurements \#{@times.length.gz} \
|
793
|
+
does not correspond with the number of columns in the matrix \#{@matrix.ncol.gz}" if (@times.length.gz != @matrix.ncol.gz)
|
794
|
+
|
795
|
+
end
|
796
|
+
|
797
|
+
end
|
798
|
+
|
799
|
+
|
800
|
+
class Trajectories
|
801
|
+
|
802
|
+
def initialize(times: nil, matrix: nil)
|
803
|
+
@times = R.convert(times)
|
804
|
+
@matrix = R.convert(matrix)
|
805
|
+
|
806
|
+
# call method validate to validate our imput
|
807
|
+
validate
|
808
|
+
|
809
|
+
# show the object just created
|
810
|
+
show
|
811
|
+
|
812
|
+
end
|
813
|
+
|
814
|
+
def validate
|
815
|
+
|
816
|
+
# Let's first check that we do not have an empty object
|
817
|
+
raise "Neither times nor matrix can be an empty object" if (@times.nil? || @matrix.nil?)
|
818
|
+
|
819
|
+
# validate the input, to make sure that size of @times and the number of columns in
|
820
|
+
# @matrix are the same
|
821
|
+
puts ("~~~ Trajectories: inspector ~~~ ")
|
822
|
+
raise "[Trajectories: validation] the number of temporal measurements #{@times.length.gz} \
|
823
|
+
does not correspond with the number of columns in the matrix #{@matrix.ncol.gz}" if (@times.length.gz != @matrix.ncol.gz)
|
824
|
+
|
825
|
+
end
|
826
|
+
|
827
|
+
end
|
828
|
+
```
|
829
|
+
|
830
|
+
And now let's create a new Trajectories, but we will now pass a Ruby range for times:
|
831
|
+
|
832
|
+
```{ruby}
|
833
|
+
ok = Trajectories.new(times: (1..2), matrix: R.matrix((1..2), ncol: 2))
|
834
|
+
```
|
835
|
+
|
836
|
+
Perfect! This works fine. Let's do another example... Galaaz integrates with another Ruby
|
837
|
+
Gem called MDArray. MDArray provides multi-dimensional arrays for Ruby similar to what is
|
838
|
+
find in NumPy. It is beyond the scope of this paper to explain MDArray and the interested
|
839
|
+
reader is directed to MDArray wiki pages: https://github.com/rbotafogo/mdarray/wiki.
|
840
|
+
|
841
|
+
```{ruby}
|
842
|
+
ok = Trajectories.new(times: (1..2), matrix: MDArray.double([2, 2], [1, 2, 3, 4]))
|
843
|
+
```
|
844
|
+
|
845
|
+
We will now create a multi-dimensional array with the help of MDArray. We could think of this
|
846
|
+
multi-dimensional array as having BMI data for multiple patients. In this example, we have then
|
847
|
+
data for two patients:
|
848
|
+
|
849
|
+
```{ruby}
|
850
|
+
multi_array = MDArray.fromfunction("double", [2, 3, 4]) { |x, y, z| x + y + z }
|
851
|
+
```
|
852
|
+
|
853
|
+
```
|
854
|
+
multi_array.print
|
855
|
+
```
|
856
|
+
|
857
|
+
But for our Trajectories class, we need data for only one patient at the time, so we cannot
|
858
|
+
give this MDArray to Trajectories. MDArray allow us to get data slices efficiently, that is,
|
859
|
+
it will not do a data copy, just manipulate indexes so that only a 'view' of the data is made
|
860
|
+
available. So, let's make a Trajectories with data from our first patient:
|
861
|
+
|
862
|
+
console(<<-EOT)
|
863
|
+
ok1 = Trajectories.new(times: (1..4), matrix: multi_array.slice(0, 0))
|
864
|
+
EOT
|
865
|
+
|
866
|
+
body(<<-EOT)
|
867
|
+
And now let's create a Trajectories for our second patient:
|
868
|
+
EOT
|
869
|
+
|
870
|
+
console(<<-EOT)
|
871
|
+
ok2 = Trajectories.new(times: (1..4), matrix: multi_array.slice(0, 1))
|
872
|
+
EOT
|
873
|
+
|
874
|
+
# The Initializator
|
875
|
+
|
876
|
+
As we have seen, method 'initialize' is the main object creator orchestrator. This method can be
|
877
|
+
as complex as needed. So, let's get on with some improvements to our Trajectories class.
|
878
|
+
|
879
|
+
It would be rather pleasant that the columns of the matrix of the trajectories have names, the
|
880
|
+
names of measurements times. In the same way, the lines could be subscripted by a number of
|
881
|
+
individual.
|
882
|
+
|
883
|
+
To do this in R, one also uses method initialize:
|
884
|
+
|
885
|
+
|
886
|
+
```
|
887
|
+
> setMethod(
|
888
|
+
+ f="initialize",
|
889
|
+
+ signature="Trajectories",
|
890
|
+
+ definition=function(.Object,times,traj){
|
891
|
+
+ cat("~~~ Trajectories: initializator ~~~ \\n")
|
892
|
+
+ colnames(traj) <- paste("T",times,sep="")
|
893
|
+
+ rownames(traj) <- paste("I",1:nrow(traj),sep= "")
|
894
|
+
+ .Object@traj <- traj # Assignment of the slots
|
895
|
+
+ .Object@times <- times
|
896
|
+
+ return(.Object) # return of the object
|
897
|
+
+ }
|
898
|
+
+ )
|
899
|
+
```
|
900
|
+
|
901
|
+
Let's do this change to our 'initialize' method; however, before that, we need to introduce
|
902
|
+
a new characteristic of Galaaz. In R, it is possible to assign a value to the result of a
|
903
|
+
function. For example, 'rownames(x) <- c("v1", "v2", "v3")'. Assigning to functions that way
|
904
|
+
is not possible in Ruby. In order to do this assignment we need to introduce method 'fassign'.
|
905
|
+
The above assignment is then writen in Galaaz as 'x.fassign(:rownames, R.c("v1", "v2", "v3")),
|
906
|
+
where the first argument to function fassign is the function name preceded by ':'.
|
907
|
+
|
908
|
+
```{ruby}
|
909
|
+
class Trajectories
|
910
|
+
|
911
|
+
def initialize(times: nil, matrix: nil)
|
912
|
+
@times = times
|
913
|
+
@matrix = matrix
|
914
|
+
|
915
|
+
# call method validate to validate our imput
|
916
|
+
validate
|
917
|
+
|
918
|
+
# Add row names
|
919
|
+
puts ("~~~ Trajectories: initializator ~~~ ")
|
920
|
+
@matrix.fassign(:colnames, R.paste("T", @times, sep: ""))
|
921
|
+
@matrix.fassign(:rownames, R.paste("I", (1..@matrix.nrow.gz), sep: ""))
|
922
|
+
|
923
|
+
# show the object just created
|
924
|
+
show
|
925
|
+
|
926
|
+
end
|
927
|
+
|
928
|
+
end
|
929
|
+
```
|
930
|
+
|
931
|
+
```{ruby}
|
932
|
+
@traj = Trajectories.new(times: R.c(1,2,4,8), matrix: R.matrix((1..8),nrow: 2))
|
933
|
+
```
|
934
|
+
|
935
|
+
Another example:
|
936
|
+
|
937
|
+
```{ruby}
|
938
|
+
error = Trajectories.new(times: R.c(1,2,4,8), matrix: R.matrix((1..8), nrow: 2))
|
939
|
+
```
|
940
|
+
|
941
|
+
Note that we still call our 'validate' method and it is still an error to create an empty
|
942
|
+
Trajectories or one in which the sizes are wrong:
|
943
|
+
|
944
|
+
```{ruby}
|
945
|
+
error = Trajectories.new(times: R.c(1, 2, 48), matrix: R.matrix((1..8), nrow: 2))
|
946
|
+
```
|
947
|
+
|
948
|
+
A constructor does not necessarily take the instance variable of the object as argument. For
|
949
|
+
example, if we know (that is not the case in reality, but let us imagine so) that the
|
950
|
+
BMI increases by 0.1 every week, we could build trajectories by providing the number
|
951
|
+
of weeks and the initial weights.
|
952
|
+
|
953
|
+
First the code in R, we skip the definition of class TrajectoriesBis:
|
954
|
+
|
955
|
+
|
956
|
+
```
|
957
|
+
> setMethod ("initialize",
|
958
|
+
+ "TrajectoriesBis",
|
959
|
+
+ function(.Object,nbWeek,BMIinit){
|
960
|
+
+ traj <- outer(BMIinit,1:nbWeek,function(init,week){return(init+0.1*week)})
|
961
|
+
+ colnames(traj) <- paste("T",1:nbWeek,sep="")
|
962
|
+
+ rownames(traj) <- paste("I",1:nrow(traj),sep="")
|
963
|
+
+ .Object@times <- 1:nbWeek
|
964
|
+
+ .Object@traj <- traj
|
965
|
+
+ return(.Object)
|
966
|
+
+ }
|
967
|
+
+ )
|
968
|
+
```
|
969
|
+
|
970
|
+
Now, let's make a TrajectoriesBis in Galaaz. Here again, we should point out some characteristics
|
971
|
+
of our code:
|
972
|
+
|
973
|
+
* We made initialize with two positional arguments, instead of named arguments, i.e.,
|
974
|
+
the first argument is the number of weeks and the second bmi_init. Is this case,
|
975
|
+
when making a new object the position of the arguments is important and there is no
|
976
|
+
way to pass the argument by name;
|
977
|
+
|
978
|
+
* R function outer was called as if a method from bmi_init using dot notation, although
|
979
|
+
one could use R.outer without problem;
|
980
|
+
|
981
|
+
* Function 'outer' expects an R function as its 3rd argument. In order to build an R
|
982
|
+
function from Galaaz, we need to pass the function definition as a string to R.eval.
|
983
|
+
|
984
|
+
```{ruby}
|
985
|
+
class TrajectoriesBis
|
986
|
+
|
987
|
+
attr_reader :times
|
988
|
+
attr_reader :matrix
|
989
|
+
|
990
|
+
def initialize(number_weeks, bmi_init)
|
991
|
+
@matrix = bmi_init.outer((1..number_weeks),
|
992
|
+
R.eval("function(init, week) {return(init + 0.1 * week)}"))
|
993
|
+
@times = number_weeks
|
994
|
+
end
|
995
|
+
|
996
|
+
end
|
997
|
+
|
998
|
+
@traj_bis = TrajectoriesBis.new(4, R.c(16,17,15.6))
|
999
|
+
```
|
1000
|
+
|
1001
|
+
```{ruby}
|
1002
|
+
puts @traj_bis.matrix
|
1003
|
+
```
|
1004
|
+
|
1005
|
+
Is is always possible to pass a Ruby variable to any string, by interpolating it into the string.
|
1006
|
+
To interpolate a variable into a string we put the variable inside #{}. As an example, let's
|
1007
|
+
assume that we will also require the BMI increase as a parameter for the constructor:
|
1008
|
+
|
1009
|
+
```{ruby}
|
1010
|
+
class TrajectoriesBis
|
1011
|
+
|
1012
|
+
def initialize(number_weeks, bmi_init, increment)
|
1013
|
+
@matrix = bmi_init.outer((1..number_weeks),
|
1014
|
+
R.eval("function(init, week) {return(init + \#{increment} * week)}"))
|
1015
|
+
@times = number_weeks
|
1016
|
+
end
|
1017
|
+
|
1018
|
+
end
|
1019
|
+
|
1020
|
+
traj_bis = TrajectoriesBis.new(4, R.c(16,17,15.6), 0.3)
|
1021
|
+
```
|
1022
|
+
|
1023
|
+
```{ruby}
|
1024
|
+
class TrajectoriesBis
|
1025
|
+
|
1026
|
+
def initialize(number_weeks, bmi_init, increment)
|
1027
|
+
@matrix = bmi_init.outer((1..number_weeks),
|
1028
|
+
R.eval("function(init, week) {return(init + #{increment} * week)}"))
|
1029
|
+
@times = number_weeks
|
1030
|
+
end
|
1031
|
+
|
1032
|
+
end
|
1033
|
+
|
1034
|
+
@traj_bis = TrajectoriesBis.new(4, R.c(16,17,15.6), 0.3)
|
1035
|
+
```
|
1036
|
+
|
1037
|
+
```{ruby}
|
1038
|
+
puts @traj_bis.matrix
|
1039
|
+
```
|
1040
|
+
|
1041
|
+
# Constructors for Users
|
1042
|
+
|
1043
|
+
Many times, it is interesting to have different ways of constructing an object depending on
|
1044
|
+
what information our users have or want to provide to the constructor. Although we have only one
|
1045
|
+
initialize method, we can create multiple methods, that do some preprocessing and then call the
|
1046
|
+
initialize method to carry out the object building.
|
1047
|
+
|
1048
|
+
In order to do that, we use what are called class methods, instead of instance methods. all the
|
1049
|
+
methods we've created so far are instance methods, class methods are defined by prepending the
|
1050
|
+
self keyword to the methods name. Still using the assumption that the BMI will grow by 0.1 per
|
1051
|
+
week, let's define a regular trajectory without having to define a TrajectoriesBis as above:
|
1052
|
+
|
1053
|
+
|
1054
|
+
```
|
1055
|
+
> regularTrajectories <- function(nbWeek,BMIinit) {
|
1056
|
+
+ traj <- outer(BMIinit,1:nbWeek,function(init,week){return(init+0.1*week)})
|
1057
|
+
+ times <- 1: nbWeek
|
1058
|
+
+ return(new(Class="Trajectories",times=times,traj=traj))
|
1059
|
+
+ }
|
1060
|
+
> regularTrajectories(nbWeek=3,BMIinit=c(14,15,16))
|
1061
|
+
```
|
1062
|
+
|
1063
|
+
Notice how method 'regular' is defined as 'self.regular', making it a class method. The last
|
1064
|
+
statement of the method definition is actually a call to the Trajectories constructor 'new' passing
|
1065
|
+
the calculated values for times and matrix.
|
1066
|
+
|
1067
|
+
Notice also how method regular is called, similar to the way new is called by adding it after class
|
1068
|
+
Trajectories name: 'Trajectories.regular'.
|
1069
|
+
|
1070
|
+
```{ruby}
|
1071
|
+
class Trajectories
|
1072
|
+
|
1073
|
+
def self.regular(number_weeks: nil, bmi_init: nil)
|
1074
|
+
matrix = bmi_init.outer((1..number_weeks),
|
1075
|
+
R.eval("function(init, week) {return(init + 0.1 * week)}"))
|
1076
|
+
times = R.c((1..number_weeks))
|
1077
|
+
Trajectories.new(times: times, matrix: matrix)
|
1078
|
+
end
|
1079
|
+
|
1080
|
+
end
|
1081
|
+
```
|
1082
|
+
|
1083
|
+
```{ruby}
|
1084
|
+
@regular = Trajectories.regular(bmi_init: R.c(14, 15, 16), number_weeks: 3)
|
1085
|
+
```
|
1086
|
+
|
1087
|
+
We have already seen that constructors can be as complex as needed, calling other methods and doing
|
1088
|
+
calculations on the received parameters. On this last example, we will check if the times
|
1089
|
+
variable was provided. If it is not provided, then we will use matrix columns to define the times:
|
1090
|
+
|
1091
|
+
```{ruby}
|
1092
|
+
class Trajectories
|
1093
|
+
|
1094
|
+
def self.init(times: nil, matrix: nil)
|
1095
|
+
times = R.c((1..matrix.ncol.gz)) if times.nil?
|
1096
|
+
Trajectories.new(times: times, matrix: matrix)
|
1097
|
+
end
|
1098
|
+
|
1099
|
+
end
|
1100
|
+
```
|
1101
|
+
|
1102
|
+
```
|
1103
|
+
@traj = Trajectories.init(matrix: R.matrix((1..8), ncol: 4))
|
1104
|
+
```
|
1105
|
+
|
1106
|
+
# Accessors
|
1107
|
+
|
1108
|
+
Accessors are methods for getting and setting the value of instance variables.
|
1109
|
+
|
1110
|
+
# Get
|
1111
|
+
|
1112
|
+
Getters are methods for getting the value of an instance variable. We have being using getters
|
1113
|
+
since the beginning of this document, without explicitly saying so. When defining attr_reader
|
1114
|
+
:times and attr_reader :matrix, we have actually defined two getter methods for reading the values
|
1115
|
+
of variables times and matrix respectively. We can however define getters explicitly:
|
1116
|
+
|
1117
|
+
```{ruby}
|
1118
|
+
class TrajectoriesBis
|
1119
|
+
|
1120
|
+
def initialize(times: times, matrix: matrix)
|
1121
|
+
@times = times
|
1122
|
+
@matrix = matrix
|
1123
|
+
end
|
1124
|
+
|
1125
|
+
def times
|
1126
|
+
@times
|
1127
|
+
end
|
1128
|
+
|
1129
|
+
def matrix
|
1130
|
+
@matrix
|
1131
|
+
end
|
1132
|
+
|
1133
|
+
end
|
1134
|
+
|
1135
|
+
@traj = TrajectoriesBis.new(times: 1, matrix: 2)
|
1136
|
+
```
|
1137
|
+
|
1138
|
+
```{ruby}
|
1139
|
+
puts @traj.times
|
1140
|
+
```
|
1141
|
+
|
1142
|
+
```{ruby}
|
1143
|
+
puts @traj.matrix
|
1144
|
+
```
|
1145
|
+
|
1146
|
+
It is also possible to define more sophisticated getters. For example one can
|
1147
|
+
regularly need the BMI at inclusion. In R, one would index a matrix as matrix[,1]. In Ruby,
|
1148
|
+
it is a syntax error to have a ',' just after the '['. In this case we need to add 'nil' as
|
1149
|
+
in matrix[nil, 1]:
|
1150
|
+
|
1151
|
+
```{ruby}
|
1152
|
+
class Trajectories
|
1153
|
+
|
1154
|
+
def get_traj_inclusion
|
1155
|
+
@matrix[nil, 1]
|
1156
|
+
end
|
1157
|
+
|
1158
|
+
end
|
1159
|
+
```
|
1160
|
+
|
1161
|
+
```{ruby}
|
1162
|
+
puts @trajCochin.get_traj_inclusion
|
1163
|
+
```
|
1164
|
+
|
1165
|
+
# Set
|
1166
|
+
|
1167
|
+
A setter is a method that assigns a value to a variable. As with getters, Ruby also provides an
|
1168
|
+
easy way to write setters and allow you to also write them explicitly. Let's first use the
|
1169
|
+
simple way:
|
1170
|
+
|
1171
|
+
```{ruby}
|
1172
|
+
class TrajectoriesBis
|
1173
|
+
|
1174
|
+
attr_writer :times
|
1175
|
+
attr_writer :matrix
|
1176
|
+
|
1177
|
+
end
|
1178
|
+
|
1179
|
+
traj = TrajectoriesBis.new
|
1180
|
+
traj.times = R.c(1, 2)
|
1181
|
+
traj.matrix = R.matrix((1..2), ncol: 2)
|
1182
|
+
```
|
1183
|
+
|
1184
|
+
```
|
1185
|
+
puts @traj.matrix
|
1186
|
+
```
|
1187
|
+
|
1188
|
+
Note that now we can use '=' to assign a value to both variables times and matrix. Without
|
1189
|
+
setters, changing the value of variables times and matrix was not possible. Our class, up
|
1190
|
+
to this point was protected from any changes to those variables. If we need to allow changes
|
1191
|
+
to those variable, then setters are needed. In this case, the simple setter as shown above is
|
1192
|
+
not ideal, since it would allow changes that break the restriction that variable times has to
|
1193
|
+
have the same length as the number of columns of matrix. In order to do the verification we
|
1194
|
+
need to implement a more sophisticated setter. In the example bellow, we add the 'times=' setter
|
1195
|
+
that receives as input one argument. First we convert the given argument to an R object, then
|
1196
|
+
check to see that the length of times is the same as the number of columns and if everything is
|
1197
|
+
fine, then we set the value of instance variable times:
|
1198
|
+
EOT
|
1199
|
+
|
1200
|
+
#
|
1201
|
+
# We need to put the times= definiton inside the comment_code block because it accesses a variable
|
1202
|
+
# from inside the HereDoc. If we do not comment this access we will get an error saying that
|
1203
|
+
# @matrix is not a global variable, which is really the case.
|
1204
|
+
#
|
1205
|
+
```{ruby}
|
1206
|
+
class Trajectories
|
1207
|
+
|
1208
|
+
def times=(times)
|
1209
|
+
times = R.convert(times)
|
1210
|
+
raise "[Trajectories: validation] the number of temporal measurements \#{times.length.gz} \
|
1211
|
+
does not correspond with the number of columns in the matrix \#{@matrix.ncol.gz}" if (times.length.gz != @matrix.ncol.gz)
|
1212
|
+
@times = times
|
1213
|
+
end
|
1214
|
+
|
1215
|
+
end
|
1216
|
+
```
|
1217
|
+
|
1218
|
+
class Trajectories
|
1219
|
+
|
1220
|
+
def times=(times)
|
1221
|
+
times = R.convert(times)
|
1222
|
+
raise "[Trajectories: validation] the number of temporal measurements #{times.length.gz} \
|
1223
|
+
does not correspond with the number of columns in the matrix #{@matrix.ncol.gz}" if (times.length.gz != @matrix.ncol.gz)
|
1224
|
+
@times = times
|
1225
|
+
end
|
1226
|
+
|
1227
|
+
end
|
1228
|
+
|
1229
|
+
```{ruby}
|
1230
|
+
trajCochin.times = (1..5)
|
1231
|
+
```
|
1232
|
+
|
1233
|
+
We now set the value approprietaly and will not get any errors:
|
1234
|
+
|
1235
|
+
```{ruby}
|
1236
|
+
trajCochin.times = R.c(1, 5, 6, 8)
|
1237
|
+
```
|
1238
|
+
|
1239
|
+
# The Operator '['
|
1240
|
+
|
1241
|
+
It is also possible to define getters by using the operator '['. This operator is not usually
|
1242
|
+
used for returning instance variables and it is preferable to use the methods we've used above;
|
1243
|
+
however, for completeness with SS4 we are showing how to define this here. Operator '[' is
|
1244
|
+
better left to be used for array/matrix indices.
|
1245
|
+
|
1246
|
+
```{ruby}
|
1247
|
+
class Trajectories
|
1248
|
+
|
1249
|
+
def [](var_name)
|
1250
|
+
|
1251
|
+
case var_name
|
1252
|
+
when "times"
|
1253
|
+
@times
|
1254
|
+
when "matrix"
|
1255
|
+
@matrix
|
1256
|
+
else
|
1257
|
+
raise "Unknown instance variable"
|
1258
|
+
end
|
1259
|
+
|
1260
|
+
end
|
1261
|
+
|
1262
|
+
end
|
1263
|
+
```
|
1264
|
+
|
1265
|
+
```{ruby}
|
1266
|
+
puts @trajCochin["times"]
|
1267
|
+
```
|
1268
|
+
|
1269
|
+
Similarly, we could use operator '[]=' to assign a value to times and matrix. We will not do this
|
1270
|
+
here as we think that the other options are better and the interested user can easily find help,
|
1271
|
+
if needed to implement such method.
|
1272
|
+
|
1273
|
+
# To Go Further
|
1274
|
+
|
1275
|
+
This section will introduce advance features of Object Oriented programming such as Inheritance
|
1276
|
+
and Modules and will also show some aspects of S4 that do not apply to Ruby.
|
1277
|
+
|
1278
|
+
# Methods Using Several Arguments
|
1279
|
+
|
1280
|
+
In Ruby, methods can have as many arguments as needed and those methods are defined the way we
|
1281
|
+
have already seen in many of the examples above. The example in SS4 presents a method that prints
|
1282
|
+
different output if its input is numeric, character has both. Let's write a class in Ruby that
|
1283
|
+
does the same for Numeric and String. In Ruby we do not define global functions, we always define
|
1284
|
+
methods inside classes or modules (as we will see later). Also, Ruby is not typed, so methods are
|
1285
|
+
not called depending on their types as in SS4 examples. Bellow, method test will be called with
|
1286
|
+
one parameter. At the time of calling we do not know the type of the argument, the method can
|
1287
|
+
then check is the received argument is a Numeric or a String and at this time, decide what should
|
1288
|
+
be printed.
|
1289
|
+
|
1290
|
+
```{ruby}
|
1291
|
+
class Test
|
1292
|
+
|
1293
|
+
def test(input)
|
1294
|
+
|
1295
|
+
case input
|
1296
|
+
when Numeric
|
1297
|
+
puts "The input is numeric: #{input}"
|
1298
|
+
when String
|
1299
|
+
puts "The input is a string: #{input}"
|
1300
|
+
else
|
1301
|
+
puts "The input is neither a number nor a string"
|
1302
|
+
end
|
1303
|
+
|
1304
|
+
end
|
1305
|
+
|
1306
|
+
end
|
1307
|
+
|
1308
|
+
t = Test.new
|
1309
|
+
```
|
1310
|
+
|
1311
|
+
```{ruby}
|
1312
|
+
puts @t.test(5)
|
1313
|
+
```
|
1314
|
+
|
1315
|
+
```{ruby}
|
1316
|
+
puts @t.test("Hello")
|
1317
|
+
```
|
1318
|
+
|
1319
|
+
Ruby has ways of dealing with multiple arguments, missing arguments, undefined number of arguments,
|
1320
|
+
named arguments, unnamed arguments, etc. This is beyond the scope of this document and we
|
1321
|
+
suggest the interested reader to go to the many resources about Ruby that can easily be found
|
1322
|
+
on the web.
|
1323
|
+
|
1324
|
+
We will now create a new class 'Partition' that we will use later in this document. This class will
|
1325
|
+
have only the basic methods needed for the examples to work.
|
1326
|
+
|
1327
|
+
```{ruby}
|
1328
|
+
class Partition
|
1329
|
+
|
1330
|
+
attr_reader :nb_groups
|
1331
|
+
attr_reader :part
|
1332
|
+
|
1333
|
+
def initialize(nb_groups, part)
|
1334
|
+
@nb_groups = nb_groups
|
1335
|
+
@part = part
|
1336
|
+
end
|
1337
|
+
|
1338
|
+
end
|
1339
|
+
|
1340
|
+
@partCochin = Partition.new(2, R.c("A","B","A","B").factor)
|
1341
|
+
@partStAnne = Partition.new(2, R.c("A","B").rep(R.c(50,30)).factor)
|
1342
|
+
```
|
1343
|
+
|
1344
|
+
```
|
1345
|
+
puts @partCochin.part
|
1346
|
+
```
|
1347
|
+
|
1348
|
+
```{ruby}
|
1349
|
+
puts @partStAnne.part
|
1350
|
+
```
|
1351
|
+
|
1352
|
+
We will suppose that part is always composed of capital letters going from A to
|
1353
|
+
LETTERS[nb_groups].
|
1354
|
+
|
1355
|
+
# Inheritance
|
1356
|
+
|
1357
|
+
Ruby being a powerful Object Oriented language has the concept of Inheritance, but it does not
|
1358
|
+
allow for multiple inheritance. Multiple inheritance has many drawbacks and Ruby just does not
|
1359
|
+
support it. However, Ruby has other concepts that make up for the lack or multiple inheritance as
|
1360
|
+
we will see in the following examples.
|
1361
|
+
|
1362
|
+
So, let's go back to SS4 examples. We want now to define a class called TrajPartitioned that
|
1363
|
+
inherits from class Trajectories. When a class has a parent, all methods available for the
|
1364
|
+
parent are also available to the child.
|
1365
|
+
|
1366
|
+
|
1367
|
+
```{ruby}
|
1368
|
+
class TrajPartitioned < Trajectories
|
1369
|
+
|
1370
|
+
attr_reader :list_partitions
|
1371
|
+
|
1372
|
+
end
|
1373
|
+
```
|
1374
|
+
|
1375
|
+
Thats all there is to it! We've just created a class TrajPartitioned that inherits all methods
|
1376
|
+
from class Trajectories and at this point does nothing different from Trajectories, but adds a
|
1377
|
+
new instance variable: list_partitions.
|
1378
|
+
|
1379
|
+
Creating TrajPartitioned without arguments will generate an error, since a Trajectories requires
|
1380
|
+
both times and matrix to be non null.
|
1381
|
+
|
1382
|
+
|
1383
|
+
```{ruby}
|
1384
|
+
@tdPitie = TrajPartitioned.new
|
1385
|
+
```
|
1386
|
+
|
1387
|
+
Let's try to create a TrajPartitioned, but passing to it two partitions. For that, let's first
|
1388
|
+
create a new Partition:
|
1389
|
+
|
1390
|
+
```{ruby}
|
1391
|
+
@partCochin2 = Partition.new(3, R.c("A", "C", "C", "B").factor)
|
1392
|
+
```
|
1393
|
+
|
1394
|
+
And now let's create the TrajPartitioned:
|
1395
|
+
|
1396
|
+
```{ruby}
|
1397
|
+
@tdCochin = TrajPartitioned.new(times: R.c(1,3,4,5), matrix: trajCochin.matrix,
|
1398
|
+
list_partitions: R.list(partCochin,partCochin2))
|
1399
|
+
```
|
1400
|
+
|
1401
|
+
This didn't work giving us an error saying that <Partition...> is an unknown parameter for R. Hummm??
|
1402
|
+
R function 'list' expects R objects, and in this case, partCochin and partCochin2 are Ruby classes,
|
1403
|
+
so trying to apply function list to then does not work. Clearly, we will have to work in the realm
|
1404
|
+
of Ruby to keep the list of partitions. This is not a problem as Ruby has data strucutres to
|
1405
|
+
maintain a list of objects, the Array. Let's then try another solution:
|
1406
|
+
|
1407
|
+
```{ruby}
|
1408
|
+
tdCochin = TrajPartitioned.new(times: R.c(1,3,4,5), matrix: trajCochin.matrix,
|
1409
|
+
list_partitions: [partCochin, partCochin2])
|
1410
|
+
```
|
1411
|
+
|
1412
|
+
We now get a second error: 'unknown keyword: list_partitions'. Class TrajPartitioned inherits
|
1413
|
+
from class Trajectories and class Trajectories has an initialize function that requires two
|
1414
|
+
parameters, times and matrix; list_partitions is not a parameter for initialize and is thus
|
1415
|
+
unknown. In order to fix this problem we need to create an initialize method for class
|
1416
|
+
TrajPartitioned.
|
1417
|
+
|
1418
|
+
|
1419
|
+
# The 'super' Keyword
|
1420
|
+
|
1421
|
+
R has a method called 'callNextMethod' for control flow between inherited classes. In Ruby, we
|
1422
|
+
have a model that is a bit different. When a method is called on a subclass, if this method is
|
1423
|
+
not found it will be searched in the parent class and it will go up the hierarchy of classes until
|
1424
|
+
it is found or an error is issued. If we want the parent method to be called we can call 'super':
|
1425
|
+
|
1426
|
+
|
1427
|
+
```{ruby}
|
1428
|
+
class TrajPartitioned
|
1429
|
+
|
1430
|
+
def initialize(times: times, matrix: matrix, list_partitions: list_partitions)
|
1431
|
+
super(times: times, matrix: matrix)
|
1432
|
+
@list_partitions = list_partitions
|
1433
|
+
end
|
1434
|
+
|
1435
|
+
end
|
1436
|
+
```
|
1437
|
+
|
1438
|
+
Let's try our example again:
|
1439
|
+
|
1440
|
+
```{ruby}
|
1441
|
+
@tdCochin = TrajPartitioned.new(times: R.c(1,3,4,5), matrix: trajCochin.matrix,
|
1442
|
+
list_partitions: [partCochin, partCochin2])
|
1443
|
+
```
|
1444
|
+
|
1445
|
+
Now tdCochin is created correctly; however, the 'show' method only shows information about
|
1446
|
+
times and matrix, there is nothing about our new list_partitions variable. This is so, since
|
1447
|
+
there is no method 'show' in TrajPartitioned, so method 'show' from Trajectories is executed.
|
1448
|
+
|
1449
|
+
So, let's start by writing a 'print' method, that will print all the information we have in
|
1450
|
+
TrajPartitioned. The flow of control for this method is: Ruby see a call to 'print', so it checks
|
1451
|
+
to see if 'print' is a method for TrajPartitioned. Since we have just defined this method, Ruby
|
1452
|
+
finds it and executes it. The first command in print is a call to 'super', which will call the
|
1453
|
+
parent 'print' method, that print information for 'times' and 'matrix'. When the parent 'print'
|
1454
|
+
finishes control continues after the 'super' call, printing the number of available partitions.
|
1455
|
+
|
1456
|
+
```{ruby}
|
1457
|
+
class TrajPartitioned
|
1458
|
+
|
1459
|
+
def print
|
1460
|
+
super
|
1461
|
+
puts ("the object also contains \#{@list_partitions.length} partition")
|
1462
|
+
puts ("***** Fine of print (TrajPartitioned) *****")
|
1463
|
+
end
|
1464
|
+
|
1465
|
+
end
|
1466
|
+
```
|
1467
|
+
|
1468
|
+
```{ruby}
|
1469
|
+
puts @tdCochin
|
1470
|
+
```
|
1471
|
+
|
1472
|
+
Notice that this model is much cleaner than 'callNextMethod' and is not subject to any of the
|
1473
|
+
difficulties presented in SS4 and there is no need for the keywords “is”, “as” and “as<-”, although
|
1474
|
+
Ruby provides methods to check the class of an object its hierarchy, etc. when needed.
|
1475
|
+
|
1476
|
+
In Ruby there is no similar method as "setIs" and it is not possible to convert one class into
|
1477
|
+
another, but there are other ways of getting the necessary results. Let's then implement a
|
1478
|
+
method that returns the partition with the least number of groups. First, as usual, the R code
|
1479
|
+
with 'setIs':
|
1480
|
+
|
1481
|
+
```
|
1482
|
+
> setIs(
|
1483
|
+
+ class1="TrajPartitioned",
|
1484
|
+
+ class2="Partition",
|
1485
|
+
+ coerce=function(from,to){
|
1486
|
+
+ numberGroups <- sapply(tdCochin@listPartitions,getNbGroups)
|
1487
|
+
+ Smallest <- which.min(-numberGroups)
|
1488
|
+
+ to<-new("Partition")
|
1489
|
+
+ to@nbGroups <- getNbGroups(from@listPartitions[[Smallest]])
|
1490
|
+
+ to@part <- getPart(from@listPartitions[[Smallest]])
|
1491
|
+
+ return(to)
|
1492
|
+
+ }
|
1493
|
+
+ )
|
1494
|
+
```
|
1495
|
+
|
1496
|
+
And now the Ruby code. Here we are getting deeper into Ruby and it is becoming harder for a
|
1497
|
+
pure R developer to understand the code. We will describe it in more detail:
|
1498
|
+
|
1499
|
+
* We define a method called 'to_part' that has one argument 'which'. By default 'which'
|
1500
|
+
is ':min', the name of the minimum method. This means that if no argument is given to
|
1501
|
+
to_part it will assume the which = :min;
|
1502
|
+
|
1503
|
+
* @list_partition is a Ruby array. Method map is similar to method sapply in R, it
|
1504
|
+
applies a 'block' to every element of the array, returning an array. Describing
|
1505
|
+
blocks is beyond the scope of this document, but we can think of it as if it were a
|
1506
|
+
function. The block is in '{}' and has one argument named 'part'. Thus, map goes
|
1507
|
+
through all elements of the array, and gets the nb_groups of the element and returns
|
1508
|
+
them into the number_groups array.
|
1509
|
+
|
1510
|
+
* number_groups is and array and doing number_groups.min returns the minimum value in
|
1511
|
+
number_groups and number_groups.max the maximum. We can call a method on an object
|
1512
|
+
by 'sending' the method name to the object, so, number_groups.send(:min) is equivalent to
|
1513
|
+
number_groups.min;
|
1514
|
+
|
1515
|
+
* Method 'index' for array, returns the index of a given element. So, number_groups(3)
|
1516
|
+
would return the index of the element '3'. Then number_groups.index(number_groups.min)
|
1517
|
+
returns the index of the minimum element in the array. This is the equivalent of R
|
1518
|
+
which.min(number_groups);
|
1519
|
+
|
1520
|
+
* Finally, number_groups.index(number_groups.send(which)), will return the index of the
|
1521
|
+
element we ask for, be it :min or :max. Note that if we pass another value, this would
|
1522
|
+
be an error.
|
1523
|
+
|
1524
|
+
```{ruby}
|
1525
|
+
class TrajPartitioned
|
1526
|
+
|
1527
|
+
def to_part(which = :min)
|
1528
|
+
number_groups = @list_partitions.map { |part| part.nb_groups }
|
1529
|
+
selected = number_groups.index(number_groups.send(which))
|
1530
|
+
return @list_partitions[selected]
|
1531
|
+
end
|
1532
|
+
|
1533
|
+
end
|
1534
|
+
```
|
1535
|
+
|
1536
|
+
To get the partition whith the minimum number of elements:
|
1537
|
+
|
1538
|
+
```{ruby}
|
1539
|
+
puts @tdCochin.to_part.part
|
1540
|
+
```
|
1541
|
+
|
1542
|
+
To get the partition whith the maximum number of elements:
|
1543
|
+
|
1544
|
+
```{ruby}
|
1545
|
+
tdCochin.to_part(:max).part.pp
|
1546
|
+
```
|
1547
|
+
|
1548
|
+
In this example we did not follow exactly the R code from SS4. The reason for that is that
|
1549
|
+
'list_partitions' is a list of Ruby classes and we cannot run sapply on this list. If we
|
1550
|
+
try to call a 'getNbGroups' or in the Ruby case nb_groups, the code will crash. Let's try
|
1551
|
+
it:
|
1552
|
+
|
1553
|
+
# Virtual Classes
|
1554
|
+
|
1555
|
+
In Ruby there are no "Virtual Classes", but it is possible to implement derived classes from
|
1556
|
+
a parent class with methods that behave properly according to the object's class. Following
|
1557
|
+
SS4 we will implement two classes: PartitionSimple and PartitionEval which are subclasses
|
1558
|
+
of class PartitionFather. PartitionFather will just be a regular class. Methods defined in
|
1559
|
+
PartionFather will be available to be used in the subclasses
|
1560
|
+
|
1561
|
+
Here is the R code of those classes and the implementation of a method in PartitionFather
|
1562
|
+
that multiplies the number of groups by 2:
|
1563
|
+
|
1564
|
+
|
1565
|
+
```
|
1566
|
+
> setClass(
|
1567
|
+
+ Class="PartitionFather",
|
1568
|
+
+ representation=representation(nbGroups="numeric","VIRTUAL")
|
1569
|
+
+ )
|
1570
|
+
|
1571
|
+
> setClass(
|
1572
|
+
+ Class="PartitionSimple",
|
1573
|
+
+ representation=representation(part="factor"),
|
1574
|
+
+ contains="PartitionFather"
|
1575
|
+
+ )
|
1576
|
+
|
1577
|
+
> setClass(
|
1578
|
+
+ Class="PartitionEval",
|
1579
|
+
+ representation=representation(part="ordered"),
|
1580
|
+
+ contains="PartitionFather"
|
1581
|
+
+ )
|
1582
|
+
|
1583
|
+
> setGeneric("nbMultTwo",function(object){standardGeneric("nbMultTwo")})
|
1584
|
+
|
1585
|
+
> setMethod("nbMultTwo","PartitionFather",
|
1586
|
+
+ function(object){
|
1587
|
+
+ object@nbGroups <- object@nbGroups*2
|
1588
|
+
+ return (object)
|
1589
|
+
+ }
|
1590
|
+
+ )
|
1591
|
+
```
|
1592
|
+
|
1593
|
+
Since Ruby has no type definition, there is no really need for a parent class and subclasses.
|
1594
|
+
However, we will implement those classes in order to show Ruby's inheritance:
|
1595
|
+
|
1596
|
+
```{ruby}
|
1597
|
+
# Parent class. Differently from SS4, both 'nb_groups' and 'part' are defined in the
|
1598
|
+
# parent class.
|
1599
|
+
class PartitionFather
|
1600
|
+
|
1601
|
+
attr_reader :nb_groups
|
1602
|
+
attr_reader :part
|
1603
|
+
|
1604
|
+
# initialize class PartitionFather with the number of groups and parts. Note that we
|
1605
|
+
# use R.i for nb_groups in order to convert the number of groups into an R vector.
|
1606
|
+
def initialize(nb_groups: 0, part: nil)
|
1607
|
+
@nb_groups = R.i(nb_groups)
|
1608
|
+
@part = part
|
1609
|
+
end
|
1610
|
+
|
1611
|
+
# method nb_mult_two can be called from all subclasses
|
1612
|
+
def nb_mult_two
|
1613
|
+
@nb_groups * 2
|
1614
|
+
end
|
1615
|
+
|
1616
|
+
# method 'to_s' is called whenever we try to print a Ruby object. This method emulates
|
1617
|
+
# R 'print' method that prints all the slots.
|
1618
|
+
def to_s
|
1619
|
+
puts ("Variable 'nb_groups':")
|
1620
|
+
@nb_groups.pp
|
1621
|
+
puts
|
1622
|
+
puts ("Variable 'part':")
|
1623
|
+
@part.pp
|
1624
|
+
puts
|
1625
|
+
end
|
1626
|
+
|
1627
|
+
end
|
1628
|
+
|
1629
|
+
# Class PartitionSimple is a subclass of PartitionFather. To make a subclass of a
|
1630
|
+
# class we use the operator '<'. Since the whole logic is in the parent class
|
1631
|
+
# PartitionSimple is just an empty class
|
1632
|
+
class PartitionSimple < PartitionFather
|
1633
|
+
|
1634
|
+
end
|
1635
|
+
|
1636
|
+
# PartitionEval is also only an empty class
|
1637
|
+
class PartitionEval < PartitionFather
|
1638
|
+
|
1639
|
+
end
|
1640
|
+
```
|
1641
|
+
|
1642
|
+
```{ruby}
|
1643
|
+
@a = PartitionSimple.new(nb_groups: 3, part: (R.LETTERS[R.c(1, 2, 3, 2, 2, 1)].factor))
|
1644
|
+
puts @a
|
1645
|
+
```
|
1646
|
+
|
1647
|
+
```{ruby}
|
1648
|
+
puts @a.nb_mult_two
|
1649
|
+
```
|
1650
|
+
|
1651
|
+
```{ruby}
|
1652
|
+
@b = PartitionEval.new(nb_groups: 5, part: R.LETTERS[R.c(1, 5, 3, 4, 2, 4)].ordered)
|
1653
|
+
puts @b
|
1654
|
+
```
|
1655
|
+
|
1656
|
+
```{ruby}
|
1657
|
+
puts @b.nb_mult_two
|
1658
|
+
```
|
1659
|
+
|
1660
|
+
The example above, although it replicates SS4 is not actually very useful from the point of
|
1661
|
+
view of class hierarchy in Ruby. We will then write a new function to_s in class
|
1662
|
+
PartitionSimple that will print the name of the class:
|
1663
|
+
|
1664
|
+
```{ruby}
|
1665
|
+
class PartitionSimple
|
1666
|
+
|
1667
|
+
def to_s
|
1668
|
+
puts("Class PartitionSimple")
|
1669
|
+
super
|
1670
|
+
end
|
1671
|
+
|
1672
|
+
end
|
1673
|
+
```
|
1674
|
+
|
1675
|
+
```{ruby)
|
1676
|
+
puts @a
|
1677
|
+
```
|
1678
|
+
|
1679
|
+
As can be seen, 'puts a' now calls method 'to_s' defined in class PartitionSimple. This
|
1680
|
+
method prints 'Class PartitionSimple' and then call the super method, i.e., method 'to_s'
|
1681
|
+
from class PartitionFather.
|
1682
|
+
|
1683
|
+
Note though that 'puts b' still prints the same output, since it has no particular 'to_s'
|
1684
|
+
method.
|
1685
|
+
|
1686
|
+
```{ruby}
|
1687
|
+
puts @b
|
1688
|
+
```
|
1689
|
+
|
1690
|
+
# Internal Modification of an Object
|
1691
|
+
|
1692
|
+
|
1693
|
+
## Method to Modify a Field
|
1694
|
+
|
1695
|
+
Let us return to our trajectories example and define a third method that imputes data for
|
1696
|
+
missing values. To simplify, we will impute by replacing by the mean values. This is the R
|
1697
|
+
code to do this:
|
1698
|
+
|
1699
|
+
```
|
1700
|
+
> meanWithoutNa <- function (x){mean(x,na.rm=TRUE)}
|
1701
|
+
> setGeneric("impute",function (.Object){standardGeneric("impute")})
|
1702
|
+
> setMethod(
|
1703
|
+
+ f="impute",
|
1704
|
+
+ signature="Trajectories",
|
1705
|
+
+ def=function(.Object){
|
1706
|
+
+ average <- apply(.Object@traj,2,meanWithoutNa)
|
1707
|
+
+ for (iCol in 1:ncol(.Object@traj)){
|
1708
|
+
+ .Object@traj[is.na(.Object@traj[,iCol]),iCol] <- average[iCol]
|
1709
|
+
+ }
|
1710
|
+
+ return(.Object)
|
1711
|
+
+ }
|
1712
|
+
+ )
|
1713
|
+
```
|
1714
|
+
|
1715
|
+
The code above, as explained in SS4 creates a new object and does not change the original one.
|
1716
|
+
So, calling impute(trajCochin) will work correctly by creating a new object but will not
|
1717
|
+
change trajCochin. This works fine, but can be memory expensive if the matrix is a large
|
1718
|
+
one.
|
1719
|
+
|
1720
|
+
Let's now implement the same method in Galaaz. We will use for that Ruby's 'each' method.
|
1721
|
+
In Ruby, the 'each' method goes through all elements of a vector or list in order. The
|
1722
|
+
'each' method is available for an R matrix in Galaaz. Actually, when calling 'each' for an
|
1723
|
+
R matrix, the matrix is converted to a Ruby MDArray and the 'each' method is applied to this
|
1724
|
+
MDArray. So, we can do @matrix.each and cycle through every element in this matrix.
|
1725
|
+
The 'each_with_index' method does the same as 'each' but also passes the index of the element
|
1726
|
+
to the Ruby block (please, google Ruby block to get further information on blocks in Ruby).
|
1727
|
+
|
1728
|
+
One key aspect to remember is that Ruby indexes start with 0 while R indexes start with 1, so
|
1729
|
+
an element with index i in Ruby will be indexed i+1 in R. With that, let's see the Galaaz
|
1730
|
+
code for method impute:
|
1731
|
+
|
1732
|
+
```{ruby}
|
1733
|
+
class Trajectories
|
1734
|
+
|
1735
|
+
def mean_without_na
|
1736
|
+
@matrix.mean(na__rm: TRUE)
|
1737
|
+
end
|
1738
|
+
|
1739
|
+
def impute
|
1740
|
+
@matrix.each_with_index do |elmt, i|
|
1741
|
+
@matrix[i+1] = mean_without_na if elmt.nan?
|
1742
|
+
end
|
1743
|
+
end
|
1744
|
+
|
1745
|
+
end
|
1746
|
+
```
|
1747
|
+
|
1748
|
+
```{ruby}
|
1749
|
+
@trajCochin.impute
|
1750
|
+
puts @trajCochin.matrix
|
1751
|
+
```
|
1752
|
+
|
1753
|
+
It works! and note that actually trajCochin matrix was changed. However, as with the R
|
1754
|
+
solution, Renjin does make a copy of the data on the background. Let's investigate this a
|
1755
|
+
little further getting inside Galaaz's internal. Method 'as__mdarray' explicitly converts
|
1756
|
+
an R matrix to an MDArray:
|
1757
|
+
|
1758
|
+
```
|
1759
|
+
cochin_internal = trajCochin.matrix.as__mdarray
|
1760
|
+
cochin_internal.print
|
1761
|
+
```
|
1762
|
+
|
1763
|
+
|
1764
|
+
Now lets assign a value to trajCochin matrix and compare it to the variable chochin_internal:
|
1765
|
+
|
1766
|
+
```
|
1767
|
+
trajCochin.matrix[1] = 1
|
1768
|
+
trajCochin.matrix.pp
|
1769
|
+
puts
|
1770
|
+
puts cochin_internal
|
1771
|
+
```
|
1772
|
+
|
1773
|
+
As we can now see, trajCochin and cochin_internal have different content, while cochin_internal
|
1774
|
+
still has the same value in index 0, i.e. 15.0, trajCochin matrix has value 1 in index 1. This
|
1775
|
+
shows that Renjin when assigning to trajCochin.matrix[1] makes a copy of the original data.
|
1776
|
+
|
1777
|
+
Bellow, we use method 'get' which is a synonym of method 'as__mdarray' to again get the content
|
1778
|
+
of trajCochin.matrix. This variable has as first element the value 1, as set previously.
|
1779
|
+
|
1780
|
+
```
|
1781
|
+
internal2 = trajCochin.matrix.get
|
1782
|
+
internal2.print
|
1783
|
+
```
|
1784
|
+
|
1785
|
+
We will now set the value of the second element of internal2 to 1000. Note that internal2 is
|
1786
|
+
an MDArray and that the second element of this array is indexed with 1:
|
1787
|
+
|
1788
|
+
console(<<-EOT)
|
1789
|
+
internal2[1] = 1000
|
1790
|
+
internal2.print
|
1791
|
+
EOT
|
1792
|
+
|
1793
|
+
body(<<-EOT)
|
1794
|
+
And now, if we print the value of trajCochin.matrix, we note that the second element of this
|
1795
|
+
matrix (R matrix) is also 1000. This shows that the MDArray obtained from calling 'as__mdarray'
|
1796
|
+
and the R matrix have the same backing store.
|
1797
|
+
EOT
|
1798
|
+
|
1799
|
+
console(<<-EOT)
|
1800
|
+
trajCochin.matrix.pp
|
1801
|
+
EOT
|
1802
|
+
|
1803
|
+
body(<<-EOT)
|
1804
|
+
Remember, changing the internals of an R matrix like that can be quite dangerous. Renjin expects
|
1805
|
+
its data to be imuntable, and using MDArray allows the user to change this data violating
|
1806
|
+
Renjin principles. If weird bugs start creeping on your code, this should be one of the first
|
1807
|
+
things to be investigated.
|
1808
|
+
EOT
|
1809
|
+
|
1810
|
+
# Conclusions I
|
1811
|
+
|
1812
|
+
This ends SS4 paper. We believe we have shown that R S4 can be substituted by Galaaz and
|
1813
|
+
Ruby classes and that Galaaz makes an easy transition from R developers to Ruby. Ruby is
|
1814
|
+
a very flexible and powerful language and has many interesting libraries, where Rails is
|
1815
|
+
maybe one of the best known, but there are thousands of others. For those interested in
|
1816
|
+
getting deeper into Ruby's libraries, we suggest they look at:
|
1817
|
+
|
1818
|
+
* https://github.com/markets/awesome-ruby
|
1819
|
+
* http://bestgems.org/
|
1820
|
+
|
1821
|
+
For those interested in Ruby and science, we recommend:
|
1822
|
+
|
1823
|
+
* http://sciruby.com/
|
1824
|
+
|
1825
|
+
|
1826
|
+
|
1827
|
+
# ET Phone Home
|
1828
|
+
|
1829
|
+
On this paper we have focused on accessing R functions from Ruby and have shown how to
|
1830
|
+
integrate Ruby with R from the point of view of a Ruby developer, i.e, we have developed
|
1831
|
+
in Ruby and have made calls to R functions very transparently. Although this is quite
|
1832
|
+
powerful, sometimes this still lacks some power. In this section we will see how we can
|
1833
|
+
integrate R with Ruby (through Galaaz) from the point of view of the R developer, i.e.,
|
1834
|
+
we will allow R scripts to have access to Ruby classes and methods.
|
1835
|
+
|
1836
|
+
We did not explicitly show and did not call upon the readers attention, but whenever
|
1837
|
+
an R function was called we either passed to it basic type objects (numeric, string,
|
1838
|
+
boolean), Ruby arrays and MDArrays. Let's try now to pass a Ruby class to R:
|
1839
|
+
|
1840
|
+
```
|
1841
|
+
R.part = Partition.new(3, R.c("A", "C", "C", "B").factor)
|
1842
|
+
```
|
1843
|
+
|
1844
|
+
```
|
1845
|
+
R.part.pp
|
1846
|
+
```
|
1847
|
+
|
1848
|
+
Calling method 'pp' on this object does not print anything, as this is a completely strange
|
1849
|
+
object in the R planet. So, let's try to see what type of object this is:
|
1850
|
+
|
1851
|
+
```
|
1852
|
+
R.part.typeof.pp
|
1853
|
+
```
|
1854
|
+
|
1855
|
+
We get 'externalptr' as type. So we can send the Ruby class to the R planet, but there is
|
1856
|
+
nothing we can do with it there. It is just an 'externalptr'. But we have learned elsewhere
|
1857
|
+
that if we want to send an astronaut from a planet to another, a good way of doing it is by
|
1858
|
+
creating an 'avatar'! An 'avatar' is remotely controled by it's owner, but it acts almost as
|
1859
|
+
if it were a native being of the other planet.
|
1860
|
+
|
1861
|
+
Galaaz provides a way of creating an 'avatar' from any Ruby class and send it to R land. We
|
1862
|
+
will now show how this is done and how our 'avatar' calls home to get things done. Method
|
1863
|
+
'rpack' creates the avatar. We will start with a simple example, creating an 'avatar' from
|
1864
|
+
a Ruby array:
|
1865
|
+
|
1866
|
+
```
|
1867
|
+
# create an array of data in Ruby
|
1868
|
+
array = [1, 2, 3]
|
1869
|
+
|
1870
|
+
# Pack the array and assign it to an R variable. Remember that ruby__array, becomes
|
1871
|
+
# ruby.array inside the R script
|
1872
|
+
R.ruby__array = R.rpack(array)
|
1873
|
+
```
|
1874
|
+
|
1875
|
+
```
|
1876
|
+
Now, we have in 'ruby.array' an 'avatar' of array. In order for our 'avatar' to call
|
1877
|
+
back home, it uses method 'run':
|
1878
|
+
```
|
1879
|
+
|
1880
|
+
```
|
1881
|
+
# note that this calls Ruby method 'length' on the array and not R length function.
|
1882
|
+
R.eval("val <- ruby.array$run('length')")
|
1883
|
+
```
|
1884
|
+
console(<<-EOT)
|
1885
|
+
R.eval("print(val)")
|
1886
|
+
EOT
|
1887
|
+
|
1888
|
+
body(<<-EOT)
|
1889
|
+
Let's use a more interesting array method '<<'. This method adds elements to the
|
1890
|
+
end of the array. This method takes one argument, the element to be added at the end of
|
1891
|
+
the array. Thus we call function run passing two arguments, the '<<' method as first
|
1892
|
+
argument and the element to add as second argument.
|
1893
|
+
EOT
|
1894
|
+
|
1895
|
+
code(<<-EOC)
|
1896
|
+
R.eval(<<-EOT)
|
1897
|
+
ruby.array$run('<<', 4)
|
1898
|
+
ruby.array$run('<<', 5)
|
1899
|
+
EOT
|
1900
|
+
EOC
|
1901
|
+
|
1902
|
+
body(<<-EOT)
|
1903
|
+
Let's now print the content of the array. For that, we use another Ruby method: 'to_s'. This
|
1904
|
+
method generates a string with a representation of an object. In the case of an array, it
|
1905
|
+
will show the array's content:
|
1906
|
+
EOT
|
1907
|
+
|
1908
|
+
console(<<-EOT)
|
1909
|
+
R.eval("print(ruby.array$run('to_s'))")
|
1910
|
+
EOT
|
1911
|
+
|
1912
|
+
body(<<-EOT)
|
1913
|
+
One important aspect of interfacing R and Ruby is that both world interact with the same data.
|
1914
|
+
There is no data copying between the two worlds, so, effectively whatever happens to the
|
1915
|
+
'avatar' will also happen to the 'real' object. Let's take a look at that. First, we will
|
1916
|
+
go back to the Ruby world and see our array:
|
1917
|
+
EOT
|
1918
|
+
|
1919
|
+
console(<<-EOT)
|
1920
|
+
puts array
|
1921
|
+
EOT
|
1922
|
+
|
1923
|
+
body(<<-EOT)
|
1924
|
+
Now, let's change a value of our array in Ruby:
|
1925
|
+
EOT
|
1926
|
+
|
1927
|
+
code(<<-EOT)
|
1928
|
+
array[0] = "new element"
|
1929
|
+
EOT
|
1930
|
+
|
1931
|
+
body(<<-EOT)
|
1932
|
+
And let's take a look at our 'ruby.array' in R:
|
1933
|
+
EOT
|
1934
|
+
|
1935
|
+
console(<<-EOT)
|
1936
|
+
R.eval("print(ruby.array$run('to_s'))")
|
1937
|
+
EOT
|
1938
|
+
|
1939
|
+
body(<<-EOT)
|
1940
|
+
As you can see, 'ruby.array' is still the same Ruby object.
|
1941
|
+
|
1942
|
+
Avatars maintain some properties of their original world. Although the concept of method
|
1943
|
+
chaning is foreign to R, chaining can be used with imported objects from Ruby. Method
|
1944
|
+
chaining occurs when the result of a applying a method on an object returns an object (usually
|
1945
|
+
the same object) in which another method can be applied. In the example bellow, method '<<'
|
1946
|
+
will be applied multiple times for ruby.array
|
1947
|
+
EOT
|
1948
|
+
|
1949
|
+
code(<<-EOC)
|
1950
|
+
R.eval(<<-EOT)
|
1951
|
+
ruby.array$run('<<', 6)$run('<<', 7)$run('<<', 8)$run('<<', 9)
|
1952
|
+
EOT
|
1953
|
+
EOC
|
1954
|
+
|
1955
|
+
console(<<-EOT)
|
1956
|
+
R.eval("print(ruby.array$run('to_s'))")
|
1957
|
+
EOT
|
1958
|
+
|
1959
|
+
body(<<-EOT)
|
1960
|
+
We can also access any array element inside the R script, but note that we have
|
1961
|
+
to use Ruby indexing, i.e., the first element of the array is index 0:
|
1962
|
+
EOT
|
1963
|
+
|
1964
|
+
console(<<-EOT)
|
1965
|
+
R.eval("print(ruby.array$run('[]', 2))")
|
1966
|
+
EOT
|
1967
|
+
|
1968
|
+
console(<<-EOT)
|
1969
|
+
R.eval("print(ruby.array$run('[]', 5))")
|
1970
|
+
EOT
|
1971
|
+
|
1972
|
+
body(<<-EOT)
|
1973
|
+
Now that we have seen how to "call back" home and integrate Ruby classes with R, let's go
|
1974
|
+
back to our TrajPartitioned methtod to_part, and create a to_part2 method that will use
|
1975
|
+
R 'sapply' function:
|
1976
|
+
EOT
|
1977
|
+
|
1978
|
+
code(<<-EOT)
|
1979
|
+
class TrajPartitioned
|
1980
|
+
|
1981
|
+
def to_part2
|
1982
|
+
R.pack = R.rpack(@list_partitions, scope: :internal)
|
1983
|
+
number_groups = R.eval("sapply(pack, function(x) x$run('nb_groups'))")
|
1984
|
+
@list_partitions[number_groups.which__min.gz]
|
1985
|
+
end
|
1986
|
+
|
1987
|
+
end
|
1988
|
+
EOT
|
1989
|
+
|
1990
|
+
console(<<-EOT)
|
1991
|
+
tdCochin.to_part2.part.pp
|
1992
|
+
EOT
|
1993
|
+
|
1994
|
+
subsection("Creating Ruby Objects from R Scripts")
|
1995
|
+
|
1996
|
+
body(<<-EOT)
|
1997
|
+
In all the examples given so far on sending Ruby objects to R, the object was created in
|
1998
|
+
Ruby and send to R. In the following examples, all the work will be done inside R
|
1999
|
+
scripts without the need to create anything in Ruby. For the R developer, this might be
|
2000
|
+
the easiest way to begin trying Galaaz and start migrating from R to Ruby.
|
2001
|
+
|
2002
|
+
In this first example we will create a Ruby String object inside an R script. In order
|
2003
|
+
to create Ruby objects in R, we need to use the Ruby.Ojbect class and use the 'build'
|
2004
|
+
function. The 'build' function is the equivalent of the 'new' function in Ruby and
|
2005
|
+
receives as first argument the name of the class to be build and as other arguments the
|
2006
|
+
same arguments from Ruby 'new':
|
2007
|
+
|
2008
|
+
In the following example, we create a String object initialized with "this is a new string":
|
2009
|
+
EOT
|
2010
|
+
|
2011
|
+
code(<<-EOC)
|
2012
|
+
R.eval(<<-EOT)
|
2013
|
+
# This is an actuall R script, which allows the creation and use of Ruby classes
|
2014
|
+
# and methods.
|
2015
|
+
# Create a string, from class String in Ruby. Use function build to intanciate a
|
2016
|
+
# new object
|
2017
|
+
string <- Ruby.Object$build("String", "this is a new string")
|
2018
|
+
EOT
|
2019
|
+
EOC
|
2020
|
+
|
2021
|
+
console(<<-EOT)
|
2022
|
+
R.eval("print(string)")
|
2023
|
+
EOT
|
2024
|
+
|
2025
|
+
body(<<-EOT)
|
2026
|
+
In Ruby, many methods are know as 'class methods'. Class methods are methods that exists on
|
2027
|
+
the class and not on an instance of the class. In the example above, we create an instance
|
2028
|
+
(object) of type String. In the following example, we will access class Marshal: The marshaling
|
2029
|
+
library converts collections of Ruby objects into a byte stream, allowing them to be stored
|
2030
|
+
outside the currently active script. This data may subsequently be read and the original
|
2031
|
+
objects reconstituted.
|
2032
|
+
EOT
|
2033
|
+
|
2034
|
+
code(<<-EOC)
|
2035
|
+
# Use function get_class to get a Ruby class
|
2036
|
+
R.eval(<<-EOT)
|
2037
|
+
Marshal <- Ruby.Object$get_class("Marshal")
|
2038
|
+
|
2039
|
+
# Method 'dump' is a Marshal class method as is 'load'
|
2040
|
+
str <- Marshal$run("dump", string)
|
2041
|
+
restored <- Marshal$run("load", str)
|
2042
|
+
EOT
|
2043
|
+
EOC
|
2044
|
+
|
2045
|
+
console(<<-EOT)
|
2046
|
+
R.eval("print(restored)")
|
2047
|
+
EOT
|
2048
|
+
|
2049
|
+
subsection("Interfacing Java with Renjin")
|
2050
|
+
|
2051
|
+
body(<<-EOT)
|
2052
|
+
Renjin allows for easy integration of Java into R scripts, giving the user access to all of
|
2053
|
+
Java's libraries and functions. Although this paper is manly about interfacing R and Ruby,
|
2054
|
+
we believe that it is also important to see how to interface with Java from an R script.
|
2055
|
+
JRuby, the platform on which Galaaz depends, also allows easy integration of Java and Ruby;
|
2056
|
+
however we will not show it here, since this is well documented elsewhere.
|
2057
|
+
EOT
|
2058
|
+
|
2059
|
+
code(<<-EOC)
|
2060
|
+
R.eval(<<-EOT)
|
2061
|
+
import(java.util.HashMap)
|
2062
|
+
|
2063
|
+
# create a new instance of the HashMap class:
|
2064
|
+
ageMap <- HashMap$new()
|
2065
|
+
|
2066
|
+
# call methods on the new instance:
|
2067
|
+
ageMap$put("Bob", 33)
|
2068
|
+
ageMap$put("Carol", 41)
|
2069
|
+
|
2070
|
+
age <- ageMap$get("Carol")
|
2071
|
+
|
2072
|
+
# Java primitives and their boxed types
|
2073
|
+
# are automatically converted to R vectors:
|
2074
|
+
typeof(age)
|
2075
|
+
EOT
|
2076
|
+
EOC
|
2077
|
+
|
2078
|
+
console(<<-EOT)
|
2079
|
+
R.eval("print(ageMap$size())")
|
2080
|
+
EOT
|
2081
|
+
|
2082
|
+
console(<<-EOC)
|
2083
|
+
R.eval(<<-EOT)
|
2084
|
+
cat("Carol is ", age, " years old.\\n", sep = "")
|
2085
|
+
EOT
|
2086
|
+
EOC
|
2087
|
+
|
2088
|
+
|
2089
|
+
section("Conclusions II")
|
2090
|
+
|
2091
|
+
body(<<-EOT)
|
2092
|
+
The Java Virtual Machine (JVM) is an amazing environment allowing for multiple languages to cohabit
|
2093
|
+
and integrate in a very transparent way. Galaaz interfaces R, Ruby and Java and gives the
|
2094
|
+
developer access to a gigantic set of libraries from those three worlds. In
|
2095
|
+
development circles people usually say: "choose the right tool for the job at hand", with JVM/
|
2096
|
+
Java/R/Renjin/Ruby/Galaaz the right tool for the job might just be at hand all the time.
|
2097
|
+
|
2098
|
+
We often see questions on the web about which language to choose between R and Python. Between
|
2099
|
+
R and Python, choose Galaaz!
|
2100
|
+
EOT
|