galaaz 0.4.2 → 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +25 -0
  3. data/Rakefile +8 -0
  4. data/bin/gknit +9 -5
  5. data/bin/gstudio +4 -2
  6. data/bin/gstudio.rb +32 -2
  7. data/blogs/dev/dev.html +219 -34
  8. data/blogs/dev/dev.md +26 -26
  9. data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
  10. data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
  11. data/blogs/dplyr/dplyr.rb +63 -0
  12. data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +38 -26
  13. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +16 -17
  14. data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
  15. data/blogs/galaaz_ggplot/galaaz_ggplot.tex +65 -31
  16. data/blogs/oh_my/not_so.rb +2342 -0
  17. data/blogs/oh_my/oh_my.Rmd +493 -0
  18. data/blogs/oh_my/oh_my.html +680 -0
  19. data/blogs/oh_my/oh_my.md +597 -0
  20. data/blogs/oh_my/old.Rmd +2100 -0
  21. data/blogs/ruby_plot/figures/facets_with_decorations.png +0 -0
  22. data/blogs/ruby_plot/figures/facets_with_jitter.png +0 -0
  23. data/blogs/ruby_plot/figures/final_box_plot.png +0 -0
  24. data/blogs/ruby_plot/figures/final_violin_plot.png +0 -0
  25. data/blogs/ruby_plot/figures/violin_with_jitter.png +0 -0
  26. data/blogs/ruby_plot/ruby_plot.Rmd +147 -122
  27. data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +662 -0
  28. data/blogs/ruby_plot/ruby_plot.html +49 -54
  29. data/blogs/ruby_plot/ruby_plot.md +147 -122
  30. data/blogs/ruby_plot/ruby_plot.pdf +0 -0
  31. data/blogs/ruby_plot/ruby_plot.tex +776 -157
  32. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +57 -0
  33. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +106 -0
  34. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +110 -0
  35. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +174 -0
  36. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +236 -0
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +296 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +236 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +218 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +128 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +150 -0
  43. data/examples/islr/ch2.spec.rb +21 -18
  44. data/examples/islr/ch3_boston.rb +14 -5
  45. data/examples/islr/ch3_multiple_regression.rb +2 -3
  46. data/examples/islr/ch6.spec.rb +1 -1
  47. data/examples/islr/x_y_rnorm.jpg +0 -0
  48. data/lib/R_interface/r.rb +14 -10
  49. data/lib/R_interface/r_libs.R +9 -0
  50. data/lib/R_interface/r_methods.rb +77 -6
  51. data/lib/R_interface/{expression.rb → r_module_s.rb} +13 -14
  52. data/lib/R_interface/rbinary_operators.rb +58 -71
  53. data/lib/R_interface/rdata_frame.rb +2 -1
  54. data/lib/R_interface/rdevices.R +4 -0
  55. data/lib/R_interface/rdevices.rb +1 -1
  56. data/lib/R_interface/renvironment.rb +34 -1
  57. data/lib/R_interface/rexpression.rb +108 -2
  58. data/lib/R_interface/rindexed_object.rb +3 -1
  59. data/lib/R_interface/rlanguage.rb +18 -2
  60. data/lib/R_interface/rmatrix.rb +14 -0
  61. data/lib/R_interface/rmd_indexed_object.rb +5 -1
  62. data/lib/R_interface/robject.rb +61 -23
  63. data/lib/R_interface/rsupport.rb +111 -53
  64. data/lib/R_interface/rsymbol.rb +6 -5
  65. data/lib/R_interface/ruby_extensions.rb +130 -4
  66. data/lib/R_interface/runary_operators.rb +35 -3
  67. data/lib/R_interface/rvector.rb +1 -0
  68. data/lib/galaaz.rb +0 -2
  69. data/lib/gknit/knitr_engine.rb +58 -4
  70. data/lib/gknit/ruby_engine.rb +5 -6
  71. data/lib/util/exec_ruby.rb +55 -9
  72. data/specs/all.rb +13 -3
  73. data/specs/figures/dose_len.png +0 -0
  74. data/specs/r_dataframe.spec.rb +49 -26
  75. data/specs/r_environment.spec.rb +140 -0
  76. data/specs/r_eval.spec.rb +0 -15
  77. data/specs/r_formula.spec.rb +232 -0
  78. data/specs/r_function.spec.rb +7 -8
  79. data/specs/r_list.spec.rb +4 -0
  80. data/specs/r_list_apply.spec.rb +11 -11
  81. data/specs/r_matrix.spec.rb +3 -3
  82. data/specs/{r_plots.spec.rb~ → r_nse.spec.rb} +29 -6
  83. data/specs/r_vector_creation.spec.rb +6 -0
  84. data/specs/r_vector_object.spec.rb +2 -2
  85. data/specs/r_vector_operators.spec.rb +3 -3
  86. data/specs/r_vector_subsetting.spec.rb +4 -4
  87. data/specs/ruby_expression.spec.rb +324 -0
  88. data/specs/tmp.rb +12 -524
  89. data/sty/galaaz.sty +71 -0
  90. data/version.rb +1 -1
  91. metadata +31 -41
  92. data/bin/gknit2~ +0 -6
  93. data/bin/ogk~ +0 -4
  94. data/bin/prepareR.rb~ +0 -1
  95. data/blogs/dev/dev.Rmd~ +0 -104
  96. data/blogs/galaaz_ggplot/galaaz_ggplot.dvi +0 -0
  97. data/blogs/galaaz_ggplot/midwest_external_png~ +0 -1
  98. data/blogs/gknit/gknit.Rmd~ +0 -184
  99. data/blogs/gknit/gknit.Rnd~ +0 -17
  100. data/blogs/gknit/model.rb~ +0 -46
  101. data/blogs/ruby_plot/ruby_plot.Rmd~ +0 -215
  102. data/examples/islr/Figure.jpg +0 -0
  103. data/examples/misc/moneyball.rb~ +0 -16
  104. data/examples/misc/subsetting.rb~ +0 -372
  105. data/lib/R/eng_ruby.R~ +0 -63
  106. data/lib/R_interface/capture_plot.rb~ +0 -23
  107. data/lib/R_interface/r.rb~ +0 -121
  108. data/lib/R_interface/rdevices.rb~ +0 -27
  109. data/lib/gknit.rb~ +0 -26
  110. data/lib/gknit/knitr_engine.rb~ +0 -102
  111. data/lib/gknit/ruby_engine.rb~ +0 -72
  112. data/lib/util/inline_file.rb~ +0 -23
  113. data/r_requires/knitr.rb~ +0 -4
  114. data/specs/r_language.spec.rb +0 -157
@@ -0,0 +1,2100 @@
1
+ ---
2
+ title: "Extending R with classes, modules, procs, lambdas, oh my!"
3
+ author:
4
+ - "Rodrigo Botafogo"
5
+ - "Daniel Mossé - University of Pittsburgh"
6
+ tags: [Tech, Data Science, Ruby, R, GraalVM]
7
+ date: "November 19th, 2018"
8
+ output:
9
+ html_document:
10
+ self_contained: true
11
+ keep_md: true
12
+ pdf_document:
13
+ includes:
14
+ in_header: "../../sty/galaaz.sty"
15
+ keep_tex: yes
16
+ number_sections: yes
17
+ toc: true
18
+ toc_depth: 2
19
+ fontsize: 11pt
20
+ ---
21
+
22
+ ```{r setup, echo=FALSE}
23
+
24
+ ```
25
+
26
+ # Introduction
27
+
28
+ This paper introduces and compares Galaaz with R's S4. It is a shameless rip off of
29
+ ["A '(not so)' Short Introduction to S4"](https://cran.r-project.org/doc/contrib/Genolini-S4tutorialV0-5en.pdf) by Christophe Genolini and follows the same structure and examples presented there.
30
+
31
+ Galaaz is a Ruby Gem (library) that allows very tight integration between Ruby and R.
32
+ It's integration is much tigher and transparent from what one can get beetween RinRuby
33
+ or similar solutions in Python
34
+ such as [PypeR](https://pypi.python.org/pypi/PypeR/1.1.0), [rpy2](http://rpy2.bitbucket.org/)
35
+ and other similar solutions. Galaaz targets the Java Virtual Machine and it
36
+ integrates with Renjin (http://www.renjin.org/), an R interpreter for Java.
37
+
38
+ From the Renjin page we can get the following description of Renjin and its objectives:
39
+
40
+ The goal of Renjin
41
+ is to eventually be compatible with GNU R such that most existing R language programs will
42
+ run in Renjin without the need to make any changes to the code. Needless to say, Renjin is
43
+ currently not 100% compatible with GNU R so your mileage may vary.
44
+
45
+ The biggest advantage of Renjin is that the R interpreter itself is a Java module which can be
46
+ seamlessly integrated into any Java application. This dispenses with the need to load dynamic
47
+ libraries or to provide some form of communication between separate processes. These types of
48
+ interfaces are often the source of much agony because they place very specific demands on the
49
+ environment in which they run.
50
+
51
+ We frequently see on the web people asking: "which is better for data analysis: R or Python?" In
52
+ This article we also have the objective to try to answer this question. As you will see, our
53
+ point is: "when in doubt about R or Python, use Galaaz!"
54
+
55
+ # Bases of Object Programming
56
+
57
+ In this paper, we will start our discussion from Part II of "The (not so) Short Introduction
58
+ to S4", which from now on we will reference as SS4 for "short S4". Interested readers are directed
59
+ to this paper to understand the motivation and examples in that paper. In this paper we will
60
+ present the S4 code from SS4 and then the same code in Ruby/Galaaz. We will not comment on the
61
+ S4 code, as all the comments can be found in SS4, we will only focus on the Ruby/Galaaz
62
+ description.
63
+
64
+ S4 defines classes by using the setClass function:
65
+
66
+ # Classes Declaration
67
+
68
+ ```
69
+ # > setClass(
70
+ # + Class="Trajectories",
71
+ # + representation=representation(
72
+ # + times = "numeric",
73
+ # + traj = "matrix"
74
+ # + )
75
+ # + )
76
+ ```
77
+
78
+ # Instance Variables
79
+
80
+ In Ruby a class is defined by the keyword 'class'. Every class should start with a capital
81
+ letter. S4 'slots' are called 'instance variables' in Ruby. Differently from R's S4,
82
+ instance variables in Ruby do not have type information. It should be clear though, that S4
83
+ type information is also not a "compile" time type, since R is not compiled. The type is
84
+ checked at runtime. The same checking can be done in Ruby and we will do it later in this
85
+ document.
86
+
87
+ In the example bellow, we create
88
+ class Trajectories with two instance variables, 'times' and 'matrix'. We will not go over
89
+ the details of instance variables in Ruby, but here we created those variables with the
90
+ keyword 'attr_reader' and a column before the variables name:
91
+
92
+
93
+ ```
94
+ class Trajectories
95
+
96
+ attr_reader :times
97
+ attr_reader :matrix
98
+
99
+ end
100
+ ```
101
+
102
+
103
+ In order to create a new instance of object Trajectories we call method new on the class and
104
+ we can store the result in a varible (not an instance variable) as bellow:
105
+
106
+ ```
107
+ traj = Trajectories.new
108
+ ```
109
+
110
+ We now have in variable 'traj' a Trajectories object. In Ruby, printing variable 'traj' will
111
+ only print the class name of the object and not it contents as in R.
112
+
113
+ ```
114
+ puts traj
115
+ ```
116
+
117
+ To see the contents of an object, one needs to access its components using the '.' operator:
118
+
119
+ ```
120
+ puts traj.times
121
+ ```
122
+
123
+ # Constructor
124
+
125
+ Since there is no content stored in 'times' nor 'matrix', nil is returned. In order to add
126
+ a value in the variables, we need to add a constructor the class Trajectories. In R, a
127
+ constructor is build by default, in Ruby, this has to be created by adding a method called
128
+ 'initialize'. In the example bellow, we will create the initializer that accepts two values,
129
+ a 'times' value and a 'matrix' value and they are used to initialize the value of the
130
+ instance variables:
131
+ EOT
132
+
133
+ ```
134
+ class Trajectories
135
+
136
+ attr_reader :times
137
+ attr_reader :matrix
138
+
139
+ def initialize(times: nil, matrix: nil)
140
+ @times = times
141
+ @matrix = matrix
142
+ end
143
+
144
+ end
145
+ ```
146
+
147
+ Up to this point, everything described in pure Ruby code and has absolutely no relationship is R.
148
+ We now want to create a Trajectories with a 'times' vector. Ruby has a vector class and we could
149
+ use this class to create a vector and add it to the 'times' instance variable; however, in order
150
+ to make use of R's functions, we want to create a R vector to add to 'times'. In Galaaz,
151
+ creating R objects is done using the corresponding R functions by just preceding them with 'R.',
152
+ i.e., R functions are all defined in Galaaz in the R namespace.
153
+
154
+ Since Galaaz is Ruby and not R, some syntax adjustments are sometimes necessary. For instance,
155
+ in R, a range is represented as '(1:4)', in Ruby, the same range is represented as '(1..4)'.
156
+ When passing arguments to an R function in R one uses the '=' sign after the slot name; in R,
157
+ one uses the ':' operator after parameter's name as we can see bellow:
158
+ EOT
159
+
160
+ ```
161
+ # Create a Trajectories with the times vector [1, 2, 3, 4] and not matrix
162
+ traj = Trajectories.new(times: R.c(1, 2, 3, 4))
163
+
164
+ # Create a Trajectories with times and matrix
165
+ traj2 = Trajectories.new(times: R.c(1, 3), matrix: R.matrix((1..4), ncol: 2))
166
+ ```
167
+
168
+ # Access to Instance Variables (to reach a slot)
169
+
170
+ In order to access data in an instance variable the operator '.' is used. In R, a similar
171
+ result is obtained by use of the '@' operator, but SS4 does not recommend its use. In Galaaz,
172
+ the '.' operator is the recommended way of accessing an instance variable.
173
+
174
+ Now that we have created two trajectories, let's try to print its instance variables to see
175
+ that everything is fine:
176
+
177
+
178
+ ```
179
+ puts traj.times
180
+ ```
181
+
182
+ Well this wasn't really what we had expected... as explained before, printing a variable, will
183
+ actually only show the class name and vector 'times' in Galaaz is actually a Renjin::Vector.
184
+ In order to print the content of a Galaaz object we use method 'pp' as follows:
185
+
186
+ ```{ruby}
187
+ puts @traj.times
188
+ ```
189
+
190
+ We now have the expected value. Note that the 'times' vector is printed exactly as it would
191
+ if we were using GNU R. Let's now take a look at variable 'traj2':
192
+
193
+ ```{ruby}
194
+ puts @traj2.times
195
+ ```
196
+
197
+ ```{ruby}
198
+ puts @traj2.matrix
199
+ ```
200
+
201
+ Let's now build the same examples as in SS4: Three hospitals take part in a
202
+ study. The Pitié Salpêtriere (which has not yet returned its data file, shame on them!),
203
+ Cochin and Saint-Anne. We first show the code in R and the corresponding Galaaz:
204
+
205
+ ```
206
+ > trajPitie <- new(Class="Trajectories")
207
+ > trajCochin <- new(
208
+ + Class= "Trajectories",
209
+ + times=c(1,3,4,5),
210
+ + traj=rbind (
211
+ + c(15,15.1, 15.2, 15.2),
212
+ + c(16,15.9, 16,16.4),
213
+ + c(15.2, NA, 15.3, 15.3),
214
+ + c(15.7, 15.6, 15.8, 16)
215
+ + )
216
+ + )
217
+ > trajStAnne <- new(
218
+ + Class= "Trajectories",
219
+ + times=c(1: 10, (6: 16) *2),
220
+ + traj=rbind(
221
+ + matrix (seq (16,19, length=21), ncol=21, nrow=50, byrow=TRUE),
222
+ + matrix (seq (15.8, 18, length=21), ncol=21, nrow=30, byrow=TRUE)
223
+ + )+rnorm (21*80,0,0.2)
224
+ + )
225
+ ```
226
+
227
+ This same code in Galaaz becomes:
228
+
229
+ ```{ruby}
230
+ @trajPitie = Trajectories.new
231
+ ```
232
+
233
+ ```{ruby}
234
+ @trajCochin = Trajectories.new(times: R.c(1,3,4,5),
235
+ matrix: R.rbind(
236
+ R.c(15,15.1, 15.2, 15.2),
237
+ R.c(16,15.9, 16,16.4),
238
+ R.c(15.2, NA, 15.3, 15.3),
239
+ R.c(15.7, 15.6, 15.8, 16)))
240
+ ```
241
+
242
+ ```{ruby}
243
+ @trajStAnne =
244
+ Trajectories.new(times: R.c((1..10), R.c(6..16) * 2),
245
+ matrix: (R.rbind(
246
+ R.matrix(R.seq(16, 19, length: 21), ncol: 21,
247
+ nrow: 50, byrow: true),
248
+ R.matrix(R.seq(15.8, 18, length: 21), ncol: 21,
249
+ nrow: 30, byrow: true)) + R.rnorm(21*80, 0, 0.2)))
250
+
251
+ ```
252
+
253
+ Let's check that the 'times' and 'matrix' instance variables were correctly set:
254
+
255
+ ```{ruby}
256
+ puts @trajCochin.times
257
+ ```
258
+
259
+ ```{ruby}
260
+ puts @trajCochin.matrix
261
+ ```
262
+
263
+ ```{ruby}
264
+ puts @trajStAnne.times
265
+ ```
266
+
267
+ We will not at this time print trajStAnne.matrix, since this is a huge matrix and the result
268
+ would just take too much space. Later we will print just a partial view of the matrix.
269
+
270
+ # Default Values
271
+
272
+ Default values are very useful and quite often used in Ruby programs. Although SS4 does not
273
+ recommend its use, there are many cases in which default values are useful and make code simpler.
274
+ We have already seen default values in this document, with the default being 'nil'. This was
275
+ necessary in order to be able to create our constructor and passing it the proper values.
276
+
277
+ In the example bellow, a class TrajectoriesBis is created with default value 1 for times and a
278
+ matrix with no elements in matrix.
279
+
280
+ ```{ruby}
281
+ class TrajectoriesBis
282
+
283
+ attr_reader :times
284
+ attr_reader :matrix
285
+
286
+ def initialize(times: 1, matrix: R.matrix(0))
287
+ @times = times
288
+ @matrix = matrix
289
+ end
290
+
291
+ end
292
+
293
+ traj_bis = TrajectoriesBis.new
294
+ ```
295
+
296
+
297
+ Let's take a look at our new class:
298
+
299
+
300
+ ```{ruby}
301
+ puts @traj_bis.times
302
+ ```
303
+
304
+ Well, not exactly what we had in mind. We got an error saying that .pp is undefined for
305
+ Fixnum. In R, numbers are automatically converted to vectors, but this is not the case
306
+ in Ruby and Galaaz. In Ruby, numbers are numbers and vectors are vectors. In the
307
+ initialize method above, we stored 1 in variable @times and 1 is a number. Method .pp is
308
+ only available for R objects.
309
+
310
+ In order to fix this, we need to fix our initializer to convert number 1 to a vector with
311
+ one element of value 1. Galaaz provides the method R.i to do this conversion.
312
+
313
+ When calling an R function that expects a number as argument, this conversion is
314
+ automatically done by Galaaz; however, in the initialize method, there is no indication
315
+ to Galaaz that variable @times is actually a Galaaz variable, since there is no type
316
+ information. In this case, we need to be explicit and use R.i:
317
+
318
+ ```{ruby}
319
+ class TrajectoriesBis
320
+
321
+ attr_reader :times
322
+ attr_reader :matrix
323
+
324
+ # Use R.i to convert number 1 to a vector
325
+ def initialize(times: R.i(1), matrix: R.matrix(0))
326
+ @times = times
327
+ @matrix = matrix
328
+ end
329
+
330
+ end
331
+
332
+ @traj_bis = TrajectoriesBis.new
333
+ ```
334
+
335
+ ```{ruby}
336
+ puts @traj_bis.times
337
+ ```
338
+
339
+ ```{ruby}
340
+ puts @traj_bis.matrix
341
+ ```
342
+
343
+ # To Remove an Object
344
+
345
+ As far as I know, there isn't a good way of removing a defined class, but there might be
346
+ one and the interested user is directed to google it! In principle, there should not be
347
+ any real need to remove a defined class. Both in R and Galaaz, large programs are usually
348
+ written in a file and the file loaded. If one writes a wrong class, the better solution is
349
+ to correct it on and then load it again. If the class is written directly on the console,
350
+ then leaving it there will not have any serious impact.
351
+
352
+ # The Empty Object
353
+
354
+ When a Trajectories is created with new, and no argument is given, all its instance variables
355
+ will have the default nil value. Since Ruby has no type information, then there is only one
356
+ type (or actually no type) of nil. To check if a variable is empty, we check it against the nil
357
+ value.
358
+
359
+ # To See an Object
360
+
361
+ Ruby has very strong meta-programming features, in particular, one can use introspection to
362
+ see methods and instance variables from a given class. Method 'instance_variables' shows all
363
+ the instance variables of an object:
364
+
365
+ ```
366
+ puts @traj.instance_variables
367
+ ```
368
+
369
+ The description of all meta-programming features of Ruby is well beyond the scope of this
370
+ document, but it is a very frequent a powerful feature of Ruby, that makes programming in
371
+ Ruby a different experience than programming in other languages.
372
+
373
+ # Methods
374
+
375
+ Methods are a fundamental feature of object oriented programming. We will now extend our class
376
+ Trajectories to add methods to it. In SS4, a method 'plot' is added to Trajectories. At this
377
+ point, Renjin and Galaaz do not yet have plotting capabilities, so we will have to skip this
378
+ method and go directly to the implementation of the 'print' method.
379
+
380
+ Bellow is the R code for method print:
381
+
382
+ ```
383
+ > setMethod ("print","Trajectories",
384
+ + function(x,...){
385
+ + cat("*** Class Trajectories, method Print *** \\n")
386
+ + cat("* Times ="); print (x@times)
387
+ + cat("* Traj = \\n"); print (x@traj)
388
+ + cat("******* End Print (trajectories) ******* \\n")
389
+ + }
390
+ + )
391
+ ```
392
+
393
+ Now the same code for class Trajectories in Galaaz. In general methods are defined in a class
394
+ together with all the class definition. We will first use this approach. Later, we will show
395
+ how to 'reopen' a class to add new methods to it.
396
+
397
+ In this example, we are defining a method named 'print'. We have being using method 'puts' to
398
+ output data. There is a Ruby method that is more flexible than puts and that we need to use to
399
+ implement our function: 'print'. However, trying to use Ruby print inside the definition of
400
+ Trajectories's print will not work, as Ruby will understand that as a recursive call to print.
401
+ Ruby's print is defined inside the Kernel class, so, in order to call Ruby's print inside the
402
+ definition of Trajectories's print we need to write 'Kernel.print'.
403
+
404
+ ```{ruby}
405
+ class Trajectories
406
+
407
+ attr_reader :times
408
+ attr_reader :matrix
409
+
410
+ #
411
+ #
412
+ #
413
+ def initialize(times: nil, matrix: nil)
414
+ @times = times
415
+ @matrix = matrix
416
+ end
417
+
418
+ def print
419
+ puts("*** Class Trajectories, method Print *** ")
420
+ Kernel.print("times = ")
421
+ @times.pp
422
+ puts("traj =")
423
+ @matrix.pp
424
+ puts("******* End Print (trajectories) ******* ")
425
+ end
426
+
427
+ end
428
+ ```
429
+
430
+ ```{ruby}
431
+ puts @trajCochin
432
+ ```
433
+
434
+ For Cochin, the result is correct. For Saint-Anne, print will display too much
435
+ information. So we need a second method.
436
+
437
+ Show is the default R method used to show an object when its name is written in the
438
+ console. We thus define 'show' by taking into account the size of the object: if there are too
439
+ many trajectories, 'show' posts only part of them.
440
+
441
+ Here is the R code for method 'show':
442
+
443
+ ```
444
+ > setMethod("show","Trajectories",
445
+ + function(object){
446
+ + cat("*** Class Trajectories, method Show *** \\n")
447
+ + cat("* Times ="); print(object@times)
448
+ + nrowShow <- min(10,nrow(object@traj))
449
+ + ncolShow <- min(10,ncol(object@traj))
450
+ + cat("* Traj (limited to a matrix 10x10) = \\n")
451
+ + print(formatC(object@traj[1:nrowShow,1:ncolShow]),quote=FALSE)
452
+ + cat("******* End Show (trajectories) ******* \\n")
453
+ + }
454
+ + )
455
+ ```
456
+
457
+ Now, let's write it with Galaaz. This time though, we will not rewrite the whole Trajectories
458
+ class, but just reopen it to add this specific method. The next example has many interesting
459
+ features of Galaaz, some we have already seen, others will be described now:
460
+
461
+ * As we have already seen, to call an R function one uses the R.<function> notation. There
462
+ is however another way: when the first argument to the R function is an R object such as a
463
+ matrix, a list, a vector, etc. we can use '.' notation to call the function. This makes the
464
+ function look like a method of the object. For instance, R.nrow(@matrix), can be called by
465
+ doing @matrix.nrow;
466
+
467
+ * In R, every number is converted to a vector and this can be done with method R.i. Converting
468
+ a vector with only one number back to a number can be done with method '.gz'. So if @num is
469
+ an R vector that holds a number, then @num.gz is a number that can be used normally with Ruby
470
+ methods;
471
+
472
+ * R functions and Ruby methods can be used freely in Galaaz. We show bellow two different ways
473
+ of getting the minimum of a number, either by calling R.min or by getting the minimum of an
474
+ array, with the min method;
475
+
476
+ * Galaaz allows for method 'chaining'. Method chaining, also known as named parameter idiom, is
477
+ a common syntax for invoking multiple method calls in object-oriented programming languages.
478
+ Each method returns an object, allowing the calls to be chained together in a single statement
479
+ without requiring variables to store the intermediate results. For instance @matrix.nrow.gz,
480
+ which returns the number of rows of the matrix as a number;
481
+
482
+ * Ranges in Ruby are represented by (x..y), where x is the beginning of the range and y its end.
483
+ An R matrix can be indexed by range, object@traj[1:nrowShow,1:ncolShow], the same result is
484
+ obtained in Galaaz by indexing @matrix[(1..nrow_show), (1..ncol_show)]. Observe that this
485
+ statement is then chained with the format function and with the pp method to print the matrix.
486
+
487
+
488
+ ```{ruby}
489
+ class Trajectories
490
+
491
+ def show
492
+ puts("*** Class Trajectories, method Show *** ")
493
+ Kernel.print("times = ")
494
+ @times.pp
495
+ nrow_show = [10, @matrix.nrow.gz].min
496
+ ncol_show = R.min(10, @matrix.ncol).gz
497
+ puts("* Traj (limited to a matrix 10x10) = ")
498
+ @matrix[(1..nrow_show), (1..ncol_show)].format(digits: 2, nsmall: 2).pp
499
+ puts("******* End Show (trajectories) ******* ")
500
+ end
501
+
502
+ end
503
+ ```
504
+
505
+ ```{ruby}
506
+ @trajStAnne.show
507
+ ```
508
+
509
+ Our show method has the same problem as SS4, i.e., if an empty trajectories object is created and
510
+ we try to 'show' it, it will generate an error. Let's see it:
511
+
512
+ ```{ruby}
513
+ @empty_traj = Trajectories.new
514
+ ```
515
+
516
+ ```{ruby}
517
+ @empty_traj.show
518
+ ```
519
+
520
+ ```
521
+ NoMethodError: undefined method `pp' for nil:NilClass
522
+ show at :6
523
+ <eval> at :1
524
+ eval at org/jruby/RubyKernel.java:976
525
+ console at T:/Rodrigo/Desenv/Galaaz/examples/rbmarkdown.rb:61
526
+ <top> at T:\Rodrigo\Desenv\Galaaz\examples\not_so.rb:533
527
+ ```
528
+
529
+ In this example, we try to call method .pp on a nil (empty) object and this method is not
530
+ defined. In order to fix this, we can either prevent an empty trajectories class to be created,
531
+ or make sure that method show will not choke on the empty object. We will take the second
532
+ alternative, to follow SS4 and will check if either @times or @matrix are empty. If either one
533
+ of them is nil, then we will print a message saying so.
534
+
535
+ Although the first alternative, i.e., not allow for empty objects is a possibility in Ruby,
536
+ it seems that this is not the case for S4.
537
+
538
+ ```{ruby}
539
+ class Trajectories
540
+
541
+ def show
542
+ if (@times.nil? || @matrix.nil?)
543
+ puts("*** Class Trajectories is empty!! *** ")
544
+ return
545
+ end
546
+ puts("*** Class Trajectories, method Show *** ")
547
+ Kernel.print("times = ")
548
+ @times.pp
549
+ nrow_show = [10, @matrix.nrow.gz].min
550
+ ncol_show = R.min(10, @matrix.ncol).gz
551
+ puts("* Traj (limited to a matrix 10x10) = ")
552
+ @matrix[(1..nrow_show), (1..ncol_show)].format(digits: 2, nsmall: 2).pp
553
+ puts("******* End Show (trajectories) ******* ")
554
+ end
555
+
556
+ end
557
+ ```
558
+
559
+ ```{ruby}
560
+ empty_traj.show
561
+
562
+ # Method count_missing
563
+
564
+ In R, methods 'print' and 'show' are methods that already exist. SS4 wants to add a method
565
+ called 'countMissing' which does not exist in R, and thus requires some special preparation. In
566
+ Ruby, every method we've created is a new method that exists inside the class. The fact that
567
+ 'print' happens to be also a method for class Kernel and 'show' is not, is not of special interest.
568
+ Actually we've seen that in order to call method print from the Kernel class we had to call
569
+ Kernel.print.
570
+
571
+ To create method 'count_missing' we just need to reopen the Trajectories class and add the
572
+ method the same way we've done with method 'show'. Again, let's first look at R's 'countMissing'
573
+ and then at Ruby's:
574
+
575
+
576
+ ```
577
+ > setMethod(
578
+ + f= "countMissing",
579
+ + signature= "Trajectories",
580
+ + definition=function(object){
581
+ + return(sum(is.na(object@traj)))
582
+ + }
583
+ + )
584
+ ```
585
+
586
+ Here we introduce another particular case of Galaaz. R has many methods that have a '.' in
587
+ their names, such as 'is.na'. In Ruby, the dot '.' is has a special meaning as it is the way
588
+ we call a method on an object. Doing 'R.is.na' will not work. So, in Galaaz, R functions that
589
+ have a dot in then will have the dot substituted by '__'. So, method is.na in Galaaz, becomes
590
+ R.is__na. In method count_missing we use method chaining and convert the final count to a number.
591
+
592
+ ```{ruby}
593
+ class Trajectories
594
+
595
+ def count_missing
596
+ return @matrix.is__na.sum.gz
597
+ end
598
+
599
+ end
600
+ ```
601
+
602
+ ```{ruby}
603
+ puts @trajCochin.count_missing
604
+ ```
605
+
606
+ # To See the Methods
607
+
608
+ In order to see the methods we have defined so far, we call call on class Trajectories the method
609
+ 'instace_method' passing it one argument, 'false', as follows:
610
+
611
+ ```{ruby}
612
+ puts @Trajectories.instance_methods(false)
613
+ ```
614
+
615
+ It is interesting to observe that we see our three methods 'count_missing', 'print' and 'show', but
616
+ we also see two other methods 'times' and 'matrix', but those last two as far as we know are
617
+ just instance variables and not methods, right? More on that when we talk about Accessors.
618
+
619
+ Galaaz and Ruby, do not by default provide a way to see a method's code. However, if the user uses
620
+ a Ruby console such as Pry, then seeing methods and debugging is possible. Pry, is beyond the
621
+ scope of this document.
622
+
623
+ # Construction
624
+
625
+ Every class in Ruby has a constructor, if not explicitly defined, at least implicitly. Method
626
+ initialize is the constructor method and the one that coordinates the whole construction process.
627
+
628
+ # Inspector
629
+
630
+ There is no default 'inspector' in Ruby as is R, although there is nothing that prevents the
631
+ developer to inspect and validate the imput. For example, in the object Trajectories, one may
632
+ want to check that the number of elements in 'times' is equal to the number of columns in 'matrix'
633
+ and if they are not, issue an error. In order to understand why this is restriction, the user is
634
+ again directed to SS4.
635
+
636
+ Here we show the R code for this validation:
637
+
638
+ ```
639
+ > setClass(
640
+ + Class="Trajectories",
641
+ + representation(times="numeric",traj="matrix"),
642
+ + validity=function(object){
643
+ + cat("~~~ Trajectories: inspector ~~~ \\n")
644
+ + if(length(object@times)!=ncol(object@traj)){
645
+ + stop ("[Trajectories: validation] the number of temporal measurements does not correspond
646
+ + }else{}
647
+ + return(TRUE)
648
+ + }
649
+ + )
650
+ ```
651
+
652
+ In order to implement this validation we will coordinate it in the initialize method.
653
+
654
+ ```{ruby}
655
+ class Trajectories
656
+
657
+ def initialize(times: nil, matrix: nil)
658
+ @times = times
659
+ @matrix = matrix
660
+
661
+ # validate the input, to make sure that size of @times and the number of columns in
662
+ # @matrix are the same
663
+ puts ("~~~ Trajectories: inspector ~~~ ")
664
+ raise "[Trajectories: validation] the number of temporal measurements does not correspond with the number of columns in the matrix" if (@times.length.gz != @matrix.ncol.gz)
665
+
666
+ # show the object just created
667
+ show
668
+
669
+ end
670
+
671
+ end
672
+ ```
673
+
674
+ Let's first create a Trajectories that validates fine, i.e., the number of elements in @times is
675
+ equal to the number of columns of the matrix. In this case, we will show a message saying that
676
+ validation was done and then print the object.
677
+
678
+ ```{ruby}
679
+ ok = Trajectories.new(times: R.c(1..2), matrix: R.matrix((1..2), ncol: 2))
680
+ ```
681
+
682
+ Now, if we try to create a Trajectories that does not pass the validation criteria, our code
683
+ will raise an exception. Exceptions are a standard way to deal with errors in Ruby code and
684
+ many other object oriented languages. The interested reader should look for further documentation
685
+ on exception in the web.
686
+
687
+
688
+ ```{ruby}
689
+ error = Trajectories.new(times: R.c(1..3), matrix: R.matrix((1..2), ncol: 2))
690
+ ```
691
+
692
+ The validation above does not consider the case when an empty object is created. Here we will
693
+ check to see if either times or matrix are nil, if either one of them is nil, then we will raise
694
+ an exception and interrupt the creation of the object. We also create a method validate that is
695
+ called from our initialize method.
696
+
697
+ Method validate has some interesting features about the integration of Galaaz and R. First,
698
+ observe that instead of using @times.length.gz and @matrix.ncol.gz to get the length and number of
699
+ columns of variables 'times' and 'matrix' we actually compared (@times.length != @matrix.ncol).
700
+ In this case, the actual R operator '!=' is being used. This operator works on vectors and
701
+ matrices and returns a logical vector with TRUE or FALSE. In order to convert the logical vector,
702
+ with one element, to a logical value in Ruby we use method 'gt' (get truth).
703
+
704
+
705
+ ```{ruby}
706
+ class Trajectories
707
+
708
+ def initialize(times: nil, matrix: nil)
709
+ @times = times
710
+ @matrix = matrix
711
+
712
+ # call method validate to validate our imput
713
+ validate
714
+
715
+ # show the object just created
716
+ show
717
+
718
+ end
719
+
720
+ def validate
721
+
722
+ # Let's first check that we do not have an empty object
723
+ raise "Neither times nor matrix can be an empty object" if (@times.nil? || @matrix.nil?)
724
+
725
+ # validate the input, to make sure that size of @times and the number of columns in
726
+ # @matrix are the same
727
+ puts ("~~~ Trajectories: inspector ~~~ ")
728
+ raise "[Trajectories: validation] the number of temporal measurements does not correspond with the number of columns in the matrix" if (@times.length != @matrix.ncol).gt
729
+
730
+ end
731
+
732
+ end
733
+ ```
734
+
735
+ Let's try then creating an empty object:
736
+
737
+
738
+ ```{ruby}
739
+ error = Trajectories.new
740
+ ```
741
+
742
+ Another example:
743
+
744
+ ```{ruby}
745
+ error = Trajectories.new(times: 1)
746
+ ```
747
+
748
+ Let's see now that the implementation is correct and that it does not raise an error on valid
749
+ input:
750
+
751
+ ```{ruby}
752
+ ok = Trajectories.new(times: R.c(1, 2), matrix: R.matrix((1..2), ncol: 2))
753
+ ```
754
+
755
+ The 'initialize' method is called ONLY during the initial creation of the object. If any instance
756
+ variable is later modified, no control is done. At this moment though, there is no way to change
757
+ the value of any of our instance variables.
758
+
759
+ ```
760
+ error.times = R.c(1, 2, 3)
761
+ ```
762
+
763
+ The Trajectories class works for R objects and not for Ruby objects and thus expects as input R
764
+ objects. Passing R objects in all examples has being the obligation of the programmer. Galaaz,
765
+ however, can translate Ruby objects to R objects and does so for parameter passing. Here we do
766
+ an explicit conversion of Ruby object to R in class Trajectories by calling R.convert for our
767
+ input parameters
768
+
769
+ ```{ruby}
770
+ class Trajectories
771
+
772
+ def initialize(times: nil, matrix: nil)
773
+ @times = R.convert(times)
774
+ @matrix = R.convert(matrix)
775
+
776
+ # call method validate to validate our imput
777
+ validate
778
+
779
+ # show the object just created
780
+ show
781
+
782
+ end
783
+
784
+ def validate
785
+
786
+ # Let's first check that we do not have an empty object
787
+ raise "Neither times nor matrix can be an empty object" if (@times.nil? || @matrix.nil?)
788
+
789
+ # validate the input, to make sure that size of @times and the number of columns in
790
+ # @matrix are the same
791
+ puts ("~~~ Trajectories: inspector ~~~ ")
792
+ raise "[Trajectories: validation] the number of temporal measurements \#{@times.length.gz} \
793
+ does not correspond with the number of columns in the matrix \#{@matrix.ncol.gz}" if (@times.length.gz != @matrix.ncol.gz)
794
+
795
+ end
796
+
797
+ end
798
+
799
+
800
+ class Trajectories
801
+
802
+ def initialize(times: nil, matrix: nil)
803
+ @times = R.convert(times)
804
+ @matrix = R.convert(matrix)
805
+
806
+ # call method validate to validate our imput
807
+ validate
808
+
809
+ # show the object just created
810
+ show
811
+
812
+ end
813
+
814
+ def validate
815
+
816
+ # Let's first check that we do not have an empty object
817
+ raise "Neither times nor matrix can be an empty object" if (@times.nil? || @matrix.nil?)
818
+
819
+ # validate the input, to make sure that size of @times and the number of columns in
820
+ # @matrix are the same
821
+ puts ("~~~ Trajectories: inspector ~~~ ")
822
+ raise "[Trajectories: validation] the number of temporal measurements #{@times.length.gz} \
823
+ does not correspond with the number of columns in the matrix #{@matrix.ncol.gz}" if (@times.length.gz != @matrix.ncol.gz)
824
+
825
+ end
826
+
827
+ end
828
+ ```
829
+
830
+ And now let's create a new Trajectories, but we will now pass a Ruby range for times:
831
+
832
+ ```{ruby}
833
+ ok = Trajectories.new(times: (1..2), matrix: R.matrix((1..2), ncol: 2))
834
+ ```
835
+
836
+ Perfect! This works fine. Let's do another example... Galaaz integrates with another Ruby
837
+ Gem called MDArray. MDArray provides multi-dimensional arrays for Ruby similar to what is
838
+ find in NumPy. It is beyond the scope of this paper to explain MDArray and the interested
839
+ reader is directed to MDArray wiki pages: https://github.com/rbotafogo/mdarray/wiki.
840
+
841
+ ```{ruby}
842
+ ok = Trajectories.new(times: (1..2), matrix: MDArray.double([2, 2], [1, 2, 3, 4]))
843
+ ```
844
+
845
+ We will now create a multi-dimensional array with the help of MDArray. We could think of this
846
+ multi-dimensional array as having BMI data for multiple patients. In this example, we have then
847
+ data for two patients:
848
+
849
+ ```{ruby}
850
+ multi_array = MDArray.fromfunction("double", [2, 3, 4]) { |x, y, z| x + y + z }
851
+ ```
852
+
853
+ ```
854
+ multi_array.print
855
+ ```
856
+
857
+ But for our Trajectories class, we need data for only one patient at the time, so we cannot
858
+ give this MDArray to Trajectories. MDArray allow us to get data slices efficiently, that is,
859
+ it will not do a data copy, just manipulate indexes so that only a 'view' of the data is made
860
+ available. So, let's make a Trajectories with data from our first patient:
861
+
862
+ console(<<-EOT)
863
+ ok1 = Trajectories.new(times: (1..4), matrix: multi_array.slice(0, 0))
864
+ EOT
865
+
866
+ body(<<-EOT)
867
+ And now let's create a Trajectories for our second patient:
868
+ EOT
869
+
870
+ console(<<-EOT)
871
+ ok2 = Trajectories.new(times: (1..4), matrix: multi_array.slice(0, 1))
872
+ EOT
873
+
874
+ # The Initializator
875
+
876
+ As we have seen, method 'initialize' is the main object creator orchestrator. This method can be
877
+ as complex as needed. So, let's get on with some improvements to our Trajectories class.
878
+
879
+ It would be rather pleasant that the columns of the matrix of the trajectories have names, the
880
+ names of measurements times. In the same way, the lines could be subscripted by a number of
881
+ individual.
882
+
883
+ To do this in R, one also uses method initialize:
884
+
885
+
886
+ ```
887
+ > setMethod(
888
+ + f="initialize",
889
+ + signature="Trajectories",
890
+ + definition=function(.Object,times,traj){
891
+ + cat("~~~ Trajectories: initializator ~~~ \\n")
892
+ + colnames(traj) <- paste("T",times,sep="")
893
+ + rownames(traj) <- paste("I",1:nrow(traj),sep= "")
894
+ + .Object@traj <- traj # Assignment of the slots
895
+ + .Object@times <- times
896
+ + return(.Object) # return of the object
897
+ + }
898
+ + )
899
+ ```
900
+
901
+ Let's do this change to our 'initialize' method; however, before that, we need to introduce
902
+ a new characteristic of Galaaz. In R, it is possible to assign a value to the result of a
903
+ function. For example, 'rownames(x) <- c("v1", "v2", "v3")'. Assigning to functions that way
904
+ is not possible in Ruby. In order to do this assignment we need to introduce method 'fassign'.
905
+ The above assignment is then writen in Galaaz as 'x.fassign(:rownames, R.c("v1", "v2", "v3")),
906
+ where the first argument to function fassign is the function name preceded by ':'.
907
+
908
+ ```{ruby}
909
+ class Trajectories
910
+
911
+ def initialize(times: nil, matrix: nil)
912
+ @times = times
913
+ @matrix = matrix
914
+
915
+ # call method validate to validate our imput
916
+ validate
917
+
918
+ # Add row names
919
+ puts ("~~~ Trajectories: initializator ~~~ ")
920
+ @matrix.fassign(:colnames, R.paste("T", @times, sep: ""))
921
+ @matrix.fassign(:rownames, R.paste("I", (1..@matrix.nrow.gz), sep: ""))
922
+
923
+ # show the object just created
924
+ show
925
+
926
+ end
927
+
928
+ end
929
+ ```
930
+
931
+ ```{ruby}
932
+ @traj = Trajectories.new(times: R.c(1,2,4,8), matrix: R.matrix((1..8),nrow: 2))
933
+ ```
934
+
935
+ Another example:
936
+
937
+ ```{ruby}
938
+ error = Trajectories.new(times: R.c(1,2,4,8), matrix: R.matrix((1..8), nrow: 2))
939
+ ```
940
+
941
+ Note that we still call our 'validate' method and it is still an error to create an empty
942
+ Trajectories or one in which the sizes are wrong:
943
+
944
+ ```{ruby}
945
+ error = Trajectories.new(times: R.c(1, 2, 48), matrix: R.matrix((1..8), nrow: 2))
946
+ ```
947
+
948
+ A constructor does not necessarily take the instance variable of the object as argument. For
949
+ example, if we know (that is not the case in reality, but let us imagine so) that the
950
+ BMI increases by 0.1 every week, we could build trajectories by providing the number
951
+ of weeks and the initial weights.
952
+
953
+ First the code in R, we skip the definition of class TrajectoriesBis:
954
+
955
+
956
+ ```
957
+ > setMethod ("initialize",
958
+ + "TrajectoriesBis",
959
+ + function(.Object,nbWeek,BMIinit){
960
+ + traj <- outer(BMIinit,1:nbWeek,function(init,week){return(init+0.1*week)})
961
+ + colnames(traj) <- paste("T",1:nbWeek,sep="")
962
+ + rownames(traj) <- paste("I",1:nrow(traj),sep="")
963
+ + .Object@times <- 1:nbWeek
964
+ + .Object@traj <- traj
965
+ + return(.Object)
966
+ + }
967
+ + )
968
+ ```
969
+
970
+ Now, let's make a TrajectoriesBis in Galaaz. Here again, we should point out some characteristics
971
+ of our code:
972
+
973
+ * We made initialize with two positional arguments, instead of named arguments, i.e.,
974
+ the first argument is the number of weeks and the second bmi_init. Is this case,
975
+ when making a new object the position of the arguments is important and there is no
976
+ way to pass the argument by name;
977
+
978
+ * R function outer was called as if a method from bmi_init using dot notation, although
979
+ one could use R.outer without problem;
980
+
981
+ * Function 'outer' expects an R function as its 3rd argument. In order to build an R
982
+ function from Galaaz, we need to pass the function definition as a string to R.eval.
983
+
984
+ ```{ruby}
985
+ class TrajectoriesBis
986
+
987
+ attr_reader :times
988
+ attr_reader :matrix
989
+
990
+ def initialize(number_weeks, bmi_init)
991
+ @matrix = bmi_init.outer((1..number_weeks),
992
+ R.eval("function(init, week) {return(init + 0.1 * week)}"))
993
+ @times = number_weeks
994
+ end
995
+
996
+ end
997
+
998
+ @traj_bis = TrajectoriesBis.new(4, R.c(16,17,15.6))
999
+ ```
1000
+
1001
+ ```{ruby}
1002
+ puts @traj_bis.matrix
1003
+ ```
1004
+
1005
+ Is is always possible to pass a Ruby variable to any string, by interpolating it into the string.
1006
+ To interpolate a variable into a string we put the variable inside #{}. As an example, let's
1007
+ assume that we will also require the BMI increase as a parameter for the constructor:
1008
+
1009
+ ```{ruby}
1010
+ class TrajectoriesBis
1011
+
1012
+ def initialize(number_weeks, bmi_init, increment)
1013
+ @matrix = bmi_init.outer((1..number_weeks),
1014
+ R.eval("function(init, week) {return(init + \#{increment} * week)}"))
1015
+ @times = number_weeks
1016
+ end
1017
+
1018
+ end
1019
+
1020
+ traj_bis = TrajectoriesBis.new(4, R.c(16,17,15.6), 0.3)
1021
+ ```
1022
+
1023
+ ```{ruby}
1024
+ class TrajectoriesBis
1025
+
1026
+ def initialize(number_weeks, bmi_init, increment)
1027
+ @matrix = bmi_init.outer((1..number_weeks),
1028
+ R.eval("function(init, week) {return(init + #{increment} * week)}"))
1029
+ @times = number_weeks
1030
+ end
1031
+
1032
+ end
1033
+
1034
+ @traj_bis = TrajectoriesBis.new(4, R.c(16,17,15.6), 0.3)
1035
+ ```
1036
+
1037
+ ```{ruby}
1038
+ puts @traj_bis.matrix
1039
+ ```
1040
+
1041
+ # Constructors for Users
1042
+
1043
+ Many times, it is interesting to have different ways of constructing an object depending on
1044
+ what information our users have or want to provide to the constructor. Although we have only one
1045
+ initialize method, we can create multiple methods, that do some preprocessing and then call the
1046
+ initialize method to carry out the object building.
1047
+
1048
+ In order to do that, we use what are called class methods, instead of instance methods. all the
1049
+ methods we've created so far are instance methods, class methods are defined by prepending the
1050
+ self keyword to the methods name. Still using the assumption that the BMI will grow by 0.1 per
1051
+ week, let's define a regular trajectory without having to define a TrajectoriesBis as above:
1052
+
1053
+
1054
+ ```
1055
+ > regularTrajectories <- function(nbWeek,BMIinit) {
1056
+ + traj <- outer(BMIinit,1:nbWeek,function(init,week){return(init+0.1*week)})
1057
+ + times <- 1: nbWeek
1058
+ + return(new(Class="Trajectories",times=times,traj=traj))
1059
+ + }
1060
+ > regularTrajectories(nbWeek=3,BMIinit=c(14,15,16))
1061
+ ```
1062
+
1063
+ Notice how method 'regular' is defined as 'self.regular', making it a class method. The last
1064
+ statement of the method definition is actually a call to the Trajectories constructor 'new' passing
1065
+ the calculated values for times and matrix.
1066
+
1067
+ Notice also how method regular is called, similar to the way new is called by adding it after class
1068
+ Trajectories name: 'Trajectories.regular'.
1069
+
1070
+ ```{ruby}
1071
+ class Trajectories
1072
+
1073
+ def self.regular(number_weeks: nil, bmi_init: nil)
1074
+ matrix = bmi_init.outer((1..number_weeks),
1075
+ R.eval("function(init, week) {return(init + 0.1 * week)}"))
1076
+ times = R.c((1..number_weeks))
1077
+ Trajectories.new(times: times, matrix: matrix)
1078
+ end
1079
+
1080
+ end
1081
+ ```
1082
+
1083
+ ```{ruby}
1084
+ @regular = Trajectories.regular(bmi_init: R.c(14, 15, 16), number_weeks: 3)
1085
+ ```
1086
+
1087
+ We have already seen that constructors can be as complex as needed, calling other methods and doing
1088
+ calculations on the received parameters. On this last example, we will check if the times
1089
+ variable was provided. If it is not provided, then we will use matrix columns to define the times:
1090
+
1091
+ ```{ruby}
1092
+ class Trajectories
1093
+
1094
+ def self.init(times: nil, matrix: nil)
1095
+ times = R.c((1..matrix.ncol.gz)) if times.nil?
1096
+ Trajectories.new(times: times, matrix: matrix)
1097
+ end
1098
+
1099
+ end
1100
+ ```
1101
+
1102
+ ```
1103
+ @traj = Trajectories.init(matrix: R.matrix((1..8), ncol: 4))
1104
+ ```
1105
+
1106
+ # Accessors
1107
+
1108
+ Accessors are methods for getting and setting the value of instance variables.
1109
+
1110
+ # Get
1111
+
1112
+ Getters are methods for getting the value of an instance variable. We have being using getters
1113
+ since the beginning of this document, without explicitly saying so. When defining attr_reader
1114
+ :times and attr_reader :matrix, we have actually defined two getter methods for reading the values
1115
+ of variables times and matrix respectively. We can however define getters explicitly:
1116
+
1117
+ ```{ruby}
1118
+ class TrajectoriesBis
1119
+
1120
+ def initialize(times: times, matrix: matrix)
1121
+ @times = times
1122
+ @matrix = matrix
1123
+ end
1124
+
1125
+ def times
1126
+ @times
1127
+ end
1128
+
1129
+ def matrix
1130
+ @matrix
1131
+ end
1132
+
1133
+ end
1134
+
1135
+ @traj = TrajectoriesBis.new(times: 1, matrix: 2)
1136
+ ```
1137
+
1138
+ ```{ruby}
1139
+ puts @traj.times
1140
+ ```
1141
+
1142
+ ```{ruby}
1143
+ puts @traj.matrix
1144
+ ```
1145
+
1146
+ It is also possible to define more sophisticated getters. For example one can
1147
+ regularly need the BMI at inclusion. In R, one would index a matrix as matrix[,1]. In Ruby,
1148
+ it is a syntax error to have a ',' just after the '['. In this case we need to add 'nil' as
1149
+ in matrix[nil, 1]:
1150
+
1151
+ ```{ruby}
1152
+ class Trajectories
1153
+
1154
+ def get_traj_inclusion
1155
+ @matrix[nil, 1]
1156
+ end
1157
+
1158
+ end
1159
+ ```
1160
+
1161
+ ```{ruby}
1162
+ puts @trajCochin.get_traj_inclusion
1163
+ ```
1164
+
1165
+ # Set
1166
+
1167
+ A setter is a method that assigns a value to a variable. As with getters, Ruby also provides an
1168
+ easy way to write setters and allow you to also write them explicitly. Let's first use the
1169
+ simple way:
1170
+
1171
+ ```{ruby}
1172
+ class TrajectoriesBis
1173
+
1174
+ attr_writer :times
1175
+ attr_writer :matrix
1176
+
1177
+ end
1178
+
1179
+ traj = TrajectoriesBis.new
1180
+ traj.times = R.c(1, 2)
1181
+ traj.matrix = R.matrix((1..2), ncol: 2)
1182
+ ```
1183
+
1184
+ ```
1185
+ puts @traj.matrix
1186
+ ```
1187
+
1188
+ Note that now we can use '=' to assign a value to both variables times and matrix. Without
1189
+ setters, changing the value of variables times and matrix was not possible. Our class, up
1190
+ to this point was protected from any changes to those variables. If we need to allow changes
1191
+ to those variable, then setters are needed. In this case, the simple setter as shown above is
1192
+ not ideal, since it would allow changes that break the restriction that variable times has to
1193
+ have the same length as the number of columns of matrix. In order to do the verification we
1194
+ need to implement a more sophisticated setter. In the example bellow, we add the 'times=' setter
1195
+ that receives as input one argument. First we convert the given argument to an R object, then
1196
+ check to see that the length of times is the same as the number of columns and if everything is
1197
+ fine, then we set the value of instance variable times:
1198
+ EOT
1199
+
1200
+ #
1201
+ # We need to put the times= definiton inside the comment_code block because it accesses a variable
1202
+ # from inside the HereDoc. If we do not comment this access we will get an error saying that
1203
+ # @matrix is not a global variable, which is really the case.
1204
+ #
1205
+ ```{ruby}
1206
+ class Trajectories
1207
+
1208
+ def times=(times)
1209
+ times = R.convert(times)
1210
+ raise "[Trajectories: validation] the number of temporal measurements \#{times.length.gz} \
1211
+ does not correspond with the number of columns in the matrix \#{@matrix.ncol.gz}" if (times.length.gz != @matrix.ncol.gz)
1212
+ @times = times
1213
+ end
1214
+
1215
+ end
1216
+ ```
1217
+
1218
+ class Trajectories
1219
+
1220
+ def times=(times)
1221
+ times = R.convert(times)
1222
+ raise "[Trajectories: validation] the number of temporal measurements #{times.length.gz} \
1223
+ does not correspond with the number of columns in the matrix #{@matrix.ncol.gz}" if (times.length.gz != @matrix.ncol.gz)
1224
+ @times = times
1225
+ end
1226
+
1227
+ end
1228
+
1229
+ ```{ruby}
1230
+ trajCochin.times = (1..5)
1231
+ ```
1232
+
1233
+ We now set the value approprietaly and will not get any errors:
1234
+
1235
+ ```{ruby}
1236
+ trajCochin.times = R.c(1, 5, 6, 8)
1237
+ ```
1238
+
1239
+ # The Operator '['
1240
+
1241
+ It is also possible to define getters by using the operator '['. This operator is not usually
1242
+ used for returning instance variables and it is preferable to use the methods we've used above;
1243
+ however, for completeness with SS4 we are showing how to define this here. Operator '[' is
1244
+ better left to be used for array/matrix indices.
1245
+
1246
+ ```{ruby}
1247
+ class Trajectories
1248
+
1249
+ def [](var_name)
1250
+
1251
+ case var_name
1252
+ when "times"
1253
+ @times
1254
+ when "matrix"
1255
+ @matrix
1256
+ else
1257
+ raise "Unknown instance variable"
1258
+ end
1259
+
1260
+ end
1261
+
1262
+ end
1263
+ ```
1264
+
1265
+ ```{ruby}
1266
+ puts @trajCochin["times"]
1267
+ ```
1268
+
1269
+ Similarly, we could use operator '[]=' to assign a value to times and matrix. We will not do this
1270
+ here as we think that the other options are better and the interested user can easily find help,
1271
+ if needed to implement such method.
1272
+
1273
+ # To Go Further
1274
+
1275
+ This section will introduce advance features of Object Oriented programming such as Inheritance
1276
+ and Modules and will also show some aspects of S4 that do not apply to Ruby.
1277
+
1278
+ # Methods Using Several Arguments
1279
+
1280
+ In Ruby, methods can have as many arguments as needed and those methods are defined the way we
1281
+ have already seen in many of the examples above. The example in SS4 presents a method that prints
1282
+ different output if its input is numeric, character has both. Let's write a class in Ruby that
1283
+ does the same for Numeric and String. In Ruby we do not define global functions, we always define
1284
+ methods inside classes or modules (as we will see later). Also, Ruby is not typed, so methods are
1285
+ not called depending on their types as in SS4 examples. Bellow, method test will be called with
1286
+ one parameter. At the time of calling we do not know the type of the argument, the method can
1287
+ then check is the received argument is a Numeric or a String and at this time, decide what should
1288
+ be printed.
1289
+
1290
+ ```{ruby}
1291
+ class Test
1292
+
1293
+ def test(input)
1294
+
1295
+ case input
1296
+ when Numeric
1297
+ puts "The input is numeric: #{input}"
1298
+ when String
1299
+ puts "The input is a string: #{input}"
1300
+ else
1301
+ puts "The input is neither a number nor a string"
1302
+ end
1303
+
1304
+ end
1305
+
1306
+ end
1307
+
1308
+ t = Test.new
1309
+ ```
1310
+
1311
+ ```{ruby}
1312
+ puts @t.test(5)
1313
+ ```
1314
+
1315
+ ```{ruby}
1316
+ puts @t.test("Hello")
1317
+ ```
1318
+
1319
+ Ruby has ways of dealing with multiple arguments, missing arguments, undefined number of arguments,
1320
+ named arguments, unnamed arguments, etc. This is beyond the scope of this document and we
1321
+ suggest the interested reader to go to the many resources about Ruby that can easily be found
1322
+ on the web.
1323
+
1324
+ We will now create a new class 'Partition' that we will use later in this document. This class will
1325
+ have only the basic methods needed for the examples to work.
1326
+
1327
+ ```{ruby}
1328
+ class Partition
1329
+
1330
+ attr_reader :nb_groups
1331
+ attr_reader :part
1332
+
1333
+ def initialize(nb_groups, part)
1334
+ @nb_groups = nb_groups
1335
+ @part = part
1336
+ end
1337
+
1338
+ end
1339
+
1340
+ @partCochin = Partition.new(2, R.c("A","B","A","B").factor)
1341
+ @partStAnne = Partition.new(2, R.c("A","B").rep(R.c(50,30)).factor)
1342
+ ```
1343
+
1344
+ ```
1345
+ puts @partCochin.part
1346
+ ```
1347
+
1348
+ ```{ruby}
1349
+ puts @partStAnne.part
1350
+ ```
1351
+
1352
+ We will suppose that part is always composed of capital letters going from A to
1353
+ LETTERS[nb_groups].
1354
+
1355
+ # Inheritance
1356
+
1357
+ Ruby being a powerful Object Oriented language has the concept of Inheritance, but it does not
1358
+ allow for multiple inheritance. Multiple inheritance has many drawbacks and Ruby just does not
1359
+ support it. However, Ruby has other concepts that make up for the lack or multiple inheritance as
1360
+ we will see in the following examples.
1361
+
1362
+ So, let's go back to SS4 examples. We want now to define a class called TrajPartitioned that
1363
+ inherits from class Trajectories. When a class has a parent, all methods available for the
1364
+ parent are also available to the child.
1365
+
1366
+
1367
+ ```{ruby}
1368
+ class TrajPartitioned < Trajectories
1369
+
1370
+ attr_reader :list_partitions
1371
+
1372
+ end
1373
+ ```
1374
+
1375
+ Thats all there is to it! We've just created a class TrajPartitioned that inherits all methods
1376
+ from class Trajectories and at this point does nothing different from Trajectories, but adds a
1377
+ new instance variable: list_partitions.
1378
+
1379
+ Creating TrajPartitioned without arguments will generate an error, since a Trajectories requires
1380
+ both times and matrix to be non null.
1381
+
1382
+
1383
+ ```{ruby}
1384
+ @tdPitie = TrajPartitioned.new
1385
+ ```
1386
+
1387
+ Let's try to create a TrajPartitioned, but passing to it two partitions. For that, let's first
1388
+ create a new Partition:
1389
+
1390
+ ```{ruby}
1391
+ @partCochin2 = Partition.new(3, R.c("A", "C", "C", "B").factor)
1392
+ ```
1393
+
1394
+ And now let's create the TrajPartitioned:
1395
+
1396
+ ```{ruby}
1397
+ @tdCochin = TrajPartitioned.new(times: R.c(1,3,4,5), matrix: trajCochin.matrix,
1398
+ list_partitions: R.list(partCochin,partCochin2))
1399
+ ```
1400
+
1401
+ This didn't work giving us an error saying that <Partition...> is an unknown parameter for R. Hummm??
1402
+ R function 'list' expects R objects, and in this case, partCochin and partCochin2 are Ruby classes,
1403
+ so trying to apply function list to then does not work. Clearly, we will have to work in the realm
1404
+ of Ruby to keep the list of partitions. This is not a problem as Ruby has data strucutres to
1405
+ maintain a list of objects, the Array. Let's then try another solution:
1406
+
1407
+ ```{ruby}
1408
+ tdCochin = TrajPartitioned.new(times: R.c(1,3,4,5), matrix: trajCochin.matrix,
1409
+ list_partitions: [partCochin, partCochin2])
1410
+ ```
1411
+
1412
+ We now get a second error: 'unknown keyword: list_partitions'. Class TrajPartitioned inherits
1413
+ from class Trajectories and class Trajectories has an initialize function that requires two
1414
+ parameters, times and matrix; list_partitions is not a parameter for initialize and is thus
1415
+ unknown. In order to fix this problem we need to create an initialize method for class
1416
+ TrajPartitioned.
1417
+
1418
+
1419
+ # The 'super' Keyword
1420
+
1421
+ R has a method called 'callNextMethod' for control flow between inherited classes. In Ruby, we
1422
+ have a model that is a bit different. When a method is called on a subclass, if this method is
1423
+ not found it will be searched in the parent class and it will go up the hierarchy of classes until
1424
+ it is found or an error is issued. If we want the parent method to be called we can call 'super':
1425
+
1426
+
1427
+ ```{ruby}
1428
+ class TrajPartitioned
1429
+
1430
+ def initialize(times: times, matrix: matrix, list_partitions: list_partitions)
1431
+ super(times: times, matrix: matrix)
1432
+ @list_partitions = list_partitions
1433
+ end
1434
+
1435
+ end
1436
+ ```
1437
+
1438
+ Let's try our example again:
1439
+
1440
+ ```{ruby}
1441
+ @tdCochin = TrajPartitioned.new(times: R.c(1,3,4,5), matrix: trajCochin.matrix,
1442
+ list_partitions: [partCochin, partCochin2])
1443
+ ```
1444
+
1445
+ Now tdCochin is created correctly; however, the 'show' method only shows information about
1446
+ times and matrix, there is nothing about our new list_partitions variable. This is so, since
1447
+ there is no method 'show' in TrajPartitioned, so method 'show' from Trajectories is executed.
1448
+
1449
+ So, let's start by writing a 'print' method, that will print all the information we have in
1450
+ TrajPartitioned. The flow of control for this method is: Ruby see a call to 'print', so it checks
1451
+ to see if 'print' is a method for TrajPartitioned. Since we have just defined this method, Ruby
1452
+ finds it and executes it. The first command in print is a call to 'super', which will call the
1453
+ parent 'print' method, that print information for 'times' and 'matrix'. When the parent 'print'
1454
+ finishes control continues after the 'super' call, printing the number of available partitions.
1455
+
1456
+ ```{ruby}
1457
+ class TrajPartitioned
1458
+
1459
+ def print
1460
+ super
1461
+ puts ("the object also contains \#{@list_partitions.length} partition")
1462
+ puts ("***** Fine of print (TrajPartitioned) *****")
1463
+ end
1464
+
1465
+ end
1466
+ ```
1467
+
1468
+ ```{ruby}
1469
+ puts @tdCochin
1470
+ ```
1471
+
1472
+ Notice that this model is much cleaner than 'callNextMethod' and is not subject to any of the
1473
+ difficulties presented in SS4 and there is no need for the keywords “is”, “as” and “as<-”, although
1474
+ Ruby provides methods to check the class of an object its hierarchy, etc. when needed.
1475
+
1476
+ In Ruby there is no similar method as "setIs" and it is not possible to convert one class into
1477
+ another, but there are other ways of getting the necessary results. Let's then implement a
1478
+ method that returns the partition with the least number of groups. First, as usual, the R code
1479
+ with 'setIs':
1480
+
1481
+ ```
1482
+ > setIs(
1483
+ + class1="TrajPartitioned",
1484
+ + class2="Partition",
1485
+ + coerce=function(from,to){
1486
+ + numberGroups <- sapply(tdCochin@listPartitions,getNbGroups)
1487
+ + Smallest <- which.min(-numberGroups)
1488
+ + to<-new("Partition")
1489
+ + to@nbGroups <- getNbGroups(from@listPartitions[[Smallest]])
1490
+ + to@part <- getPart(from@listPartitions[[Smallest]])
1491
+ + return(to)
1492
+ + }
1493
+ + )
1494
+ ```
1495
+
1496
+ And now the Ruby code. Here we are getting deeper into Ruby and it is becoming harder for a
1497
+ pure R developer to understand the code. We will describe it in more detail:
1498
+
1499
+ * We define a method called 'to_part' that has one argument 'which'. By default 'which'
1500
+ is ':min', the name of the minimum method. This means that if no argument is given to
1501
+ to_part it will assume the which = :min;
1502
+
1503
+ * @list_partition is a Ruby array. Method map is similar to method sapply in R, it
1504
+ applies a 'block' to every element of the array, returning an array. Describing
1505
+ blocks is beyond the scope of this document, but we can think of it as if it were a
1506
+ function. The block is in '{}' and has one argument named 'part'. Thus, map goes
1507
+ through all elements of the array, and gets the nb_groups of the element and returns
1508
+ them into the number_groups array.
1509
+
1510
+ * number_groups is and array and doing number_groups.min returns the minimum value in
1511
+ number_groups and number_groups.max the maximum. We can call a method on an object
1512
+ by 'sending' the method name to the object, so, number_groups.send(:min) is equivalent to
1513
+ number_groups.min;
1514
+
1515
+ * Method 'index' for array, returns the index of a given element. So, number_groups(3)
1516
+ would return the index of the element '3'. Then number_groups.index(number_groups.min)
1517
+ returns the index of the minimum element in the array. This is the equivalent of R
1518
+ which.min(number_groups);
1519
+
1520
+ * Finally, number_groups.index(number_groups.send(which)), will return the index of the
1521
+ element we ask for, be it :min or :max. Note that if we pass another value, this would
1522
+ be an error.
1523
+
1524
+ ```{ruby}
1525
+ class TrajPartitioned
1526
+
1527
+ def to_part(which = :min)
1528
+ number_groups = @list_partitions.map { |part| part.nb_groups }
1529
+ selected = number_groups.index(number_groups.send(which))
1530
+ return @list_partitions[selected]
1531
+ end
1532
+
1533
+ end
1534
+ ```
1535
+
1536
+ To get the partition whith the minimum number of elements:
1537
+
1538
+ ```{ruby}
1539
+ puts @tdCochin.to_part.part
1540
+ ```
1541
+
1542
+ To get the partition whith the maximum number of elements:
1543
+
1544
+ ```{ruby}
1545
+ tdCochin.to_part(:max).part.pp
1546
+ ```
1547
+
1548
+ In this example we did not follow exactly the R code from SS4. The reason for that is that
1549
+ 'list_partitions' is a list of Ruby classes and we cannot run sapply on this list. If we
1550
+ try to call a 'getNbGroups' or in the Ruby case nb_groups, the code will crash. Let's try
1551
+ it:
1552
+
1553
+ # Virtual Classes
1554
+
1555
+ In Ruby there are no "Virtual Classes", but it is possible to implement derived classes from
1556
+ a parent class with methods that behave properly according to the object's class. Following
1557
+ SS4 we will implement two classes: PartitionSimple and PartitionEval which are subclasses
1558
+ of class PartitionFather. PartitionFather will just be a regular class. Methods defined in
1559
+ PartionFather will be available to be used in the subclasses
1560
+
1561
+ Here is the R code of those classes and the implementation of a method in PartitionFather
1562
+ that multiplies the number of groups by 2:
1563
+
1564
+
1565
+ ```
1566
+ > setClass(
1567
+ + Class="PartitionFather",
1568
+ + representation=representation(nbGroups="numeric","VIRTUAL")
1569
+ + )
1570
+
1571
+ > setClass(
1572
+ + Class="PartitionSimple",
1573
+ + representation=representation(part="factor"),
1574
+ + contains="PartitionFather"
1575
+ + )
1576
+
1577
+ > setClass(
1578
+ + Class="PartitionEval",
1579
+ + representation=representation(part="ordered"),
1580
+ + contains="PartitionFather"
1581
+ + )
1582
+
1583
+ > setGeneric("nbMultTwo",function(object){standardGeneric("nbMultTwo")})
1584
+
1585
+ > setMethod("nbMultTwo","PartitionFather",
1586
+ + function(object){
1587
+ + object@nbGroups <- object@nbGroups*2
1588
+ + return (object)
1589
+ + }
1590
+ + )
1591
+ ```
1592
+
1593
+ Since Ruby has no type definition, there is no really need for a parent class and subclasses.
1594
+ However, we will implement those classes in order to show Ruby's inheritance:
1595
+
1596
+ ```{ruby}
1597
+ # Parent class. Differently from SS4, both 'nb_groups' and 'part' are defined in the
1598
+ # parent class.
1599
+ class PartitionFather
1600
+
1601
+ attr_reader :nb_groups
1602
+ attr_reader :part
1603
+
1604
+ # initialize class PartitionFather with the number of groups and parts. Note that we
1605
+ # use R.i for nb_groups in order to convert the number of groups into an R vector.
1606
+ def initialize(nb_groups: 0, part: nil)
1607
+ @nb_groups = R.i(nb_groups)
1608
+ @part = part
1609
+ end
1610
+
1611
+ # method nb_mult_two can be called from all subclasses
1612
+ def nb_mult_two
1613
+ @nb_groups * 2
1614
+ end
1615
+
1616
+ # method 'to_s' is called whenever we try to print a Ruby object. This method emulates
1617
+ # R 'print' method that prints all the slots.
1618
+ def to_s
1619
+ puts ("Variable 'nb_groups':")
1620
+ @nb_groups.pp
1621
+ puts
1622
+ puts ("Variable 'part':")
1623
+ @part.pp
1624
+ puts
1625
+ end
1626
+
1627
+ end
1628
+
1629
+ # Class PartitionSimple is a subclass of PartitionFather. To make a subclass of a
1630
+ # class we use the operator '<'. Since the whole logic is in the parent class
1631
+ # PartitionSimple is just an empty class
1632
+ class PartitionSimple < PartitionFather
1633
+
1634
+ end
1635
+
1636
+ # PartitionEval is also only an empty class
1637
+ class PartitionEval < PartitionFather
1638
+
1639
+ end
1640
+ ```
1641
+
1642
+ ```{ruby}
1643
+ @a = PartitionSimple.new(nb_groups: 3, part: (R.LETTERS[R.c(1, 2, 3, 2, 2, 1)].factor))
1644
+ puts @a
1645
+ ```
1646
+
1647
+ ```{ruby}
1648
+ puts @a.nb_mult_two
1649
+ ```
1650
+
1651
+ ```{ruby}
1652
+ @b = PartitionEval.new(nb_groups: 5, part: R.LETTERS[R.c(1, 5, 3, 4, 2, 4)].ordered)
1653
+ puts @b
1654
+ ```
1655
+
1656
+ ```{ruby}
1657
+ puts @b.nb_mult_two
1658
+ ```
1659
+
1660
+ The example above, although it replicates SS4 is not actually very useful from the point of
1661
+ view of class hierarchy in Ruby. We will then write a new function to_s in class
1662
+ PartitionSimple that will print the name of the class:
1663
+
1664
+ ```{ruby}
1665
+ class PartitionSimple
1666
+
1667
+ def to_s
1668
+ puts("Class PartitionSimple")
1669
+ super
1670
+ end
1671
+
1672
+ end
1673
+ ```
1674
+
1675
+ ```{ruby)
1676
+ puts @a
1677
+ ```
1678
+
1679
+ As can be seen, 'puts a' now calls method 'to_s' defined in class PartitionSimple. This
1680
+ method prints 'Class PartitionSimple' and then call the super method, i.e., method 'to_s'
1681
+ from class PartitionFather.
1682
+
1683
+ Note though that 'puts b' still prints the same output, since it has no particular 'to_s'
1684
+ method.
1685
+
1686
+ ```{ruby}
1687
+ puts @b
1688
+ ```
1689
+
1690
+ # Internal Modification of an Object
1691
+
1692
+
1693
+ ## Method to Modify a Field
1694
+
1695
+ Let us return to our trajectories example and define a third method that imputes data for
1696
+ missing values. To simplify, we will impute by replacing by the mean values. This is the R
1697
+ code to do this:
1698
+
1699
+ ```
1700
+ > meanWithoutNa <- function (x){mean(x,na.rm=TRUE)}
1701
+ > setGeneric("impute",function (.Object){standardGeneric("impute")})
1702
+ > setMethod(
1703
+ + f="impute",
1704
+ + signature="Trajectories",
1705
+ + def=function(.Object){
1706
+ + average <- apply(.Object@traj,2,meanWithoutNa)
1707
+ + for (iCol in 1:ncol(.Object@traj)){
1708
+ + .Object@traj[is.na(.Object@traj[,iCol]),iCol] <- average[iCol]
1709
+ + }
1710
+ + return(.Object)
1711
+ + }
1712
+ + )
1713
+ ```
1714
+
1715
+ The code above, as explained in SS4 creates a new object and does not change the original one.
1716
+ So, calling impute(trajCochin) will work correctly by creating a new object but will not
1717
+ change trajCochin. This works fine, but can be memory expensive if the matrix is a large
1718
+ one.
1719
+
1720
+ Let's now implement the same method in Galaaz. We will use for that Ruby's 'each' method.
1721
+ In Ruby, the 'each' method goes through all elements of a vector or list in order. The
1722
+ 'each' method is available for an R matrix in Galaaz. Actually, when calling 'each' for an
1723
+ R matrix, the matrix is converted to a Ruby MDArray and the 'each' method is applied to this
1724
+ MDArray. So, we can do @matrix.each and cycle through every element in this matrix.
1725
+ The 'each_with_index' method does the same as 'each' but also passes the index of the element
1726
+ to the Ruby block (please, google Ruby block to get further information on blocks in Ruby).
1727
+
1728
+ One key aspect to remember is that Ruby indexes start with 0 while R indexes start with 1, so
1729
+ an element with index i in Ruby will be indexed i+1 in R. With that, let's see the Galaaz
1730
+ code for method impute:
1731
+
1732
+ ```{ruby}
1733
+ class Trajectories
1734
+
1735
+ def mean_without_na
1736
+ @matrix.mean(na__rm: TRUE)
1737
+ end
1738
+
1739
+ def impute
1740
+ @matrix.each_with_index do |elmt, i|
1741
+ @matrix[i+1] = mean_without_na if elmt.nan?
1742
+ end
1743
+ end
1744
+
1745
+ end
1746
+ ```
1747
+
1748
+ ```{ruby}
1749
+ @trajCochin.impute
1750
+ puts @trajCochin.matrix
1751
+ ```
1752
+
1753
+ It works! and note that actually trajCochin matrix was changed. However, as with the R
1754
+ solution, Renjin does make a copy of the data on the background. Let's investigate this a
1755
+ little further getting inside Galaaz's internal. Method 'as__mdarray' explicitly converts
1756
+ an R matrix to an MDArray:
1757
+
1758
+ ```
1759
+ cochin_internal = trajCochin.matrix.as__mdarray
1760
+ cochin_internal.print
1761
+ ```
1762
+
1763
+
1764
+ Now lets assign a value to trajCochin matrix and compare it to the variable chochin_internal:
1765
+
1766
+ ```
1767
+ trajCochin.matrix[1] = 1
1768
+ trajCochin.matrix.pp
1769
+ puts
1770
+ puts cochin_internal
1771
+ ```
1772
+
1773
+ As we can now see, trajCochin and cochin_internal have different content, while cochin_internal
1774
+ still has the same value in index 0, i.e. 15.0, trajCochin matrix has value 1 in index 1. This
1775
+ shows that Renjin when assigning to trajCochin.matrix[1] makes a copy of the original data.
1776
+
1777
+ Bellow, we use method 'get' which is a synonym of method 'as__mdarray' to again get the content
1778
+ of trajCochin.matrix. This variable has as first element the value 1, as set previously.
1779
+
1780
+ ```
1781
+ internal2 = trajCochin.matrix.get
1782
+ internal2.print
1783
+ ```
1784
+
1785
+ We will now set the value of the second element of internal2 to 1000. Note that internal2 is
1786
+ an MDArray and that the second element of this array is indexed with 1:
1787
+
1788
+ console(<<-EOT)
1789
+ internal2[1] = 1000
1790
+ internal2.print
1791
+ EOT
1792
+
1793
+ body(<<-EOT)
1794
+ And now, if we print the value of trajCochin.matrix, we note that the second element of this
1795
+ matrix (R matrix) is also 1000. This shows that the MDArray obtained from calling 'as__mdarray'
1796
+ and the R matrix have the same backing store.
1797
+ EOT
1798
+
1799
+ console(<<-EOT)
1800
+ trajCochin.matrix.pp
1801
+ EOT
1802
+
1803
+ body(<<-EOT)
1804
+ Remember, changing the internals of an R matrix like that can be quite dangerous. Renjin expects
1805
+ its data to be imuntable, and using MDArray allows the user to change this data violating
1806
+ Renjin principles. If weird bugs start creeping on your code, this should be one of the first
1807
+ things to be investigated.
1808
+ EOT
1809
+
1810
+ # Conclusions I
1811
+
1812
+ This ends SS4 paper. We believe we have shown that R S4 can be substituted by Galaaz and
1813
+ Ruby classes and that Galaaz makes an easy transition from R developers to Ruby. Ruby is
1814
+ a very flexible and powerful language and has many interesting libraries, where Rails is
1815
+ maybe one of the best known, but there are thousands of others. For those interested in
1816
+ getting deeper into Ruby's libraries, we suggest they look at:
1817
+
1818
+ * https://github.com/markets/awesome-ruby
1819
+ * http://bestgems.org/
1820
+
1821
+ For those interested in Ruby and science, we recommend:
1822
+
1823
+ * http://sciruby.com/
1824
+
1825
+
1826
+
1827
+ # ET Phone Home
1828
+
1829
+ On this paper we have focused on accessing R functions from Ruby and have shown how to
1830
+ integrate Ruby with R from the point of view of a Ruby developer, i.e, we have developed
1831
+ in Ruby and have made calls to R functions very transparently. Although this is quite
1832
+ powerful, sometimes this still lacks some power. In this section we will see how we can
1833
+ integrate R with Ruby (through Galaaz) from the point of view of the R developer, i.e.,
1834
+ we will allow R scripts to have access to Ruby classes and methods.
1835
+
1836
+ We did not explicitly show and did not call upon the readers attention, but whenever
1837
+ an R function was called we either passed to it basic type objects (numeric, string,
1838
+ boolean), Ruby arrays and MDArrays. Let's try now to pass a Ruby class to R:
1839
+
1840
+ ```
1841
+ R.part = Partition.new(3, R.c("A", "C", "C", "B").factor)
1842
+ ```
1843
+
1844
+ ```
1845
+ R.part.pp
1846
+ ```
1847
+
1848
+ Calling method 'pp' on this object does not print anything, as this is a completely strange
1849
+ object in the R planet. So, let's try to see what type of object this is:
1850
+
1851
+ ```
1852
+ R.part.typeof.pp
1853
+ ```
1854
+
1855
+ We get 'externalptr' as type. So we can send the Ruby class to the R planet, but there is
1856
+ nothing we can do with it there. It is just an 'externalptr'. But we have learned elsewhere
1857
+ that if we want to send an astronaut from a planet to another, a good way of doing it is by
1858
+ creating an 'avatar'! An 'avatar' is remotely controled by it's owner, but it acts almost as
1859
+ if it were a native being of the other planet.
1860
+
1861
+ Galaaz provides a way of creating an 'avatar' from any Ruby class and send it to R land. We
1862
+ will now show how this is done and how our 'avatar' calls home to get things done. Method
1863
+ 'rpack' creates the avatar. We will start with a simple example, creating an 'avatar' from
1864
+ a Ruby array:
1865
+
1866
+ ```
1867
+ # create an array of data in Ruby
1868
+ array = [1, 2, 3]
1869
+
1870
+ # Pack the array and assign it to an R variable. Remember that ruby__array, becomes
1871
+ # ruby.array inside the R script
1872
+ R.ruby__array = R.rpack(array)
1873
+ ```
1874
+
1875
+ ```
1876
+ Now, we have in 'ruby.array' an 'avatar' of array. In order for our 'avatar' to call
1877
+ back home, it uses method 'run':
1878
+ ```
1879
+
1880
+ ```
1881
+ # note that this calls Ruby method 'length' on the array and not R length function.
1882
+ R.eval("val <- ruby.array$run('length')")
1883
+ ```
1884
+ console(<<-EOT)
1885
+ R.eval("print(val)")
1886
+ EOT
1887
+
1888
+ body(<<-EOT)
1889
+ Let's use a more interesting array method '<<'. This method adds elements to the
1890
+ end of the array. This method takes one argument, the element to be added at the end of
1891
+ the array. Thus we call function run passing two arguments, the '<<' method as first
1892
+ argument and the element to add as second argument.
1893
+ EOT
1894
+
1895
+ code(<<-EOC)
1896
+ R.eval(<<-EOT)
1897
+ ruby.array$run('<<', 4)
1898
+ ruby.array$run('<<', 5)
1899
+ EOT
1900
+ EOC
1901
+
1902
+ body(<<-EOT)
1903
+ Let's now print the content of the array. For that, we use another Ruby method: 'to_s'. This
1904
+ method generates a string with a representation of an object. In the case of an array, it
1905
+ will show the array's content:
1906
+ EOT
1907
+
1908
+ console(<<-EOT)
1909
+ R.eval("print(ruby.array$run('to_s'))")
1910
+ EOT
1911
+
1912
+ body(<<-EOT)
1913
+ One important aspect of interfacing R and Ruby is that both world interact with the same data.
1914
+ There is no data copying between the two worlds, so, effectively whatever happens to the
1915
+ 'avatar' will also happen to the 'real' object. Let's take a look at that. First, we will
1916
+ go back to the Ruby world and see our array:
1917
+ EOT
1918
+
1919
+ console(<<-EOT)
1920
+ puts array
1921
+ EOT
1922
+
1923
+ body(<<-EOT)
1924
+ Now, let's change a value of our array in Ruby:
1925
+ EOT
1926
+
1927
+ code(<<-EOT)
1928
+ array[0] = "new element"
1929
+ EOT
1930
+
1931
+ body(<<-EOT)
1932
+ And let's take a look at our 'ruby.array' in R:
1933
+ EOT
1934
+
1935
+ console(<<-EOT)
1936
+ R.eval("print(ruby.array$run('to_s'))")
1937
+ EOT
1938
+
1939
+ body(<<-EOT)
1940
+ As you can see, 'ruby.array' is still the same Ruby object.
1941
+
1942
+ Avatars maintain some properties of their original world. Although the concept of method
1943
+ chaning is foreign to R, chaining can be used with imported objects from Ruby. Method
1944
+ chaining occurs when the result of a applying a method on an object returns an object (usually
1945
+ the same object) in which another method can be applied. In the example bellow, method '<<'
1946
+ will be applied multiple times for ruby.array
1947
+ EOT
1948
+
1949
+ code(<<-EOC)
1950
+ R.eval(<<-EOT)
1951
+ ruby.array$run('<<', 6)$run('<<', 7)$run('<<', 8)$run('<<', 9)
1952
+ EOT
1953
+ EOC
1954
+
1955
+ console(<<-EOT)
1956
+ R.eval("print(ruby.array$run('to_s'))")
1957
+ EOT
1958
+
1959
+ body(<<-EOT)
1960
+ We can also access any array element inside the R script, but note that we have
1961
+ to use Ruby indexing, i.e., the first element of the array is index 0:
1962
+ EOT
1963
+
1964
+ console(<<-EOT)
1965
+ R.eval("print(ruby.array$run('[]', 2))")
1966
+ EOT
1967
+
1968
+ console(<<-EOT)
1969
+ R.eval("print(ruby.array$run('[]', 5))")
1970
+ EOT
1971
+
1972
+ body(<<-EOT)
1973
+ Now that we have seen how to "call back" home and integrate Ruby classes with R, let's go
1974
+ back to our TrajPartitioned methtod to_part, and create a to_part2 method that will use
1975
+ R 'sapply' function:
1976
+ EOT
1977
+
1978
+ code(<<-EOT)
1979
+ class TrajPartitioned
1980
+
1981
+ def to_part2
1982
+ R.pack = R.rpack(@list_partitions, scope: :internal)
1983
+ number_groups = R.eval("sapply(pack, function(x) x$run('nb_groups'))")
1984
+ @list_partitions[number_groups.which__min.gz]
1985
+ end
1986
+
1987
+ end
1988
+ EOT
1989
+
1990
+ console(<<-EOT)
1991
+ tdCochin.to_part2.part.pp
1992
+ EOT
1993
+
1994
+ subsection("Creating Ruby Objects from R Scripts")
1995
+
1996
+ body(<<-EOT)
1997
+ In all the examples given so far on sending Ruby objects to R, the object was created in
1998
+ Ruby and send to R. In the following examples, all the work will be done inside R
1999
+ scripts without the need to create anything in Ruby. For the R developer, this might be
2000
+ the easiest way to begin trying Galaaz and start migrating from R to Ruby.
2001
+
2002
+ In this first example we will create a Ruby String object inside an R script. In order
2003
+ to create Ruby objects in R, we need to use the Ruby.Ojbect class and use the 'build'
2004
+ function. The 'build' function is the equivalent of the 'new' function in Ruby and
2005
+ receives as first argument the name of the class to be build and as other arguments the
2006
+ same arguments from Ruby 'new':
2007
+
2008
+ In the following example, we create a String object initialized with "this is a new string":
2009
+ EOT
2010
+
2011
+ code(<<-EOC)
2012
+ R.eval(<<-EOT)
2013
+ # This is an actuall R script, which allows the creation and use of Ruby classes
2014
+ # and methods.
2015
+ # Create a string, from class String in Ruby. Use function build to intanciate a
2016
+ # new object
2017
+ string <- Ruby.Object$build("String", "this is a new string")
2018
+ EOT
2019
+ EOC
2020
+
2021
+ console(<<-EOT)
2022
+ R.eval("print(string)")
2023
+ EOT
2024
+
2025
+ body(<<-EOT)
2026
+ In Ruby, many methods are know as 'class methods'. Class methods are methods that exists on
2027
+ the class and not on an instance of the class. In the example above, we create an instance
2028
+ (object) of type String. In the following example, we will access class Marshal: The marshaling
2029
+ library converts collections of Ruby objects into a byte stream, allowing them to be stored
2030
+ outside the currently active script. This data may subsequently be read and the original
2031
+ objects reconstituted.
2032
+ EOT
2033
+
2034
+ code(<<-EOC)
2035
+ # Use function get_class to get a Ruby class
2036
+ R.eval(<<-EOT)
2037
+ Marshal <- Ruby.Object$get_class("Marshal")
2038
+
2039
+ # Method 'dump' is a Marshal class method as is 'load'
2040
+ str <- Marshal$run("dump", string)
2041
+ restored <- Marshal$run("load", str)
2042
+ EOT
2043
+ EOC
2044
+
2045
+ console(<<-EOT)
2046
+ R.eval("print(restored)")
2047
+ EOT
2048
+
2049
+ subsection("Interfacing Java with Renjin")
2050
+
2051
+ body(<<-EOT)
2052
+ Renjin allows for easy integration of Java into R scripts, giving the user access to all of
2053
+ Java's libraries and functions. Although this paper is manly about interfacing R and Ruby,
2054
+ we believe that it is also important to see how to interface with Java from an R script.
2055
+ JRuby, the platform on which Galaaz depends, also allows easy integration of Java and Ruby;
2056
+ however we will not show it here, since this is well documented elsewhere.
2057
+ EOT
2058
+
2059
+ code(<<-EOC)
2060
+ R.eval(<<-EOT)
2061
+ import(java.util.HashMap)
2062
+
2063
+ # create a new instance of the HashMap class:
2064
+ ageMap <- HashMap$new()
2065
+
2066
+ # call methods on the new instance:
2067
+ ageMap$put("Bob", 33)
2068
+ ageMap$put("Carol", 41)
2069
+
2070
+ age <- ageMap$get("Carol")
2071
+
2072
+ # Java primitives and their boxed types
2073
+ # are automatically converted to R vectors:
2074
+ typeof(age)
2075
+ EOT
2076
+ EOC
2077
+
2078
+ console(<<-EOT)
2079
+ R.eval("print(ageMap$size())")
2080
+ EOT
2081
+
2082
+ console(<<-EOC)
2083
+ R.eval(<<-EOT)
2084
+ cat("Carol is ", age, " years old.\\n", sep = "")
2085
+ EOT
2086
+ EOC
2087
+
2088
+
2089
+ section("Conclusions II")
2090
+
2091
+ body(<<-EOT)
2092
+ The Java Virtual Machine (JVM) is an amazing environment allowing for multiple languages to cohabit
2093
+ and integrate in a very transparent way. Galaaz interfaces R, Ruby and Java and gives the
2094
+ developer access to a gigantic set of libraries from those three worlds. In
2095
+ development circles people usually say: "choose the right tool for the job at hand", with JVM/
2096
+ Java/R/Renjin/Ruby/Galaaz the right tool for the job might just be at hand all the time.
2097
+
2098
+ We often see questions on the web about which language to choose between R and Python. Between
2099
+ R and Python, choose Galaaz!
2100
+ EOT