galaaz 0.4.2 → 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +25 -0
  3. data/Rakefile +8 -0
  4. data/bin/gknit +9 -5
  5. data/bin/gstudio +4 -2
  6. data/bin/gstudio.rb +32 -2
  7. data/blogs/dev/dev.html +219 -34
  8. data/blogs/dev/dev.md +26 -26
  9. data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
  10. data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
  11. data/blogs/dplyr/dplyr.rb +63 -0
  12. data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +38 -26
  13. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +16 -17
  14. data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
  15. data/blogs/galaaz_ggplot/galaaz_ggplot.tex +65 -31
  16. data/blogs/oh_my/not_so.rb +2342 -0
  17. data/blogs/oh_my/oh_my.Rmd +493 -0
  18. data/blogs/oh_my/oh_my.html +680 -0
  19. data/blogs/oh_my/oh_my.md +597 -0
  20. data/blogs/oh_my/old.Rmd +2100 -0
  21. data/blogs/ruby_plot/figures/facets_with_decorations.png +0 -0
  22. data/blogs/ruby_plot/figures/facets_with_jitter.png +0 -0
  23. data/blogs/ruby_plot/figures/final_box_plot.png +0 -0
  24. data/blogs/ruby_plot/figures/final_violin_plot.png +0 -0
  25. data/blogs/ruby_plot/figures/violin_with_jitter.png +0 -0
  26. data/blogs/ruby_plot/ruby_plot.Rmd +147 -122
  27. data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +662 -0
  28. data/blogs/ruby_plot/ruby_plot.html +49 -54
  29. data/blogs/ruby_plot/ruby_plot.md +147 -122
  30. data/blogs/ruby_plot/ruby_plot.pdf +0 -0
  31. data/blogs/ruby_plot/ruby_plot.tex +776 -157
  32. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +57 -0
  33. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +106 -0
  34. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +110 -0
  35. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +174 -0
  36. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +236 -0
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +296 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +236 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +218 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +128 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +150 -0
  43. data/examples/islr/ch2.spec.rb +21 -18
  44. data/examples/islr/ch3_boston.rb +14 -5
  45. data/examples/islr/ch3_multiple_regression.rb +2 -3
  46. data/examples/islr/ch6.spec.rb +1 -1
  47. data/examples/islr/x_y_rnorm.jpg +0 -0
  48. data/lib/R_interface/r.rb +14 -10
  49. data/lib/R_interface/r_libs.R +9 -0
  50. data/lib/R_interface/r_methods.rb +77 -6
  51. data/lib/R_interface/{expression.rb → r_module_s.rb} +13 -14
  52. data/lib/R_interface/rbinary_operators.rb +58 -71
  53. data/lib/R_interface/rdata_frame.rb +2 -1
  54. data/lib/R_interface/rdevices.R +4 -0
  55. data/lib/R_interface/rdevices.rb +1 -1
  56. data/lib/R_interface/renvironment.rb +34 -1
  57. data/lib/R_interface/rexpression.rb +108 -2
  58. data/lib/R_interface/rindexed_object.rb +3 -1
  59. data/lib/R_interface/rlanguage.rb +18 -2
  60. data/lib/R_interface/rmatrix.rb +14 -0
  61. data/lib/R_interface/rmd_indexed_object.rb +5 -1
  62. data/lib/R_interface/robject.rb +61 -23
  63. data/lib/R_interface/rsupport.rb +111 -53
  64. data/lib/R_interface/rsymbol.rb +6 -5
  65. data/lib/R_interface/ruby_extensions.rb +130 -4
  66. data/lib/R_interface/runary_operators.rb +35 -3
  67. data/lib/R_interface/rvector.rb +1 -0
  68. data/lib/galaaz.rb +0 -2
  69. data/lib/gknit/knitr_engine.rb +58 -4
  70. data/lib/gknit/ruby_engine.rb +5 -6
  71. data/lib/util/exec_ruby.rb +55 -9
  72. data/specs/all.rb +13 -3
  73. data/specs/figures/dose_len.png +0 -0
  74. data/specs/r_dataframe.spec.rb +49 -26
  75. data/specs/r_environment.spec.rb +140 -0
  76. data/specs/r_eval.spec.rb +0 -15
  77. data/specs/r_formula.spec.rb +232 -0
  78. data/specs/r_function.spec.rb +7 -8
  79. data/specs/r_list.spec.rb +4 -0
  80. data/specs/r_list_apply.spec.rb +11 -11
  81. data/specs/r_matrix.spec.rb +3 -3
  82. data/specs/{r_plots.spec.rb~ → r_nse.spec.rb} +29 -6
  83. data/specs/r_vector_creation.spec.rb +6 -0
  84. data/specs/r_vector_object.spec.rb +2 -2
  85. data/specs/r_vector_operators.spec.rb +3 -3
  86. data/specs/r_vector_subsetting.spec.rb +4 -4
  87. data/specs/ruby_expression.spec.rb +324 -0
  88. data/specs/tmp.rb +12 -524
  89. data/sty/galaaz.sty +71 -0
  90. data/version.rb +1 -1
  91. metadata +31 -41
  92. data/bin/gknit2~ +0 -6
  93. data/bin/ogk~ +0 -4
  94. data/bin/prepareR.rb~ +0 -1
  95. data/blogs/dev/dev.Rmd~ +0 -104
  96. data/blogs/galaaz_ggplot/galaaz_ggplot.dvi +0 -0
  97. data/blogs/galaaz_ggplot/midwest_external_png~ +0 -1
  98. data/blogs/gknit/gknit.Rmd~ +0 -184
  99. data/blogs/gknit/gknit.Rnd~ +0 -17
  100. data/blogs/gknit/model.rb~ +0 -46
  101. data/blogs/ruby_plot/ruby_plot.Rmd~ +0 -215
  102. data/examples/islr/Figure.jpg +0 -0
  103. data/examples/misc/moneyball.rb~ +0 -16
  104. data/examples/misc/subsetting.rb~ +0 -372
  105. data/lib/R/eng_ruby.R~ +0 -63
  106. data/lib/R_interface/capture_plot.rb~ +0 -23
  107. data/lib/R_interface/r.rb~ +0 -121
  108. data/lib/R_interface/rdevices.rb~ +0 -27
  109. data/lib/gknit.rb~ +0 -26
  110. data/lib/gknit/knitr_engine.rb~ +0 -102
  111. data/lib/gknit/ruby_engine.rb~ +0 -72
  112. data/lib/util/inline_file.rb~ +0 -23
  113. data/r_requires/knitr.rb~ +0 -4
  114. data/specs/r_language.spec.rb +0 -157
@@ -0,0 +1,63 @@
1
+ R.library('dplyr')
2
+ R.library('nycflights13')
3
+
4
+ puts R.filter(:flights, (:month.eq 1), (:day.eq 1)) # exec time: 0.15 sec
5
+
6
+ flights = ~:flights
7
+
8
+ puts flights
9
+
10
+ # In this call, 'flights' will be inlined in the expression and
11
+ # passed to method 'filter'. If 'filter' wants to print it's
12
+ # first argument, it will print the dataframe.
13
+ puts flights.filter((:month.eq 1), (:day.eq 1)) # exec time: 0.146 sec
14
+
15
+ # In this call, the symbol 'flights' is the first argument to method
16
+ # filter. If 'filter' wants to print it's arguments, it will print
17
+ # 'flights'.
18
+ puts :flights.filter((:month.eq 1), (:day.eq 1))
19
+
20
+ puts flights.filter((:month.eq 12), (:day.eq 25))
21
+ puts flights.filter(:month._ :in, R.c(11, 12))
22
+ puts flights.filter(!((:arr_delay > 120) | (:dep_delay > 120)))
23
+ puts flights.arrange(:year, :month, :day)
24
+ puts flights.arrange(E.desc(:arr_delay))
25
+ puts flights.select(:year, :month, :day)
26
+ puts flights.select(R::S.columns(:year, :day))
27
+ puts flights.select(R::S.columns(:year, :day, remove: true))
28
+ puts flights.rename(tail_num: :tailnum)
29
+ puts flights.select(:time_hour, :air_time, E.everything)
30
+
31
+ #=begin
32
+ flights_sml =
33
+ flights.
34
+ select(R::S.columns(:year, :day),
35
+ E.ends_with("delay"),
36
+ :distance,
37
+ :air_time)
38
+
39
+ puts flights_sml
40
+
41
+ puts flights_sml.mutate(gain: (:arr_delay - :dep_delay),
42
+ speed: (:distance / :air_time * 60))
43
+
44
+ puts flights_sml.mutate(gain: :arr_delay - :dep_delay,
45
+ hours: (:air_time / 60),
46
+ gain_per_hour: (:gain / :hours))
47
+
48
+ #=end
49
+
50
+ #=begin
51
+ expr = E.transmute(:flights, :dep_time,
52
+ hour: (:dep_time.int_div 100.0),
53
+ minute: :dep_time % 100.0)
54
+ puts expr
55
+ puts expr.ast
56
+ puts expr.eval
57
+
58
+
59
+ lst = R.list(x: 10, y: 20)
60
+ e = E.eval(:sum, :x, :y, lst)
61
+ e.eval
62
+
63
+ #=end
@@ -7,11 +7,15 @@ date: "16 October 2018"
7
7
  output:
8
8
  pdf_document:
9
9
  includes:
10
- in_header: ["../../sty/galaaz.sty"]
10
+ in_header: "../../sty/galaaz.sty"
11
+ keep_tex: yes
11
12
  number_sections: yes
13
+ toc: true
14
+ toc_depth: 2
12
15
  html_document:
13
16
  md_document:
14
17
  variant: markdown_github
18
+ fontsize: 11pt
15
19
  ---
16
20
 
17
21
  ```{r setup, echo=FALSE}
@@ -32,28 +36,32 @@ for very tight coupling between the two languages to the point that the Ruby dev
32
36
  not need to know that there is an R engine running. For this to happen we use new
33
37
  technologies provided by Oracle: GraalVM, TruffleRuby and FastR:
34
38
 
35
- GraalVM is a universal virtual machine for running applications written in JavaScript,
36
- Python 3, Ruby, R, JVM-based languages like Java, Scala, Kotlin, and LLVM-based languages
37
- such as C and C++.
39
+ GraalVM is a universal virtual machine for running applications
40
+ written in JavaScript, Python 3, Ruby, R, JVM-based languages like Java,
41
+ Scala, Kotlin, and LLVM-based languages such as C and C++.
38
42
 
39
- GraalVM removes the isolation between programming languages and enables interoperability in a
40
- shared runtime. It can run either standalone or in the context of OpenJDK, Node.js,
41
- Oracle Database, or MySQL.
43
+ GraalVM removes the isolation between programming languages and enables
44
+ interoperability in a shared runtime. It can run either standalone or in
45
+ the context of OpenJDK, Node.js, Oracle Database, or MySQL.
42
46
 
43
- GraalVM allows you to write polyglot applications with a seamless way to pass values from one
44
- language to another. With GraalVM there is no copying or marshaling necessary as it is with
45
- other polyglot systems. This lets you achieve high performance when language boundaries are
46
- crossed. Most of the time there is no additional cost for crossing a language boundary at all.
47
+ GraalVM allows you to write polyglot applications with a seamless way to
48
+ pass values from one language to another. With GraalVM there is no copying
49
+ or marshaling necessary as it is with other polyglot systems. This lets
50
+ you achieve high performance when language boundaries are crossed. Most
51
+ of the time there is no additional cost for crossing a language boundary
52
+ at all.
47
53
 
48
- Often developers have to make uncomfortable compromises that require them to rewrite
49
- their software in other languages. For example:
54
+ Often developers have to make uncomfortable compromises that require them
55
+ to rewrite their software in other languages. For example:
50
56
 
51
57
  * “That library is not available in my language. I need to rewrite it.”
52
- * “That language would be the perfect fit for my problem, but we cannot run it in our environment.”
53
- * “That problem is already solved in my language, but the language is too slow.”
58
+ * “That language would be the perfect fit for my problem, but we cannot
59
+ run it in our environment.”
60
+ * “That problem is already solved in my language, but the language is
61
+ too slow.”
54
62
 
55
- With GraalVM we aim to allow developers to freely choose the right language for the task at
56
- hand without making compromises.
63
+ With GraalVM we aim to allow developers to freely choose the right language
64
+ for the task at hand without making compromises.
57
65
 
58
66
  Interested readers should also check out the following sites:
59
67
 
@@ -67,13 +75,15 @@ Interested readers should also check out the following sites:
67
75
  Galaaz is the Portuguese name for "Galahad". From Wikipedia:
68
76
 
69
77
  Sir Galahad (sometimes referred to as Galeas or Galath),
70
- in Arthurian legend, is a knight of King Arthur's Round Table and one of the three
71
- achievers of the Holy Grail. He is the illegitimate son of Sir Lancelot and Elaine of
72
- Corbenic, and is renowned for his gallantry and purity as the most perfect of all knights.
73
- Emerging quite late in the medieval Arthurian tradition, Sir Galahad first appears in the
74
- Lancelot–Grail cycle, and his story is taken up in later works such as the Post-Vulgate
75
- Cycle and Sir Thomas Malory's Le Morte d'Arthur. His name should not be mistaken with
76
- Galehaut, a different knight from Arthurian legend.
78
+ in Arthurian legend, is a knight of King Arthur's Round Table and one
79
+ of the three achievers of the Holy Grail. He is the illegitimate son
80
+ of Sir Lancelot and Elaine of Corbenic, and is renowned for his
81
+ gallantry and purity as the most perfect of all knights. Emerging quite
82
+ late in the medieval Arthurian tradition, Sir Galahad first appears in the
83
+ Lancelot–Grail cycle, and his story is taken up in later works such as
84
+ the Post-Vulgate Cycle and Sir Thomas Malory's Le Morte d'Arthur.
85
+ His name should not be mistaken with Galehaut, a different knight from
86
+ Arthurian legend.
77
87
 
78
88
  # Galaaz Demo
79
89
 
@@ -114,8 +124,10 @@ Doing on the console
114
124
 
115
125
  > galaaz -T
116
126
 
117
- will show a list with all available demos. To run any of the demos in the list, substitute the call to
118
- 'rake' to 'galaaz'. For instance, one of the examples in the list is 'rake sthda:bar'. In order to run
127
+ will show a list with all available demos. To run any of the demos in the list,
128
+ substitute the call to
129
+ 'rake' to 'galaaz'. For instance, one of the examples in the list is 'rake sthda:bar'.
130
+ In order to run
119
131
  this example just do 'galaaz sthda:bar'. Doing 'galaaz sthda:all' will run all demos in the sthda
120
132
  cathegory. Some of the examples require 'rspec' do be available. To install 'rspec' just do
121
133
  'gem install rspec'.
@@ -16,29 +16,28 @@
16
16
  \gdef\HyperFirstAtBeginDocument#1{#1}
17
17
  \providecommand\HyField@AuxAddToFields[1]{}
18
18
  \providecommand\HyField@AuxAddToCoFields[2]{}
19
- \providecommand*\new@tpo@label[2]{}
20
19
  \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}}
21
20
  \newlabel{introduction}{{1}{1}{Introduction}{section.1}{}}
22
- \@writefile{toc}{\contentsline {subsection}{\numberline {1.1}What does Galaaz mean}{1}{subsection.1.1}}
23
- \newlabel{what-does-galaaz-mean}{{1.1}{1}{What does Galaaz mean}{subsection.1.1}{}}
21
+ \@writefile{toc}{\contentsline {subsection}{\numberline {1.1}What does Galaaz mean}{2}{subsection.1.1}}
22
+ \newlabel{what-does-galaaz-mean}{{1.1}{2}{What does Galaaz mean}{subsection.1.1}{}}
24
23
  \@writefile{toc}{\contentsline {section}{\numberline {2}Galaaz Demo}{2}{section.2}}
25
24
  \newlabel{galaaz-demo}{{2}{2}{Galaaz Demo}{section.2}{}}
26
25
  \@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Prerequisites}{2}{subsection.2.1}}
27
26
  \newlabel{prerequisites}{{2.1}{2}{Prerequisites}{subsection.2.1}{}}
28
- \@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Preparation}{2}{subsection.2.2}}
29
- \newlabel{preparation}{{2.2}{2}{Preparation}{subsection.2.2}{}}
30
- \@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Running the demo}{2}{subsection.2.3}}
31
- \newlabel{running-the-demo}{{2.3}{2}{Running the demo}{subsection.2.3}{}}
32
- \@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Running other demos}{2}{subsection.2.4}}
33
- \newlabel{running-other-demos}{{2.4}{2}{Running other demos}{subsection.2.4}{}}
27
+ \@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Preparation}{3}{subsection.2.2}}
28
+ \newlabel{preparation}{{2.2}{3}{Preparation}{subsection.2.2}{}}
29
+ \@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Running the demo}{3}{subsection.2.3}}
30
+ \newlabel{running-the-demo}{{2.3}{3}{Running the demo}{subsection.2.3}{}}
31
+ \@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Running other demos}{3}{subsection.2.4}}
32
+ \newlabel{running-other-demos}{{2.4}{3}{Running other demos}{subsection.2.4}{}}
34
33
  \@writefile{toc}{\contentsline {section}{\numberline {3}The demo code}{3}{section.3}}
35
34
  \newlabel{the-demo-code}{{3}{3}{The demo code}{section.3}{}}
36
35
  \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Midwest Plot}}{4}{figure.1}}
37
- \@writefile{toc}{\contentsline {section}{\numberline {4}An extension to the example}{4}{section.4}}
38
- \newlabel{an-extension-to-the-example}{{4}{4}{An extension to the example}{section.4}{}}
39
- \@writefile{toc}{\contentsline {section}{\numberline {5}Conclusion}{7}{section.5}}
40
- \newlabel{conclusion}{{5}{7}{Conclusion}{section.5}{}}
41
- \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Midwest Plot with `glm' function and modified theme}}{8}{figure.2}}
42
- \newlabel{LastPage}{{}{8}{}{page.8}{}}
43
- \xdef\lastpage@lastpage{8}
44
- \xdef\lastpage@lastpageHy{8}
36
+ \@writefile{toc}{\contentsline {section}{\numberline {4}An extension to the example}{5}{section.4}}
37
+ \newlabel{an-extension-to-the-example}{{4}{5}{An extension to the example}{section.4}{}}
38
+ \@writefile{toc}{\contentsline {section}{\numberline {5}Conclusion}{8}{section.5}}
39
+ \newlabel{conclusion}{{5}{8}{Conclusion}{section.5}{}}
40
+ \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Midwest Plot with `glm' function and modified theme}}{9}{figure.2}}
41
+ \newlabel{LastPage}{{}{9}{}{page.9}{}}
42
+ \xdef\lastpage@lastpage{9}
43
+ \xdef\lastpage@lastpageHy{9}
@@ -1,4 +1,4 @@
1
- \documentclass[]{article}
1
+ \documentclass[11pt,]{article}
2
2
  \usepackage{lmodern}
3
3
  \usepackage{amssymb,amsmath}
4
4
  \usepackage{ifxetex,ifluatex}
@@ -132,7 +132,7 @@
132
132
  \usepackage{geometry}
133
133
  \geometry{a4paper, top=1in}
134
134
 
135
- % necessários para uso com kableExtra
135
+ % needed for kableExtra
136
136
  \usepackage{longtable}
137
137
  \usepackage{multirow}
138
138
  \usepackage[table]{xcolor}
@@ -149,17 +149,38 @@
149
149
  \usepackage{expex}
150
150
 
151
151
  \usepackage{graphicx}
152
+
152
153
  \usepackage{fancyhdr}
154
+ % set the header and foot style
155
+ % style 'fancy' adds the section name on the header
156
+ % and the page number on the footer
153
157
  \pagestyle{fancy}
154
- \fancyhf{}
155
158
 
156
- \usepackage{lipsum}
159
+ % style 'fancyhf' leaves header and footer empty
160
+ %\fancyhf{}
161
+
162
+ % sets the left head element to \rightmark, which contains the
163
+ % current section (\leftmark is the current chapter)
164
+ %\fancyhead[L]{\rightmark} .
165
+
166
+ % sets the right head element to the page number.
167
+ % \fancyhead[R]{\thepage}
168
+
169
+ % lets the head rule disappear.
170
+ % \renewcommand{\headrulewidth}{0pt}
171
+ % Possible selectors for the optional argument of \fancyhead/\fancyfoot
172
+ % are L (left), C (center) or R (right) for the position of the element
173
+ % and E (even) or O (odd) to distinguish even and odd pages. If you omit
174
+ % E/O the element is set for all pages.
157
175
 
158
- % disponibilizar o comando lastpage
176
+ % \usepackage{lipsum}
177
+
178
+ % make available command lastpage
159
179
  \usepackage{lastpage}
160
180
 
161
- % tamanho do font padrão 11pt
162
- \usepackage[fontsize=10pt]{scrextend}
181
+ % default fontsize 11pt better to add
182
+ % fontsize on the yaml header
183
+ % \usepackage[fontsize=11pt]{scrextend}
163
184
 
164
185
  % comandos para formatar uma tabela
165
186
  \usepackage{array}
@@ -167,15 +188,22 @@
167
188
  \newcolumntype{C}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
168
189
  \newcolumntype{R}[1]{>{\raggedleft\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
169
190
 
170
- % necessário para importar outros arquivos latex
191
+ % necessário if we need to import other latex documents
171
192
  \usepackage{import}
172
193
 
194
+ % Command to import an R variable to latex
173
195
  \newcommand{\RtoLatex}[2]{\newcommand{#1}{#2}}
196
+
197
+ %
174
198
  %\newcommand{\atraso}[1]{\color{red} \textbf {Tempo desde a Assinatura do Contrato: #1 dias}}
175
199
 
176
200
  \begin{document}
177
201
  \maketitle
178
202
 
203
+ {
204
+ \setcounter{tocdepth}{2}
205
+ \tableofcontents
206
+ }
179
207
  \section{Introduction}\label{introduction}
180
208
 
181
209
  Galaaz is a system for tightly coupling Ruby and R. Ruby is a powerful
@@ -193,28 +221,32 @@ that there is an R engine running. For this to happen we use new
193
221
  technologies provided by Oracle: GraalVM, TruffleRuby and FastR:
194
222
 
195
223
  \begin{verbatim}
196
- GraalVM is a universal virtual machine for running applications written in JavaScript,
197
- Python 3, Ruby, R, JVM-based languages like Java, Scala, Kotlin, and LLVM-based languages
198
- such as C and C++.
224
+ GraalVM is a universal virtual machine for running applications
225
+ written in JavaScript, Python 3, Ruby, R, JVM-based languages like Java,
226
+ Scala, Kotlin, and LLVM-based languages such as C and C++.
199
227
 
200
- GraalVM removes the isolation between programming languages and enables interoperability in a
201
- shared runtime. It can run either standalone or in the context of OpenJDK, Node.js,
202
- Oracle Database, or MySQL.
228
+ GraalVM removes the isolation between programming languages and enables
229
+ interoperability in a shared runtime. It can run either standalone or in
230
+ the context of OpenJDK, Node.js, Oracle Database, or MySQL.
203
231
 
204
- GraalVM allows you to write polyglot applications with a seamless way to pass values from one
205
- language to another. With GraalVM there is no copying or marshaling necessary as it is with
206
- other polyglot systems. This lets you achieve high performance when language boundaries are
207
- crossed. Most of the time there is no additional cost for crossing a language boundary at all.
232
+ GraalVM allows you to write polyglot applications with a seamless way to
233
+ pass values from one language to another. With GraalVM there is no copying
234
+ or marshaling necessary as it is with other polyglot systems. This lets
235
+ you achieve high performance when language boundaries are crossed. Most
236
+ of the time there is no additional cost for crossing a language boundary
237
+ at all.
208
238
 
209
- Often developers have to make uncomfortable compromises that require them to rewrite
210
- their software in other languages. For example:
239
+ Often developers have to make uncomfortable compromises that require them
240
+ to rewrite their software in other languages. For example:
211
241
 
212
242
  * “That library is not available in my language. I need to rewrite it.”
213
- * “That language would be the perfect fit for my problem, but we cannot run it in our environment.”
214
- * “That problem is already solved in my language, but the language is too slow.”
243
+ * “That language would be the perfect fit for my problem, but we cannot
244
+ run it in our environment.”
245
+ * “That problem is already solved in my language, but the language is
246
+ too slow.”
215
247
 
216
- With GraalVM we aim to allow developers to freely choose the right language for the task at
217
- hand without making compromises.
248
+ With GraalVM we aim to allow developers to freely choose the right language
249
+ for the task at hand without making compromises.
218
250
  \end{verbatim}
219
251
 
220
252
  Interested readers should also check out the following sites:
@@ -238,13 +270,15 @@ Galaaz is the Portuguese name for ``Galahad''. From Wikipedia:
238
270
 
239
271
  \begin{verbatim}
240
272
  Sir Galahad (sometimes referred to as Galeas or Galath),
241
- in Arthurian legend, is a knight of King Arthur's Round Table and one of the three
242
- achievers of the Holy Grail. He is the illegitimate son of Sir Lancelot and Elaine of
243
- Corbenic, and is renowned for his gallantry and purity as the most perfect of all knights.
244
- Emerging quite late in the medieval Arthurian tradition, Sir Galahad first appears in the
245
- Lancelot–Grail cycle, and his story is taken up in later works such as the Post-Vulgate
246
- Cycle and Sir Thomas Malory's Le Morte d'Arthur. His name should not be mistaken with
247
- Galehaut, a different knight from Arthurian legend.
273
+ in Arthurian legend, is a knight of King Arthur's Round Table and one
274
+ of the three achievers of the Holy Grail. He is the illegitimate son
275
+ of Sir Lancelot and Elaine of Corbenic, and is renowned for his
276
+ gallantry and purity as the most perfect of all knights. Emerging quite
277
+ late in the medieval Arthurian tradition, Sir Galahad first appears in the
278
+ Lancelot–Grail cycle, and his story is taken up in later works such as
279
+ the Post-Vulgate Cycle and Sir Thomas Malory's Le Morte d'Arthur.
280
+ His name should not be mistaken with Galehaut, a different knight from
281
+ Arthurian legend.
248
282
  \end{verbatim}
249
283
 
250
284
  \section{Galaaz Demo}\label{galaaz-demo}
@@ -0,0 +1,2342 @@
1
+ # coding: utf-8
2
+
3
+ author("Rodrigo Botafogo")
4
+
5
+ body(<<-EOT)
6
+ This paper introduces and compares SciCom with R's S4. It is a shameless rip off of
7
+ #{ref("A '(not so)' Short Introduction to S4",
8
+ "https://cran.r-project.org/doc/contrib/Genolini-S4tutorialV0-5en.pdf")} by Christophe Genolini
9
+ and follows the same structure and examples presented there.
10
+
11
+ SciCom is a Ruby Gem (library) that allows very tight integration between Ruby and R.
12
+ It's integration is
13
+ much tigher and transparent from what one can get beetween RinRuby or similar solutions in Python
14
+ such as PypeR (https://pypi.python.org/pypi/PypeR/1.1.0), rpy2 (http://rpy2.bitbucket.org/) and
15
+ other similar solutions. SciCom targets the Java Virtual Machine and it
16
+ integrates with Renjin (http://www.renjin.org/), an R interpreter for Java.
17
+
18
+ From the Renjin page we can get the following description of Renjin and its objectives:
19
+
20
+ The goal of Renjin
21
+ is to eventually be compatible with GNU R such that most existing R language programs will
22
+ run in Renjin without the need to make any changes to the code. Needless to say, Renjin is
23
+ currently not 100% compatible with GNU R so your mileage may vary.
24
+
25
+ The biggest advantage of Renjin is that the R interpreter itself is a Java module which can be
26
+ seamlessly integrated into any Java application. This dispenses with the need to load dynamic
27
+ libraries or to provide some form of communication between separate processes. These types of
28
+ interfaces are often the source of much agony because they place very specific demands on the
29
+ environment in which they run.
30
+
31
+ We frequently see on the web people asking: "which is better for data analysis: R or Python?" In
32
+ This article we also have the objective to try to answer this question. As you will see, our
33
+ point is: "when in doubt about R or Python, use SciCom!"
34
+
35
+ EOT
36
+
37
+ subsubsection("Limitations")
38
+
39
+ body(<<-EOT)
40
+ Unfortunately, SciCom has three main limitations, and although we think that "use SciCom!" is a
41
+ good catch phrase, at this point we don't see SciCom as being able to substitute R. The three
42
+ limitations are:
43
+ EOT
44
+
45
+ list(<<-EOT)
46
+ Renjin has implemented all of base R (maybe still some bugs, I don't know!), but there are still
47
+ many packages that do not yet work with it. Renjin is making huge steps forward, but for the
48
+ standard R user, chances are that her preferred package does not yet run in Renjin;
49
+
50
+ Renjin does not implement any of the graph functionality such as plot or ggplot and has intention
51
+ to do so. Ruby has some graphing libraries, but they are still not "au par" with ggplot nor
52
+ matplotlib;
53
+
54
+ SciCom does not have a large user community. Actually it does not even have a small user
55
+ community. Without a user community, no free software can survive. I hope this paper will help
56
+ attract some people to this new community.
57
+
58
+ EOT
59
+
60
+ subsubsection("A Note of Advice")
61
+
62
+ body(<<-EOT)
63
+ Renjin's internal representations of vectors are immutable. This can be very important for
64
+ large datasets allowing for lazy operations and delaying them to as late as possible. SciCom's
65
+ goal is to make integration of Ruby and R as seamless as possible, and as will be seen in this
66
+ document, R functions look like class methods on an Ruby R class. The frontier between Ruby
67
+ and R is made as thin and transparent as possible and in some cases data in a Ruby object is
68
+ shared with data in an R object. In this case, we do not try to make Ruby data immutable and
69
+ since Renjin expects it to be immutable, it is possible for weird problems to creep into the code.
70
+ For instance, MDArray data is shared between Ruby and R and changing an element in MDArray will
71
+ change the R dataframe without copying. We believe that this can be very helpful in some
72
+ circumstance, but dangerous in others. If, by any change, your code start showing strange
73
+ behavior and you are sharing data between R and Ruby, make sure that you know what you are
74
+ doing.
75
+ EOT
76
+
77
+ chapter("Bases of Object Programming")
78
+
79
+ body(<<-EOT)
80
+ In this paper, we will start our discussion from Part II of "The (not so) Short Introduction
81
+ to S4", which from now on we will reference as SS4 for "short S4". Interested readers are directed
82
+ to this paper to understand the motivation and examples in that paper. In this paper we will
83
+ present the S4 code from SS4 and then the same code in Ruby/SciCom. We will not comment on the
84
+ S4 code, as all the comments can be found in SS4, we will only focus on the Ruby/SciCom
85
+ description.
86
+
87
+ S4 defines classes by using the setClass function:
88
+ EOT
89
+
90
+ section("Classes Declaration")
91
+
92
+ comment_code(<<-EOT)
93
+ # > setClass(
94
+ # + Class="Trajectories",
95
+ # + representation=representation(
96
+ # + times = "numeric",
97
+ # + traj = "matrix"
98
+ # + )
99
+ # + )
100
+ EOT
101
+
102
+ subsection("Instance Variables")
103
+
104
+ body(<<-EOT)
105
+ In Ruby a class is defined by the keyword 'class'. Every class should start with a capital
106
+ letter. S4 'slots' are called 'instance variables' in Ruby. Differently from R's S4,
107
+ instance variables in Ruby do not have type information. It should be clear though, that S4
108
+ type information is also not a "compile" time type, since R is not compiled. The type is
109
+ checked at runtime. The same checking can be done in Ruby and we will do it later in this
110
+ document.
111
+
112
+ In the example bellow, we create
113
+ class Trajectories with two instance variables, 'times' and 'matrix'. We will not go over
114
+ the details of instance variables in Ruby, but here we created those variables with the
115
+ keyword 'attr_reader' and a column before the variables name:
116
+ EOT
117
+
118
+ code(<<-EOT)
119
+ class Trajectories
120
+
121
+ attr_reader :times
122
+ attr_reader :matrix
123
+
124
+ end
125
+
126
+ EOT
127
+
128
+ body(<<-EOT)
129
+ In order to create a new instance of object Trajectories we call method new on the class and
130
+ we can store the result in a varible (not an instance variable) as bellow:
131
+ EOT
132
+
133
+ console(<<-EOT)
134
+ traj = Trajectories.new
135
+ EOT
136
+
137
+ body(<<-EOT)
138
+ We now have in variable 'traj' a Trajectories object. In Ruby, printing variable 'traj' will
139
+ only print the class name of the object and not it contents as in R.
140
+ EOT
141
+
142
+ console(<<-EOT)
143
+ puts traj
144
+ EOT
145
+
146
+ body(<<-EOT)
147
+ To see the contents of an object, one needs to access its components using the '.' operator:
148
+ EOT
149
+
150
+ console(<<-EOT)
151
+ puts traj.times
152
+ EOT
153
+
154
+ subsection("Constructor")
155
+
156
+ body(<<-EOT)
157
+ Since there is no content stored in 'times' nor 'matrix', nil is returned. In order to add
158
+ a value in the variables, we need to add a constructor the class Trajectories. In R, a
159
+ constructor is build by default, in Ruby, this has to be created by adding a method called
160
+ 'initialize'. In the example bellow, we will create the initializer that accepts two values,
161
+ a 'times' value and a 'matrix' value and they are used to initialize the value of the
162
+ instance variables:
163
+ EOT
164
+
165
+ code(<<-EOT)
166
+
167
+ class Trajectories
168
+
169
+ attr_reader :times
170
+ attr_reader :matrix
171
+
172
+ def initialize(times: nil, matrix: nil)
173
+ @times = times
174
+ @matrix = matrix
175
+ end
176
+
177
+ end
178
+
179
+ EOT
180
+
181
+ body(<<-EOT)
182
+ Up to this point, everything described in pure Ruby code and has absolutely no relationship is R.
183
+ We now want to create a Trajectories with a 'times' vector. Ruby has a vector class and we could
184
+ use this class to create a vector and add it to the 'times' instance variable; however, in order
185
+ to make use of R's functions, we want to create a R vector to add to 'times'. In SciCom,
186
+ creating R objects is done using the corresponding R functions by just preceding them with 'R.',
187
+ i.e., R functions are all defined in SciCom in the R namespace.
188
+
189
+ Since SciCom is Ruby and not R, some syntax adjustments are sometimes necessary. For instance,
190
+ in R, a range is represented as '(1:4)', in Ruby, the same range is represented as '(1..4)'.
191
+ When passing arguments to an R function in R one uses the '=' sign after the slot name; in R,
192
+ one uses the ':' operator after parameter's name as we can see bellow:
193
+ EOT
194
+
195
+ code(<<-EOT)
196
+ # Create a Trajectories with the times vector [1, 2, 3, 4] and not matrix
197
+ traj = Trajectories.new(times: R.c(1, 2, 3, 4))
198
+
199
+ # Create a Trajectories with times and matrix
200
+ traj2 = Trajectories.new(times: R.c(1, 3), matrix: R.matrix((1..4), ncol: 2))
201
+ EOT
202
+
203
+ subsection("Access to Instance Variables (to reach a slot)")
204
+
205
+ body(<<-EOT)
206
+ In order to access data in an instance variable the operator '.' is used. In R, a similar
207
+ result is obtained by use of the '@' operator, but SS4 does not recommend its use. In SciCom,
208
+ the '.' operator is the recommended way of accessing an instance variable.
209
+
210
+ Now that we have created two trajectories, let's try to print its instance variables to see
211
+ that everything is fine:
212
+ EOT
213
+
214
+ console(<<-EOT)
215
+ puts traj.times
216
+ EOT
217
+
218
+ body(<<-EOT)
219
+ Well this wasn't really what we had expected... as explained before, printing a variable, will
220
+ actually only show the class name and vector 'times' in SciCom is actually a Renjin::Vector.
221
+ In order to print the content of a SciCom object we use method 'pp' as follows:
222
+ EOT
223
+
224
+ console(<<-EOT)
225
+ traj.times.pp
226
+ EOT
227
+
228
+ body(<<-EOT)
229
+ We now have the expected value. Note that the 'times' vector is printed exactly as it would
230
+ if we were using GNU R. Let's now take a look at variable 'traj2':
231
+ EOT
232
+
233
+ console(<<-EOT)
234
+ traj2.times.pp
235
+ EOT
236
+
237
+ console(<<-EOT)
238
+ traj2.matrix.pp
239
+ EOT
240
+
241
+ body(<<-EOT)
242
+ Let's now build the same examples as in SS4: Three hospitals take part in a
243
+ study. The Pitié Salpêtriere (which has not yet returned its data file, shame on them!),
244
+ Cochin and Saint-Anne. We first show the code in R and the corresponding SciCom:
245
+ EOT
246
+
247
+ comment_code(<<-EOT)
248
+ > trajPitie <- new(Class="Trajectories")
249
+ > trajCochin <- new(
250
+ + Class= "Trajectories",
251
+ + times=c(1,3,4,5),
252
+ + traj=rbind (
253
+ + c(15,15.1, 15.2, 15.2),
254
+ + c(16,15.9, 16,16.4),
255
+ + c(15.2, NA, 15.3, 15.3),
256
+ + c(15.7, 15.6, 15.8, 16)
257
+ + )
258
+ + )
259
+ > trajStAnne <- new(
260
+ + Class= "Trajectories",
261
+ + times=c(1: 10, (6: 16) *2),
262
+ + traj=rbind(
263
+ + matrix (seq (16,19, length=21), ncol=21, nrow=50, byrow=TRUE),
264
+ + matrix (seq (15.8, 18, length=21), ncol=21, nrow=30, byrow=TRUE)
265
+ + )+rnorm (21*80,0,0.2)
266
+ + )
267
+ EOT
268
+
269
+ body(<<-EOT)
270
+ This same code in SciCom becomes:
271
+ EOT
272
+
273
+ code(<<-EOT)
274
+ trajPitie = Trajectories.new
275
+ EOT
276
+
277
+ code(<<-EOT)
278
+ trajCochin = Trajectories.new(times: R.c(1,3,4,5),
279
+ matrix: R.rbind(
280
+ R.c(15,15.1, 15.2, 15.2),
281
+ R.c(16,15.9, 16,16.4),
282
+ R.c(15.2, NA, 15.3, 15.3),
283
+ R.c(15.7, 15.6, 15.8, 16)))
284
+ EOT
285
+
286
+ code(<<-EOT)
287
+ trajStAnne =
288
+ Trajectories.new(times: R.c((1..10), R.c(6..16) * 2),
289
+ matrix: (R.rbind(
290
+ R.matrix(R.seq(16, 19, length: 21), ncol: 21,
291
+ nrow: 50, byrow: true),
292
+ R.matrix(R.seq(15.8, 18, length: 21), ncol: 21,
293
+ nrow: 30, byrow: true)) + R.rnorm(21*80, 0, 0.2)))
294
+
295
+ EOT
296
+
297
+ body(<<-EOT)
298
+ Let's check that the 'times' and 'matrix' instance variables were correctly set:
299
+ EOT
300
+
301
+ console(<<-EOT)
302
+ trajCochin.times.pp
303
+ EOT
304
+
305
+ console(<<-EOT)
306
+ trajCochin.matrix.pp
307
+ EOT
308
+
309
+ console(<<-EOT)
310
+ trajStAnne.times.pp
311
+ EOT
312
+
313
+ body(<<-EOT)
314
+ We will not at this time print trajStAnne.matrix, since this is a huge matrix and the result
315
+ would just take too much space. Later we will print just a partial view of the matrix.
316
+ EOT
317
+
318
+ subsection("Default Values")
319
+
320
+ body(<<-EOT)
321
+ Default values are very useful and quite often used in Ruby programs. Although SS4 does not
322
+ recommend its use, there are many cases in which default values are useful and make code simpler.
323
+ We have already seen default values in this document, with the default being 'nil'. This was
324
+ necessary in order to be able to create our constructor and passing it the proper values.
325
+
326
+ In the example bellow, a class TrajectoriesBis is created with default value 1 for times and a
327
+ matrix with no elements in matrix.
328
+ EOT
329
+
330
+ code(<<-EOT)
331
+ class TrajectoriesBis
332
+
333
+ attr_reader :times
334
+ attr_reader :matrix
335
+
336
+ def initialize(times: 1, matrix: R.matrix(0))
337
+ @times = times
338
+ @matrix = matrix
339
+ end
340
+
341
+ end
342
+
343
+ traj_bis = TrajectoriesBis.new
344
+ EOT
345
+
346
+ body(<<-EOT)
347
+ Let's take a look at our new class:
348
+ EOT
349
+
350
+ console(<<-EOT)
351
+ traj_bis.times.pp
352
+ EOT
353
+
354
+ body(<<-EOT)
355
+ Well, not exactly what we had in mind. We got an error saying that .pp is undefined for
356
+ Fixnum. In R, numbers are automatically converted to vectors, but this is not the case
357
+ in Ruby and SciCom. In Ruby, numbers are numbers and vectors are vectors. In the
358
+ initialize method above, we stored 1 in variable @times and 1 is a number. Method .pp is
359
+ only available for R objects.
360
+
361
+ In order to fix this, we need to fix our initializer to convert number 1 to a vector with
362
+ one element of value 1. SciCom provides the method R.i to do this conversion.
363
+
364
+ When calling an R function that expects a number as argument, this conversion is
365
+ automatically done by SciCom; however, in the initialize method, there is no indication
366
+ to SciCom that variable @times is actually a SciCom variable, since there is no type
367
+ information. In this case, we need to be explicit and use R.i:
368
+ EOT
369
+
370
+ code(<<-EOT)
371
+ class TrajectoriesBis
372
+
373
+ attr_reader :times
374
+ attr_reader :matrix
375
+
376
+ # Use R.i to convert number 1 to a vector
377
+ def initialize(times: R.i(1), matrix: R.matrix(0))
378
+ @times = times
379
+ @matrix = matrix
380
+ end
381
+
382
+ end
383
+
384
+ traj_bis = TrajectoriesBis.new
385
+ EOT
386
+
387
+ console(<<-EOT)
388
+ traj_bis.times.pp
389
+ EOT
390
+
391
+ console(<<-EOT)
392
+ traj_bis.matrix.pp
393
+ EOT
394
+
395
+ subsection("To Remove an Object")
396
+
397
+ body(<<-EOT)
398
+ As far as I know, there isn't a good way of removing a defined class, but there might be
399
+ one and the interested user is directed to google it! In principle, there should not be
400
+ any real need to remove a defined class. Both in R and SciCom, large programs are usually
401
+ written in a file and the file loaded. If one writes a wrong class, the better solution is
402
+ to correct it on and then load it again. If the class is written directly on the console,
403
+ then leaving it there will not have any serious impact.
404
+ EOT
405
+
406
+ subsection ("The Empty Object")
407
+
408
+ body(<<-EOT)
409
+ When a Trajectories is created with new, and no argument is given, all its instance variables
410
+ will have the default nil value. Since Ruby has no type information, then there is only one
411
+ type (or actually no type) of nil. To check if a variable is empty, we check it against the nil
412
+ value.
413
+ EOT
414
+
415
+ subsection ("To See an Object")
416
+
417
+ body(<<-EOT)
418
+ Ruby has very strong meta-programming features, in particular, one can use introspection to
419
+ see methods and instance variables from a given class. Method 'instance_variables' shows all
420
+ the instance variables of an object:
421
+ EOT
422
+
423
+ console(<<-EOT)
424
+ puts traj.instance_variables
425
+ EOT
426
+
427
+ body(<<-EOT)
428
+ The description of all meta-programming features of Ruby is well beyond the scope of this
429
+ document, but it is a very frequent a powerful feature of Ruby, that makes programming in
430
+ Ruby a different experience than programming in other languages.
431
+ EOT
432
+
433
+ section ("Methods")
434
+
435
+ body(<<-EOT)
436
+ Methods are a fundamental feature of object oriented programming. We will now extend our class
437
+ Trajectories to add methods to it. In SS4, a method 'plot' is added to Trajectories. At this
438
+ point, Renjin and SciCom do not yet have plotting capabilities, so we will have to skip this
439
+ method and go directly to the implementation of the 'print' method.
440
+
441
+ Bellow is the R code for method print:
442
+ EOT
443
+
444
+ comment_code(<<-EOT)
445
+ > setMethod ("print","Trajectories",
446
+ + function(x,...){
447
+ + cat("*** Class Trajectories, method Print *** \\n")
448
+ + cat("* Times ="); print (x@times)
449
+ + cat("* Traj = \\n"); print (x@traj)
450
+ + cat("******* End Print (trajectories) ******* \\n")
451
+ + }
452
+ + )
453
+ EOT
454
+
455
+ body(<<-EOT)
456
+ Now the same code for class Trajectories in SciCom. In general methods are defined in a class
457
+ together with all the class definition. We will first use this approach. Later, we will show
458
+ how to 'reopen' a class to add new methods to it.
459
+
460
+ In this example, we are defining a method named 'print'. We have being using method 'puts' to
461
+ output data. There is a Ruby method that is more flexible than puts and that we need to use to
462
+ implement our function: 'print'. However, trying to use Ruby print inside the definition of
463
+ Trajectories's print will not work, as Ruby will understand that as a recursive call to print.
464
+ Ruby's print is defined inside the Kernel class, so, in order to call Ruby's print inside the
465
+ definition of Trajectories's print we need to write 'Kernel.print'.
466
+ EOT
467
+
468
+ code(<<-EOT)
469
+ class Trajectories
470
+
471
+ attr_reader :times
472
+ attr_reader :matrix
473
+
474
+ #
475
+ #
476
+ #
477
+ def initialize(times: nil, matrix: nil)
478
+ @times = times
479
+ @matrix = matrix
480
+ end
481
+
482
+ def print
483
+ puts("*** Class Trajectories, method Print *** ")
484
+ Kernel.print("times = ")
485
+ @times.pp
486
+ puts("traj =")
487
+ @matrix.pp
488
+ puts("******* End Print (trajectories) ******* ")
489
+ end
490
+
491
+ end
492
+ EOT
493
+
494
+ console(<<-EOT)
495
+ trajCochin.print
496
+ EOT
497
+
498
+ body(<<-EOT)
499
+ For Cochin, the result is correct. For Saint-Anne, print will display too much
500
+ information. So we need a second method.
501
+
502
+ Show is the default R method used to show an object when its name is written in the
503
+ console. We thus define 'show' by taking into account the size of the object: if there are too
504
+ many trajectories, 'show' posts only part of them.
505
+
506
+ Here is the R code for method 'show':
507
+ EOT
508
+
509
+ comment_code(<<-EOT)
510
+ > setMethod("show","Trajectories",
511
+ + function(object){
512
+ + cat("*** Class Trajectories, method Show *** \\n")
513
+ + cat("* Times ="); print(object@times)
514
+ + nrowShow <- min(10,nrow(object@traj))
515
+ + ncolShow <- min(10,ncol(object@traj))
516
+ + cat("* Traj (limited to a matrix 10x10) = \\n")
517
+ + print(formatC(object@traj[1:nrowShow,1:ncolShow]),quote=FALSE)
518
+ + cat("******* End Show (trajectories) ******* \\n")
519
+ + }
520
+ + )
521
+ EOT
522
+
523
+ body(<<-EOT)
524
+ Now, let's write it with SciCom. This time though, we will not rewrite the whole Trajectories
525
+ class, but just reopen it to add this specific method. The next example has many interesting
526
+ features of SciCom, some we have already seen, others will be described now:
527
+ EOT
528
+
529
+ list(<<-EOT)
530
+ As we have already seen, to call an R function one uses the R.<function> notation. There
531
+ is however another way: when the first argument to the R function is an R object such as a
532
+ matrix, a list, a vector, etc. we can use '.' notation to call the function. This makes the
533
+ function look like a method of the object. For instance, R.nrow(@matrix), can be called by
534
+ doing @matrix.nrow;
535
+
536
+ In R, every number is converted to a vector and this can be done with method R.i. Converting
537
+ a vector with only one number back to a number can be done with method '.gz'. So if @num is
538
+ an R vector that holds a number, then @num.gz is a number that can be used normally with Ruby
539
+ methods;
540
+
541
+ R functions and Ruby methods can be used freely in SciCom. We show bellow two different ways
542
+ of getting the minimum of a number, either by calling R.min or by getting the minimum of an
543
+ array, with the min method;
544
+
545
+ SciCom allows for method 'chaining'. Method chaining, also known as named parameter idiom, is
546
+ a common syntax for invoking multiple method calls in object-oriented programming languages.
547
+ Each method returns an object, allowing the calls to be chained together in a single statement
548
+ without requiring variables to store the intermediate results. For instance @matrix.nrow.gz,
549
+ which returns the number of rows of the matrix as a number;
550
+
551
+ Ranges in Ruby are represented by (x..y), where x is the beginning of the range and y its end.
552
+ An R matrix can be indexed by range, object@traj[1:nrowShow,1:ncolShow], the same result is
553
+ obtained in SciCom by indexing @matrix[(1..nrow_show), (1..ncol_show)]. Observe that this
554
+ statement is then chained with the format function and with the pp method to print the matrix.
555
+ EOT
556
+
557
+
558
+ code(<<-EOT)
559
+ class Trajectories
560
+
561
+ def show
562
+ puts("*** Class Trajectories, method Show *** ")
563
+ Kernel.print("times = ")
564
+ @times.pp
565
+ nrow_show = [10, @matrix.nrow.gz].min
566
+ ncol_show = R.min(10, @matrix.ncol).gz
567
+ puts("* Traj (limited to a matrix 10x10) = ")
568
+ @matrix[(1..nrow_show), (1..ncol_show)].format(digits: 2, nsmall: 2).pp
569
+ puts("******* End Show (trajectories) ******* ")
570
+ end
571
+
572
+ end
573
+ EOT
574
+
575
+ console(<<-EOT)
576
+ trajStAnne.show
577
+ EOT
578
+
579
+ body(<<-EOT)
580
+ Our show method has the same problem as SS4, i.e., if an empty trajectories object is created and
581
+ we try to 'show' it, it will generate an error. Let's see it:
582
+ EOT
583
+
584
+ code(<<-EOT)
585
+ empty_traj = Trajectories.new
586
+ EOT
587
+
588
+ console(<<-EOT)
589
+ empty_traj.show
590
+ EOT
591
+
592
+ comment_code(<<-EOT)
593
+ NoMethodError: undefined method `pp' for nil:NilClass
594
+ show at :6
595
+ <eval> at :1
596
+ eval at org/jruby/RubyKernel.java:976
597
+ console at T:/Rodrigo/Desenv/SciCom/examples/rbmarkdown.rb:61
598
+ <top> at T:\Rodrigo\Desenv\SciCom\examples\not_so.rb:533
599
+ EOT
600
+
601
+ body(<<-EOT)
602
+ In this example, we try to call method .pp on a nil (empty) object and this method is not
603
+ defined. In order to fix this, we can either prevent an empty trajectories class to be created,
604
+ or make sure that method show will not choke on the empty object. We will take the second
605
+ alternative, to follow SS4 and will check if either @times or @matrix are empty. If either one
606
+ of them is nil, then we will print a message saying so.
607
+
608
+ Although the first alternative, i.e., not allow for empty objects is a possibility in Ruby,
609
+ it seems that this is not the case for S4.
610
+ EOT
611
+
612
+ code(<<-EOT)
613
+ class Trajectories
614
+
615
+ def show
616
+ if (@times.nil? || @matrix.nil?)
617
+ puts("*** Class Trajectories is empty!! *** ")
618
+ return
619
+ end
620
+ puts("*** Class Trajectories, method Show *** ")
621
+ Kernel.print("times = ")
622
+ @times.pp
623
+ nrow_show = [10, @matrix.nrow.gz].min
624
+ ncol_show = R.min(10, @matrix.ncol).gz
625
+ puts("* Traj (limited to a matrix 10x10) = ")
626
+ @matrix[(1..nrow_show), (1..ncol_show)].format(digits: 2, nsmall: 2).pp
627
+ puts("******* End Show (trajectories) ******* ")
628
+ end
629
+
630
+ end
631
+ EOT
632
+
633
+ console(<<-EOT)
634
+ empty_traj.show
635
+ EOT
636
+
637
+ subsection("Method count_missing")
638
+
639
+ body(<<-EOT)
640
+ In R, methods 'print' and 'show' are methods that already exist. SS4 wants to add a method
641
+ called 'countMissing' which does not exist in R, and thus requires some special preparation. In
642
+ Ruby, every method we've created is a new method that exists inside the class. The fact that
643
+ 'print' happens to be also a method for class Kernel and 'show' is not, is not of special interest.
644
+ Actually we've seen that in order to call method print from the Kernel class we had to call
645
+ Kernel.print.
646
+
647
+ To create method 'count_missing' we just need to reopen the Trajectories class and add the
648
+ method the same way we've done with method 'show'. Again, let's first look at R's 'countMissing'
649
+ and then at Ruby's:
650
+ EOT
651
+
652
+ comment_code(<<-EOT)
653
+ > setMethod(
654
+ + f= "countMissing",
655
+ + signature= "Trajectories",
656
+ + definition=function(object){
657
+ + return(sum(is.na(object@traj)))
658
+ + }
659
+ + )
660
+ EOT
661
+
662
+ body(<<-EOT)
663
+ Here we introduce another particular case of SciCom. R has many methods that have a '.' in
664
+ their names, such as 'is.na'. In Ruby, the dot '.' is has a special meaning as it is the way
665
+ we call a method on an object. Doing 'R.is.na' will not work. So, in SciCom, R functions that
666
+ have a dot in then will have the dot substituted by '__'. So, method is.na in SciCom, becomes
667
+ R.is__na. In method count_missing we use method chaining and convert the final count to a number.
668
+ EOT
669
+
670
+ code(<<-EOT)
671
+ class Trajectories
672
+
673
+ def count_missing
674
+ return @matrix.is__na.sum.gz
675
+ end
676
+
677
+ end
678
+ EOT
679
+
680
+ console(<<-EOT)
681
+ puts trajCochin.count_missing
682
+ EOT
683
+
684
+ subsection("To See the Methods")
685
+
686
+ body(<<-EOT)
687
+ In order to see the methods we have defined so far, we call call on class Trajectories the method
688
+ 'instace_method' passing it one argument, 'false', as follows:
689
+ EOT
690
+
691
+ console(<<-EOT)
692
+ puts Trajectories.instance_methods(false)
693
+ EOT
694
+
695
+ body(<<-EOT)
696
+ It is interesting to observe that we see our three methods 'count_missing', 'print' and 'show', but
697
+ we also see two other methods 'times' and 'matrix', but those last two as far as we know are
698
+ just instance variables and not methods, right? More on that when we talk about Accessors.
699
+
700
+ SciCom and Ruby, do not by default provide a way to see a method's code. However, if the user uses
701
+ a Ruby console such as Pry, then seeing methods and debugging is possible. Pry, is beyond the
702
+ scope of this document.
703
+ EOT
704
+
705
+ section("Construction")
706
+
707
+ body(<<-EOT)
708
+ Every class in Ruby has a constructor, if not explicitly defined, at least implicitly. Method
709
+ initialize is the constructor method and the one that coordinates the whole construction process.
710
+ EOT
711
+
712
+ subsection("Inspector")
713
+
714
+ body(<<-EOT)
715
+ There is no default 'inspector' in Ruby as is R, although there is nothing that prevents the
716
+ developer to inspect and validate the imput. For example, in the object Trajectories, one may
717
+ want to check that the number of elements in 'times' is equal to the number of columns in 'matrix'
718
+ and if they are not, issue an error. In order to understand why this is restriction, the user is
719
+ again directed to SS4.
720
+
721
+ Here we show the R code for this validation:
722
+ EOT
723
+
724
+ comment_code(<<-EOT)
725
+ > setClass(
726
+ + Class="Trajectories",
727
+ + representation(times="numeric",traj="matrix"),
728
+ + validity=function(object){
729
+ + cat("~~~ Trajectories: inspector ~~~ \\n")
730
+ + if(length(object@times)!=ncol(object@traj)){
731
+ + stop ("[Trajectories: validation] the number of temporal measurements does not correspond
732
+ + }else{}
733
+ + return(TRUE)
734
+ + }
735
+ + )
736
+ EOT
737
+
738
+ body(<<-EOT)
739
+ In order to implement this validation we will coordinate it in the initialize method.
740
+ EOT
741
+
742
+ code(<<-EOT)
743
+ class Trajectories
744
+
745
+ def initialize(times: nil, matrix: nil)
746
+ @times = times
747
+ @matrix = matrix
748
+
749
+ # validate the input, to make sure that size of @times and the number of columns in
750
+ # @matrix are the same
751
+ puts ("~~~ Trajectories: inspector ~~~ ")
752
+ raise "[Trajectories: validation] the number of temporal measurements does not correspond with the number of columns in the matrix" if (@times.length.gz != @matrix.ncol.gz)
753
+
754
+ # show the object just created
755
+ show
756
+
757
+ end
758
+
759
+ end
760
+
761
+ EOT
762
+
763
+ body(<<-EOT)
764
+ Let's first create a Trajectories that validates fine, i.e., the number of elements in @times is
765
+ equal to the number of columns of the matrix. In this case, we will show a message saying that
766
+ validation was done and then print the object.
767
+ EOT
768
+
769
+ console(<<-EOT)
770
+ ok = Trajectories.new(times: R.c(1..2), matrix: R.matrix((1..2), ncol: 2))
771
+ EOT
772
+
773
+ body(<<-EOT)
774
+ Now, if we try to create a Trajectories that does not pass the validation criteria, our code
775
+ will raise an exception. Exceptions are a standard way to deal with errors in Ruby code and
776
+ many other object oriented languages. The interested reader should look for further documentation
777
+ on exception in the web.
778
+ EOT
779
+
780
+ console(<<-EOT)
781
+ error = Trajectories.new(times: R.c(1..3), matrix: R.matrix((1..2), ncol: 2))
782
+ EOT
783
+
784
+ body(<<-EOT)
785
+ The validation above does not consider the case when an empty object is created. Here we will
786
+ check to see if either times or matrix are nil, if either one of them is nil, then we will raise
787
+ an exception and interrupt the creation of the object. We also create a method validate that is
788
+ called from our initialize method.
789
+
790
+ Method validate has some interesting features about the integration of SciCom and R. First,
791
+ observe that instead of using @times.length.gz and @matrix.ncol.gz to get the length and number of
792
+ columns of variables 'times' and 'matrix' we actually compared (@times.length != @matrix.ncol).
793
+ In this case, the actual R operator '!=' is being used. This operator works on vectors and
794
+ matrices and returns a logical vector with TRUE or FALSE. In order to convert the logical vector,
795
+ with one element, to a logical value in Ruby we use method 'gt' (get truth).
796
+
797
+ EOT
798
+
799
+ code(<<-EOT)
800
+ class Trajectories
801
+
802
+ def initialize(times: nil, matrix: nil)
803
+ @times = times
804
+ @matrix = matrix
805
+
806
+ # call method validate to validate our imput
807
+ validate
808
+
809
+ # show the object just created
810
+ show
811
+
812
+ end
813
+
814
+ def validate
815
+
816
+ # Let's first check that we do not have an empty object
817
+ raise "Neither times nor matrix can be an empty object" if (@times.nil? || @matrix.nil?)
818
+
819
+ # validate the input, to make sure that size of @times and the number of columns in
820
+ # @matrix are the same
821
+ puts ("~~~ Trajectories: inspector ~~~ ")
822
+ raise "[Trajectories: validation] the number of temporal measurements does not correspond with the number of columns in the matrix" if (@times.length != @matrix.ncol).gt
823
+
824
+ end
825
+
826
+ end
827
+ EOT
828
+
829
+ body(<<-EOT)
830
+ Let's try then creating an empty object:
831
+ EOT
832
+
833
+ console(<<-EOT)
834
+ error = Trajectories.new
835
+ EOT
836
+
837
+ body(<<-EOT)
838
+ Another example:
839
+ EOT
840
+
841
+ console(<<-EOT)
842
+ error = Trajectories.new(times: 1)
843
+ EOT
844
+
845
+ body(<<-EOT)
846
+ Let's see now that the implementation is correct and that it does not raise an error on valid
847
+ input:
848
+ EOT
849
+
850
+ console(<<-EOT)
851
+ ok = Trajectories.new(times: R.c(1, 2), matrix: R.matrix((1..2), ncol: 2))
852
+ EOT
853
+
854
+ body(<<-EOT)
855
+ The 'initialize' method is called ONLY during the initial creation of the object. If any instance
856
+ variable is later modified, no control is done. At this moment though, there is no way to change
857
+ the value of any of our instance variables.
858
+ EOT
859
+
860
+ console(<<-EOT)
861
+ error.times = R.c(1, 2, 3)
862
+ EOT
863
+
864
+ body(<<-EOT)
865
+ The Trajectories class works for R objects and not for Ruby objects and thus expects as input R
866
+ objects. Passing R objects in all examples has being the obligation of the programmer. SciCom,
867
+ however, can translate Ruby objects to R objects and does so for parameter passing. Here we do
868
+ an explicit conversion of Ruby object to R in class Trajectories by calling R.convert for our
869
+ input parameters
870
+ EOT
871
+
872
+ comment_code(<<-EOT)
873
+ class Trajectories
874
+
875
+ def initialize(times: nil, matrix: nil)
876
+ @times = R.convert(times)
877
+ @matrix = R.convert(matrix)
878
+
879
+ # call method validate to validate our imput
880
+ validate
881
+
882
+ # show the object just created
883
+ show
884
+
885
+ end
886
+
887
+ def validate
888
+
889
+ # Let's first check that we do not have an empty object
890
+ raise "Neither times nor matrix can be an empty object" if (@times.nil? || @matrix.nil?)
891
+
892
+ # validate the input, to make sure that size of @times and the number of columns in
893
+ # @matrix are the same
894
+ puts ("~~~ Trajectories: inspector ~~~ ")
895
+ raise "[Trajectories: validation] the number of temporal measurements \#{@times.length.gz} \
896
+ does not correspond with the number of columns in the matrix \#{@matrix.ncol.gz}" if (@times.length.gz != @matrix.ncol.gz)
897
+
898
+ end
899
+
900
+ end
901
+ EOT
902
+
903
+ class Trajectories
904
+
905
+ def initialize(times: nil, matrix: nil)
906
+ @times = R.convert(times)
907
+ @matrix = R.convert(matrix)
908
+
909
+ # call method validate to validate our imput
910
+ validate
911
+
912
+ # show the object just created
913
+ show
914
+
915
+ end
916
+
917
+ def validate
918
+
919
+ # Let's first check that we do not have an empty object
920
+ raise "Neither times nor matrix can be an empty object" if (@times.nil? || @matrix.nil?)
921
+
922
+ # validate the input, to make sure that size of @times and the number of columns in
923
+ # @matrix are the same
924
+ puts ("~~~ Trajectories: inspector ~~~ ")
925
+ raise "[Trajectories: validation] the number of temporal measurements #{@times.length.gz} \
926
+ does not correspond with the number of columns in the matrix #{@matrix.ncol.gz}" if (@times.length.gz != @matrix.ncol.gz)
927
+
928
+ end
929
+
930
+ end
931
+
932
+ body(<<-EOT)
933
+ And now let's create a new Trajectories, but we will now pass a Ruby range for times:
934
+ EOT
935
+
936
+ console(<<-EOT)
937
+ ok = Trajectories.new(times: (1..2), matrix: R.matrix((1..2), ncol: 2))
938
+ EOT
939
+
940
+ body(<<-EOT)
941
+ Perfect! This works fine. Let's do another example... SciCom integrates with another Ruby
942
+ Gem called MDArray. MDArray provides multi-dimensional arrays for Ruby similar to what is
943
+ find in NumPy. It is beyond the scope of this paper to explain MDArray and the interested
944
+ reader is directed to MDArray wiki pages: https://github.com/rbotafogo/mdarray/wiki.
945
+ EOT
946
+
947
+ console(<<-EOT)
948
+ ok = Trajectories.new(times: (1..2), matrix: MDArray.double([2, 2], [1, 2, 3, 4]))
949
+ EOT
950
+
951
+ body(<<-EOT)
952
+ We will now create a multi-dimensional array with the help of MDArray. We could think of this
953
+ multi-dimensional array as having BMI data for multiple patients. In this example, we have then
954
+ data for two patients:
955
+ EOT
956
+
957
+ code(<<-EOT)
958
+ multi_array = MDArray.fromfunction("double", [2, 3, 4]) { |x, y, z| x + y + z }
959
+ EOT
960
+
961
+ console(<<-EOT)
962
+ multi_array.print
963
+ EOT
964
+
965
+ body(<<-EOT)
966
+ But for our Trajectories class, we need data for only one patient at the time, so we cannot
967
+ give this MDArray to Trajectories. MDArray allow us to get data slices efficiently, that is,
968
+ it will not do a data copy, just manipulate indexes so that only a 'view' of the data is made
969
+ available. So, let's make a Trajectories with data from our first patient:
970
+ EOT
971
+
972
+ console(<<-EOT)
973
+ ok1 = Trajectories.new(times: (1..4), matrix: multi_array.slice(0, 0))
974
+ EOT
975
+
976
+ body(<<-EOT)
977
+ And now let's create a Trajectories for our second patient:
978
+ EOT
979
+
980
+ console(<<-EOT)
981
+ ok2 = Trajectories.new(times: (1..4), matrix: multi_array.slice(0, 1))
982
+ EOT
983
+
984
+ subsection("The Initializator")
985
+
986
+ body(<<-EOT)
987
+ As we have seen, method 'initialize' is the main object creator orchestrator. This method can be
988
+ as complex as needed. So, let's get on with some improvements to our Trajectories class.
989
+
990
+ It would be rather pleasant that the columns of the matrix of the trajectories have names, the
991
+ names of measurements times. In the same way, the lines could be subscripted by a number of
992
+ individual.
993
+
994
+ To do this in R, one also uses method initialize:
995
+ EOT
996
+
997
+ comment_code(<<-EOT)
998
+ > setMethod(
999
+ + f="initialize",
1000
+ + signature="Trajectories",
1001
+ + definition=function(.Object,times,traj){
1002
+ + cat("~~~ Trajectories: initializator ~~~ \\n")
1003
+ + colnames(traj) <- paste("T",times,sep="")
1004
+ + rownames(traj) <- paste("I",1:nrow(traj),sep= "")
1005
+ + .Object@traj <- traj # Assignment of the slots
1006
+ + .Object@times <- times
1007
+ + return(.Object) # return of the object
1008
+ + }
1009
+ + )
1010
+ EOT
1011
+
1012
+ body(<<-EOT)
1013
+ Let's do this change to our 'initialize' method; however, before that, we need to introduce
1014
+ a new characteristic of SciCom. In R, it is possible to assign a value to the result of a
1015
+ function. For example, 'rownames(x) <- c("v1", "v2", "v3")'. Assigning to functions that way
1016
+ is not possible in Ruby. In order to do this assignment we need to introduce method 'fassign'.
1017
+ The above assignment is then writen in SciCom as 'x.fassign(:rownames, R.c("v1", "v2", "v3")),
1018
+ where the first argument to function fassign is the function name preceded by ':'.
1019
+ EOT
1020
+
1021
+ code(<<-EOT)
1022
+ class Trajectories
1023
+
1024
+ def initialize(times: nil, matrix: nil)
1025
+ @times = times
1026
+ @matrix = matrix
1027
+
1028
+ # call method validate to validate our imput
1029
+ validate
1030
+
1031
+ # Add row names
1032
+ puts ("~~~ Trajectories: initializator ~~~ ")
1033
+ @matrix.fassign(:colnames, R.paste("T", @times, sep: ""))
1034
+ @matrix.fassign(:rownames, R.paste("I", (1..@matrix.nrow.gz), sep: ""))
1035
+
1036
+ # show the object just created
1037
+ show
1038
+
1039
+ end
1040
+
1041
+ end
1042
+ EOT
1043
+
1044
+ console(<<-EOT)
1045
+ traj = Trajectories.new(times: R.c(1,2,4,8), matrix: R.matrix((1..8),nrow: 2))
1046
+ EOT
1047
+
1048
+ body(<<-EOT)
1049
+ Another example:
1050
+ EOT
1051
+
1052
+ console(<<-EOT)
1053
+ error = Trajectories.new(times: R.c(1,2,4,8), matrix: R.matrix((1..8), nrow: 2))
1054
+ EOT
1055
+
1056
+ body(<<-EOT)
1057
+ Note that we still call our 'validate' method and it is still an error to create an empty
1058
+ Trajectories or one in which the sizes are wrong:
1059
+ EOT
1060
+
1061
+ console(<<-EOT)
1062
+ error = Trajectories.new(times: R.c(1, 2, 48), matrix: R.matrix((1..8), nrow: 2))
1063
+ EOT
1064
+
1065
+ body(<<-EOT)
1066
+ A constructor does not necessarily take the instance variable of the object as argument. For
1067
+ example, if we know (that is not the case in reality, but let us imagine so) that the
1068
+ BMI increases by 0.1 every week, we could build trajectories by providing the number
1069
+ of weeks and the initial weights.
1070
+
1071
+ First the code in R, we skip the definition of class TrajectoriesBis:
1072
+ EOT
1073
+
1074
+ comment_code(<<-EOT)
1075
+ > setMethod ("initialize",
1076
+ + "TrajectoriesBis",
1077
+ + function(.Object,nbWeek,BMIinit){
1078
+ + traj <- outer(BMIinit,1:nbWeek,function(init,week){return(init+0.1*week)})
1079
+ + colnames(traj) <- paste("T",1:nbWeek,sep="")
1080
+ + rownames(traj) <- paste("I",1:nrow(traj),sep="")
1081
+ + .Object@times <- 1:nbWeek
1082
+ + .Object@traj <- traj
1083
+ + return(.Object)
1084
+ + }
1085
+ + )
1086
+ EOT
1087
+
1088
+ body(<<-EOT)
1089
+ Now, let's make a TrajectoriesBis in SciCom. Here again, we should point out some characteristics
1090
+ of our code:
1091
+ EOT
1092
+
1093
+ list(<<-EOT)
1094
+ We made initialize with two positional arguments, instead of named arguments, i.e., the first
1095
+ argument is the number of weeks and the second bmi_init. Is this case, when making a new object the
1096
+ position of the arguments is important and there is no way to pass the argument by name;
1097
+
1098
+ R function outer was called as if a method from bmi_init using dot notation, although one could
1099
+ use R.outer without problem;
1100
+
1101
+ Function 'outer' expects an R function as its 3rd argument. In order to build an R function from
1102
+ SciCom, we need to pass the function definition as a string to R.eval.
1103
+ EOT
1104
+
1105
+ code(<<-EOT)
1106
+ class TrajectoriesBis
1107
+
1108
+ attr_reader :times
1109
+ attr_reader :matrix
1110
+
1111
+ def initialize(number_weeks, bmi_init)
1112
+ @matrix = bmi_init.outer((1..number_weeks),
1113
+ R.eval("function(init, week) {return(init + 0.1 * week)}"))
1114
+ @times = number_weeks
1115
+ end
1116
+
1117
+ end
1118
+
1119
+ traj_bis = TrajectoriesBis.new(4, R.c(16,17,15.6))
1120
+ EOT
1121
+
1122
+ console(<<-EOT)
1123
+ traj_bis.matrix.pp
1124
+ EOT
1125
+
1126
+ body(<<-EOT)
1127
+ Is is always possible to pass a Ruby variable to any string, by interpolating it into the string.
1128
+ To interpolate a variable into a string we put the variable inside #{}. As an example, let's
1129
+ assume that we will also require the BMI increase as a parameter for the constructor:
1130
+ EOT
1131
+
1132
+ comment_code(<<-EOT)
1133
+ class TrajectoriesBis
1134
+
1135
+ def initialize(number_weeks, bmi_init, increment)
1136
+ @matrix = bmi_init.outer((1..number_weeks),
1137
+ R.eval("function(init, week) {return(init + \#{increment} * week)}"))
1138
+ @times = number_weeks
1139
+ end
1140
+
1141
+ end
1142
+
1143
+ traj_bis = TrajectoriesBis.new(4, R.c(16,17,15.6), 0.3)
1144
+ EOT
1145
+
1146
+ #code(<<-EOT)
1147
+ class TrajectoriesBis
1148
+
1149
+ def initialize(number_weeks, bmi_init, increment)
1150
+ @matrix = bmi_init.outer((1..number_weeks),
1151
+ R.eval("function(init, week) {return(init + #{increment} * week)}"))
1152
+ @times = number_weeks
1153
+ end
1154
+
1155
+ end
1156
+
1157
+ traj_bis = TrajectoriesBis.new(4, R.c(16,17,15.6), 0.3)
1158
+ #EOT
1159
+
1160
+ console(<<-EOT)
1161
+ traj_bis.matrix.pp
1162
+ EOT
1163
+
1164
+ subsection("Constructors for Users")
1165
+
1166
+ body(<<-EOT)
1167
+ Many times, it is interesting to have different ways of constructing an object depending on
1168
+ what information our users have or want to provide to the constructor. Although we have only one
1169
+ initialize method, we can create multiple methods, that do some preprocessing and then call the
1170
+ initialize method to carry out the object building.
1171
+
1172
+ In order to do that, we use what are called class methods, instead of instance methods. all the
1173
+ methods we've created so far are instance methods, class methods are defined by prepending the
1174
+ self keyword to the methods name. Still using the assumption that the BMI will grow by 0.1 per
1175
+ week, let's define a regular trajectory without having to define a TrajectoriesBis as above:
1176
+ EOT
1177
+
1178
+ comment_code(<<-EOT)
1179
+ > regularTrajectories <- function(nbWeek,BMIinit) {
1180
+ + traj <- outer(BMIinit,1:nbWeek,function(init,week){return(init+0.1*week)})
1181
+ + times <- 1: nbWeek
1182
+ + return(new(Class="Trajectories",times=times,traj=traj))
1183
+ + }
1184
+ > regularTrajectories(nbWeek=3,BMIinit=c(14,15,16))
1185
+ EOT
1186
+
1187
+ body(<<-EOT)
1188
+ Notice how method 'regular' is defined as 'self.regular', making it a class method. The last
1189
+ statement of the method definition is actually a call to the Trajectories constructor 'new' passing
1190
+ the calculated values for times and matrix.
1191
+
1192
+ Notice also how method regular is called, similar to the way new is called by adding it after class
1193
+ Trajectories name: 'Trajectories.regular'.
1194
+ EOT
1195
+
1196
+ code(<<-EOT)
1197
+
1198
+ class Trajectories
1199
+
1200
+ def self.regular(number_weeks: nil, bmi_init: nil)
1201
+ matrix = bmi_init.outer((1..number_weeks),
1202
+ R.eval("function(init, week) {return(init + 0.1 * week)}"))
1203
+ times = R.c((1..number_weeks))
1204
+ Trajectories.new(times: times, matrix: matrix)
1205
+ end
1206
+
1207
+ end
1208
+
1209
+ EOT
1210
+
1211
+ console(<<-EOT)
1212
+ regular = Trajectories.regular(bmi_init: R.c(14, 15, 16), number_weeks: 3)
1213
+ EOT
1214
+
1215
+ body(<<-EOT)
1216
+ We have already seen that constructors can be as complex as needed, calling other methods and doing
1217
+ calculations on the received parameters. On this last example, we will check if the times
1218
+ variable was provided. If it is not provided, then we will use matrix columns to define the times:
1219
+ EOT
1220
+
1221
+ code(<<-EOT)
1222
+
1223
+ class Trajectories
1224
+
1225
+ def self.init(times: nil, matrix: nil)
1226
+ times = R.c((1..matrix.ncol.gz)) if times.nil?
1227
+ Trajectories.new(times: times, matrix: matrix)
1228
+ end
1229
+
1230
+ end
1231
+
1232
+ EOT
1233
+
1234
+ console(<<-EOT)
1235
+ traj = Trajectories.init(matrix: R.matrix((1..8), ncol: 4))
1236
+ EOT
1237
+
1238
+ section("Accessors")
1239
+
1240
+ body(<<-EOT)
1241
+ Accessors are methods for getting and setting the value of instance variables.
1242
+ EOT
1243
+
1244
+ subsection("Get")
1245
+
1246
+ body(<<-EOT)
1247
+ Getters are methods for getting the value of an instance variable. We have being using getters
1248
+ since the beginning of this document, without explicitly saying so. When defining attr_reader
1249
+ :times and attr_reader :matrix, we have actually defined two getter methods for reading the values
1250
+ of variables times and matrix respectively. We can however define getters explicitly:
1251
+ EOT
1252
+
1253
+ code(<<-EOT)
1254
+
1255
+ class TrajectoriesBis
1256
+
1257
+ def initialize(times: times, matrix: matrix)
1258
+ @times = times
1259
+ @matrix = matrix
1260
+ end
1261
+
1262
+ def times
1263
+ @times
1264
+ end
1265
+
1266
+ def matrix
1267
+ @matrix
1268
+ end
1269
+
1270
+ end
1271
+
1272
+ traj = TrajectoriesBis.new(times: 1, matrix: 2)
1273
+
1274
+ EOT
1275
+
1276
+ console(<<-EOT)
1277
+ puts traj.times
1278
+ EOT
1279
+
1280
+ console(<<-EOT)
1281
+ puts traj.matrix
1282
+ EOT
1283
+
1284
+ body(<<-EOT)
1285
+ It is also possible to define more sophisticated getters. For example one can
1286
+ regularly need the BMI at inclusion. In R, one would index a matrix as matrix[,1]. In Ruby,
1287
+ it is a syntax error to have a ',' just after the '['. In this case we need to add 'nil' as
1288
+ in matrix[nil, 1]:
1289
+ EOT
1290
+
1291
+ code(<<-EOT)
1292
+ class Trajectories
1293
+
1294
+ def get_traj_inclusion
1295
+ @matrix[nil, 1]
1296
+ end
1297
+
1298
+ end
1299
+ EOT
1300
+
1301
+ console(<<-EOT)
1302
+ trajCochin.get_traj_inclusion.pp
1303
+ EOT
1304
+
1305
+ subsection("Set")
1306
+
1307
+ body(<<-EOT)
1308
+ A setter is a method that assigns a value to a variable. As with getters, Ruby also provides an
1309
+ easy way to write setters and allow you to also write them explicitly. Let's first use the
1310
+ simple way:
1311
+ EOT
1312
+
1313
+ code(<<-EOT)
1314
+ class TrajectoriesBis
1315
+
1316
+ attr_writer :times
1317
+ attr_writer :matrix
1318
+
1319
+ end
1320
+
1321
+ traj = TrajectoriesBis.new
1322
+ traj.times = R.c(1, 2)
1323
+ traj.matrix = R.matrix((1..2), ncol: 2)
1324
+ EOT
1325
+
1326
+ console(<<-EOT)
1327
+ traj.matrix.pp
1328
+ EOT
1329
+
1330
+ body(<<-EOT)
1331
+ Note that now we can use '=' to assign a value to both variables times and matrix. Without
1332
+ setters, changing the value of variables times and matrix was not possible. Our class, up
1333
+ to this point was protected from any changes to those variables. If we need to allow changes
1334
+ to those variable, then setters are needed. In this case, the simple setter as shown above is
1335
+ not ideal, since it would allow changes that break the restriction that variable times has to
1336
+ have the same length as the number of columns of matrix. In order to do the verification we
1337
+ need to implement a more sophisticated setter. In the example bellow, we add the 'times=' setter
1338
+ that receives as input one argument. First we convert the given argument to an R object, then
1339
+ check to see that the length of times is the same as the number of columns and if everything is
1340
+ fine, then we set the value of instance variable times:
1341
+ EOT
1342
+
1343
+ #
1344
+ # We need to put the times= definiton inside the comment_code block because it accesses a variable
1345
+ # from inside the HereDoc. If we do not comment this access we will get an error saying that
1346
+ # @matrix is not a global variable, which is really the case.
1347
+ #
1348
+ comment_code(<<-EOT)
1349
+ class Trajectories
1350
+
1351
+ def times=(times)
1352
+ times = R.convert(times)
1353
+ raise "[Trajectories: validation] the number of temporal measurements \#{times.length.gz} \
1354
+ does not correspond with the number of columns in the matrix \#{@matrix.ncol.gz}" if (times.length.gz != @matrix.ncol.gz)
1355
+ @times = times
1356
+ end
1357
+
1358
+ end
1359
+ EOT
1360
+
1361
+ class Trajectories
1362
+
1363
+ def times=(times)
1364
+ times = R.convert(times)
1365
+ raise "[Trajectories: validation] the number of temporal measurements #{times.length.gz} \
1366
+ does not correspond with the number of columns in the matrix #{@matrix.ncol.gz}" if (times.length.gz != @matrix.ncol.gz)
1367
+ @times = times
1368
+ end
1369
+
1370
+ end
1371
+
1372
+ console(<<-EOT)
1373
+ trajCochin.times = (1..5)
1374
+ EOT
1375
+
1376
+ body(<<-EOT)
1377
+ We now set the value approprietaly and will not get any errors:
1378
+ EOT
1379
+
1380
+ console(<<-EOT)
1381
+ trajCochin.times = R.c(1, 5, 6, 8)
1382
+ EOT
1383
+
1384
+ subsection("The Operator '['")
1385
+
1386
+ body(<<-EOT)
1387
+ It is also possible to define getters by using the operator '['. This operator is not usually
1388
+ used for returning instance variables and it is preferable to use the methods we've used above;
1389
+ however, for completeness with SS4 we are showing how to define this here. Operator '[' is
1390
+ better left to be used for array/matrix indices.
1391
+ EOT
1392
+
1393
+ code(<<-EOT)
1394
+
1395
+ class Trajectories
1396
+
1397
+ def [](var_name)
1398
+
1399
+ case var_name
1400
+ when "times"
1401
+ @times
1402
+ when "matrix"
1403
+ @matrix
1404
+ else
1405
+ raise "Unknown instance variable"
1406
+ end
1407
+
1408
+ end
1409
+
1410
+ end
1411
+
1412
+ EOT
1413
+
1414
+ console(<<-EOT)
1415
+ trajCochin["times"].pp
1416
+ EOT
1417
+
1418
+ body(<<-EOT)
1419
+ Similarly, we could use operator '[]=' to assign a value to times and matrix. We will not do this
1420
+ here as we think that the other options are better and the interested user can easily find help,
1421
+ if needed to implement such method.
1422
+ EOT
1423
+
1424
+ section("To Go Further")
1425
+
1426
+ body(<<-EOT)
1427
+ This section will introduce advance features of Object Oriented programming such as Inheritance
1428
+ and Modules and will also show some aspects of S4 that do not apply to Ruby.
1429
+ EOT
1430
+
1431
+ subsection("Methods Using Several Arguments")
1432
+
1433
+ body(<<-EOT)
1434
+ In Ruby, methods can have as many arguments as needed and those methods are defined the way we
1435
+ have already seen in many of the examples above. The example in SS4 presents a method that prints
1436
+ different output if its input is numeric, character has both. Let's write a class in Ruby that
1437
+ does the same for Numeric and String. In Ruby we do not define global functions, we always define
1438
+ methods inside classes or modules (as we will see later). Also, Ruby is not typed, so methods are
1439
+ not called depending on their types as in SS4 examples. Bellow, method test will be called with
1440
+ one parameter. At the time of calling we do not know the type of the argument, the method can
1441
+ then check is the received argument is a Numeric or a String and at this time, decide what should
1442
+ be printed.
1443
+ EOT
1444
+
1445
+ class Test
1446
+
1447
+ def test(input)
1448
+
1449
+ case input
1450
+ when Numeric
1451
+ puts "The input is numeric: #{input}"
1452
+ when String
1453
+ puts "The input is a string: #{input}"
1454
+ else
1455
+ puts "The input is neither a number nor a string"
1456
+ end
1457
+
1458
+ end
1459
+
1460
+ end
1461
+
1462
+ t = Test.new
1463
+
1464
+ comment_code(<<-EOT)
1465
+ class Test
1466
+
1467
+ def test(input)
1468
+
1469
+ case input
1470
+ when Numeric
1471
+ puts "The input is numeric: \#{input}"
1472
+ when String
1473
+ puts "The input is a string: \#{input}"
1474
+ else
1475
+ puts "The input is neither a number nor a string"
1476
+ end
1477
+
1478
+ end
1479
+
1480
+ end
1481
+
1482
+ t = Test.new
1483
+
1484
+ EOT
1485
+
1486
+
1487
+ console(<<-EOT)
1488
+ puts t.test(5)
1489
+ EOT
1490
+
1491
+ console(<<-EOT)
1492
+ puts t.test("Hello")
1493
+ EOT
1494
+
1495
+ body(<<-EOT)
1496
+ Ruby has ways of dealing with multiple arguments, missing arguments, undefined number of arguments,
1497
+ named arguments, unnamed arguments, etc. This is beyond the scope of this document and we
1498
+ suggest the interested reader to go to the many resources about Ruby that can easily be found
1499
+ on the web.
1500
+
1501
+ We will now create a new class 'Partition' that we will use later in this document. This class will
1502
+ have only the basic methods needed for the examples to work.
1503
+ EOT
1504
+
1505
+ code(<<-EOT)
1506
+ class Partition
1507
+
1508
+ attr_reader :nb_groups
1509
+ attr_reader :part
1510
+
1511
+ def initialize(nb_groups, part)
1512
+ @nb_groups = nb_groups
1513
+ @part = part
1514
+ end
1515
+
1516
+ end
1517
+
1518
+ partCochin = Partition.new(2, R.c("A","B","A","B").factor)
1519
+ partStAnne = Partition.new(2, R.c("A","B").rep(R.c(50,30)).factor)
1520
+
1521
+ EOT
1522
+
1523
+ console(<<-EOT)
1524
+ partCochin.part.pp
1525
+ EOT
1526
+
1527
+ console(<<-EOT)
1528
+ partStAnne.part.pp
1529
+ EOT
1530
+
1531
+ body(<<-EOT)
1532
+ We will suppose that part is always composed of capital letters going from A to
1533
+ LETTERS[nb_groups].
1534
+ EOT
1535
+
1536
+ subsection("Inheritance")
1537
+
1538
+ body(<<-EOT)
1539
+ Ruby being a powerful Object Oriented language has the concept of Inheritance, but it does not
1540
+ allow for multiple inheritance. Multiple inheritance has many drawbacks and Ruby just does not
1541
+ support it. However, Ruby has other concepts that make up for the lack or multiple inheritance as
1542
+ we will see in the following examples.
1543
+
1544
+ So, let's go back to SS4 examples. We want now to define a class called TrajPartitioned that
1545
+ inherits from class Trajectories. When a class has a parent, all methods available for the
1546
+ parent are also available to the child.
1547
+ EOT
1548
+
1549
+ code(<<-EOT)
1550
+ class TrajPartitioned < Trajectories
1551
+
1552
+ attr_reader :list_partitions
1553
+
1554
+ end
1555
+ EOT
1556
+
1557
+ body(<<-EOT)
1558
+ Thats all there is to it! We've just created a class TrajPartitioned that inherits all methods
1559
+ from class Trajectories and at this point does nothing different from Trajectories, but adds a
1560
+ new instance variable: list_partitions.
1561
+
1562
+ Creating TrajPartitioned without arguments will generate an error, since a Trajectories requires
1563
+ both times and matrix to be non null.
1564
+ EOT
1565
+
1566
+ console(<<-EOT)
1567
+ tdPitie = TrajPartitioned.new
1568
+ EOT
1569
+
1570
+ body(<<-EOT)
1571
+ Let's try to create a TrajPartitioned, but passing to it two partitions. For that, let's first
1572
+ create a new Partition:
1573
+ EOT
1574
+
1575
+ code(<<-EOT)
1576
+ partCochin2 = Partition.new(3, R.c("A", "C", "C", "B").factor)
1577
+ EOT
1578
+
1579
+ body(<<-EOT)
1580
+ And now let's create the TrajPartitioned:
1581
+ EOT
1582
+
1583
+ console(<<-EOT)
1584
+ tdCochin = TrajPartitioned.new(times: R.c(1,3,4,5), matrix: trajCochin.matrix,
1585
+ list_partitions: R.list(partCochin,partCochin2))
1586
+ EOT
1587
+
1588
+ body(<<-EOT)
1589
+ This didn't work giving us an error saying that <Partition...> is an unknown parameter for R. Hummm??
1590
+ R function 'list' expects R objects, and in this case, partCochin and partCochin2 are Ruby classes,
1591
+ so trying to apply function list to then does not work. Clearly, we will have to work in the realm
1592
+ of Ruby to keep the list of partitions. This is not a problem as Ruby has data strucutres to
1593
+ maintain a list of objects, the Array. Let's then try another solution:
1594
+ EOT
1595
+
1596
+ console(<<-EOT)
1597
+ tdCochin = TrajPartitioned.new(times: R.c(1,3,4,5), matrix: trajCochin.matrix,
1598
+ list_partitions: [partCochin, partCochin2])
1599
+ EOT
1600
+
1601
+ body(<<-EOT)
1602
+ We now get a second error: 'unknown keyword: list_partitions'. Class TrajPartitioned inherits
1603
+ from class Trajectories and class Trajectories has an initialize function that requires two
1604
+ parameters, times and matrix; list_partitions is not a parameter for initialize and is thus
1605
+ unknown. In order to fix this problem we need to create an initialize method for class
1606
+ TrajPartitioned.
1607
+ EOT
1608
+
1609
+ subsection("The 'super' Keyword")
1610
+
1611
+ body(<<-EOT)
1612
+ R has a method called 'callNextMethod' for control flow between inherited classes. In Ruby, we
1613
+ have a model that is a bit different. When a method is called on a subclass, if this method is
1614
+ not found it will be searched in the parent class and it will go up the hierarchy of classes until
1615
+ it is found or an error is issued. If we want the parent method to be called we can call 'super':
1616
+ EOT
1617
+
1618
+ code(<<-EOT)
1619
+ class TrajPartitioned
1620
+
1621
+ def initialize(times: times, matrix: matrix, list_partitions: list_partitions)
1622
+ super(times: times, matrix: matrix)
1623
+ @list_partitions = list_partitions
1624
+ end
1625
+
1626
+ end
1627
+ EOT
1628
+
1629
+ body(<<-EOT)
1630
+ Let's try our example again:
1631
+ EOT
1632
+
1633
+ console(<<-EOT)
1634
+ tdCochin = TrajPartitioned.new(times: R.c(1,3,4,5), matrix: trajCochin.matrix,
1635
+ list_partitions: [partCochin, partCochin2])
1636
+ EOT
1637
+
1638
+ body(<<-EOT)
1639
+ Now tdCochin is created correctly; however, the 'show' method only shows information about
1640
+ times and matrix, there is nothing about our new list_partitions variable. This is so, since
1641
+ there is no method 'show' in TrajPartitioned, so method 'show' from Trajectories is executed.
1642
+
1643
+ So, let's start by writing a 'print' method, that will print all the information we have in
1644
+ TrajPartitioned. The flow of control for this method is: Ruby see a call to 'print', so it checks
1645
+ to see if 'print' is a method for TrajPartitioned. Since we have just defined this method, Ruby
1646
+ finds it and executes it. The first command in print is a call to 'super', which will call the
1647
+ parent 'print' method, that print information for 'times' and 'matrix'. When the parent 'print'
1648
+ finishes control continues after the 'super' call, printing the number of available partitions.
1649
+ EOT
1650
+
1651
+ class TrajPartitioned
1652
+
1653
+ def print
1654
+ super
1655
+ puts ("the object also contains #{@list_partitions.length} partition")
1656
+ puts ("***** Fine of print (TrajPartitioned) *****")
1657
+ end
1658
+
1659
+ end
1660
+
1661
+ comment_code(<<-EOT)
1662
+ class TrajPartitioned
1663
+
1664
+ def print
1665
+ super
1666
+ puts ("the object also contains \#{@list_partitions.length} partition")
1667
+ puts ("***** Fine of print (TrajPartitioned) *****")
1668
+ end
1669
+
1670
+ end
1671
+ EOT
1672
+
1673
+ console(<<-EOT)
1674
+ tdCochin.print
1675
+ EOT
1676
+
1677
+ body(<<-EOT)
1678
+ Notice that this model is much cleaner than 'callNextMethod' and is not subject to any of the
1679
+ difficulties presented in SS4 and there is no need for the keywords “is”, “as” and “as<-”, although
1680
+ Ruby provides methods to check the class of an object its hierarchy, etc. when needed.
1681
+
1682
+ In Ruby there is no similar method as "setIs" and it is not possible to convert one class into
1683
+ another, but there are other ways of getting the necessary results. Let's then implement a
1684
+ method that returns the partition with the least number of groups. First, as usual, the R code
1685
+ with 'setIs':
1686
+ EOT
1687
+
1688
+ comment_code(<<-EOT)
1689
+ > setIs(
1690
+ + class1="TrajPartitioned",
1691
+ + class2="Partition",
1692
+ + coerce=function(from,to){
1693
+ + numberGroups <- sapply(tdCochin@listPartitions,getNbGroups)
1694
+ + Smallest <- which.min(-numberGroups)
1695
+ + to<-new("Partition")
1696
+ + to@nbGroups <- getNbGroups(from@listPartitions[[Smallest]])
1697
+ + to@part <- getPart(from@listPartitions[[Smallest]])
1698
+ + return(to)
1699
+ + }
1700
+ + )
1701
+ EOT
1702
+
1703
+
1704
+ body(<<-EOT)
1705
+ And now the Ruby code. Here we are getting deeper into Ruby and it is becoming harder for a
1706
+ pure R developer to understand the code. We will describe it in more detail:
1707
+ EOT
1708
+
1709
+ list(<<-EOT)
1710
+ We define a method called 'to_part' that has one argument 'which'. By default 'which' is
1711
+ ':min', the name of the minimum method. This means that if no argument is given to to_part it
1712
+ will assume the which = :min;
1713
+
1714
+ @list_partition is a Ruby array. Method map is similar to method sapply in R, it applies a
1715
+ 'block' to every element of the array, returning an array. Describing blocks is beyond the
1716
+ scope of this document, but we can think of it as if it were a function.
1717
+ The block is in '{}' and has one argument named 'part'. Thus, map goes through all elements
1718
+ of the array, and gets the nb_groups of the element and returns them into the number_groups array.
1719
+
1720
+ number_groups is and array and doing number_groups.min returns
1721
+ the minimum value in number_groups and number_groups.max the maximum. We can call a method on an
1722
+ object by 'sending' the method name to the object, so, number_groups.send(:min) is equivalent to
1723
+ number_groups.min;
1724
+
1725
+ Method 'index' for array, returns the index of a given element. So, number_groups(3) would return
1726
+ the index of the element '3'. Then number_groups.index(number_groups.min) returns the index of
1727
+ the minimum element in the array. This is the equivalent of R which.min(number_groups);
1728
+
1729
+ Finally, number_groups.index(number_groups.send(which)), will return the index of the element we
1730
+ ask for, be it :min or :max. Note that if we pass another value, this would be an error.
1731
+ EOT
1732
+
1733
+ code(<<-EOT)
1734
+ class TrajPartitioned
1735
+
1736
+ def to_part(which = :min)
1737
+ number_groups = @list_partitions.map { |part| part.nb_groups }
1738
+ selected = number_groups.index(number_groups.send(which))
1739
+ return @list_partitions[selected]
1740
+ end
1741
+
1742
+ end
1743
+ EOT
1744
+
1745
+ body(<<-EOT)
1746
+ To get the partition whith the minimum number of elements:
1747
+ EOT
1748
+
1749
+ console(<<-EOT)
1750
+ tdCochin.to_part.part.pp
1751
+ EOT
1752
+
1753
+ body(<<-EOT)
1754
+ To get the partition whith the maximum number of elements:
1755
+ EOT
1756
+
1757
+ console(<<-EOT)
1758
+ tdCochin.to_part(:max).part.pp
1759
+ EOT
1760
+
1761
+ body(<<-EOT)
1762
+ In this example we did not follow exactly the R code from SS4. The reason for that is that
1763
+ 'list_partitions' is a list of Ruby classes and we cannot run sapply on this list. If we
1764
+ try to call a 'getNbGroups' or in the Ruby case nb_groups, the code will crash. Let's try
1765
+ it:
1766
+ EOT
1767
+
1768
+ section("Virtual Classes")
1769
+
1770
+ body(<<-EOT)
1771
+ In Ruby there are no "Virtual Classes", but it is possible to implement derived classes from
1772
+ a parent class with methods that behave properly according to the object's class. Following
1773
+ SS4 we will implement two classes: PartitionSimple and PartitionEval which are subclasses
1774
+ of class PartitionFather. PartitionFather will just be a regular class. Methods defined in
1775
+ PartionFather will be available to be used in the subclasses
1776
+
1777
+ Here is the R code of those classes and the implementation of a method in PartitionFather
1778
+ that multiplies the number of groups by 2:
1779
+ EOT
1780
+
1781
+ comment_code(<<-EOT)
1782
+ > setClass(
1783
+ + Class="PartitionFather",
1784
+ + representation=representation(nbGroups="numeric","VIRTUAL")
1785
+ + )
1786
+
1787
+ > setClass(
1788
+ + Class="PartitionSimple",
1789
+ + representation=representation(part="factor"),
1790
+ + contains="PartitionFather"
1791
+ + )
1792
+
1793
+ > setClass(
1794
+ + Class="PartitionEval",
1795
+ + representation=representation(part="ordered"),
1796
+ + contains="PartitionFather"
1797
+ + )
1798
+
1799
+ > setGeneric("nbMultTwo",function(object){standardGeneric("nbMultTwo")})
1800
+
1801
+ > setMethod("nbMultTwo","PartitionFather",
1802
+ + function(object){
1803
+ + object@nbGroups <- object@nbGroups*2
1804
+ + return (object)
1805
+ + }
1806
+ + )
1807
+ EOT
1808
+
1809
+ body(<<-EOT)
1810
+ Since Ruby has no type definition, there is no really need for a parent class and subclasses.
1811
+ However, we will implement those classes in order to show Ruby's inheritance:
1812
+ EOT
1813
+
1814
+ code(<<-EOT)
1815
+ # Parent class. Differently from SS4, both 'nb_groups' and 'part' are defined in the
1816
+ # parent class.
1817
+ class PartitionFather
1818
+
1819
+ attr_reader :nb_groups
1820
+ attr_reader :part
1821
+
1822
+ # initialize class PartitionFather with the number of groups and parts. Note that we
1823
+ # use R.i for nb_groups in order to convert the number of groups into an R vector.
1824
+ def initialize(nb_groups: 0, part: nil)
1825
+ @nb_groups = R.i(nb_groups)
1826
+ @part = part
1827
+ end
1828
+
1829
+ # method nb_mult_two can be called from all subclasses
1830
+ def nb_mult_two
1831
+ @nb_groups * 2
1832
+ end
1833
+
1834
+ # method 'to_s' is called whenever we try to print a Ruby object. This method emulates
1835
+ # R 'print' method that prints all the slots.
1836
+ def to_s
1837
+ puts ("Variable 'nb_groups':")
1838
+ @nb_groups.pp
1839
+ puts
1840
+ puts ("Variable 'part':")
1841
+ @part.pp
1842
+ puts
1843
+ end
1844
+
1845
+ end
1846
+
1847
+ # Class PartitionSimple is a subclass of PartitionFather. To make a subclass of a
1848
+ # class we use the operator '<'. Since the whole logic is in the parent class
1849
+ # PartitionSimple is just an empty class
1850
+ class PartitionSimple < PartitionFather
1851
+
1852
+ end
1853
+
1854
+ # PartitionEval is also only an empty class
1855
+ class PartitionEval < PartitionFather
1856
+
1857
+ end
1858
+ EOT
1859
+
1860
+ console(<<-EOT)
1861
+ a = PartitionSimple.new(nb_groups: 3, part: (R.LETTERS[R.c(1, 2, 3, 2, 2, 1)].factor))
1862
+ puts a
1863
+ EOT
1864
+
1865
+ console(<<-EOT)
1866
+ a.nb_mult_two.pp
1867
+ EOT
1868
+
1869
+ console(<<-EOT)
1870
+ b = PartitionEval.new(nb_groups: 5, part: R.LETTERS[R.c(1, 5, 3, 4, 2, 4)].ordered)
1871
+ puts b
1872
+ EOT
1873
+
1874
+ console(<<-EOT)
1875
+ b.nb_mult_two.pp
1876
+ EOT
1877
+
1878
+ body(<<-EOT)
1879
+ The example above, although it replicates SS4 is not actually very useful from the point of
1880
+ view of class hierarchy in Ruby. We will then write a new function to_s in class
1881
+ PartitionSimple that will print the name of the class:
1882
+ EOT
1883
+
1884
+ code(<<-EOT)
1885
+ class PartitionSimple
1886
+
1887
+ def to_s
1888
+ puts("Class PartitionSimple")
1889
+ super
1890
+ end
1891
+
1892
+ end
1893
+ EOT
1894
+
1895
+ console(<<-EOT)
1896
+ puts a
1897
+ EOT
1898
+
1899
+ body(<<-EOT)
1900
+ As can be seen, 'puts a' now calls method 'to_s' defined in class PartitionSimple. This
1901
+ method prints 'Class PartitionSimple' and then call the super method, i.e., method 'to_s'
1902
+ from class PartitionFather.
1903
+
1904
+ Note though that 'puts b' still prints the same output, since it has no particular 'to_s'
1905
+ method.
1906
+ EOT
1907
+
1908
+ console(<<-EOT)
1909
+ puts b
1910
+ EOT
1911
+
1912
+ section("Internal Modification of an Object")
1913
+
1914
+
1915
+ subsection("Method to Modify a Field")
1916
+
1917
+ body(<<-EOT)
1918
+ Let us return to our trajectories example and define a third method that imputes data for
1919
+ missing values. To simplify, we will impute by replacing by the mean values. This is the R
1920
+ code to do this:
1921
+ EOT
1922
+
1923
+ comment_code(<<-EOT)
1924
+ > meanWithoutNa <- function (x){mean(x,na.rm=TRUE)}
1925
+ > setGeneric("impute",function (.Object){standardGeneric("impute")})
1926
+ > setMethod(
1927
+ + f="impute",
1928
+ + signature="Trajectories",
1929
+ + def=function(.Object){
1930
+ + average <- apply(.Object@traj,2,meanWithoutNa)
1931
+ + for (iCol in 1:ncol(.Object@traj)){
1932
+ + .Object@traj[is.na(.Object@traj[,iCol]),iCol] <- average[iCol]
1933
+ + }
1934
+ + return(.Object)
1935
+ + }
1936
+ + )
1937
+ EOT
1938
+
1939
+ body(<<-EOT)
1940
+ The code above, as explained in SS4 creates a new object and does not change the original one.
1941
+ So, calling impute(trajCochin) will work correctly by creating a new object but will not
1942
+ change trajCochin. This works fine, but can be memory expensive if the matrix is a large
1943
+ one.
1944
+
1945
+ Let's now implement the same method in SciCom. We will use for that Ruby's 'each' method.
1946
+ In Ruby, the 'each' method goes through all elements of a vector or list in order. The
1947
+ 'each' method is available for an R matrix in SciCom. Actually, when calling 'each' for an
1948
+ R matrix, the matrix is converted to a Ruby MDArray and the 'each' method is applied to this
1949
+ MDArray. So, we can do @matrix.each and cycle through every element in this matrix.
1950
+ The 'each_with_index' method does the same as 'each' but also passes the index of the element
1951
+ to the Ruby block (please, google Ruby block to get further information on blocks in Ruby).
1952
+
1953
+ One key aspect to remember is that Ruby indexes start with 0 while R indexes start with 1, so
1954
+ an element with index i in Ruby will be indexed i+1 in R. With that, let's see the SciCom
1955
+ code for method impute:
1956
+ EOT
1957
+
1958
+ code(<<-EOT)
1959
+ class Trajectories
1960
+
1961
+ def mean_without_na
1962
+ @matrix.mean(na__rm: TRUE)
1963
+ end
1964
+
1965
+ def impute
1966
+ @matrix.each_with_index do |elmt, i|
1967
+ @matrix[i+1] = mean_without_na if elmt.nan?
1968
+ end
1969
+ end
1970
+
1971
+ end
1972
+ EOT
1973
+
1974
+ console(<<-EOT)
1975
+ trajCochin.impute
1976
+ trajCochin.matrix.pp
1977
+ EOT
1978
+
1979
+ body(<<-EOT)
1980
+ It works! and note that actually trajCochin matrix was changed. However, as with the R
1981
+ solution, Renjin does make a copy of the data on the background. Let's investigate this a
1982
+ little further getting inside SciCom's internal. Method 'as__mdarray' explicitly converts
1983
+ an R matrix to an MDArray:
1984
+ EOT
1985
+
1986
+ console(<<-EOT)
1987
+ cochin_internal = trajCochin.matrix.as__mdarray
1988
+ cochin_internal.print
1989
+ EOT
1990
+
1991
+ body(<<-EOT)
1992
+ Now lets assign a value to trajCochin matrix and compare it to the variable chochin_internal:
1993
+ EOT
1994
+
1995
+ console(<<-EOT)
1996
+ trajCochin.matrix[1] = 1
1997
+ trajCochin.matrix.pp
1998
+ puts
1999
+ puts cochin_internal
2000
+ EOT
2001
+
2002
+ body(<<-EOT)
2003
+ As we can now see, trajCochin and cochin_internal have different content, while cochin_internal
2004
+ still has the same value in index 0, i.e. 15.0, trajCochin matrix has value 1 in index 1. This
2005
+ shows that Renjin when assigning to trajCochin.matrix[1] makes a copy of the original data.
2006
+
2007
+ Bellow, we use method 'get' which is a synonym of method 'as__mdarray' to again get the content
2008
+ of trajCochin.matrix. This variable has as first element the value 1, as set previously.
2009
+ EOT
2010
+
2011
+ console(<<-EOT)
2012
+ internal2 = trajCochin.matrix.get
2013
+ internal2.print
2014
+ EOT
2015
+
2016
+ body(<<-EOT)
2017
+ We will now set the value of the second element of internal2 to 1000. Note that internal2 is
2018
+ an MDArray and that the second element of this array is indexed with 1:
2019
+ EOT
2020
+
2021
+ console(<<-EOT)
2022
+ internal2[1] = 1000
2023
+ internal2.print
2024
+ EOT
2025
+
2026
+ body(<<-EOT)
2027
+ And now, if we print the value of trajCochin.matrix, we note that the second element of this
2028
+ matrix (R matrix) is also 1000. This shows that the MDArray obtained from calling 'as__mdarray'
2029
+ and the R matrix have the same backing store.
2030
+ EOT
2031
+
2032
+ console(<<-EOT)
2033
+ trajCochin.matrix.pp
2034
+ EOT
2035
+
2036
+ body(<<-EOT)
2037
+ Remember, changing the internals of an R matrix like that can be quite dangerous. Renjin expects
2038
+ its data to be imuntable, and using MDArray allows the user to change this data violating
2039
+ Renjin principles. If weird bugs start creeping on your code, this should be one of the first
2040
+ things to be investigated.
2041
+ EOT
2042
+
2043
+ section("Conclusions I")
2044
+
2045
+ body(<<-EOT)
2046
+ This ends SS4 paper. We believe we have shown that R S4 can be substituted by SciCom and
2047
+ Ruby classes and that SciCom makes an easy transition from R developers to Ruby. Ruby is
2048
+ a very flexible and powerful language and has many interesting libraries, where Rails is
2049
+ maybe one of the best known, but there are thousands of others. For those interested in
2050
+ getting deeper into Ruby's libraries, we suggest they look at:
2051
+
2052
+ * https://github.com/markets/awesome-ruby
2053
+ * http://bestgems.org/
2054
+
2055
+ For those interested in Ruby and science, we recommend:
2056
+
2057
+ * http://sciruby.com/
2058
+
2059
+ EOT
2060
+
2061
+
2062
+ section("ET Phone Home")
2063
+
2064
+ body(<<-EOT)
2065
+ On this paper we have focused on accessing R functions from Ruby and have shown how to
2066
+ integrate Ruby with R from the point of view of a Ruby developer, i.e, we have developed
2067
+ in Ruby and have made calls to R functions very transparently. Although this is quite
2068
+ powerful, sometimes this still lacks some power. In this section we will see how we can
2069
+ integrate R with Ruby (through SciCom) from the point of view of the R developer, i.e.,
2070
+ we will allow R scripts to have access to Ruby classes and methods.
2071
+
2072
+ We did not explicitly show and did not call upon the readers attention, but whenever
2073
+ an R function was called we either passed to it basic type objects (numeric, string,
2074
+ boolean), Ruby arrays and MDArrays. Let's try now to pass a Ruby class to R:
2075
+ EOT
2076
+
2077
+ code(<<-EOT)
2078
+ R.part = Partition.new(3, R.c("A", "C", "C", "B").factor)
2079
+ EOT
2080
+
2081
+ console(<<-EOT)
2082
+ R.part.pp
2083
+ EOT
2084
+
2085
+ body(<<-EOT)
2086
+ Calling method 'pp' on this object does not print anything, as this is a completely strange
2087
+ object in the R planet. So, let's try to see what type of object this is:
2088
+ EOT
2089
+
2090
+ console(<<-EOT)
2091
+ R.part.typeof.pp
2092
+ EOT
2093
+
2094
+ body(<<-EOT)
2095
+ We get 'externalptr' as type. So we can send the Ruby class to the R planet, but there is
2096
+ nothing we can do with it there. It is just an 'externalptr'. But we have learned elsewhere
2097
+ that if we want to send an astronaut from a planet to another, a good way of doing it is by
2098
+ creating an 'avatar'! An 'avatar' is remotely controled by it's owner, but it acts almost as
2099
+ if it were a native being of the other planet.
2100
+
2101
+ SciCom provides a way of creating an 'avatar' from any Ruby class and send it to R land. We
2102
+ will now show how this is done and how our 'avatar' calls home to get things done. Method
2103
+ 'rpack' creates the avatar. We will start with a simple example, creating an 'avatar' from
2104
+ a Ruby array:
2105
+ EOT
2106
+
2107
+ code(<<-EOT)
2108
+ # create an array of data in Ruby
2109
+ array = [1, 2, 3]
2110
+
2111
+ # Pack the array and assign it to an R variable. Remember that ruby__array, becomes
2112
+ # ruby.array inside the R script
2113
+ R.ruby__array = R.rpack(array)
2114
+ EOT
2115
+
2116
+ body(<<-EOT)
2117
+ Now, we have in 'ruby.array' an 'avatar' of array. In order for our 'avatar' to call
2118
+ back home, it uses method 'run':
2119
+ EOT
2120
+
2121
+ code(<<-EOT)
2122
+ # note that this calls Ruby method 'length' on the array and not R length function.
2123
+ R.eval("val <- ruby.array$run('length')")
2124
+ EOT
2125
+
2126
+ console(<<-EOT)
2127
+ R.eval("print(val)")
2128
+ EOT
2129
+
2130
+ body(<<-EOT)
2131
+ Let's use a more interesting array method '<<'. This method adds elements to the
2132
+ end of the array. This method takes one argument, the element to be added at the end of
2133
+ the array. Thus we call function run passing two arguments, the '<<' method as first
2134
+ argument and the element to add as second argument.
2135
+ EOT
2136
+
2137
+ code(<<-EOC)
2138
+ R.eval(<<-EOT)
2139
+ ruby.array$run('<<', 4)
2140
+ ruby.array$run('<<', 5)
2141
+ EOT
2142
+ EOC
2143
+
2144
+ body(<<-EOT)
2145
+ Let's now print the content of the array. For that, we use another Ruby method: 'to_s'. This
2146
+ method generates a string with a representation of an object. In the case of an array, it
2147
+ will show the array's content:
2148
+ EOT
2149
+
2150
+ console(<<-EOT)
2151
+ R.eval("print(ruby.array$run('to_s'))")
2152
+ EOT
2153
+
2154
+ body(<<-EOT)
2155
+ One important aspect of interfacing R and Ruby is that both world interact with the same data.
2156
+ There is no data copying between the two worlds, so, effectively whatever happens to the
2157
+ 'avatar' will also happen to the 'real' object. Let's take a look at that. First, we will
2158
+ go back to the Ruby world and see our array:
2159
+ EOT
2160
+
2161
+ console(<<-EOT)
2162
+ puts array
2163
+ EOT
2164
+
2165
+ body(<<-EOT)
2166
+ Now, let's change a value of our array in Ruby:
2167
+ EOT
2168
+
2169
+ code(<<-EOT)
2170
+ array[0] = "new element"
2171
+ EOT
2172
+
2173
+ body(<<-EOT)
2174
+ And let's take a look at our 'ruby.array' in R:
2175
+ EOT
2176
+
2177
+ console(<<-EOT)
2178
+ R.eval("print(ruby.array$run('to_s'))")
2179
+ EOT
2180
+
2181
+ body(<<-EOT)
2182
+ As you can see, 'ruby.array' is still the same Ruby object.
2183
+
2184
+ Avatars maintain some properties of their original world. Although the concept of method
2185
+ chaning is foreign to R, chaining can be used with imported objects from Ruby. Method
2186
+ chaining occurs when the result of a applying a method on an object returns an object (usually
2187
+ the same object) in which another method can be applied. In the example bellow, method '<<'
2188
+ will be applied multiple times for ruby.array
2189
+ EOT
2190
+
2191
+ code(<<-EOC)
2192
+ R.eval(<<-EOT)
2193
+ ruby.array$run('<<', 6)$run('<<', 7)$run('<<', 8)$run('<<', 9)
2194
+ EOT
2195
+ EOC
2196
+
2197
+ console(<<-EOT)
2198
+ R.eval("print(ruby.array$run('to_s'))")
2199
+ EOT
2200
+
2201
+ body(<<-EOT)
2202
+ We can also access any array element inside the R script, but note that we have
2203
+ to use Ruby indexing, i.e., the first element of the array is index 0:
2204
+ EOT
2205
+
2206
+ console(<<-EOT)
2207
+ R.eval("print(ruby.array$run('[]', 2))")
2208
+ EOT
2209
+
2210
+ console(<<-EOT)
2211
+ R.eval("print(ruby.array$run('[]', 5))")
2212
+ EOT
2213
+
2214
+ body(<<-EOT)
2215
+ Now that we have seen how to "call back" home and integrate Ruby classes with R, let's go
2216
+ back to our TrajPartitioned methtod to_part, and create a to_part2 method that will use
2217
+ R 'sapply' function:
2218
+ EOT
2219
+
2220
+ code(<<-EOT)
2221
+ class TrajPartitioned
2222
+
2223
+ def to_part2
2224
+ R.pack = R.rpack(@list_partitions, scope: :internal)
2225
+ number_groups = R.eval("sapply(pack, function(x) x$run('nb_groups'))")
2226
+ @list_partitions[number_groups.which__min.gz]
2227
+ end
2228
+
2229
+ end
2230
+ EOT
2231
+
2232
+ console(<<-EOT)
2233
+ tdCochin.to_part2.part.pp
2234
+ EOT
2235
+
2236
+ subsection("Creating Ruby Objects from R Scripts")
2237
+
2238
+ body(<<-EOT)
2239
+ In all the examples given so far on sending Ruby objects to R, the object was created in
2240
+ Ruby and send to R. In the following examples, all the work will be done inside R
2241
+ scripts without the need to create anything in Ruby. For the R developer, this might be
2242
+ the easiest way to begin trying SciCom and start migrating from R to Ruby.
2243
+
2244
+ In this first example we will create a Ruby String object inside an R script. In order
2245
+ to create Ruby objects in R, we need to use the Ruby.Ojbect class and use the 'build'
2246
+ function. The 'build' function is the equivalent of the 'new' function in Ruby and
2247
+ receives as first argument the name of the class to be build and as other arguments the
2248
+ same arguments from Ruby 'new':
2249
+
2250
+ In the following example, we create a String object initialized with "this is a new string":
2251
+ EOT
2252
+
2253
+ code(<<-EOC)
2254
+ R.eval(<<-EOT)
2255
+ # This is an actuall R script, which allows the creation and use of Ruby classes
2256
+ # and methods.
2257
+ # Create a string, from class String in Ruby. Use function build to intanciate a
2258
+ # new object
2259
+ string <- Ruby.Object$build("String", "this is a new string")
2260
+ EOT
2261
+ EOC
2262
+
2263
+ console(<<-EOT)
2264
+ R.eval("print(string)")
2265
+ EOT
2266
+
2267
+ body(<<-EOT)
2268
+ In Ruby, many methods are know as 'class methods'. Class methods are methods that exists on
2269
+ the class and not on an instance of the class. In the example above, we create an instance
2270
+ (object) of type String. In the following example, we will access class Marshal: The marshaling
2271
+ library converts collections of Ruby objects into a byte stream, allowing them to be stored
2272
+ outside the currently active script. This data may subsequently be read and the original
2273
+ objects reconstituted.
2274
+ EOT
2275
+
2276
+ code(<<-EOC)
2277
+ # Use function get_class to get a Ruby class
2278
+ R.eval(<<-EOT)
2279
+ Marshal <- Ruby.Object$get_class("Marshal")
2280
+
2281
+ # Method 'dump' is a Marshal class method as is 'load'
2282
+ str <- Marshal$run("dump", string)
2283
+ restored <- Marshal$run("load", str)
2284
+ EOT
2285
+ EOC
2286
+
2287
+ console(<<-EOT)
2288
+ R.eval("print(restored)")
2289
+ EOT
2290
+
2291
+ subsection("Interfacing Java with Renjin")
2292
+
2293
+ body(<<-EOT)
2294
+ Renjin allows for easy integration of Java into R scripts, giving the user access to all of
2295
+ Java's libraries and functions. Although this paper is manly about interfacing R and Ruby,
2296
+ we believe that it is also important to see how to interface with Java from an R script.
2297
+ JRuby, the platform on which SciCom depends, also allows easy integration of Java and Ruby;
2298
+ however we will not show it here, since this is well documented elsewhere.
2299
+ EOT
2300
+
2301
+ code(<<-EOC)
2302
+ R.eval(<<-EOT)
2303
+ import(java.util.HashMap)
2304
+
2305
+ # create a new instance of the HashMap class:
2306
+ ageMap <- HashMap$new()
2307
+
2308
+ # call methods on the new instance:
2309
+ ageMap$put("Bob", 33)
2310
+ ageMap$put("Carol", 41)
2311
+
2312
+ age <- ageMap$get("Carol")
2313
+
2314
+ # Java primitives and their boxed types
2315
+ # are automatically converted to R vectors:
2316
+ typeof(age)
2317
+ EOT
2318
+ EOC
2319
+
2320
+ console(<<-EOT)
2321
+ R.eval("print(ageMap$size())")
2322
+ EOT
2323
+
2324
+ console(<<-EOC)
2325
+ R.eval(<<-EOT)
2326
+ cat("Carol is ", age, " years old.\\n", sep = "")
2327
+ EOT
2328
+ EOC
2329
+
2330
+
2331
+ section("Conclusions II")
2332
+
2333
+ body(<<-EOT)
2334
+ The Java Virtual Machine (JVM) is an amazing environment allowing for multiple languages to cohabit
2335
+ and integrate in a very transparent way. SciCom interfaces R, Ruby and Java and gives the
2336
+ developer access to a gigantic set of libraries from those three worlds. In
2337
+ development circles people usually say: "choose the right tool for the job at hand", with JVM/
2338
+ Java/R/Renjin/Ruby/SciCom the right tool for the job might just be at hand all the time.
2339
+
2340
+ We often see questions on the web about which language to choose between R and Python. Between
2341
+ R and Python, choose SciCom!
2342
+ EOT