galaaz 0.4.2 → 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +25 -0
  3. data/Rakefile +8 -0
  4. data/bin/gknit +9 -5
  5. data/bin/gstudio +4 -2
  6. data/bin/gstudio.rb +32 -2
  7. data/blogs/dev/dev.html +219 -34
  8. data/blogs/dev/dev.md +26 -26
  9. data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
  10. data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
  11. data/blogs/dplyr/dplyr.rb +63 -0
  12. data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +38 -26
  13. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +16 -17
  14. data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
  15. data/blogs/galaaz_ggplot/galaaz_ggplot.tex +65 -31
  16. data/blogs/oh_my/not_so.rb +2342 -0
  17. data/blogs/oh_my/oh_my.Rmd +493 -0
  18. data/blogs/oh_my/oh_my.html +680 -0
  19. data/blogs/oh_my/oh_my.md +597 -0
  20. data/blogs/oh_my/old.Rmd +2100 -0
  21. data/blogs/ruby_plot/figures/facets_with_decorations.png +0 -0
  22. data/blogs/ruby_plot/figures/facets_with_jitter.png +0 -0
  23. data/blogs/ruby_plot/figures/final_box_plot.png +0 -0
  24. data/blogs/ruby_plot/figures/final_violin_plot.png +0 -0
  25. data/blogs/ruby_plot/figures/violin_with_jitter.png +0 -0
  26. data/blogs/ruby_plot/ruby_plot.Rmd +147 -122
  27. data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +662 -0
  28. data/blogs/ruby_plot/ruby_plot.html +49 -54
  29. data/blogs/ruby_plot/ruby_plot.md +147 -122
  30. data/blogs/ruby_plot/ruby_plot.pdf +0 -0
  31. data/blogs/ruby_plot/ruby_plot.tex +776 -157
  32. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +57 -0
  33. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +106 -0
  34. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +110 -0
  35. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +174 -0
  36. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +236 -0
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +296 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +236 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +218 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +128 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +150 -0
  43. data/examples/islr/ch2.spec.rb +21 -18
  44. data/examples/islr/ch3_boston.rb +14 -5
  45. data/examples/islr/ch3_multiple_regression.rb +2 -3
  46. data/examples/islr/ch6.spec.rb +1 -1
  47. data/examples/islr/x_y_rnorm.jpg +0 -0
  48. data/lib/R_interface/r.rb +14 -10
  49. data/lib/R_interface/r_libs.R +9 -0
  50. data/lib/R_interface/r_methods.rb +77 -6
  51. data/lib/R_interface/{expression.rb → r_module_s.rb} +13 -14
  52. data/lib/R_interface/rbinary_operators.rb +58 -71
  53. data/lib/R_interface/rdata_frame.rb +2 -1
  54. data/lib/R_interface/rdevices.R +4 -0
  55. data/lib/R_interface/rdevices.rb +1 -1
  56. data/lib/R_interface/renvironment.rb +34 -1
  57. data/lib/R_interface/rexpression.rb +108 -2
  58. data/lib/R_interface/rindexed_object.rb +3 -1
  59. data/lib/R_interface/rlanguage.rb +18 -2
  60. data/lib/R_interface/rmatrix.rb +14 -0
  61. data/lib/R_interface/rmd_indexed_object.rb +5 -1
  62. data/lib/R_interface/robject.rb +61 -23
  63. data/lib/R_interface/rsupport.rb +111 -53
  64. data/lib/R_interface/rsymbol.rb +6 -5
  65. data/lib/R_interface/ruby_extensions.rb +130 -4
  66. data/lib/R_interface/runary_operators.rb +35 -3
  67. data/lib/R_interface/rvector.rb +1 -0
  68. data/lib/galaaz.rb +0 -2
  69. data/lib/gknit/knitr_engine.rb +58 -4
  70. data/lib/gknit/ruby_engine.rb +5 -6
  71. data/lib/util/exec_ruby.rb +55 -9
  72. data/specs/all.rb +13 -3
  73. data/specs/figures/dose_len.png +0 -0
  74. data/specs/r_dataframe.spec.rb +49 -26
  75. data/specs/r_environment.spec.rb +140 -0
  76. data/specs/r_eval.spec.rb +0 -15
  77. data/specs/r_formula.spec.rb +232 -0
  78. data/specs/r_function.spec.rb +7 -8
  79. data/specs/r_list.spec.rb +4 -0
  80. data/specs/r_list_apply.spec.rb +11 -11
  81. data/specs/r_matrix.spec.rb +3 -3
  82. data/specs/{r_plots.spec.rb~ → r_nse.spec.rb} +29 -6
  83. data/specs/r_vector_creation.spec.rb +6 -0
  84. data/specs/r_vector_object.spec.rb +2 -2
  85. data/specs/r_vector_operators.spec.rb +3 -3
  86. data/specs/r_vector_subsetting.spec.rb +4 -4
  87. data/specs/ruby_expression.spec.rb +324 -0
  88. data/specs/tmp.rb +12 -524
  89. data/sty/galaaz.sty +71 -0
  90. data/version.rb +1 -1
  91. metadata +31 -41
  92. data/bin/gknit2~ +0 -6
  93. data/bin/ogk~ +0 -4
  94. data/bin/prepareR.rb~ +0 -1
  95. data/blogs/dev/dev.Rmd~ +0 -104
  96. data/blogs/galaaz_ggplot/galaaz_ggplot.dvi +0 -0
  97. data/blogs/galaaz_ggplot/midwest_external_png~ +0 -1
  98. data/blogs/gknit/gknit.Rmd~ +0 -184
  99. data/blogs/gknit/gknit.Rnd~ +0 -17
  100. data/blogs/gknit/model.rb~ +0 -46
  101. data/blogs/ruby_plot/ruby_plot.Rmd~ +0 -215
  102. data/examples/islr/Figure.jpg +0 -0
  103. data/examples/misc/moneyball.rb~ +0 -16
  104. data/examples/misc/subsetting.rb~ +0 -372
  105. data/lib/R/eng_ruby.R~ +0 -63
  106. data/lib/R_interface/capture_plot.rb~ +0 -23
  107. data/lib/R_interface/r.rb~ +0 -121
  108. data/lib/R_interface/rdevices.rb~ +0 -27
  109. data/lib/gknit.rb~ +0 -26
  110. data/lib/gknit/knitr_engine.rb~ +0 -102
  111. data/lib/gknit/ruby_engine.rb~ +0 -72
  112. data/lib/util/inline_file.rb~ +0 -23
  113. data/r_requires/knitr.rb~ +0 -4
  114. data/specs/r_language.spec.rb +0 -157
@@ -1,4 +1,4 @@
1
- \documentclass[]{article}
1
+ \documentclass[11pt,]{article}
2
2
  \usepackage{lmodern}
3
3
  \usepackage{amssymb,amsmath}
4
4
  \usepackage{ifxetex,ifluatex}
@@ -24,8 +24,8 @@
24
24
  \usepackage[margin=1in]{geometry}
25
25
  \usepackage{hyperref}
26
26
  \hypersetup{unicode=true,
27
- pdftitle={High Quality Scientific Plotting with Ruby in GraalVM},
28
- pdfauthor={Rodrigo Botafogo},
27
+ pdftitle={How to make Beautiful Ruby Plots with Galaaz},
28
+ pdfauthor={Rodrigo Botafogo; Daniel Mossé; University of Pittsburgh},
29
29
  pdfborder={0 0 0},
30
30
  breaklinks=true}
31
31
  \urlstyle{same} % don't use monospace font for urls
@@ -114,16 +114,15 @@
114
114
 
115
115
  \setlength{\droptitle}{-2em}
116
116
 
117
- \title{High Quality Scientific Plotting with Ruby in GraalVM}
117
+ \title{How to make Beautiful Ruby Plots with Galaaz}
118
118
  \pretitle{\vspace{\droptitle}\centering\huge}
119
119
  \posttitle{\par}
120
- \subtitle{Also: Allowing R to use classes, modules, blocks, etc.}
121
- \author{Rodrigo Botafogo}
120
+ \author{Rodrigo Botafogo \\ Daniel Mossé \\ University of Pittsburgh}
122
121
  \preauthor{\centering\large\emph}
123
122
  \postauthor{\par}
124
123
  \predate{\centering\large\emph}
125
124
  \postdate{\par}
126
- \date{19 October 2018}
125
+ \date{November 19th, 2018}
127
126
 
128
127
  % usar portugues do Brasil
129
128
  % \usepackage[brazilian]{babel}
@@ -132,7 +131,7 @@
132
131
  \usepackage{geometry}
133
132
  \geometry{a4paper, top=1in}
134
133
 
135
- % necessários para uso com kableExtra
134
+ % needed for kableExtra
136
135
  \usepackage{longtable}
137
136
  \usepackage{multirow}
138
137
  \usepackage[table]{xcolor}
@@ -149,17 +148,38 @@
149
148
  \usepackage{expex}
150
149
 
151
150
  \usepackage{graphicx}
151
+
152
152
  \usepackage{fancyhdr}
153
+ % set the header and foot style
154
+ % style 'fancy' adds the section name on the header
155
+ % and the page number on the footer
153
156
  \pagestyle{fancy}
154
- \fancyhf{}
155
157
 
156
- \usepackage{lipsum}
158
+ % style 'fancyhf' leaves header and footer empty
159
+ %\fancyhf{}
160
+
161
+ % sets the left head element to \rightmark, which contains the
162
+ % current section (\leftmark is the current chapter)
163
+ %\fancyhead[L]{\rightmark} .
164
+
165
+ % sets the right head element to the page number.
166
+ % \fancyhead[R]{\thepage}
167
+
168
+ % lets the head rule disappear.
169
+ % \renewcommand{\headrulewidth}{0pt}
170
+ % Possible selectors for the optional argument of \fancyhead/\fancyfoot
171
+ % are L (left), C (center) or R (right) for the position of the element
172
+ % and E (even) or O (odd) to distinguish even and odd pages. If you omit
173
+ % E/O the element is set for all pages.
157
174
 
158
- % disponibilizar o comando lastpage
175
+ % \usepackage{lipsum}
176
+
177
+ % make available command lastpage
159
178
  \usepackage{lastpage}
160
179
 
161
- % tamanho do font padrão 11pt
162
- \usepackage[fontsize=10pt]{scrextend}
180
+ % default fontsize 11pt better to add
181
+ % fontsize on the yaml header
182
+ % \usepackage[fontsize=11pt]{scrextend}
163
183
 
164
184
  % comandos para formatar uma tabela
165
185
  \usepackage{array}
@@ -167,147 +187,174 @@
167
187
  \newcolumntype{C}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
168
188
  \newcolumntype{R}[1]{>{\raggedleft\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
169
189
 
170
- % necessário para importar outros arquivos latex
190
+ % necessário if we need to import other latex documents
171
191
  \usepackage{import}
172
192
 
193
+ % Command to import an R variable to latex
173
194
  \newcommand{\RtoLatex}[2]{\newcommand{#1}{#2}}
195
+
196
+ %
174
197
  %\newcommand{\atraso}[1]{\color{red} \textbf {Tempo desde a Assinatura do Contrato: #1 dias}}
175
198
 
176
199
  \begin{document}
177
200
  \maketitle
178
201
 
202
+ {
203
+ \setcounter{tocdepth}{2}
204
+ \tableofcontents
205
+ }
179
206
  \section{Introduction}\label{introduction}
180
207
 
181
- Ruby is a dynamic, interpreted, reflective, object-oriented,
182
- general-purpose programming language. It was designed and developed in
183
- the mid-1990s by Yukihiro ``Matz'' Matsumoto in Japan. It reached high
184
- popularity with the development of Ruby on Rails (RoR) by David
185
- Heinemeier Hansson. RoR is a web application framework which was first
186
- release circa 2005 and makes extensive use of Ruby's metaprogramming
187
- features. With the advend of RoR, Ruby became extremely popular and it
188
- peeked in popularity around 2008 according to the Tiobe index
189
- (\url{https://www.tiobe.com/tiobe-index/ruby/}). From 2008 to 2015, it's
190
- popularity declined consistently and then started picking up again
191
- during the next 3 years. At the time of this writing (November 2018),
192
- Ruby is ranked 16th in the Tiobe index.
193
-
194
- Python, considered a similar language to Ruby with similar features
195
- ranks 4th in the index. The first three positions are taken by Java, C
196
- and C++. One criticism often heard about Ruby, is that it is useful only
197
- for web applications while Python, with similar features has more
198
- diverse libraries, being useful for web applications with the Django
199
- framework, but also for scientific applications such as statistics, data
200
- analysis, big data, biology, etc. This criticism is by no way wrong.
201
- Although Ruby can do much more than just web applications:
202
- \url{https://github.com/markets/awesome-ruby}, for scientific computing,
203
- Ruby lags way behind Python and R, the two most prestigous languages in
204
- the field, with R being prefered by statisticians while Python is
205
- prefered by everyone else, because of it's gentle learning curve and
206
- more ``natural'' programming paradigm.
208
+ According to Wikipedia ``Ruby is a dynamic, interpreted, reflective,
209
+ object-oriented, general-purpose programming language. It was designed
210
+ and developed in the mid-1990s by Yukihiro''Matz" Matsumoto in Japan."
211
+ It reached high popularity with the development of Ruby on Rails (RoR)
212
+ by David Heinemeier Hansson. RoR is a web application framework first
213
+ released around 2005. It makes extensive use of Ruby's metaprogramming
214
+ features. With RoR, Ruby became very popular. According to
215
+ \href{https://www.tiobe.com/tiobe-index/ruby/}{Ruby's Tiobe index} it
216
+ peeked in popularity around 2008. Then it's popularity declined until
217
+ 2015 when it started picking up again. At the time of this writing
218
+ (November 2018), the Tiobe index puts ruby in 16th position.
219
+
220
+ Python, a similar language to Ruby, ranks 4th in the index. Java, C and
221
+ C++ take the first three positions. Ruby is often criticized for its
222
+ focus on web applications. But Ruby can do
223
+ \href{https://github.com/markets/awesome-ruby}{much more} than just web
224
+ applications. Yet, for scientific computing, Ruby lags way behind Python
225
+ and R. Python has Django framework for web, NumPy for numerical arrays,
226
+ Pandas for data analysis. R is a free software environment for
227
+ statistical computing and graphics with thousands of libraries for data
228
+ analysis.
207
229
 
208
230
  Until recently, there was no real perspective for Ruby to bridge this
209
- gap and have even the most basic scientific computing infrastructure.
210
- Comes GraalVM into the picture:
231
+ gap. Implementing a complete scientific computing infrastructure would
232
+ take too long. Comes GraalVM into the picture:
211
233
 
212
- \begin{verbatim}
213
- GraalVM is a universal virtual machine for running applications written in JavaScript,
214
- Python 3, Ruby, R, JVM-based languages like Java, Scala, Kotlin, and LLVM-based languages
215
- such as C and C++.
234
+ \begin{quote}
235
+ GraalVM is a universal virtual machine for running applications written
236
+ in JavaScript, Python 3, Ruby, R, JVM-based languages like Java, Scala,
237
+ Kotlin, and LLVM-based languages such as C and C++.
216
238
 
217
- GraalVM removes the isolation between programming languages and enables interoperability in a
218
- shared runtime. It can run either standalone or in the context of OpenJDK, Node.js,
219
- Oracle Database, or MySQL.
239
+ GraalVM removes the isolation between programming languages and enables
240
+ interoperability in a shared runtime. It can run either standalone or in
241
+ the context of OpenJDK, Node.js, Oracle Database, or MySQL.
220
242
 
221
- GraalVM allows you to write polyglot applications with a seamless way to pass values from one
222
- language to another. With GraalVM there is no copying or marshaling necessary as it is with
223
- other polyglot systems. This lets you achieve high performance when language boundaries are
224
- crossed. Most of the time there is no additional cost for crossing a language boundary at all.
243
+ GraalVM allows you to write polyglot applications with a seamless way to
244
+ pass values from one language to another. With GraalVM there is no
245
+ copying or marshaling necessary as it is with other polyglot systems.
246
+ This lets you achieve high performance when language boundaries are
247
+ crossed. Most of the time there is no additional cost for crossing a
248
+ language boundary at all.
225
249
 
226
- Often developers have to make uncomfortable compromises that require them to rewrite
227
- their software in other languages. For example:
250
+ Often developers have to make uncomfortable compromises that require
251
+ them to rewrite their software in other languages. For example:
228
252
 
229
- * “That library is not available in my language. I need to rewrite it.”
230
- * “That language would be the perfect fit for my problem, but we cannot run it
231
- in our environment.”
232
- * “That problem is already solved in my language, but the language is too slow.”
253
+ \begin{itemize}
254
+ \tightlist
255
+ \item
256
+ That library is not available in my language. I need to rewrite it.
257
+ \item
258
+ That language would be the perfect fit for my problem, but we cannot
259
+ run it in our environment.
260
+ \item
261
+ That problem is already solved in my language, but the language is too
262
+ slow.
263
+ \end{itemize}
233
264
 
234
- With GraalVM we aim to allow developers to freely choose the right language for the task at
235
- hand without making compromises.
236
- \end{verbatim}
265
+ With GraalVM we aim to allow developers to freely choose the right
266
+ language for the task at hand without making compromises.
267
+ \end{quote}
237
268
 
238
269
  As stated above, GraalVM is a \emph{universal} virtual machine that
239
270
  allows Ruby and R (and other languages) to run on the same environment.
240
271
  GraalVM allows polyglot applications to \emph{seamlessly} interact with
241
- one another and pass values from one language to the other. Based on
242
- GraalVM, the Galaaz project was started. Galaaz indends to tightly
243
- couple Ruby and R and allow those languages to \emph{seamlessly}
244
- interact in a way that the user will be unaware of such interaction.
245
-
246
- Library wrapping is an usual way of bringing features from one library
247
- into another language. For instance, whenever Python needs to perform
248
- operations efficiently, C libraries are wrapped in Python. For the
249
- Python developer, the existence of such C library is of no concern. The
250
- problem with library wrapping is that for any new library of interest,
251
- there is the need to hand craft a new wrapper. With Galaaz, the same
252
- concept of wrapping was done, but instead of wrapping a single C or R
253
- library, Galaaz wraps the whole of the R language. Doing so, all
254
- thousands of R libraries are immediately available to Ruby developers
255
- and any new library developed in R will also become available without
256
- requiring a new wrapping effort.
257
-
258
- In this article, the graphing ggplot2 library from R will be accessed by
259
- Ruby transparently, bringing to Ruby the power of high quality
260
- scientific plotting. It might seem, from the exposed above, that Galaaz
261
- mainly benefits Ruby developers and might be of no consequence to the R
262
- developer. This article will however show that migrating from R to Ruby
263
- with Galaaz is a matter of small syntactic changes. Furthermore, R lacks
264
- some fundamental constructs for code reuse and large system
265
- construction. Using Galaaz, the R developer can easily migrate to a
266
- powerful OO language, at virtually no cost and then, as needs requires,
267
- she can add them to her toolbox.
272
+ one another and pass values from one language to the other. Galaaz, a
273
+ gem for Ruby, intends to tightly couple Ruby and R and allow those
274
+ languages to interact in a way that the user will be unaware of such
275
+ interaction.
276
+
277
+ Library wrapping is an usual way of bringing features from one language
278
+ into another. To improve performance, Python often wraps more efficient
279
+ C libraries. For the Python developer, the existence of such C libraries
280
+ is of no concern. The problem with library wrapping is that for any new
281
+ library, there is the need to handcraft a new wrapper.
282
+
283
+ Galaaz, instead of wrapping a single C or R library, wraps the whole of
284
+ the R language in Ruby. Doing so, all thousands of R libraries are
285
+ available to Ruby developers. Also any new library developed in R will
286
+ be available without a new wrapping effort.
287
+
288
+ This article shows how Ruby can use R's ggplot2 library tranparantly,
289
+ and bring to Ruby the power of high quality scientific plotting. it also
290
+ shows that migrating from R to Ruby with Galaaz is a matter of small
291
+ syntactic changes. Using Ruby, the R developer can use all of Ruby's
292
+ powerful OO features. It also becomes much easier to move code from the
293
+ analysis phase to the production phase.
268
294
 
269
295
  In this article we will explore the R ToothGrowth dataset. In doing so,
270
- we will create some plots. Furthermore we will create a ``Corporate
271
- Template'' for our plots ensuring that any plot of the same type will
272
- have a consistent visualisation.
296
+ we will create some boxplots. A primer on boxplot is available in
297
+ \href{https://towardsdatascience.com/understanding-boxplots-5e2df7bcbd51}{this
298
+ article}.
273
299
 
274
- \section{gKnit}\label{gknit}
300
+ We will also create a Corporate Template ensuring that plots will have a
301
+ consistent visualization. This template is build using a Ruby module.
302
+ There is a way of building ggplot themes that will work the same as the
303
+ Ruby module. Yet, writing a new theme requires specific knowledge. Ruby
304
+ modules are standard to the language and don't need special knowledge.
305
+
306
+ In
307
+ \href{https://towardsdatascience.com/ruby-plotting-with-galaaz-an-example-of-tightly-coupling-ruby-and-r-in-graalvm-520b69e21021}{this
308
+ blog} we show a scatter plot in Ruby also with Galaaz.
275
309
 
276
- This document was written using rmarkdown and the corresponding HTML was
277
- generated by the gKnit application. gKnit is a wrapper around the
278
- powerful `knitr' application which converts rmarkdown text to many
279
- different output formats such as HTML, Latex, docx, etc. The gKnit tool
280
- is still under active development and will soon be released.
310
+ \section{gKnit}\label{gknit}
281
311
 
282
- In rmarkdown, text and code can be part of the same document, and code
283
- blocks are marked with a special markup. Interested readers can easily
284
- google `knitr' and `rmarkdown'. in gKnit, each Ruby block is evaluated
285
- independently and `eval' in Ruby creates a new scope, so, in order for a
286
- variable defined in a block to be accessible in another block, it has to
287
- be a global variable, preceded by the `\$' sign.
312
+ \emph{Knitr} is an application that converts text written in rmarkdown
313
+ to many different output formats. For instance, a writer can convert an
314
+ rmarkdown document to HTML, \(LaTex\), docx and many other formats.
315
+ Rmarkdown documents can contain text and \emph{code chunks}. Knitr
316
+ formats code chunks in a grayed box in the output document. It also
317
+ executes the code chunks and formats the output in a white box. Every
318
+ line of output from the execution code is preceded by `\#\#'.
319
+
320
+ Knitr allows code chunks to be in R, Python, Ruby and dozens of other
321
+ languages. Yet, while R and Python chunks can share data, in other
322
+ languages, chunks are independent. This means that a variable defined in
323
+ one chunk cannot be used in another chunk.
324
+
325
+ With \emph{gKnit} Ruby code chunks can share data. In gKnit each Ruby
326
+ chunk executes in its own scope and thus, local variable defined in a
327
+ chunk are not accessible by other chunks. Yet, All chunks execute in the
328
+ scope of a `chunk' class and instance variables (`@'), are available in
329
+ all chunks.
288
330
 
289
331
  \section{Exploring the Dataset}\label{exploring-the-dataset}
290
332
 
291
- Let start by exploring our selected dataset. In this dataset the
292
- response is the length of odontoblasts (cells responsible for tooth
293
- growth) in 60 guinea pigs. Each animal received one of three dose levels
294
- of vitamin C (0.5, 1, and 2 mg/day) by one of two delivery methods,
295
- orange juice or ascorbic acid (a form of vitamin C and coded as VC).
296
-
297
- In Galaaz, in order to have access to an R variable pointed by an R
298
- symbol we use the corresponding Ruby symbol preceeded by the tilda
299
- (`\textasciitilde{}') function.
333
+ Let's start by exploring our selected dataset. ToothGrowth is an R
334
+ dataset. A dataset is like an excel spreadsheet, but in which each
335
+ column has only one type of data. For instance one column can have
336
+ float, the other integer, and a third strings. This dataset analyses the
337
+ length of odontoblasts (cells responsible for tooth growth) in 60 guinea
338
+ pigs, where each animal received one of three dose levels of Vitamin C
339
+ (0.5, 1, and 2 mg/day) by one of two delivery methods, orange juice (OJ)
340
+ or ascorbic acid (a form of vitamin C and coded as VC).
341
+
342
+ The ToothGrowth dataset contains three columns: `len', `supp' and
343
+ `dose'. Let's take a look at a few rows of this dataset. In Galaaz, to
344
+ have access to an R variable we use the corresponding Ruby symbol
345
+ preceeded by the tilda (`\textasciitilde{}') function. Note in the
346
+ following chunk that Ruby's `@tooth\_growth' is assigned the value of
347
+ `\textasciitilde{}:ToothGrowth'. `ToothGrowth' is the R variable
348
+ containing the dataset of interest.
300
349
 
301
350
  \begin{Shaded}
302
351
  \begin{Highlighting}[]
303
352
  \CommentTok{# Read the R ToothGrowth variable and assign it to the}
304
- \CommentTok{# Ruby tooth_growth variable}
305
- \DataTypeTok{$tooth_growth}\NormalTok{ = ~}\StringTok{:ToothGrowth}
306
- \CommentTok{# convert the dose to a factor}
307
- \DataTypeTok{$tooth_growth}\NormalTok{.dose = }\DataTypeTok{$tooth_growth}\NormalTok{.dose.as__factor}
308
-
353
+ \CommentTok{# Ruby instance variable @tooth_growth that will be }
354
+ \CommentTok{# available to all Ruby chunks in this document.}
355
+ \OtherTok{@tooth_growth}\NormalTok{ = ~}\StringTok{:ToothGrowth}
309
356
  \CommentTok{# print the first few elements of the dataset}
310
- \NormalTok{puts }\DataTypeTok{$tooth_growth}\NormalTok{.head}
357
+ \NormalTok{puts }\OtherTok{@tooth_growth}\NormalTok{.head}
311
358
  \end{Highlighting}
312
359
  \end{Shaded}
313
360
 
@@ -322,25 +369,86 @@ symbol we use the corresponding Ruby symbol preceeded by the tilda
322
369
  \end{verbatim}
323
370
 
324
371
  Great! We've managed to read the ToothGrowth dataset and take a look at
325
- its elements. Observe that we have three columns in this dataset: `len',
326
- `supp' and `dose'. Accessing a column, for example the `len' column, is
327
- done by doing `\$tooth\_growth.len'.
372
+ its elements. We see here the first 6 rows of the dataset. To access a
373
+ column, follow the dataset name with a dot (`.') and the name of the
374
+ column. Also use dot notation to chain methods in usual Ruby style.
375
+
376
+ \begin{Shaded}
377
+ \begin{Highlighting}[]
378
+ \CommentTok{# Access the tooth_growth 'len' column and print the first few}
379
+ \CommentTok{# elements of this column with the 'head' method.}
380
+ \NormalTok{puts }\OtherTok{@tooth_growth}\NormalTok{.len.head}
381
+ \end{Highlighting}
382
+ \end{Shaded}
383
+
384
+ \begin{verbatim}
385
+ ## [1] 4.2 11.5 7.3 5.8 6.4 10.0
386
+ \end{verbatim}
387
+
388
+ The `dose' column contains a numeric value wiht either, 0.5, 1 or 2.
389
+ Although those are number, they are better interpreted as a
390
+ \href{https://swcarpentry.github.io/r-novice-inflammation/12-supp-factors/}{factor
391
+ or cathegory}. So, let's convert our `dose' column from numeric to
392
+ `factor'. In R, the function `as.factor' is used to convert data in a
393
+ vector to factors. To use this function from Galaaz the dot (`.') in the
394
+ function name is substituted by '\_\_`(double underline). The function
395
+ 'as.factor' becomes 'R.as\_\_factor' or just 'as\_\_factor' when
396
+ chaining.
397
+
398
+ \begin{Shaded}
399
+ \begin{Highlighting}[]
400
+ \CommentTok{# convert the dose to a factor}
401
+ \OtherTok{@tooth_growth}\NormalTok{.dose = }\OtherTok{@tooth_growth}\NormalTok{.dose.as__factor}
402
+ \end{Highlighting}
403
+ \end{Shaded}
328
404
 
329
405
  Let's explore some more details of this dataset. In particular, let's
330
406
  look at its dimensions, structure and summary statistics.
331
407
 
332
408
  \begin{Shaded}
333
409
  \begin{Highlighting}[]
334
- \NormalTok{puts }\DataTypeTok{$tooth_growth}\NormalTok{.dim}
335
- \CommentTok{# chdck why NULL}
336
- \NormalTok{puts R.str(}\StringTok{:ToothGrowth}\NormalTok{)}
337
- \NormalTok{puts }\DataTypeTok{$tooth_growth}\NormalTok{.summary}
410
+ \NormalTok{puts }\OtherTok{@tooth_growth}\NormalTok{.dim}
338
411
  \end{Highlighting}
339
412
  \end{Shaded}
340
413
 
341
414
  \begin{verbatim}
342
415
  ## [1] 60 3
343
- ## NULL
416
+ \end{verbatim}
417
+
418
+ This dataset has 60 rows, one for each subject and 3 columns, as we have
419
+ already seen.
420
+
421
+ Note that we do not call `puts' when using the `str' function. This
422
+ functions does not return anything and prints the structure of the
423
+ dataset as a side effect.
424
+
425
+ \begin{Shaded}
426
+ \begin{Highlighting}[]
427
+ \OtherTok{@tooth_growth}\NormalTok{.str}
428
+ \end{Highlighting}
429
+ \end{Shaded}
430
+
431
+ \begin{verbatim}
432
+ ## 'data.frame': 60 obs. of 3 variables:
433
+ ## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
434
+ ## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
435
+ ## $ dose: Factor w/ 3 levels "0.5","1","2": 1 1 1 1 1 1 1 1 1 1 ...
436
+ \end{verbatim}
437
+
438
+ Observe that both variables `supp' and `dose' are factors. The system
439
+ made variable `supp' a factor automatically, since it contais two
440
+ strings OJ and VC.
441
+
442
+ Finally, using the summary method, we get the statistical summary for
443
+ the dataset
444
+
445
+ \begin{Shaded}
446
+ \begin{Highlighting}[]
447
+ \NormalTok{puts }\OtherTok{@tooth_growth}\NormalTok{.summary}
448
+ \end{Highlighting}
449
+ \end{Shaded}
450
+
451
+ \begin{verbatim}
344
452
  ## len supp dose
345
453
  ## Min. : 4.20 OJ:30 0.5:20
346
454
  ## 1st Qu.:13.07 VC:30 1 :20
@@ -350,45 +458,55 @@ look at its dimensions, structure and summary statistics.
350
458
  ## Max. :33.90
351
459
  \end{verbatim}
352
460
 
461
+ \section{Doing the Data Analysis}\label{doing-the-data-analysis}
462
+
463
+ \subsection{Quick plot for seing the
464
+ data}\label{quick-plot-for-seing-the-data}
465
+
353
466
  Let's now create our first plot with the given data by accessing ggplot2
354
467
  from Ruby. For Rubyist that have never seen or used ggplot2, here is the
355
- description found on ggplot home page:
468
+ description of ggplot found on its home page:
356
469
 
357
- \begin{verbatim}
358
- "ggplot2 is a system for declaratively creating graphics, based on _The Grammar of Graphics_.
359
- You provide the data, tell ggplot2 how to map variables to aesthetics, what graphical
360
- primitives to use, and it takes care of the details."
361
- \end{verbatim}
470
+ \begin{quote}
471
+ ``ggplot2 is a system for declaratively creating graphics, based on
472
+ \emph{The Grammar of Graphics}. You provide the data, tell ggplot2 how
473
+ to map variables to aesthetics, what graphical primitives to use, and it
474
+ takes care of the details.''
475
+ \end{quote}
362
476
 
363
477
  This description might be a bit cryptic and it is best to see it at work
364
- to understand it. Basically, in the \emph{grammar of graphics} each
365
- component of the plot such as the grid, the axis, the data, title,
366
- subtitle, etc. is added to the plot in layers to form the final
367
- graphics.
368
-
369
- In this plot bellow, the `dose' is plotted on the `x' axis and the tooth
370
- length on the `y' axis. Note the specification in the the `aes' method:
371
- `E.aes(x: :dose, y: :len)', where `:dose' is the `dose' column of the
372
- dataset and `:len' the `len' column. The `aes' method is the
373
- \emph{aesthetics} for this plot. Then, to this layer, the
374
- `geom\_boxplot' is added and the whole plot is printed.
478
+ to understand it. Basically, in the \emph{grammar of graphics}
479
+ developers add layers of components such as grid, axis, data, title,
480
+ subtitle and also graphical primitives such as \emph{bar plot},
481
+ \emph{box plot}, to form the final graphics.
482
+
483
+ In order to make a plot, we use the `ggplot' function to the dataset. In
484
+ R, this would be written as
485
+ \texttt{ggplot(\textless{}dataset\textgreater{},\ ...)}. In Galaaz, use
486
+ either \texttt{R.ggplot(\textless{}dataset\textgreater{},\ ...)}, or
487
+ \texttt{\textless{}dataset\textgreater{}.ggplot(...)}. In the graph
488
+ specification bellow, we use the second notation that looks more Ruby
489
+ like. The plot specifies the `dose' on the \(x\) axis and the `length'
490
+ on the \(y\) axis with the `aes' method. `E.aes(x: :dose, y: :len)'. To
491
+ specify the type of plot to create add a geom to the plot. For a
492
+ boxplot, the geom is R.geom\_boxplot.
375
493
 
376
494
  Note also that we have a call to `R.png' before plotting and
377
495
  'R.dev\_\_off' after the print statement. `R.png' opens a `png' device
378
- for writing the plot. When 'R.dev\_\_off' is called, the device is
379
- closed and a `png' file is created. If no name is given to the `png'
380
- function, a file named `Rplot' is generated, where is the number of the
381
- plot. So, this first plot is called `Rplot001.png'. We can then include
382
- the generated `png' file in this document, by adding an rmarkdown
496
+ for outputting the plot. 'R.dev\_\_off' closes the device and creates
497
+ the `png' file. If we do no pass a name to the `png' function, the image
498
+ gets a default name of `Rplot\textless{}nnn\textgreater{}' where
499
+ \textless{}nnn\textgreater{} is the number of the plot. We can then
500
+ include the generated `png' file in the document by adding an rmarkdown
383
501
  directive.
384
502
 
385
503
  \begin{Shaded}
386
504
  \begin{Highlighting}[]
387
505
  \NormalTok{require }\StringTok{'ggplot'}
388
506
 
389
- \NormalTok{R.png}
507
+ \NormalTok{R.png(}\StringTok{"figures/dose_len.png"}\NormalTok{)}
390
508
 
391
- \NormalTok{e = }\DataTypeTok{$tooth_growth}\NormalTok{.ggplot(E.aes(}\StringTok{x: :dose}\NormalTok{, }\StringTok{y: :len}\NormalTok{))}
509
+ \NormalTok{e = }\OtherTok{@tooth_growth}\NormalTok{.ggplot(E.aes(}\StringTok{x: :dose}\NormalTok{, }\StringTok{y: :len}\NormalTok{))}
392
510
  \NormalTok{print e + R.geom_boxplot}
393
511
 
394
512
  \NormalTok{R.dev__off}
@@ -397,16 +515,514 @@ directive.
397
515
 
398
516
  \begin{figure}
399
517
  \centering
400
- \includegraphics{Rplot001.png}
401
- \caption{ToothGrowth}
518
+ \includegraphics[width=0.70000\textwidth]{figures/dose_len.png}
519
+ \caption{}
520
+ \end{figure}
521
+
522
+ Great! We've just managed to create and save our first plot in Ruby with
523
+ only four lines of code. We can see with this plot a clear trend: as the
524
+ dose of the supplement is increased, so is the length of teeth.
525
+
526
+ \subsection{Facetting the plot}\label{facetting-the-plot}
527
+
528
+ This first plot shows a trend, but our data has information about two
529
+ different forms of delivery method, either by Orange Juice (OJ) or by
530
+ Vitamin C (VC). Let's then try to create a plot that explicits the
531
+ effect of each delivery method. This next plot is a \emph{facetted} plot
532
+ where each delivery method gets is own plot. On the left side, the plot
533
+ shows the OJ delivery method. On the right side, we see the VC delivery
534
+ method. To obtain this plot, we use the `R.facet\_grid' function, that
535
+ automatically creates the facets based on the delivery method factors.
536
+ The parameter to the `facet\_grid' method is a
537
+ \href{https://thomasleeper.com/Rcourse/Tutorials/formulae.html}{\emph{formula}}.
538
+
539
+ In Galaaz, formulas are written a bit differently than in R. The
540
+ following changes are necessary:
541
+
542
+ \begin{itemize}
543
+ \tightlist
544
+ \item
545
+ R symbols are represented by the same Ruby symbol prefixed with the
546
+ `+' method. The symbol \texttt{x} in R becomes \texttt{+:x} in Ruby;
547
+ \item
548
+ The `\textasciitilde{}' operator in R becomes `=\textasciitilde{}' in
549
+ Ruby. The formula \texttt{x\ \textasciitilde{}\ y} in R is written as
550
+ \texttt{+:x\ =\textasciitilde{}\ +:y} in Ruby;
551
+ \item
552
+ The `.' symbol in R becomes `+:all'
553
+ \end{itemize}
554
+
555
+ Another way of writing a formula is to use the `formula' function with
556
+ the actual formula as a string. The formula
557
+ \texttt{x\ \textasciitilde{}\ y} in R can be written as
558
+ \texttt{R.formula("x\ \textasciitilde{}\ y")}. For more complex
559
+ formulas, the use of the `formula' function is preferred.
560
+
561
+ The formula \texttt{+:all\ =\textasciitilde{}\ +:supp} indicates to the
562
+ `facet\_grid' function that it needs to facet the plot based on the
563
+ \texttt{supp} variable and split the plot vertically. Changing the
564
+ formula to \texttt{+:supp\ =\textasciitilde{}\ +:all} would split the
565
+ plot horizontally.
566
+
567
+ \begin{Shaded}
568
+ \begin{Highlighting}[]
569
+ \NormalTok{R.png(}\StringTok{"figures/facet_by_delivery.png"}\NormalTok{)}
570
+
571
+ \OtherTok{@base_tooth}\NormalTok{ = }\OtherTok{@tooth_growth}\NormalTok{.ggplot(E.aes(}\StringTok{x: :dose}\NormalTok{, }\StringTok{y: :len}\NormalTok{, }\StringTok{group: :dose}\NormalTok{))}
572
+
573
+ \OtherTok{@bp}\NormalTok{ = }\OtherTok{@base_tooth}\NormalTok{ + R.geom_boxplot +}
574
+ \CommentTok{# Split in vertical direction}
575
+ \NormalTok{ R.facet_grid(+}\StringTok{:all}\NormalTok{ =~ +}\StringTok{:supp}\NormalTok{)}
576
+
577
+ \NormalTok{puts }\OtherTok{@bp}
578
+
579
+ \NormalTok{R.dev__off}
580
+ \end{Highlighting}
581
+ \end{Shaded}
582
+
583
+ \begin{figure}
584
+ \centering
585
+ \includegraphics[width=0.70000\textwidth]{figures/facet_by_delivery.png}
586
+ \caption{}
402
587
  \end{figure}
403
588
 
404
- We've just managed to generate our first plot in Ruby with only two
405
- lines of code. This plot, however, if far from being pleasing to the
406
- eye.
589
+ It now becomes clear that although both methods of delivery have a
590
+ direct impact on tooth growth, method (OJ) is non-linear having a higher
591
+ impact with smaller doses of ascorbic acid and reducing it's impact as
592
+ the dose increases. With the (VC) approach, the impact seems to be more
593
+ linear.
594
+
595
+ \subsection{Adding Color}\label{adding-color}
596
+
597
+ If this paper was about data analysis, we should make a better analysis
598
+ of the trends and should improve the statistical analysis. But we are
599
+ interested in working with ggplot in Ruby. So, Let's add some color to
600
+ this plot to make the trend and comparison more visible. In the
601
+ following plot, the boxes are color coded by dose. To add color, it is
602
+ enough to add \texttt{fill:\ :dose} to the aesthetic of boxplot. With
603
+ this command each `dose' factor gets its own color.
604
+
605
+ \begin{Shaded}
606
+ \begin{Highlighting}[]
607
+ \NormalTok{R.png(}\StringTok{"figures/facets_by_delivery_color.png"}\NormalTok{)}
608
+
609
+ \OtherTok{@bp}\NormalTok{ = }\OtherTok{@bp}\NormalTok{ + R.geom_boxplot(E.aes(}\StringTok{fill: :dose}\NormalTok{))}
610
+ \NormalTok{puts }\OtherTok{@bp}
611
+
612
+ \NormalTok{R.dev__off}
613
+ \end{Highlighting}
614
+ \end{Shaded}
615
+
616
+ \begin{figure}
617
+ \centering
618
+ \includegraphics[width=0.70000\textwidth]{figures/facets_by_delivery_color.png}
619
+ \caption{}
620
+ \end{figure}
621
+
622
+ Facetting helps us compare the general trends in the (OJ) and (VC)
623
+ delivery methods. Adding color allow us to compare specifically how each
624
+ dosage impacts the teeth growth. It is possible to observe that with
625
+ smaller doses, up to 1mg, (OJ) performs better than (VC) (red color).
626
+ For 2mg, both (OJ) and (VC) have the same median, but (OJ) is less
627
+ disperse (blue color). For 1mg (green color), (OJ) is significantly
628
+ bettern than (VC). By this very quick analysis, it seems that (OJ) is a
629
+ better delivery method than (VC).
630
+
631
+ \subsection{Clarifying the data}\label{clarifying-the-data}
632
+
633
+ Boxplots give us a nice idea of the distribution of data, but looking at
634
+ those plots with large colored boxes leaves us wondering what is going
635
+ on on those boxes. According to Edward Tufte in Envisioning Information:
636
+
637
+ \begin{quote}
638
+ Thin data rightly prompts suspicions: ``What are they leaving out? Is
639
+ that really everything they know? What are they hiding? Is that all they
640
+ did?'' Now and then it is claimed that vacant space is ``friendly''
641
+ (anthropomorphizing an inherently murky idea) but \emph{it is not how
642
+ much empty space there is, but rather how it is used. It is not how much
643
+ information there is, but rather how effectively it is arranged.}
644
+ \end{quote}
645
+
646
+ And he states:
647
+
648
+ \begin{quote}
649
+ A most unconventional design strategy is revealed: \emph{to clarify, add
650
+ detail.}
651
+ \end{quote}
652
+
653
+ Let's then use this wisdom and add yet another layer of data to our
654
+ plot, so that we clarify it with detail and do not leave large empty
655
+ boxes. In this next plot, we add data points for each of the 60 pigs in
656
+ the experiment. For that, add the function `R.geom\_point' to the plot.
657
+
658
+ \begin{Shaded}
659
+ \begin{Highlighting}[]
660
+ \NormalTok{R.png(}\StringTok{"figures/facets_with_points.png"}\NormalTok{)}
661
+
662
+ \CommentTok{# Split in vertical direction}
663
+ \OtherTok{@bp}\NormalTok{ = }\OtherTok{@bp}\NormalTok{ + R.geom_point}
664
+
665
+ \NormalTok{puts }\OtherTok{@bp}
666
+
667
+ \NormalTok{R.dev__off}
668
+ \end{Highlighting}
669
+ \end{Shaded}
670
+
671
+ \begin{figure}
672
+ \centering
673
+ \includegraphics[width=0.70000\textwidth]{figures/facets_with_points.png}
674
+ \caption{}
675
+ \end{figure}
676
+
677
+ Now we can see the actual distribution of all the 60 subject. Actually,
678
+ this is not totally true. We have a hard time seing all 60 subjects. It
679
+ seems that some points might be placed one over the other hiding useful
680
+ information.
681
+
682
+ But no sweat! Another layer might solve the problem. In the following
683
+ plot a new layer called `geom\_jitter' is added to the plot. This adds
684
+ randomness to the position of the points, making it easier to see all of
685
+ then and preventing data hiding. We also add color and change the shape
686
+ of the points, making them even easier to see.
687
+
688
+ \begin{Shaded}
689
+ \begin{Highlighting}[]
690
+ \NormalTok{R.png(}\StringTok{"figures/facets_with_jitter.png"}\NormalTok{)}
691
+
692
+ \CommentTok{# Split in vertical direction}
693
+ \NormalTok{puts }\OtherTok{@bp}\NormalTok{ + R.geom_jitter(}\StringTok{shape: }\DecValTok{23}\NormalTok{, }\StringTok{color: "cyan3"}\NormalTok{, }\StringTok{size: }\DecValTok{1}\NormalTok{)}
694
+
695
+ \NormalTok{R.dev__off}
696
+ \end{Highlighting}
697
+ \end{Shaded}
698
+
699
+ \begin{figure}
700
+ \centering
701
+ \includegraphics[width=0.70000\textwidth]{figures/facets_with_jitter.png}
702
+ \caption{}
703
+ \end{figure}
704
+
705
+ Now we can see all 60 points in the graph. We have here a much higher
706
+ information density and we can see outliers and subjects distribution.
707
+
708
+ \section{Preparing the Plot for
709
+ Presentation}\label{preparing-the-plot-for-presentation}
710
+
711
+ We have come a long way since our first plot. As was already said, this
712
+ is not an article about data analysis and the focus is on the
713
+ integration of Ruby and ggplot. So, let's assume that the analysis is
714
+ now done. Yet, ending the analysis does not mean that the work is done.
715
+ On the contrary, the hardest part is yet to come!
716
+
717
+ After the analysis it is necessary to communicate it by making a final
718
+ plot for presentation. The last plot has all the information we want to
719
+ share, but it is not very pleasing to the eye.
720
+
721
+ \subsection{Improving Colors}\label{improving-colors}
722
+
723
+ Let's start by trying to improve colors. For now, we will not use the
724
+ jitter layer. The previous plot has three bright colors that have no
725
+ relashionship between them. Is there any obvious, or non-obvious for
726
+ that matter, interpretation for the colors? Clearly, they are just
727
+ random colors selected automatically by our software. Although those
728
+ colors helped us understand the data, for a final presentation random
729
+ colors can distract the viewer.
730
+
731
+ In the following plot we use shades function `scale\_fill\_manual' to
732
+ change the colors of the boxes and order of labels. For colors we use
733
+ shades of blue for each dosage, with light blue (`cyan') representing
734
+ the lower dose and deep blue (`deepskyblue4') the higher dose. Also the
735
+ smaller value (0.5) is on the botton of the labels and (2) at the top.
736
+ This ordering seems more natural and matches with the actual order of
737
+ the colors in the plot.
738
+
739
+ \begin{Shaded}
740
+ \begin{Highlighting}[]
741
+ \NormalTok{R.png(}\StringTok{"figures/facets_by_delivery_color2.png"}\NormalTok{)}
742
+
743
+ \OtherTok{@bp}\NormalTok{ = }\OtherTok{@bp}\NormalTok{ +}
744
+ \NormalTok{ R.scale_fill_manual(}\StringTok{values: }\NormalTok{R.c(}\StringTok{"cyan"}\NormalTok{, }\StringTok{"deepskyblue"}\NormalTok{, }\StringTok{"deepskyblue4"}\NormalTok{),}
745
+ \StringTok{breaks: }\NormalTok{R.c(}\StringTok{"2"}\NormalTok{,}\StringTok{"1"}\NormalTok{,}\StringTok{"0.5"}\NormalTok{))}
746
+
747
+ \NormalTok{puts }\OtherTok{@bp}
748
+
749
+ \NormalTok{R.dev__off}
750
+ \end{Highlighting}
751
+ \end{Shaded}
752
+
753
+ \begin{figure}
754
+ \centering
755
+ \includegraphics[width=0.70000\textwidth]{figures/facets_by_delivery_color2.png}
756
+ \caption{}
757
+ \end{figure}
758
+
759
+ \subsection{Violin Plot and Jitter}\label{violin-plot-and-jitter}
760
+
761
+ The boxplot with jitter did look a bit overwhelming. The next plot uses
762
+ a variation of a boxplot known as a \emph{violin plot} with jittered
763
+ data.
764
+
765
+ \href{https://en.wikipedia.org/wiki/Violin_plot}{From Wikipedia}
766
+
767
+ \begin{quote}
768
+ A violin plot is a method of plotting numeric data. It is similar to a
769
+ box plot with a rotated kernel density plot on each side.
770
+
771
+ A violin plot has four layers. The outer shape represents all possible
772
+ results, with thickness indicating how common. (Thus the thickest
773
+ section represents the mode average.) The next layer inside represents
774
+ the values that occur 95\% of the time. The next layer (if it exists)
775
+ inside represents the values that occur 50\% of the time. The central
776
+ dot represents the median average value.
777
+ \end{quote}
778
+
779
+ \begin{Shaded}
780
+ \begin{Highlighting}[]
781
+ \NormalTok{R.png(}\StringTok{"figures/violin_with_jitter.png"}\NormalTok{)}
782
+
783
+ \OtherTok{@violin}\NormalTok{ = }\OtherTok{@base_tooth}\NormalTok{ + R.geom_violin(E.aes(}\StringTok{fill: :dose}\NormalTok{)) + }
784
+ \NormalTok{ R.facet_grid(+}\StringTok{:all}\NormalTok{ =~ +}\StringTok{:supp}\NormalTok{) +}
785
+ \NormalTok{ R.geom_jitter(}\StringTok{shape: }\DecValTok{23}\NormalTok{, }\StringTok{color: "cyan3"}\NormalTok{, }\StringTok{size: }\DecValTok{1}\NormalTok{) +}
786
+ \NormalTok{ R.scale_fill_manual(}\StringTok{values: }\NormalTok{R.c(}\StringTok{"cyan"}\NormalTok{, }\StringTok{"deepskyblue"}\NormalTok{, }\StringTok{"deepskyblue4"}\NormalTok{),}
787
+ \StringTok{breaks: }\NormalTok{R.c(}\StringTok{"2"}\NormalTok{,}\StringTok{"1"}\NormalTok{,}\StringTok{"0.5"}\NormalTok{))}
788
+
789
+ \NormalTok{puts }\OtherTok{@violin}
790
+
791
+ \NormalTok{R.dev__off}
792
+ \end{Highlighting}
793
+ \end{Shaded}
794
+
795
+ \begin{figure}
796
+ \centering
797
+ \includegraphics[width=0.70000\textwidth]{figures/violin_with_jitter.png}
798
+ \caption{}
799
+ \end{figure}
800
+
801
+ This plot is an alternative to the original boxplot. For the final
802
+ presentation, it is important to think which graphics will be best
803
+ understood by our audience. A violin plot is a less known plot and could
804
+ add mental overhead, yet, in my opinion, it does look a lit bit better
805
+ than the boxplot and provides even more information than the boxplot
806
+ with jitter.
807
+
808
+ \subsection{Adding Decoration}\label{adding-decoration}
809
+
810
+ Our final plot is starting to take shape, but a presentation plot should
811
+ have at least a title, labels on the axis and maybe some other
812
+ decorations. Let's start adding those. Since decoration requires more
813
+ graph area, this new plot has a `width' and `height' specification. When
814
+ there is no specification, the default values for width and height are
815
+ 480.
816
+
817
+ The `labs' function adds require decoration. In this example we use
818
+ `title', `subtitle', `x' for the \(x\) axis label and `y', for the \(y\)
819
+ axis label, and `caption' for information about the plot.
820
+
821
+ \begin{Shaded}
822
+ \begin{Highlighting}[]
823
+ \NormalTok{R.png(}\StringTok{"figures/facets_with_decorations.png"}\NormalTok{, }\StringTok{width: }\DecValTok{540}\NormalTok{, }\StringTok{height: }\DecValTok{560}\NormalTok{)}
824
+
825
+ \NormalTok{caption = <<-}\KeywordTok{EOT}
826
+ \OtherTok{Length of odontoblasts in 60 guinea pigs. }
827
+ \OtherTok{Each animal received one of three dose levels of vitamin C.}
828
+ \KeywordTok{EOT}
829
+
830
+ \OtherTok{@decorations}\NormalTok{ =}
831
+ \NormalTok{ R.labs(}\StringTok{title: "Tooth Growth: Length by Dose"}\NormalTok{,}
832
+ \StringTok{subtitle: "Faceted by delivery method, (OJ) or (VC)"}\NormalTok{,}
833
+ \StringTok{x: "Dose (mg)"}\NormalTok{, }\StringTok{y: "Teeth length"}\NormalTok{,}
834
+ \StringTok{caption: }\NormalTok{caption)}
835
+
836
+ \NormalTok{puts }\OtherTok{@bp}\NormalTok{ + }\OtherTok{@decorations}
837
+
838
+ \NormalTok{R.dev__off}
839
+ \end{Highlighting}
840
+ \end{Shaded}
841
+
842
+ \begin{figure}
843
+ \centering
844
+ \includegraphics[width=0.70000\textwidth]{figures/facets_with_decorations.png}
845
+ \caption{}
846
+ \end{figure}
847
+
848
+ \subsection{The Corp Theme}\label{the-corp-theme}
849
+
850
+ We are almost done. But the plot does not yet look nice to the eye. We
851
+ are still distracted by many aspects of the graph. First, the back font
852
+ color does not look good. Then plot background, borders, grids all add
853
+ clutter to the plot.
854
+
855
+ We will now define our corporate theme. In this theme, we remove borders
856
+ and grids. The background if left for faceted plots but removed for
857
+ non-faceted plots. Font colors are a shade o blue (color: `\#00080').
858
+ Axis labels are moved near the end of the axis and written in `bold'.
859
+
860
+ \begin{Shaded}
861
+ \begin{Highlighting}[]
862
+ \KeywordTok{module} \DataTypeTok{CorpTheme}
863
+
864
+ \NormalTok{ R.install_and_loads }\StringTok{'RColorBrewer'}
865
+
866
+ \CommentTok{#---------------------------------------------------------------------------------}
867
+ \CommentTok{# face can be (1=plain, 2=bold, 3=italic, 4=bold-italic)}
868
+ \CommentTok{#---------------------------------------------------------------------------------}
869
+
870
+ \KeywordTok{def} \DecValTok{self}\NormalTok{.text_element(size, }\StringTok{face: "plain"}\NormalTok{, }\StringTok{hjust: }\DecValTok{nil}\NormalTok{)}
871
+ \NormalTok{ E.element_text(}\StringTok{color: "#000080"}\NormalTok{, }
872
+ \StringTok{face: }\NormalTok{face,}
873
+ \StringTok{size: }\NormalTok{size,}
874
+ \StringTok{hjust: }\NormalTok{hjust)}
875
+ \KeywordTok{end}
876
+
877
+ \CommentTok{#---------------------------------------------------------------------------------}
878
+ \CommentTok{# Defines the plot theme (visualization). In this theme we remove major and minor}
879
+ \CommentTok{# grids, borders and background. We also turn-off scientific notation.}
880
+ \CommentTok{#---------------------------------------------------------------------------------}
881
+
882
+ \KeywordTok{def} \DecValTok{self}\NormalTok{.global_theme(faceted = }\DecValTok{false}\NormalTok{)}
883
+
884
+ \NormalTok{ R.options(}\StringTok{scipen: }\DecValTok{999}\NormalTok{) }\CommentTok{# turn-off scientific notation like 1e+48}
885
+ \CommentTok{# R.theme_set(R.theme_bw)}
886
+
887
+ \CommentTok{# remove major grids}
888
+ \NormalTok{ gb = R.theme(}\StringTok{panel__grid__major: }\NormalTok{E.element_blank())}
889
+ \CommentTok{# remove minor grids}
890
+ \NormalTok{ gb = gb + R.theme(}\StringTok{panel__grid__minor: }\NormalTok{E.element_blank)}
891
+ \CommentTok{# gb = R.theme(panel__grid__minor: E.element_blank)}
892
+ \CommentTok{# remove border}
893
+ \NormalTok{ gb = gb + R.theme(}\StringTok{panel__border: }\NormalTok{E.element_blank)}
894
+ \CommentTok{# remove background. When working with faceted graphs, the background makes}
895
+ \CommentTok{# it easier to see each facet, so leave it}
896
+ \NormalTok{ gb = gb + R.theme(}\StringTok{panel__background: }\NormalTok{E.element_blank) }\KeywordTok{if}\NormalTok{ !faceted}
897
+ \CommentTok{# Change axis font}
898
+ \NormalTok{ gb = gb + R.theme(}\StringTok{axis__text: }\NormalTok{text_element(}\DecValTok{8}\NormalTok{))}
899
+ \CommentTok{# change axis title font}
900
+ \NormalTok{ gb = gb + R.theme(}\StringTok{axis__title: }\NormalTok{text_element(}\DecValTok{10}\NormalTok{, }\StringTok{face: "bold"}\NormalTok{, }\StringTok{hjust: }\DecValTok{1}\NormalTok{))}
901
+ \CommentTok{# change font of title}
902
+ \NormalTok{ gb = gb + R.theme(}\StringTok{title: }\NormalTok{text_element(}\DecValTok{12}\NormalTok{, }\StringTok{face: "bold"}\NormalTok{))}
903
+ \CommentTok{# change font of subtitle}
904
+ \NormalTok{ gb = gb + R.theme(}\StringTok{plot__subtitle: }\NormalTok{text_element(}\DecValTok{9}\NormalTok{))}
905
+ \CommentTok{# change font of captions}
906
+ \NormalTok{ gb = gb + R.theme(}\StringTok{plot__caption: }\NormalTok{text_element(}\DecValTok{8}\NormalTok{))}
907
+
908
+ \KeywordTok{end}
909
+
910
+ \KeywordTok{end}
911
+ \end{Highlighting}
912
+ \end{Shaded}
913
+
914
+ \subsection{Final Box Plot}\label{final-box-plot}
915
+
916
+ Here is our final boxplot, without jitter.
917
+
918
+ \begin{Shaded}
919
+ \begin{Highlighting}[]
920
+ \NormalTok{R.png(}\StringTok{"figures/final_box_plot.png"}\NormalTok{, }\StringTok{width: }\DecValTok{540}\NormalTok{, }\StringTok{height: }\DecValTok{560}\NormalTok{)}
921
+
922
+ \NormalTok{puts }\OtherTok{@bp}\NormalTok{ + }\OtherTok{@decorations}\NormalTok{ + }\DataTypeTok{CorpTheme}\NormalTok{.global_theme(}\StringTok{faceted: }\DecValTok{true}\NormalTok{)}
923
+
924
+ \NormalTok{R.dev__off}
925
+ \end{Highlighting}
926
+ \end{Shaded}
927
+
928
+ \begin{figure}
929
+ \centering
930
+ \includegraphics[width=0.70000\textwidth]{figures/final_box_plot.png}
931
+ \caption{}
932
+ \end{figure}
933
+
934
+ \subsection{Final Violin Plot}\label{final-violin-plot}
935
+
936
+ Here is the final violin plot, with jitter and the same look and feel of
937
+ the corporate boxplot.
938
+
939
+ \begin{Shaded}
940
+ \begin{Highlighting}[]
941
+ \NormalTok{R.png(}\StringTok{"figures/final_violin_plot.png"}\NormalTok{, }\StringTok{width: }\DecValTok{540}\NormalTok{, }\StringTok{height: }\DecValTok{560}\NormalTok{)}
942
+
943
+ \NormalTok{puts }\OtherTok{@violin}\NormalTok{ + }\OtherTok{@decorations}\NormalTok{ + }\DataTypeTok{CorpTheme}\NormalTok{.global_theme(}\StringTok{faceted: }\DecValTok{true}\NormalTok{)}
944
+
945
+ \NormalTok{R.dev__off}
946
+ \end{Highlighting}
947
+ \end{Shaded}
948
+
949
+ \begin{figure}
950
+ \centering
951
+ \includegraphics[width=0.70000\textwidth]{figures/final_violin_plot.png}
952
+ \caption{}
953
+ \end{figure}
954
+
955
+ \subsection{Another View}\label{another-view}
956
+
957
+ Finally, here is a last plot, with the same look and feel as before but
958
+ facetted by dose and not by supplement.
959
+
960
+ \begin{Shaded}
961
+ \begin{Highlighting}[]
962
+ \NormalTok{R.png(}\StringTok{"figures/facet_by_dose.png"}\NormalTok{, }\StringTok{width: }\DecValTok{540}\NormalTok{, }\StringTok{height: }\DecValTok{560}\NormalTok{)}
963
+
964
+ \NormalTok{caption = <<-}\KeywordTok{EOT}
965
+ \OtherTok{Length of odontoblasts in 60 guinea pigs. }
966
+ \OtherTok{Each animal received one of three dose levels of vitamin C.}
967
+ \KeywordTok{EOT}
968
+
969
+ \OtherTok{@bp}\NormalTok{ = }\OtherTok{@tooth_growth}\NormalTok{.ggplot(E.aes(}\StringTok{x: :supp}\NormalTok{, }\StringTok{y: :len}\NormalTok{, }\StringTok{group: :supp}\NormalTok{)) + }
970
+ \NormalTok{ R.geom_boxplot(E.aes(}\StringTok{fill: :supp}\NormalTok{)) + R.facet_grid(+}\StringTok{:all}\NormalTok{ =~ +}\StringTok{:dose}\NormalTok{) +}
971
+ \NormalTok{ R.scale_fill_manual(}\StringTok{values: }\NormalTok{R.c(}\StringTok{"cyan"}\NormalTok{, }\StringTok{"deepskyblue4"}\NormalTok{)) +}
972
+ \NormalTok{ R.labs(}\StringTok{title: "Tooth Growth: Length by Dose"}\NormalTok{,}
973
+ \StringTok{subtitle: "Faceted by dose"}\NormalTok{,}
974
+ \StringTok{x: "Delivery method"}\NormalTok{, }\StringTok{y: "Teeth length"}\NormalTok{,}
975
+ \StringTok{caption: }\NormalTok{caption) +}
976
+ \DataTypeTok{CorpTheme}\NormalTok{.global_theme(}\StringTok{faceted: }\DecValTok{true}\NormalTok{)}
977
+ \NormalTok{puts }\OtherTok{@bp}
978
+
979
+ \NormalTok{R.dev__off}
980
+ \end{Highlighting}
981
+ \end{Shaded}
982
+
983
+ \begin{figure}
984
+ \centering
985
+ \includegraphics[width=0.70000\textwidth]{figures/facet_by_dose.png}
986
+ \caption{}
987
+ \end{figure}
407
988
 
408
989
  \section{Conclusion}\label{conclusion}
409
990
 
991
+ Galaaz tightly couples Ruby and R in a way that Ruby developers do not
992
+ need to be aware of the executing R engine. For the Ruby developer the
993
+ existence of R is of no consequence. For her, she is just coding in
994
+ Ruby. On the other hand, for the R developer, migration to Ruby is a
995
+ matter of small syntactic changes and very gentle learning curve. As the
996
+ R developer becomes more proficient in Ruby, he can start using
997
+ `classes', `modules', `procs', `lambdas'.
998
+
999
+ This coupling shows the power of GraalVM and Truffle polyglot
1000
+ environment. Trying to bring to Ruby the power of R starting from
1001
+ scratch is an enourmous endeavour and would probably never be
1002
+ accomplished. Today's data scientists would certainly stick with either
1003
+ Python or R. Now, both the Ruby and R communities might benefit from
1004
+ this marriage. Also, the process to couple Ruby and R can be also be
1005
+ done to couple Ruby and JavaScript and maybe also Ruby and Python. In a
1006
+ polyglot world a \emph{uniglot} language might be extremely relevant.
1007
+
1008
+ From the perspective of performance, GraalVM and Truffle promises
1009
+ improvements that could reach over 10 times, both for
1010
+ \href{https://medium.com/graalvm/faster-r-with-fastr-4b8db0e0dceb}{FastR}
1011
+ and for
1012
+ \href{https://rubykaigi.org/2018/presentations/eregontp.html}{TruffleRuby}.
1013
+
1014
+ This article has shown how to improve a plot step-by-step. Starting from
1015
+ a very simple boxplot with all default configurations, we moved slowly
1016
+ to our final plot. The important point here is not if the final plot is
1017
+ actually beautiful, but that there is a process of small steps
1018
+ improvements that can be followed until getting a final plot ready for
1019
+ presentation.
1020
+
1021
+ Finally, this whole article was written in rmarkdown and compiled to
1022
+ HTML by \emph{gknit}, an application that wraps \emph{knitr} and allows
1023
+ documenting Ruby code. This application can be of great help for any
1024
+ Rubyist trying to write articles, blogs or documentation for Ruby.
1025
+
410
1026
  \section{Installing Galaaz}\label{installing-galaaz}
411
1027
 
412
1028
  \subsection{Prerequisites}\label{prerequisites}
@@ -414,7 +1030,8 @@ eye.
414
1030
  \begin{itemize}
415
1031
  \tightlist
416
1032
  \item
417
- GraalVM (\textgreater{}= rc8)
1033
+ GraalVM (\textgreater{}= rc8):
1034
+ \url{https://github.com/oracle/graal/releases}
418
1035
  \item
419
1036
  TruffleRuby
420
1037
  \item
@@ -452,6 +1069,8 @@ am not sure what is needed on the Mac.
452
1069
  \tightlist
453
1070
  \item
454
1071
  gknit
1072
+ \item
1073
+ In a scrip add: require `galaaz'
455
1074
  \end{itemize}
456
1075
 
457
1076