galaaz 0.4.9 → 0.4.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +798 -285
  3. data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +3 -12
  4. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +5 -7
  5. data/blogs/galaaz_ggplot/galaaz_ggplot.html +69 -29
  6. data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
  7. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
  8. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
  9. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/midwest_rb.pdf +0 -0
  10. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/scatter_plot_rb.pdf +0 -0
  11. data/blogs/galaaz_ggplot/midwest.Rmd +1 -9
  12. data/blogs/gknit/gknit.Rmd +37 -40
  13. data/blogs/gknit/gknit.html +32 -30
  14. data/blogs/gknit/gknit.md +36 -37
  15. data/blogs/gknit/gknit.pdf +0 -0
  16. data/blogs/gknit/gknit.tex +35 -37
  17. data/blogs/manual/manual.Rmd +548 -125
  18. data/blogs/manual/manual.html +509 -286
  19. data/blogs/manual/manual.md +798 -285
  20. data/blogs/manual/manual.pdf +0 -0
  21. data/blogs/manual/manual.tex +2816 -0
  22. data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
  23. data/blogs/nse_dplyr/nse_dplyr.Rmd +240 -74
  24. data/blogs/nse_dplyr/nse_dplyr.html +191 -87
  25. data/blogs/nse_dplyr/nse_dplyr.md +361 -107
  26. data/blogs/nse_dplyr/nse_dplyr.pdf +0 -0
  27. data/blogs/nse_dplyr/nse_dplyr.tex +1373 -0
  28. data/blogs/ruby_plot/ruby_plot.Rmd +61 -81
  29. data/blogs/ruby_plot/ruby_plot.html +54 -57
  30. data/blogs/ruby_plot/ruby_plot.md +48 -67
  31. data/blogs/ruby_plot/ruby_plot.pdf +0 -0
  32. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  33. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  34. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  35. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  36. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-latex/dose_len.png +0 -0
  43. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_delivery.png +0 -0
  44. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_dose.png +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color.png +0 -0
  46. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color2.png +0 -0
  47. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_decorations.png +0 -0
  48. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_jitter.png +0 -0
  49. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_points.png +0 -0
  50. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_box_plot.png +0 -0
  51. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_violin_plot.png +0 -0
  52. data/blogs/ruby_plot/ruby_plot_files/figure-latex/violin_with_jitter.png +0 -0
  53. data/lib/R_interface/rdata_frame.rb +0 -12
  54. data/lib/R_interface/robject.rb +14 -14
  55. data/lib/R_interface/ruby_extensions.rb +3 -31
  56. data/lib/R_interface/rvector.rb +0 -12
  57. data/lib/gknit/knitr_engine.rb +5 -3
  58. data/lib/util/exec_ruby.rb +22 -61
  59. data/specs/tmp.rb +26 -12
  60. data/version.rb +1 -1
  61. metadata +22 -17
  62. data/bin/gknit_old_r +0 -236
  63. data/blogs/dev/dev.Rmd +0 -23
  64. data/blogs/dev/dev.md +0 -58
  65. data/blogs/dev/dev2.Rmd +0 -65
  66. data/blogs/dev/model.rb +0 -41
  67. data/blogs/dplyr/dplyr.Rmd +0 -29
  68. data/blogs/dplyr/dplyr.html +0 -433
  69. data/blogs/dplyr/dplyr.md +0 -58
  70. data/blogs/dplyr/dplyr.rb +0 -63
  71. data/blogs/galaaz_ggplot/galaaz_ggplot.log +0 -640
  72. data/blogs/galaaz_ggplot/galaaz_ggplot.md +0 -431
  73. data/blogs/galaaz_ggplot/galaaz_ggplot.tex +0 -481
  74. data/blogs/galaaz_ggplot/midwest.png +0 -0
  75. data/blogs/galaaz_ggplot/scatter_plot.png +0 -0
  76. data/blogs/ruby_plot/ruby_plot.tex +0 -1077
Binary file
@@ -0,0 +1,2816 @@
1
+ \documentclass[11pt,]{article}
2
+ \usepackage{lmodern}
3
+ \usepackage{amssymb,amsmath}
4
+ \usepackage{ifxetex,ifluatex}
5
+ \usepackage{fixltx2e} % provides \textsubscript
6
+ \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
7
+ \usepackage[T1]{fontenc}
8
+ \usepackage[utf8]{inputenc}
9
+ \else % if luatex or xelatex
10
+ \ifxetex
11
+ \usepackage{mathspec}
12
+ \else
13
+ \usepackage{fontspec}
14
+ \fi
15
+ \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
16
+ \fi
17
+ % use upquote if available, for straight quotes in verbatim environments
18
+ \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
19
+ % use microtype if available
20
+ \IfFileExists{microtype.sty}{%
21
+ \usepackage{microtype}
22
+ \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
23
+ }{}
24
+ \usepackage[margin=1in]{geometry}
25
+ \usepackage{hyperref}
26
+ \hypersetup{unicode=true,
27
+ pdftitle={Galaaz Manual},
28
+ pdfauthor={Rodrigo Botafogo},
29
+ pdfborder={0 0 0},
30
+ breaklinks=true}
31
+ \urlstyle{same} % don't use monospace font for urls
32
+ \usepackage{color}
33
+ \usepackage{fancyvrb}
34
+ \newcommand{\VerbBar}{|}
35
+ \newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
36
+ \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
37
+ % Add ',fontsize=\small' for more characters per line
38
+ \usepackage{framed}
39
+ \definecolor{shadecolor}{RGB}{248,248,248}
40
+ \newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
41
+ \newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{#1}}
42
+ \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
43
+ \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.77,0.63,0.00}{#1}}
44
+ \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
45
+ \newcommand{\BuiltInTok}[1]{#1}
46
+ \newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
47
+ \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
48
+ \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
49
+ \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
50
+ \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
51
+ \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{#1}}
52
+ \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
53
+ \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
54
+ \newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.64,0.00,0.00}{\textbf{#1}}}
55
+ \newcommand{\ExtensionTok}[1]{#1}
56
+ \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
57
+ \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
58
+ \newcommand{\ImportTok}[1]{#1}
59
+ \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
60
+ \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
61
+ \newcommand{\NormalTok}[1]{#1}
62
+ \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.81,0.36,0.00}{\textbf{#1}}}
63
+ \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}}
64
+ \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
65
+ \newcommand{\RegionMarkerTok}[1]{#1}
66
+ \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
67
+ \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
68
+ \newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
69
+ \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
70
+ \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
71
+ \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
72
+ \usepackage{longtable,booktabs}
73
+ \usepackage{graphicx,grffile}
74
+ \makeatletter
75
+ \def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
76
+ \def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
77
+ \makeatother
78
+ % Scale images if necessary, so that they will not overflow the page
79
+ % margins by default, and it is still possible to overwrite the defaults
80
+ % using explicit options in \includegraphics[width, height, ...]{}
81
+ \setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
82
+ \IfFileExists{parskip.sty}{%
83
+ \usepackage{parskip}
84
+ }{% else
85
+ \setlength{\parindent}{0pt}
86
+ \setlength{\parskip}{6pt plus 2pt minus 1pt}
87
+ }
88
+ \setlength{\emergencystretch}{3em} % prevent overfull lines
89
+ \providecommand{\tightlist}{%
90
+ \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
91
+ \setcounter{secnumdepth}{5}
92
+ % Redefines (sub)paragraphs to behave more like sections
93
+ \ifx\paragraph\undefined\else
94
+ \let\oldparagraph\paragraph
95
+ \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
96
+ \fi
97
+ \ifx\subparagraph\undefined\else
98
+ \let\oldsubparagraph\subparagraph
99
+ \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
100
+ \fi
101
+
102
+ %%% Use protect on footnotes to avoid problems with footnotes in titles
103
+ \let\rmarkdownfootnote\footnote%
104
+ \def\footnote{\protect\rmarkdownfootnote}
105
+
106
+ %%% Change title format to be more compact
107
+ \usepackage{titling}
108
+
109
+ % Create subtitle command for use in maketitle
110
+ \newcommand{\subtitle}[1]{
111
+ \posttitle{
112
+ \begin{center}\large#1\end{center}
113
+ }
114
+ }
115
+
116
+ \setlength{\droptitle}{-2em}
117
+
118
+ \title{Galaaz Manual}
119
+ \pretitle{\vspace{\droptitle}\centering\huge}
120
+ \posttitle{\par}
121
+ \subtitle{How to tightly couple Ruby and R in GraalVM}
122
+ \author{Rodrigo Botafogo}
123
+ \preauthor{\centering\large\emph}
124
+ \postauthor{\par}
125
+ \predate{\centering\large\emph}
126
+ \postdate{\par}
127
+ \date{2019}
128
+
129
+ % usar portugues do Brasil
130
+ % \usepackage[brazilian]{babel}
131
+ \usepackage[utf8]{inputenc}
132
+
133
+ \usepackage{geometry}
134
+ \geometry{a4paper, top=1in}
135
+
136
+ % needed for kableExtra
137
+ \usepackage{longtable}
138
+ \usepackage{multirow}
139
+ \usepackage[table]{xcolor}
140
+ \usepackage{wrapfig}
141
+ \usepackage{float}
142
+ \usepackage{colortbl}
143
+ \usepackage{pdflscape}
144
+ \usepackage{tabu}
145
+ \usepackage{threeparttable}
146
+ \usepackage[normalem]{ulem}
147
+
148
+ \usepackage{bbm}
149
+ \usepackage{booktabs}
150
+ \usepackage{expex}
151
+
152
+ \usepackage{graphicx}
153
+
154
+ \usepackage{fancyhdr}
155
+ % set the header and foot style
156
+ % style 'fancy' adds the section name on the header
157
+ % and the page number on the footer
158
+ \pagestyle{fancy}
159
+
160
+ % style 'fancyhf' leaves header and footer empty
161
+ %\fancyhf{}
162
+
163
+ % sets the left head element to \rightmark, which contains the
164
+ % current section (\leftmark is the current chapter)
165
+ %\fancyhead[L]{\rightmark} .
166
+
167
+ % sets the right head element to the page number.
168
+ % \fancyhead[R]{\thepage}
169
+
170
+ % lets the head rule disappear.
171
+ % \renewcommand{\headrulewidth}{0pt}
172
+ % Possible selectors for the optional argument of \fancyhead/\fancyfoot
173
+ % are L (left), C (center) or R (right) for the position of the element
174
+ % and E (even) or O (odd) to distinguish even and odd pages. If you omit
175
+ % E/O the element is set for all pages.
176
+
177
+ % \usepackage{lipsum}
178
+
179
+ % make available command lastpage
180
+ \usepackage{lastpage}
181
+
182
+ % default fontsize 11pt better to add
183
+ % fontsize on the yaml header
184
+ % \usepackage[fontsize=11pt]{scrextend}
185
+
186
+ % comandos para formatar uma tabela
187
+ \usepackage{array}
188
+ \newcolumntype{L}[1]{>{\raggedright\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
189
+ \newcolumntype{C}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
190
+ \newcolumntype{R}[1]{>{\raggedleft\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
191
+
192
+ % necessário if we need to import other latex documents
193
+ \usepackage{import}
194
+
195
+ % Command to import an R variable to latex
196
+ \newcommand{\RtoLatex}[2]{\newcommand{#1}{#2}}
197
+
198
+ %
199
+ %\newcommand{\atraso}[1]{\color{red} \textbf {Tempo desde a Assinatura do Contrato: #1 dias}}
200
+ \usepackage{booktabs}
201
+ \usepackage{longtable}
202
+ \usepackage{array}
203
+ \usepackage{multirow}
204
+ \usepackage{wrapfig}
205
+ \usepackage{float}
206
+ \usepackage{colortbl}
207
+ \usepackage{pdflscape}
208
+ \usepackage{tabu}
209
+ \usepackage{threeparttable}
210
+ \usepackage{threeparttablex}
211
+ \usepackage[normalem]{ulem}
212
+ \usepackage{makecell}
213
+ \usepackage{xcolor}
214
+
215
+ \begin{document}
216
+ \maketitle
217
+
218
+ {
219
+ \setcounter{tocdepth}{2}
220
+ \tableofcontents
221
+ }
222
+ \hypertarget{introduction}{%
223
+ \section{Introduction}\label{introduction}}
224
+
225
+ Galaaz is a system for tightly coupling Ruby and R. Ruby is a powerful
226
+ language, with a large community, a very large set of libraries and
227
+ great for web development. However, it lacks libraries for data science,
228
+ statistics, scientific plotting and machine learning. On the other hand,
229
+ R is considered one of the most powerful languages for solving all of
230
+ the above problems. Maybe the strongest competitor to R is Python with
231
+ libraries such as NumPy, Panda, SciPy, SciKit-Learn and a couple more.
232
+
233
+ \hypertarget{system-compatibility}{%
234
+ \section{System Compatibility}\label{system-compatibility}}
235
+
236
+ \begin{itemize}
237
+ \tightlist
238
+ \item
239
+ Oracle Linux 7
240
+ \item
241
+ Ubuntu 18.04 LTS
242
+ \item
243
+ Ubuntu 16.04 LTS
244
+ \item
245
+ Fedora 28
246
+ \item
247
+ macOS 10.14 (Mojave)
248
+ \item
249
+ macOS 10.13 (High Sierra)
250
+ \end{itemize}
251
+
252
+ \hypertarget{dependencies}{%
253
+ \section{Dependencies}\label{dependencies}}
254
+
255
+ \begin{itemize}
256
+ \tightlist
257
+ \item
258
+ TruffleRuby
259
+ \item
260
+ FastR
261
+ \end{itemize}
262
+
263
+ \hypertarget{installation}{%
264
+ \section{Installation}\label{installation}}
265
+
266
+ \begin{itemize}
267
+ \tightlist
268
+ \item
269
+ Install GrallVM (\url{http://www.graalvm.org/})
270
+ \item
271
+ Install Ruby (gu install Ruby)
272
+ \item
273
+ Install FastR (gu install R)
274
+ \item
275
+ Install rake if you want to run the specs and examples (gem install
276
+ rake)
277
+ \end{itemize}
278
+
279
+ \hypertarget{usage}{%
280
+ \section{Usage}\label{usage}}
281
+
282
+ \begin{itemize}
283
+ \item
284
+ Interactive shell: use `gstudio' on the command line
285
+
286
+ \begin{quote}
287
+ gstudio
288
+ \end{quote}
289
+ \end{itemize}
290
+
291
+ \begin{Shaded}
292
+ \begin{Highlighting}[]
293
+ \NormalTok{ vec = R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{4}\NormalTok{)}
294
+ \NormalTok{ puts vec}
295
+ \end{Highlighting}
296
+ \end{Shaded}
297
+
298
+ \begin{verbatim}
299
+ ## [1] 1 2 3 4
300
+ \end{verbatim}
301
+
302
+ \begin{itemize}
303
+ \item
304
+ Run all specs
305
+
306
+ \begin{quote}
307
+ galaaz specs:all
308
+ \end{quote}
309
+ \item
310
+ Run graphics slideshow (80+ graphics)
311
+
312
+ \begin{quote}
313
+ galaaz sthda:all
314
+ \end{quote}
315
+ \item
316
+ Run labs from Introduction to Statistical Learning with R
317
+
318
+ \begin{quote}
319
+ galaaz islr:all
320
+ \end{quote}
321
+ \item
322
+ See all available examples
323
+
324
+ \begin{quote}
325
+ galaaz -T
326
+ \end{quote}
327
+
328
+ Shows a list with all available executalbe tasks. To execute a task,
329
+ substitute the `rake' word in the list with `galaaz'. For instance,
330
+ the following line shows up after `galaaz -T'
331
+
332
+ rake master\_list:scatter\_plot \# scatter\_plot from:\ldots{}.
333
+
334
+ execute
335
+
336
+ \begin{quote}
337
+ galaaz master\_list:scatter\_plot
338
+ \end{quote}
339
+ \end{itemize}
340
+
341
+ \hypertarget{gknitting-a-document}{%
342
+ \section{gKnitting a Document}\label{gknitting-a-document}}
343
+
344
+ This manual has been formatted usign gKnit. gKnit uses Knitr and R
345
+ markdown to knit a document in Ruby or R and output it in any of the
346
+ available formats for R markdown. gKnit runs atop of GraalVM, and
347
+ Galaaz. In gKnit, Ruby variables are persisted between chunks, making it
348
+ an ideal solution for literate programming. Also, since it is based on
349
+ Galaaz, Ruby chunks can have access to R variables and Polyglot
350
+ Programming with Ruby and R is quite natural.
351
+
352
+ \href{https://towardsdatascience.com/how-to-do-reproducible-research-in-ruby-with-gknit-c26d2684d64e}{gknit
353
+ is described in more details here}
354
+
355
+ \hypertarget{vector}{%
356
+ \section{Vector}\label{vector}}
357
+
358
+ Vectors can be thought of as contiguous cells containing data. Cells are
359
+ accessed through indexing operations such as x{[}5{]}. Galaaz has six
360
+ basic (`atomic') vector types: logical, integer, real, complex, string
361
+ (or character) and raw. The modes and storage modes for the different
362
+ vector types are listed in the following table.
363
+
364
+ \begin{longtable}[]{@{}lcr@{}}
365
+ \toprule
366
+ typeof & mode & storage.mode\tabularnewline
367
+ \midrule
368
+ \endhead
369
+ logical & logical & logical\tabularnewline
370
+ integer & numeric & integer\tabularnewline
371
+ double & numeric & double\tabularnewline
372
+ complex & complex & comples\tabularnewline
373
+ character & character & character\tabularnewline
374
+ raw & raw & raw\tabularnewline
375
+ \bottomrule
376
+ \end{longtable}
377
+
378
+ Single numbers, such as 4.2, and strings, such as ``four point two'' are
379
+ still vectors, of length 1; there are no more basic types. Vectors with
380
+ length zero are possible (and useful). String vectors have mode and
381
+ storage mode ``character''. A single element of a character vector is
382
+ often referred to as a character string.
383
+
384
+ To create a vector the `c' (concatenate) method from the `R' module
385
+ should be used:
386
+
387
+ \begin{Shaded}
388
+ \begin{Highlighting}[]
389
+ \NormalTok{vec = R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)}
390
+ \NormalTok{puts vec}
391
+ \end{Highlighting}
392
+ \end{Shaded}
393
+
394
+ \begin{verbatim}
395
+ ## [1] 1 2 3
396
+ \end{verbatim}
397
+
398
+ Lets take a look at the type, mode and storage.mode of our vector vec.
399
+ In order to print this out, we are creating a data frame `df' and
400
+ printing it out. A data frame, for those not familiar with it, is
401
+ basically a table. Here we create the data frame and add the column name
402
+ by passing named parameters for each column, such as `typeof:', `mode:'
403
+ and 'storage\_\_mode?`. You should also note here that the double
404
+ underscore is converted to a'.'. So, when printed `storage\_\_mode' will
405
+ actually print as `storage.mode'.
406
+
407
+ Data frames will later be more carefully described. In R, the method
408
+ used to create a data frame is `data.frame', in Galaaz we use
409
+ `data\_\_frame'.
410
+
411
+ \begin{Shaded}
412
+ \begin{Highlighting}[]
413
+ \NormalTok{df = R.data__frame(}\StringTok{typeof: }\NormalTok{vec.typeof, }\StringTok{mode: }\NormalTok{vec.mode, }\StringTok{storage__mode: }\NormalTok{vec.storage__mode)}
414
+ \NormalTok{puts df}
415
+ \end{Highlighting}
416
+ \end{Shaded}
417
+
418
+ \begin{verbatim}
419
+ ## typeof mode storage.mode
420
+ ## 1 integer numeric integer
421
+ \end{verbatim}
422
+
423
+ If you want to create a vector with floating point numbers, then we need
424
+ at least one of the vector's element to be a float, such as 1.0. R users
425
+ should be careful, since in R a number like `1' is converted to float
426
+ and to have an integer the R developer will use `1L'. Galaaz follows
427
+ normal Ruby rules and the number 1 is an integer and 1.0 is a float.
428
+
429
+ \begin{Shaded}
430
+ \begin{Highlighting}[]
431
+ \NormalTok{vec = R.c(}\FloatTok{1.0}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)}
432
+ \NormalTok{puts vec}
433
+ \end{Highlighting}
434
+ \end{Shaded}
435
+
436
+ \begin{verbatim}
437
+ ## [1] 1 2 3
438
+ \end{verbatim}
439
+
440
+ \begin{Shaded}
441
+ \begin{Highlighting}[]
442
+ \NormalTok{df = R.data__frame(}\StringTok{typeof: }\NormalTok{vec.typeof, }\StringTok{mode: }\NormalTok{vec.mode, }\StringTok{storage__mode: }\NormalTok{vec.storage__mode)}
443
+ \NormalTok{outputs df.kable.kable_styling}
444
+ \end{Highlighting}
445
+ \end{Shaded}
446
+
447
+ \begin{table}[H]
448
+ \centering
449
+ \begin{tabular}{l|l|l}
450
+ \hline
451
+ typeof & mode & storage.mode\\
452
+ \hline
453
+ double & numeric & double\\
454
+ \hline
455
+ \end{tabular}
456
+ \end{table}
457
+
458
+ In this next example we try to create a vector with a variable `hello'
459
+ that has not yet being defined. This will raise an exception that is
460
+ printed out. We get two return blocks, the first with a message
461
+ explaining what went wrong and the second with the full backtrace of the
462
+ error.
463
+
464
+ \begin{Shaded}
465
+ \begin{Highlighting}[]
466
+ \NormalTok{vec = R.c(}\DecValTok{1}\NormalTok{, hello, }\DecValTok{5}\NormalTok{)}
467
+ \end{Highlighting}
468
+ \end{Shaded}
469
+
470
+ \begin{verbatim}
471
+ ## Message:
472
+ ## undefined local variable or method `hello' for #<RC:0x2e0 @out_list=nil>:RC
473
+ \end{verbatim}
474
+
475
+ \begin{verbatim}
476
+ ## Message:
477
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:103:in `get_binding'
478
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `eval'
479
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `exec_ruby'
480
+ ## /home/rbotafogo/desenv/galaaz/lib/gknit/knitr_engine.rb:650:in `block in initialize'
481
+ ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
482
+ ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
483
+ ## (eval):3:in `function(...) {\n rb_method(...)'
484
+ ## unknown.r:1:in `in_dir'
485
+ ## unknown.r:1:in `block_exec:BLOCK0'
486
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:102:in `block_exec'
487
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:92:in `call_block'
488
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:6:in `process_group.block'
489
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:3:in `<no source>'
490
+ ## unknown.r:1:in `withCallingHandlers'
491
+ ## unknown.r:1:in `process_file'
492
+ ## unknown.r:1:in `<no source>:BLOCK1'
493
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/output.R:129:in `<no source>'
494
+ ## unknown.r:1:in `<no source>:BLOCK1'
495
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/rmarkdown/R/render.R:162:in `<no source>'
496
+ ## <REPL>:5:in `<repl wrapper>'
497
+ ## <REPL>:1
498
+ \end{verbatim}
499
+
500
+ Here is a vector with logical values
501
+
502
+ \begin{Shaded}
503
+ \begin{Highlighting}[]
504
+ \NormalTok{vec = R.c(}\DecValTok{true}\NormalTok{, }\DecValTok{true}\NormalTok{, }\DecValTok{false}\NormalTok{, }\DecValTok{false}\NormalTok{, }\DecValTok{true}\NormalTok{)}
505
+ \NormalTok{puts vec}
506
+ \end{Highlighting}
507
+ \end{Shaded}
508
+
509
+ \begin{verbatim}
510
+ ## [1] TRUE TRUE FALSE FALSE TRUE
511
+ \end{verbatim}
512
+
513
+ \hypertarget{combining-vectors}{%
514
+ \subsection{Combining Vectors}\label{combining-vectors}}
515
+
516
+ The `c' functions used to create vectors can also be used to combine two
517
+ vectors:
518
+
519
+ \begin{Shaded}
520
+ \begin{Highlighting}[]
521
+ \NormalTok{vec1 = R.c(}\FloatTok{10.0}\NormalTok{, }\FloatTok{20.0}\NormalTok{, }\FloatTok{30.0}\NormalTok{)}
522
+ \NormalTok{vec2 = R.c(}\FloatTok{4.0}\NormalTok{, }\FloatTok{5.0}\NormalTok{, }\FloatTok{6.0}\NormalTok{)}
523
+ \NormalTok{vec = R.c(vec1, vec2)}
524
+ \NormalTok{puts vec}
525
+ \end{Highlighting}
526
+ \end{Shaded}
527
+
528
+ \begin{verbatim}
529
+ ## [1] 10 20 30 4 5 6
530
+ \end{verbatim}
531
+
532
+ In galaaz, methods can be chainned (somewhat like the pipe operator in R
533
+ \%\textgreater{}\%, but more generic). In this next example, method `c'
534
+ is chainned after `vec1'. This also looks like `c' is a method of the
535
+ vector, but in reallity, this is actually closer to the pipe operator.
536
+ When Galaaz identifies that `c' is not a method of `vec' it actually
537
+ tries to call `R.c' with `vec1' as the first argument concatenated with
538
+ all the other available arguments. The code bellow is automatically
539
+ converted to the code above.
540
+
541
+ \begin{Shaded}
542
+ \begin{Highlighting}[]
543
+ \NormalTok{vec = vec1.c(vec2)}
544
+ \NormalTok{puts vec}
545
+ \end{Highlighting}
546
+ \end{Shaded}
547
+
548
+ \begin{verbatim}
549
+ ## [1] 10 20 30 4 5 6
550
+ \end{verbatim}
551
+
552
+ \hypertarget{vector-arithmetic}{%
553
+ \subsection{Vector Arithmetic}\label{vector-arithmetic}}
554
+
555
+ Arithmetic operations on vectors are performed element by element:
556
+
557
+ \begin{Shaded}
558
+ \begin{Highlighting}[]
559
+ \NormalTok{puts vec1 + vec2}
560
+ \end{Highlighting}
561
+ \end{Shaded}
562
+
563
+ \begin{verbatim}
564
+ ## [1] 14 25 36
565
+ \end{verbatim}
566
+
567
+ \begin{Shaded}
568
+ \begin{Highlighting}[]
569
+ \NormalTok{puts vec1 * }\DecValTok{5}
570
+ \end{Highlighting}
571
+ \end{Shaded}
572
+
573
+ \begin{verbatim}
574
+ ## [1] 50 100 150
575
+ \end{verbatim}
576
+
577
+ When vectors have different length, a recycling rule is applied to the
578
+ shorter vector:
579
+
580
+ \begin{Shaded}
581
+ \begin{Highlighting}[]
582
+ \NormalTok{vec3 = R.c(}\FloatTok{1.0}\NormalTok{, }\FloatTok{2.0}\NormalTok{, }\FloatTok{3.0}\NormalTok{, }\FloatTok{4.0}\NormalTok{, }\FloatTok{5.0}\NormalTok{, }\FloatTok{6.0}\NormalTok{, }\FloatTok{7.0}\NormalTok{, }\FloatTok{8.0}\NormalTok{, }\FloatTok{9.0}\NormalTok{)}
583
+ \NormalTok{puts vec4 = vec1 + vec3}
584
+ \end{Highlighting}
585
+ \end{Shaded}
586
+
587
+ \begin{verbatim}
588
+ ## [1] 11 22 33 14 25 36 17 28 39
589
+ \end{verbatim}
590
+
591
+ \hypertarget{vector-indexing}{%
592
+ \subsection{Vector Indexing}\label{vector-indexing}}
593
+
594
+ Vectors can be indexed by using the `{[}{]}' operator:
595
+
596
+ \begin{Shaded}
597
+ \begin{Highlighting}[]
598
+ \NormalTok{puts vec4[}\DecValTok{3}\NormalTok{]}
599
+ \end{Highlighting}
600
+ \end{Shaded}
601
+
602
+ \begin{verbatim}
603
+ ## [1] 33
604
+ \end{verbatim}
605
+
606
+ We can also index a vector with another vector. For example, in the code
607
+ bellow, we take elements 1, 3, 5, and 7 from vec3:
608
+
609
+ \begin{Shaded}
610
+ \begin{Highlighting}[]
611
+ \NormalTok{puts vec4[R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{5}\NormalTok{, }\DecValTok{7}\NormalTok{)]}
612
+ \end{Highlighting}
613
+ \end{Shaded}
614
+
615
+ \begin{verbatim}
616
+ ## [1] 11 33 25 17
617
+ \end{verbatim}
618
+
619
+ Repeating an index and having indices out of order is valid code:
620
+
621
+ \begin{Shaded}
622
+ \begin{Highlighting}[]
623
+ \NormalTok{puts vec4[R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{1}\NormalTok{)]}
624
+ \end{Highlighting}
625
+ \end{Shaded}
626
+
627
+ \begin{verbatim}
628
+ ## [1] 11 33 33 11
629
+ \end{verbatim}
630
+
631
+ It is also possible to index a vector with a negative number or negative
632
+ vector. In these cases the indexed values are not returned:
633
+
634
+ \begin{Shaded}
635
+ \begin{Highlighting}[]
636
+ \NormalTok{puts vec4[-}\DecValTok{3}\NormalTok{]}
637
+ \NormalTok{puts vec4[-R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{5}\NormalTok{, }\DecValTok{7}\NormalTok{)]}
638
+ \end{Highlighting}
639
+ \end{Shaded}
640
+
641
+ \begin{verbatim}
642
+ ## [1] 11 22 14 25 36 17 28 39
643
+ ## [1] 22 14 36 28 39
644
+ \end{verbatim}
645
+
646
+ If an index is out of range, a missing value (NA) will be reported.
647
+
648
+ \begin{Shaded}
649
+ \begin{Highlighting}[]
650
+ \NormalTok{puts vec4[}\DecValTok{30}\NormalTok{]}
651
+ \end{Highlighting}
652
+ \end{Shaded}
653
+
654
+ \begin{verbatim}
655
+ ## [1] NA
656
+ \end{verbatim}
657
+
658
+ It is also possible to index a vector by range:
659
+
660
+ \begin{Shaded}
661
+ \begin{Highlighting}[]
662
+ \NormalTok{puts vec4[(}\DecValTok{2}\NormalTok{..}\DecValTok{5}\NormalTok{)]}
663
+ \end{Highlighting}
664
+ \end{Shaded}
665
+
666
+ \begin{verbatim}
667
+ ## [1] 22 33 14 25
668
+ \end{verbatim}
669
+
670
+ Elements in a vector can be named using the `names' attribute of a
671
+ vector:
672
+
673
+ \begin{Shaded}
674
+ \begin{Highlighting}[]
675
+ \NormalTok{full_name = R.c(}\StringTok{"Rodrigo"}\NormalTok{, }\StringTok{"A"}\NormalTok{, }\StringTok{"Botafogo"}\NormalTok{)}
676
+ \NormalTok{full_name.names = R.c(}\StringTok{"First"}\NormalTok{, }\StringTok{"Middle"}\NormalTok{, }\StringTok{"Last"}\NormalTok{)}
677
+ \NormalTok{puts full_name}
678
+ \end{Highlighting}
679
+ \end{Shaded}
680
+
681
+ \begin{verbatim}
682
+ ## First Middle Last
683
+ ## "Rodrigo" "A" "Botafogo"
684
+ \end{verbatim}
685
+
686
+ Or it can also be named by using the `c' function with named
687
+ paramenters:
688
+
689
+ \begin{Shaded}
690
+ \begin{Highlighting}[]
691
+ \NormalTok{full_name = R.c(}\DataTypeTok{First}\NormalTok{: }\StringTok{"Rodrigo"}\NormalTok{, }\DataTypeTok{Middle}\NormalTok{: }\StringTok{"A"}\NormalTok{, }\DataTypeTok{Last}\NormalTok{: }\StringTok{"Botafogo"}\NormalTok{)}
692
+ \NormalTok{puts full_name}
693
+ \end{Highlighting}
694
+ \end{Shaded}
695
+
696
+ \begin{verbatim}
697
+ ## First Middle Last
698
+ ## "Rodrigo" "A" "Botafogo"
699
+ \end{verbatim}
700
+
701
+ \hypertarget{extracting-native-ruby-types-from-a-vector}{%
702
+ \subsection{Extracting Native Ruby Types from a
703
+ Vector}\label{extracting-native-ruby-types-from-a-vector}}
704
+
705
+ Vectors created with `R.c' are of class R::Vector. You might have
706
+ noticed that when indexing a vector, a new vector is returned, even if
707
+ this vector has one single element. In order to use R::Vector with other
708
+ ruby classes it might be necessary to extract the actual Ruby native
709
+ type from the vector. In order to do this extraction the
710
+ `\textgreater{}\textgreater{}' operator is used.
711
+
712
+ \begin{Shaded}
713
+ \begin{Highlighting}[]
714
+ \NormalTok{puts vec4}
715
+ \NormalTok{puts vec4 >> }\DecValTok{0}
716
+ \NormalTok{puts vec4 >> }\DecValTok{4}
717
+ \end{Highlighting}
718
+ \end{Shaded}
719
+
720
+ \begin{verbatim}
721
+ ## [1] 11 22 33 14 25 36 17 28 39
722
+ ## 11.0
723
+ ## 25.0
724
+ \end{verbatim}
725
+
726
+ Note that indexing with `\textgreater{}\textgreater{}' starts at 0 and
727
+ not at 1, also, we cannot do negative indexing.
728
+
729
+ \hypertarget{accessing-r-variables}{%
730
+ \section{Accessing R variables}\label{accessing-r-variables}}
731
+
732
+ Galaaz allows Ruby to access variables created in R. For example, the
733
+ `mtcars' data set is available in R and can be accessed from Ruby by
734
+ using the `tilda' operator followed by the symbol for the variable, in
735
+ this case `:mtcar'. In the code bellow method `outputs' is used to
736
+ output the `mtcars' data set nicely formatted in HTML by use of the
737
+ `kable' and `kable\_styling' functions. Method `outputs' is only
738
+ available when used with `gknit'.
739
+
740
+ \begin{Shaded}
741
+ \begin{Highlighting}[]
742
+ \NormalTok{outputs (~}\StringTok{:mtcars}\NormalTok{).kable.kable_styling}
743
+ \end{Highlighting}
744
+ \end{Shaded}
745
+
746
+ \begin{table}[H]
747
+ \centering
748
+ \begin{tabular}{l|r|r|r|r|r|r|r|r|r|r|r}
749
+ \hline
750
+ & mpg & cyl & disp & hp & drat & wt & qsec & vs & am & gear & carb\\
751
+ \hline
752
+ Mazda RX4 & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.620 & 16.46 & 0 & 1 & 4 & 4\\
753
+ \hline
754
+ Mazda RX4 Wag & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.875 & 17.02 & 0 & 1 & 4 & 4\\
755
+ \hline
756
+ Datsun 710 & 22.8 & 4 & 108.0 & 93 & 3.85 & 2.320 & 18.61 & 1 & 1 & 4 & 1\\
757
+ \hline
758
+ Hornet 4 Drive & 21.4 & 6 & 258.0 & 110 & 3.08 & 3.215 & 19.44 & 1 & 0 & 3 & 1\\
759
+ \hline
760
+ Hornet Sportabout & 18.7 & 8 & 360.0 & 175 & 3.15 & 3.440 & 17.02 & 0 & 0 & 3 & 2\\
761
+ \hline
762
+ Valiant & 18.1 & 6 & 225.0 & 105 & 2.76 & 3.460 & 20.22 & 1 & 0 & 3 & 1\\
763
+ \hline
764
+ Duster 360 & 14.3 & 8 & 360.0 & 245 & 3.21 & 3.570 & 15.84 & 0 & 0 & 3 & 4\\
765
+ \hline
766
+ Merc 240D & 24.4 & 4 & 146.7 & 62 & 3.69 & 3.190 & 20.00 & 1 & 0 & 4 & 2\\
767
+ \hline
768
+ Merc 230 & 22.8 & 4 & 140.8 & 95 & 3.92 & 3.150 & 22.90 & 1 & 0 & 4 & 2\\
769
+ \hline
770
+ Merc 280 & 19.2 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.30 & 1 & 0 & 4 & 4\\
771
+ \hline
772
+ Merc 280C & 17.8 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.90 & 1 & 0 & 4 & 4\\
773
+ \hline
774
+ Merc 450SE & 16.4 & 8 & 275.8 & 180 & 3.07 & 4.070 & 17.40 & 0 & 0 & 3 & 3\\
775
+ \hline
776
+ Merc 450SL & 17.3 & 8 & 275.8 & 180 & 3.07 & 3.730 & 17.60 & 0 & 0 & 3 & 3\\
777
+ \hline
778
+ Merc 450SLC & 15.2 & 8 & 275.8 & 180 & 3.07 & 3.780 & 18.00 & 0 & 0 & 3 & 3\\
779
+ \hline
780
+ Cadillac Fleetwood & 10.4 & 8 & 472.0 & 205 & 2.93 & 5.250 & 17.98 & 0 & 0 & 3 & 4\\
781
+ \hline
782
+ Lincoln Continental & 10.4 & 8 & 460.0 & 215 & 3.00 & 5.424 & 17.82 & 0 & 0 & 3 & 4\\
783
+ \hline
784
+ Chrysler Imperial & 14.7 & 8 & 440.0 & 230 & 3.23 & 5.345 & 17.42 & 0 & 0 & 3 & 4\\
785
+ \hline
786
+ Fiat 128 & 32.4 & 4 & 78.7 & 66 & 4.08 & 2.200 & 19.47 & 1 & 1 & 4 & 1\\
787
+ \hline
788
+ Honda Civic & 30.4 & 4 & 75.7 & 52 & 4.93 & 1.615 & 18.52 & 1 & 1 & 4 & 2\\
789
+ \hline
790
+ Toyota Corolla & 33.9 & 4 & 71.1 & 65 & 4.22 & 1.835 & 19.90 & 1 & 1 & 4 & 1\\
791
+ \hline
792
+ Toyota Corona & 21.5 & 4 & 120.1 & 97 & 3.70 & 2.465 & 20.01 & 1 & 0 & 3 & 1\\
793
+ \hline
794
+ Dodge Challenger & 15.5 & 8 & 318.0 & 150 & 2.76 & 3.520 & 16.87 & 0 & 0 & 3 & 2\\
795
+ \hline
796
+ AMC Javelin & 15.2 & 8 & 304.0 & 150 & 3.15 & 3.435 & 17.30 & 0 & 0 & 3 & 2\\
797
+ \hline
798
+ Camaro Z28 & 13.3 & 8 & 350.0 & 245 & 3.73 & 3.840 & 15.41 & 0 & 0 & 3 & 4\\
799
+ \hline
800
+ Pontiac Firebird & 19.2 & 8 & 400.0 & 175 & 3.08 & 3.845 & 17.05 & 0 & 0 & 3 & 2\\
801
+ \hline
802
+ Fiat X1-9 & 27.3 & 4 & 79.0 & 66 & 4.08 & 1.935 & 18.90 & 1 & 1 & 4 & 1\\
803
+ \hline
804
+ Porsche 914-2 & 26.0 & 4 & 120.3 & 91 & 4.43 & 2.140 & 16.70 & 0 & 1 & 5 & 2\\
805
+ \hline
806
+ Lotus Europa & 30.4 & 4 & 95.1 & 113 & 3.77 & 1.513 & 16.90 & 1 & 1 & 5 & 2\\
807
+ \hline
808
+ Ford Pantera L & 15.8 & 8 & 351.0 & 264 & 4.22 & 3.170 & 14.50 & 0 & 1 & 5 & 4\\
809
+ \hline
810
+ Ferrari Dino & 19.7 & 6 & 145.0 & 175 & 3.62 & 2.770 & 15.50 & 0 & 1 & 5 & 6\\
811
+ \hline
812
+ Maserati Bora & 15.0 & 8 & 301.0 & 335 & 3.54 & 3.570 & 14.60 & 0 & 1 & 5 & 8\\
813
+ \hline
814
+ Volvo 142E & 21.4 & 4 & 121.0 & 109 & 4.11 & 2.780 & 18.60 & 1 & 1 & 4 & 2\\
815
+ \hline
816
+ \end{tabular}
817
+ \end{table}
818
+
819
+ \hypertarget{matrix}{%
820
+ \section{Matrix}\label{matrix}}
821
+
822
+ A matrix is a collection of elements organized as a two dimensional
823
+ table. A matrix can be created by the `matrix' function:
824
+
825
+ \begin{Shaded}
826
+ \begin{Highlighting}[]
827
+ \NormalTok{mat = R.matrix(R.c(}\FloatTok{1.0}\NormalTok{, }\FloatTok{2.0}\NormalTok{, }\FloatTok{3.0}\NormalTok{, }\FloatTok{4.0}\NormalTok{, }\FloatTok{5.0}\NormalTok{, }\FloatTok{6.0}\NormalTok{, }\FloatTok{7.0}\NormalTok{, }\FloatTok{8.0}\NormalTok{, }\FloatTok{9.0}\NormalTok{),}
828
+ \StringTok{nrow: }\DecValTok{3}\NormalTok{,}
829
+ \StringTok{ncol: }\DecValTok{3}\NormalTok{)}
830
+
831
+ \NormalTok{puts mat}
832
+ \end{Highlighting}
833
+ \end{Shaded}
834
+
835
+ \begin{verbatim}
836
+ ## [,1] [,2] [,3]
837
+ ## [1,] 1 4 7
838
+ ## [2,] 2 5 8
839
+ ## [3,] 3 6 9
840
+ \end{verbatim}
841
+
842
+ Note that matrices data is organized by column first. It is possible to
843
+ organize the matrix memory by row first passing an extra argument to the
844
+ `matrix' function:
845
+
846
+ \begin{Shaded}
847
+ \begin{Highlighting}[]
848
+ \NormalTok{mat_row = R.matrix(R.c(}\FloatTok{1.0}\NormalTok{, }\FloatTok{2.0}\NormalTok{, }\FloatTok{3.0}\NormalTok{, }\FloatTok{4.0}\NormalTok{, }\FloatTok{5.0}\NormalTok{, }\FloatTok{6.0}\NormalTok{, }\FloatTok{7.0}\NormalTok{, }\FloatTok{8.0}\NormalTok{, }\FloatTok{9.0}\NormalTok{),}
849
+ \StringTok{nrow: }\DecValTok{3}\NormalTok{,}
850
+ \StringTok{ncol: }\DecValTok{3}\NormalTok{,}
851
+ \StringTok{byrow: }\DecValTok{true}\NormalTok{)}
852
+
853
+ \NormalTok{puts mat_row}
854
+ \end{Highlighting}
855
+ \end{Shaded}
856
+
857
+ \begin{verbatim}
858
+ ## [,1] [,2] [,3]
859
+ ## [1,] 1 2 3
860
+ ## [2,] 4 5 6
861
+ ## [3,] 7 8 9
862
+ \end{verbatim}
863
+
864
+ \hypertarget{indexing-a-matrix}{%
865
+ \subsection{Indexing a Matrix}\label{indexing-a-matrix}}
866
+
867
+ A matrix can be indexed by {[}row, column{]}:
868
+
869
+ \begin{Shaded}
870
+ \begin{Highlighting}[]
871
+ \NormalTok{puts mat_row[}\DecValTok{1}\NormalTok{, }\DecValTok{1}\NormalTok{]}
872
+ \NormalTok{puts mat_row[}\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{]}
873
+ \end{Highlighting}
874
+ \end{Shaded}
875
+
876
+ \begin{verbatim}
877
+ ## [1] 1
878
+ ## [1] 6
879
+ \end{verbatim}
880
+
881
+ It is possible to index an entire row or column with the `:all' keyword
882
+
883
+ \begin{Shaded}
884
+ \begin{Highlighting}[]
885
+ \NormalTok{puts mat_row[}\DecValTok{1}\NormalTok{, }\StringTok{:all}\NormalTok{]}
886
+ \NormalTok{puts mat_row[}\StringTok{:all}\NormalTok{, }\DecValTok{2}\NormalTok{]}
887
+ \end{Highlighting}
888
+ \end{Shaded}
889
+
890
+ \begin{verbatim}
891
+ ## [1] 1 2 3
892
+ ## [1] 2 5 8
893
+ \end{verbatim}
894
+
895
+ Indexing with a vector is also possible for matrices. In the following
896
+ example we want rows 1 and 3 and columns 2 and 3 building a 2 x 2
897
+ matrix.
898
+
899
+ \begin{Shaded}
900
+ \begin{Highlighting}[]
901
+ \NormalTok{puts mat_row[R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{3}\NormalTok{), R.c(}\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)]}
902
+ \end{Highlighting}
903
+ \end{Shaded}
904
+
905
+ \begin{verbatim}
906
+ ## [,1] [,2]
907
+ ## [1,] 2 3
908
+ ## [2,] 8 9
909
+ \end{verbatim}
910
+
911
+ Matrices can be combined with functions `rbind':
912
+
913
+ \begin{Shaded}
914
+ \begin{Highlighting}[]
915
+ \NormalTok{puts mat_row.rbind(mat)}
916
+ \end{Highlighting}
917
+ \end{Shaded}
918
+
919
+ \begin{verbatim}
920
+ ## [,1] [,2] [,3]
921
+ ## [1,] 1 2 3
922
+ ## [2,] 4 5 6
923
+ ## [3,] 7 8 9
924
+ ## [4,] 1 4 7
925
+ ## [5,] 2 5 8
926
+ ## [6,] 3 6 9
927
+ \end{verbatim}
928
+
929
+ and `cbind':
930
+
931
+ \begin{Shaded}
932
+ \begin{Highlighting}[]
933
+ \NormalTok{puts mat_row.cbind(mat)}
934
+ \end{Highlighting}
935
+ \end{Shaded}
936
+
937
+ \begin{verbatim}
938
+ ## [,1] [,2] [,3] [,4] [,5] [,6]
939
+ ## [1,] 1 2 3 1 4 7
940
+ ## [2,] 4 5 6 2 5 8
941
+ ## [3,] 7 8 9 3 6 9
942
+ \end{verbatim}
943
+
944
+ \hypertarget{list}{%
945
+ \section{List}\label{list}}
946
+
947
+ A list is a data structure that can contain sublists of different types,
948
+ while vector and matrix can only hold one type of element.
949
+
950
+ \begin{Shaded}
951
+ \begin{Highlighting}[]
952
+ \NormalTok{nums = R.c(}\FloatTok{1.0}\NormalTok{, }\FloatTok{2.0}\NormalTok{, }\FloatTok{3.0}\NormalTok{)}
953
+ \NormalTok{strs = R.c(}\StringTok{"a"}\NormalTok{, }\StringTok{"b"}\NormalTok{, }\StringTok{"c"}\NormalTok{, }\StringTok{"d"}\NormalTok{)}
954
+ \NormalTok{bool = R.c(}\DecValTok{true}\NormalTok{, }\DecValTok{true}\NormalTok{, }\DecValTok{false}\NormalTok{)}
955
+ \NormalTok{lst = R.list(}\StringTok{nums: }\NormalTok{nums, }\StringTok{strs: }\NormalTok{strs, }\StringTok{bool: }\NormalTok{bool)}
956
+ \NormalTok{puts lst}
957
+ \end{Highlighting}
958
+ \end{Shaded}
959
+
960
+ \begin{verbatim}
961
+ ## $nums
962
+ ## [1] 1 2 3
963
+ ##
964
+ ## $strs
965
+ ## [1] "a" "b" "c" "d"
966
+ ##
967
+ ## $bool
968
+ ## [1] TRUE TRUE FALSE
969
+ \end{verbatim}
970
+
971
+ Note that `lst' elements are named elements.
972
+
973
+ \hypertarget{list-indexing}{%
974
+ \subsection{List Indexing}\label{list-indexing}}
975
+
976
+ List indexing, also called slicing, is done using the `{[}{]}' operator
977
+ and the `{[}{[}{]}{]}' operator. Let's first start with the `{[}{]}'
978
+ operator. The list above has three sublist indexing with `{[}{]}' will
979
+ return one of the sublists.
980
+
981
+ \begin{Shaded}
982
+ \begin{Highlighting}[]
983
+ \NormalTok{puts lst[}\DecValTok{1}\NormalTok{]}
984
+ \end{Highlighting}
985
+ \end{Shaded}
986
+
987
+ \begin{verbatim}
988
+ ## $nums
989
+ ## [1] 1 2 3
990
+ \end{verbatim}
991
+
992
+ Note that when using `{[}{]}' a new list is returned. When using the
993
+ double square bracket operator the value returned is the actual element
994
+ of the list in the given position and not a slice of the original list
995
+
996
+ \begin{Shaded}
997
+ \begin{Highlighting}[]
998
+ \NormalTok{puts lst[[}\DecValTok{1}\NormalTok{]]}
999
+ \end{Highlighting}
1000
+ \end{Shaded}
1001
+
1002
+ \begin{verbatim}
1003
+ ## [1] 1 2 3
1004
+ \end{verbatim}
1005
+
1006
+ When elements are named, as dones with lst, indexing can be done by
1007
+ name:
1008
+
1009
+ \begin{Shaded}
1010
+ \begin{Highlighting}[]
1011
+ \NormalTok{puts lst[[}\StringTok{'bool'}\NormalTok{]][[}\DecValTok{1}\NormalTok{]] >> }\DecValTok{0}
1012
+ \end{Highlighting}
1013
+ \end{Shaded}
1014
+
1015
+ \begin{verbatim}
1016
+ ## true
1017
+ \end{verbatim}
1018
+
1019
+ In this example, first the `bool' element of the list was extracted, not
1020
+ as a list, but as a vector, then the first element of the vector was
1021
+ extracted (note that vectors also accept the `{[}{[}{]}{]}' operator)
1022
+ and then the vector was indexed by its first element, extracting the
1023
+ native Ruby type.
1024
+
1025
+ \hypertarget{data-frame}{%
1026
+ \section{Data Frame}\label{data-frame}}
1027
+
1028
+ A data frame is a table like structure in which each column has the same
1029
+ number of rows. Data frames are the basic structure for storing data for
1030
+ data analysis. We have already seen a data frame previously when we
1031
+ accessed variable `\textasciitilde{}:mtcars'. In order to create a data
1032
+ frame, function 'data\_\_frame' is used:
1033
+
1034
+ \begin{Shaded}
1035
+ \begin{Highlighting}[]
1036
+ \NormalTok{df = R.data__frame(}
1037
+ \StringTok{year: }\NormalTok{R.c(}\DecValTok{2010}\NormalTok{, }\DecValTok{2011}\NormalTok{, }\DecValTok{2012}\NormalTok{),}
1038
+ \StringTok{income: }\NormalTok{R.c(}\FloatTok{1000.0}\NormalTok{, }\FloatTok{1500.0}\NormalTok{, }\FloatTok{2000.0}\NormalTok{))}
1039
+
1040
+ \NormalTok{puts df}
1041
+ \end{Highlighting}
1042
+ \end{Shaded}
1043
+
1044
+ \begin{verbatim}
1045
+ ## year income
1046
+ ## 1 2010 1000
1047
+ ## 2 2011 1500
1048
+ ## 3 2012 2000
1049
+ \end{verbatim}
1050
+
1051
+ \hypertarget{data-frame-indexing}{%
1052
+ \subsection{Data Frame Indexing}\label{data-frame-indexing}}
1053
+
1054
+ A data frame can be indexed the same way as a matrix, by using `{[}row,
1055
+ column{]}', where row and column can either be a numeric or the name of
1056
+ the row or column
1057
+
1058
+ \begin{Shaded}
1059
+ \begin{Highlighting}[]
1060
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{).head}
1061
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{]}
1062
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[}\StringTok{'Datsun 710'}\NormalTok{, }\StringTok{'mpg'}\NormalTok{]}
1063
+ \end{Highlighting}
1064
+ \end{Shaded}
1065
+
1066
+ \begin{verbatim}
1067
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1068
+ ## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
1069
+ ## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
1070
+ ## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
1071
+ ## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
1072
+ ## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
1073
+ ## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
1074
+ ## [1] 6
1075
+ ## [1] 22.8
1076
+ \end{verbatim}
1077
+
1078
+ Extracting a column from a data frame as a vector can be done by using
1079
+ the double square bracket operator:
1080
+
1081
+ \begin{Shaded}
1082
+ \begin{Highlighting}[]
1083
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[[}\StringTok{'mpg'}\NormalTok{]]}
1084
+ \end{Highlighting}
1085
+ \end{Shaded}
1086
+
1087
+ \begin{verbatim}
1088
+ ## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
1089
+ ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
1090
+ ## [29] 15.8 19.7 15.0 21.4
1091
+ \end{verbatim}
1092
+
1093
+ A data frame column can also be accessed as if it were an instance
1094
+ variable of the data frame:
1095
+
1096
+ \begin{Shaded}
1097
+ \begin{Highlighting}[]
1098
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{).mpg}
1099
+ \end{Highlighting}
1100
+ \end{Shaded}
1101
+
1102
+ \begin{verbatim}
1103
+ ## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
1104
+ ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
1105
+ ## [29] 15.8 19.7 15.0 21.4
1106
+ \end{verbatim}
1107
+
1108
+ Slicing a data frame can be done by indexing it with a vector (we use
1109
+ `head' to reduce the output):
1110
+
1111
+ \begin{Shaded}
1112
+ \begin{Highlighting}[]
1113
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[R.c(}\StringTok{'mpg'}\NormalTok{, }\StringTok{'hp'}\NormalTok{)].head}
1114
+ \end{Highlighting}
1115
+ \end{Shaded}
1116
+
1117
+ \begin{verbatim}
1118
+ ## mpg hp
1119
+ ## Mazda RX4 21.0 110
1120
+ ## Mazda RX4 Wag 21.0 110
1121
+ ## Datsun 710 22.8 93
1122
+ ## Hornet 4 Drive 21.4 110
1123
+ ## Hornet Sportabout 18.7 175
1124
+ ## Valiant 18.1 105
1125
+ \end{verbatim}
1126
+
1127
+ A row slice can be obtained by indexing by row and using the `:all'
1128
+ keyword for the column:
1129
+
1130
+ \begin{Shaded}
1131
+ \begin{Highlighting}[]
1132
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[R.c(}\StringTok{'Datsun 710'}\NormalTok{, }\StringTok{'Camaro Z28'}\NormalTok{), }\StringTok{:all}\NormalTok{]}
1133
+ \end{Highlighting}
1134
+ \end{Shaded}
1135
+
1136
+ \begin{verbatim}
1137
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1138
+ ## Datsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1
1139
+ ## Camaro Z28 13.3 8 350 245 3.73 3.84 15.41 0 0 3 4
1140
+ \end{verbatim}
1141
+
1142
+ Finally, a data frame can also be indexed with a logical vector. In this
1143
+ next example, the `am' column of :mtcars is compared with 0 (with method
1144
+ `eq'). When `am' is equal to 0 the car is automatic. So, by doing
1145
+ `(\textasciitilde{}:mtcars).am.eq 0' a logical vector is created with
1146
+ `true' whenever `am' is 0 and `false' otherwise.
1147
+
1148
+ \begin{Shaded}
1149
+ \begin{Highlighting}[]
1150
+ \CommentTok{# obtain a vector with 'true' for cars with automatic transmission}
1151
+ \NormalTok{automatic = (~}\StringTok{:mtcars}\NormalTok{).am.eq }\DecValTok{0}
1152
+ \NormalTok{puts automatic}
1153
+ \end{Highlighting}
1154
+ \end{Shaded}
1155
+
1156
+ \begin{verbatim}
1157
+ ## [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
1158
+ ## [12] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE
1159
+ ## [23] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
1160
+ \end{verbatim}
1161
+
1162
+ Using this logical vector, the data frame is indexed, returning a new
1163
+ data frame in which all cars have automatic transmission.
1164
+
1165
+ \begin{Shaded}
1166
+ \begin{Highlighting}[]
1167
+ \CommentTok{# slice the data frame by using this vector}
1168
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[automatic, }\StringTok{:all}\NormalTok{]}
1169
+ \end{Highlighting}
1170
+ \end{Shaded}
1171
+
1172
+ \begin{verbatim}
1173
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1174
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
1175
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
1176
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
1177
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
1178
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
1179
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
1180
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
1181
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
1182
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
1183
+ ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
1184
+ ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
1185
+ ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
1186
+ ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
1187
+ ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
1188
+ ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
1189
+ ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
1190
+ ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
1191
+ ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
1192
+ ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
1193
+ \end{verbatim}
1194
+
1195
+ \hypertarget{writing-expressions-in-galaaz}{%
1196
+ \section{Writing Expressions in
1197
+ Galaaz}\label{writing-expressions-in-galaaz}}
1198
+
1199
+ Galaaz extends Ruby to work with complex expressions, similar to R's
1200
+ expressions build with `quote' (base R) or `quo' (tidyverse). Let's take
1201
+ a look at some of those expressions.
1202
+
1203
+ \hypertarget{expressions-from-operators}{%
1204
+ \subsection{Expressions from
1205
+ operators}\label{expressions-from-operators}}
1206
+
1207
+ The code bellow creates an expression summing two symbols
1208
+
1209
+ \begin{Shaded}
1210
+ \begin{Highlighting}[]
1211
+ \NormalTok{exp1 = }\StringTok{:a}\NormalTok{ + }\StringTok{:b}
1212
+ \NormalTok{puts exp1}
1213
+ \end{Highlighting}
1214
+ \end{Shaded}
1215
+
1216
+ \begin{verbatim}
1217
+ ## a + b
1218
+ \end{verbatim}
1219
+
1220
+ We can build any complex mathematical expression
1221
+
1222
+ \begin{Shaded}
1223
+ \begin{Highlighting}[]
1224
+ \NormalTok{exp2 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) * }\FloatTok{2.0}\NormalTok{ + }\StringTok{:c}\NormalTok{ ** }\DecValTok{2}\NormalTok{ / }\StringTok{:z}
1225
+ \NormalTok{puts exp2}
1226
+ \end{Highlighting}
1227
+ \end{Shaded}
1228
+
1229
+ \begin{verbatim}
1230
+ ## (a + b) * 2 + c^2L/z
1231
+ \end{verbatim}
1232
+
1233
+ It is also possible to use inequality operators in building expressions
1234
+
1235
+ \begin{Shaded}
1236
+ \begin{Highlighting}[]
1237
+ \NormalTok{exp3 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) >= }\StringTok{:z}
1238
+ \NormalTok{puts exp3}
1239
+ \end{Highlighting}
1240
+ \end{Shaded}
1241
+
1242
+ \begin{verbatim}
1243
+ ## a + b >= z
1244
+ \end{verbatim}
1245
+
1246
+ Galaaz provides both symbolic representations for operators, such as
1247
+ (\textgreater{}, \textless{}, !=) as functional notation for those
1248
+ operators such as (.gt, .ge, etc.). So the same expression written above
1249
+ can also be written as
1250
+
1251
+ \begin{Shaded}
1252
+ \begin{Highlighting}[]
1253
+ \NormalTok{exp4 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{).ge }\StringTok{:z}
1254
+ \NormalTok{puts exp4}
1255
+ \end{Highlighting}
1256
+ \end{Shaded}
1257
+
1258
+ \begin{verbatim}
1259
+ ## a + b >= z
1260
+ \end{verbatim}
1261
+
1262
+ Two type of expression can only be created with the functional
1263
+ representation of the operators, those are expressions involving `==',
1264
+ and `='. In order to write an expression involving `==' we need to use
1265
+ the method `.eq' and for `=' we need the function `.assign'
1266
+
1267
+ \begin{Shaded}
1268
+ \begin{Highlighting}[]
1269
+ \NormalTok{exp5 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{).eq }\StringTok{:z}
1270
+ \NormalTok{puts exp5}
1271
+ \end{Highlighting}
1272
+ \end{Shaded}
1273
+
1274
+ \begin{verbatim}
1275
+ ## a + b == z
1276
+ \end{verbatim}
1277
+
1278
+ \begin{Shaded}
1279
+ \begin{Highlighting}[]
1280
+ \NormalTok{exp6 = }\StringTok{:y}\NormalTok{.assign }\StringTok{:a}\NormalTok{ + }\StringTok{:b}
1281
+ \NormalTok{puts exp6}
1282
+ \end{Highlighting}
1283
+ \end{Shaded}
1284
+
1285
+ \begin{verbatim}
1286
+ ## y <- a + b
1287
+ \end{verbatim}
1288
+
1289
+ In general we think that using the functional notation is preferable to
1290
+ using the symbolic notation as otherwise, we end up writing invalid
1291
+ expressions such as
1292
+
1293
+ \begin{Shaded}
1294
+ \begin{Highlighting}[]
1295
+ \NormalTok{exp_wrong = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) == }\StringTok{:z}
1296
+ \NormalTok{puts exp_wrong}
1297
+ \end{Highlighting}
1298
+ \end{Shaded}
1299
+
1300
+ \begin{verbatim}
1301
+ ## Message:
1302
+ ## Error in function (x, y, num.eq = TRUE, single.NA = TRUE, attrib.as.set = TRUE, :
1303
+ ## object 'a' not found (RError)
1304
+ ## Translated to internal error
1305
+ \end{verbatim}
1306
+
1307
+ and it might be difficult to understand what is going on here. The
1308
+ problem lies with the fact that when using `==' we are comparing
1309
+ expression (:a + :b) to expression :z with `=='. When the comparison is
1310
+ executed, the system tries to evaluate :a, :b and :z, and those symbols
1311
+ at this time are not bound to anything and we get a ``object `a' not
1312
+ found'' message. If we only use functional notation, this type of error
1313
+ will not occur.
1314
+
1315
+ \hypertarget{expressions-with-r-methods}{%
1316
+ \subsection{Expressions with R
1317
+ methods}\label{expressions-with-r-methods}}
1318
+
1319
+ It is often necessary to create an expression that uses a method or
1320
+ function. For instance, in mathematics, it's quite natural to write an
1321
+ expressin such as \(y = sin(x)\). In this case, the `sin' function is
1322
+ part of the expression and should not immediately executed. Now, let's
1323
+ say that `x' is an angle of 45\(^\circ\) and we acttually want our
1324
+ expression to be \(y = 0.850...\). When we want the function to be part
1325
+ of the expression, we call the function preceeding it by the letter E,
1326
+ such as `E.sin(x)'
1327
+
1328
+ \begin{Shaded}
1329
+ \begin{Highlighting}[]
1330
+ \NormalTok{exp7 = }\StringTok{:y}\NormalTok{.assign E.sin(}\StringTok{:x}\NormalTok{)}
1331
+ \NormalTok{puts exp7}
1332
+ \end{Highlighting}
1333
+ \end{Shaded}
1334
+
1335
+ \begin{verbatim}
1336
+ ## y <- sin(x)
1337
+ \end{verbatim}
1338
+
1339
+ Expressions can also be written using `.' notation:
1340
+
1341
+ \begin{Shaded}
1342
+ \begin{Highlighting}[]
1343
+ \NormalTok{exp8 = }\StringTok{:y}\NormalTok{.assign }\StringTok{:x}\NormalTok{.sin}
1344
+ \NormalTok{puts exp8}
1345
+ \end{Highlighting}
1346
+ \end{Shaded}
1347
+
1348
+ \begin{verbatim}
1349
+ ## y <- sin(x)
1350
+ \end{verbatim}
1351
+
1352
+ When a function has multiple arguments, the first one can be used before
1353
+ the `.':
1354
+
1355
+ \begin{Shaded}
1356
+ \begin{Highlighting}[]
1357
+ \NormalTok{exp9 = }\StringTok{:x}\NormalTok{.c(}\StringTok{:y}\NormalTok{)}
1358
+ \NormalTok{puts exp9}
1359
+ \end{Highlighting}
1360
+ \end{Shaded}
1361
+
1362
+ \begin{verbatim}
1363
+ ## c(x, y)
1364
+ \end{verbatim}
1365
+
1366
+ \hypertarget{evaluating-an-expression}{%
1367
+ \subsection{Evaluating an Expression}\label{evaluating-an-expression}}
1368
+
1369
+ Expressions can be evaluated by calling function `eval' with a binding.
1370
+ A binding can be provided with a list:
1371
+
1372
+ \begin{Shaded}
1373
+ \begin{Highlighting}[]
1374
+ \NormalTok{exp = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) * }\FloatTok{2.0}\NormalTok{ + }\StringTok{:c}\NormalTok{ ** }\DecValTok{2}\NormalTok{ / }\StringTok{:z}
1375
+ \NormalTok{puts exp.eval(R.list(}\StringTok{a: }\DecValTok{10}\NormalTok{, }\StringTok{b: }\DecValTok{20}\NormalTok{, }\StringTok{c: }\DecValTok{30}\NormalTok{, }\StringTok{z: }\DecValTok{40}\NormalTok{))}
1376
+ \end{Highlighting}
1377
+ \end{Shaded}
1378
+
1379
+ \begin{verbatim}
1380
+ ## [1] 82.5
1381
+ \end{verbatim}
1382
+
1383
+ \ldots{} with a data frame:
1384
+
1385
+ \begin{Shaded}
1386
+ \begin{Highlighting}[]
1387
+ \NormalTok{df = R.data__frame(}
1388
+ \StringTok{a: }\NormalTok{R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{),}
1389
+ \StringTok{b: }\NormalTok{R.c(}\DecValTok{10}\NormalTok{, }\DecValTok{20}\NormalTok{, }\DecValTok{30}\NormalTok{),}
1390
+ \StringTok{c: }\NormalTok{R.c(}\DecValTok{100}\NormalTok{, }\DecValTok{200}\NormalTok{, }\DecValTok{300}\NormalTok{),}
1391
+ \StringTok{z: }\NormalTok{R.c(}\DecValTok{1000}\NormalTok{, }\DecValTok{2000}\NormalTok{, }\DecValTok{3000}\NormalTok{))}
1392
+
1393
+ \NormalTok{puts exp.eval(df)}
1394
+ \end{Highlighting}
1395
+ \end{Shaded}
1396
+
1397
+ \begin{verbatim}
1398
+ ## [1] 32 64 96
1399
+ \end{verbatim}
1400
+
1401
+ \hypertarget{manipulating-data}{%
1402
+ \section{Manipulating Data}\label{manipulating-data}}
1403
+
1404
+ One of the major benefits of Galaaz is to bring strong data manipulation
1405
+ to Ruby. The following examples were extracted from Hardley's ``R for
1406
+ Data Science'' (\url{https://r4ds.had.co.nz/}). This is a highly
1407
+ recommended book for those not already familiar with the `tidyverse'
1408
+ style of programming in R. In the sections to follow, we will limit
1409
+ ourselves to convert the R code to Galaaz.
1410
+
1411
+ For these examples, we will investigate the nycflights13 data set
1412
+ available on the package by the same name. We use function
1413
+ `R.install\_and\_loads' that checks if the library is available locally,
1414
+ and if not, installs it. This data frame contains all 336,776 flights
1415
+ that departed from New York City in 2013. The data comes from the US
1416
+ Bureau of Transportation Statistics.
1417
+
1418
+ \begin{Shaded}
1419
+ \begin{Highlighting}[]
1420
+ \NormalTok{R.install_and_loads(}\StringTok{'nycflights13'}\NormalTok{)}
1421
+ \NormalTok{R.library(}\StringTok{'dplyr'}\NormalTok{)}
1422
+ \end{Highlighting}
1423
+ \end{Shaded}
1424
+
1425
+ \begin{Shaded}
1426
+ \begin{Highlighting}[]
1427
+ \NormalTok{flights = ~}\StringTok{:flights}
1428
+ \NormalTok{puts flights.head.as__data__frame}
1429
+ \end{Highlighting}
1430
+ \end{Shaded}
1431
+
1432
+ \begin{verbatim}
1433
+ ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1434
+ ## 1 2013 1 1 517 515 2 830 819
1435
+ ## 2 2013 1 1 533 529 4 850 830
1436
+ ## 3 2013 1 1 542 540 2 923 850
1437
+ ## 4 2013 1 1 544 545 -1 1004 1022
1438
+ ## 5 2013 1 1 554 600 -6 812 837
1439
+ ## 6 2013 1 1 554 558 -4 740 728
1440
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
1441
+ ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1442
+ ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1443
+ ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1444
+ ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1445
+ ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1446
+ ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1447
+ ## minute time_hour
1448
+ ## 1 15 2013-01-01 05:00:00
1449
+ ## 2 29 2013-01-01 05:00:00
1450
+ ## 3 40 2013-01-01 05:00:00
1451
+ ## 4 45 2013-01-01 05:00:00
1452
+ ## 5 0 2013-01-01 06:00:00
1453
+ ## 6 58 2013-01-01 05:00:00
1454
+ \end{verbatim}
1455
+
1456
+ \hypertarget{filtering-rows-with-filter}{%
1457
+ \subsection{Filtering rows with
1458
+ Filter}\label{filtering-rows-with-filter}}
1459
+
1460
+ In this example we filter the flights data set by giving to the filter
1461
+ function two expressions: the first :month.eq 1
1462
+
1463
+ \begin{Shaded}
1464
+ \begin{Highlighting}[]
1465
+ \NormalTok{puts flights.filter((}\StringTok{:month}\NormalTok{.eq }\DecValTok{1}\NormalTok{), (}\StringTok{:day}\NormalTok{.eq }\DecValTok{1}\NormalTok{)).head.as__data__frame}
1466
+ \end{Highlighting}
1467
+ \end{Shaded}
1468
+
1469
+ \begin{verbatim}
1470
+ ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1471
+ ## 1 2013 1 1 517 515 2 830 819
1472
+ ## 2 2013 1 1 533 529 4 850 830
1473
+ ## 3 2013 1 1 542 540 2 923 850
1474
+ ## 4 2013 1 1 544 545 -1 1004 1022
1475
+ ## 5 2013 1 1 554 600 -6 812 837
1476
+ ## 6 2013 1 1 554 558 -4 740 728
1477
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
1478
+ ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1479
+ ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1480
+ ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1481
+ ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1482
+ ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1483
+ ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1484
+ ## minute time_hour
1485
+ ## 1 15 2013-01-01 05:00:00
1486
+ ## 2 29 2013-01-01 05:00:00
1487
+ ## 3 40 2013-01-01 05:00:00
1488
+ ## 4 45 2013-01-01 05:00:00
1489
+ ## 5 0 2013-01-01 06:00:00
1490
+ ## 6 58 2013-01-01 05:00:00
1491
+ \end{verbatim}
1492
+
1493
+ \hypertarget{logical-operators}{%
1494
+ \subsection{Logical Operators}\label{logical-operators}}
1495
+
1496
+ All flights that departed in November of December
1497
+
1498
+ \begin{Shaded}
1499
+ \begin{Highlighting}[]
1500
+ \NormalTok{puts flights.filter((}\StringTok{:month}\NormalTok{.eq }\DecValTok{11}\NormalTok{) | (}\StringTok{:month}\NormalTok{.eq }\DecValTok{12}\NormalTok{)).head.as__data__frame}
1501
+ \end{Highlighting}
1502
+ \end{Shaded}
1503
+
1504
+ \begin{verbatim}
1505
+ ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1506
+ ## 1 2013 11 1 5 2359 6 352 345
1507
+ ## 2 2013 11 1 35 2250 105 123 2356
1508
+ ## 3 2013 11 1 455 500 -5 641 651
1509
+ ## 4 2013 11 1 539 545 -6 856 827
1510
+ ## 5 2013 11 1 542 545 -3 831 855
1511
+ ## 6 2013 11 1 549 600 -11 912 923
1512
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
1513
+ ## 1 7 B6 745 N568JB JFK PSE 205 1617 23
1514
+ ## 2 87 B6 1816 N353JB JFK SYR 36 209 22
1515
+ ## 3 -10 US 1895 N192UW EWR CLT 88 529 5
1516
+ ## 4 29 UA 1714 N38727 LGA IAH 229 1416 5
1517
+ ## 5 -24 AA 2243 N5CLAA JFK MIA 147 1089 5
1518
+ ## 6 -11 UA 303 N595UA JFK SFO 359 2586 6
1519
+ ## minute time_hour
1520
+ ## 1 59 2013-11-01 23:00:00
1521
+ ## 2 50 2013-11-01 22:00:00
1522
+ ## 3 0 2013-11-01 05:00:00
1523
+ ## 4 45 2013-11-01 05:00:00
1524
+ ## 5 45 2013-11-01 05:00:00
1525
+ ## 6 0 2013-11-01 06:00:00
1526
+ \end{verbatim}
1527
+
1528
+ The same as above, but using the `in' operator. In R, it is possible to
1529
+ define many operators by doing \%\%. The \%in\% operator checks if a
1530
+ value is in a vector. In order to use those operators from Galaaz the
1531
+ `.\_' method is used, where the first argument is the operator's symbol,
1532
+ in this case `:in' and the second argument is the vector:
1533
+
1534
+ \begin{Shaded}
1535
+ \begin{Highlighting}[]
1536
+ \NormalTok{puts flights.filter(}\StringTok{:month}\NormalTok{._ }\StringTok{:in}\NormalTok{, R.c(}\DecValTok{11}\NormalTok{, }\DecValTok{12}\NormalTok{)).head.as__data__frame}
1537
+ \end{Highlighting}
1538
+ \end{Shaded}
1539
+
1540
+ \begin{verbatim}
1541
+ ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1542
+ ## 1 2013 11 1 5 2359 6 352 345
1543
+ ## 2 2013 11 1 35 2250 105 123 2356
1544
+ ## 3 2013 11 1 455 500 -5 641 651
1545
+ ## 4 2013 11 1 539 545 -6 856 827
1546
+ ## 5 2013 11 1 542 545 -3 831 855
1547
+ ## 6 2013 11 1 549 600 -11 912 923
1548
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
1549
+ ## 1 7 B6 745 N568JB JFK PSE 205 1617 23
1550
+ ## 2 87 B6 1816 N353JB JFK SYR 36 209 22
1551
+ ## 3 -10 US 1895 N192UW EWR CLT 88 529 5
1552
+ ## 4 29 UA 1714 N38727 LGA IAH 229 1416 5
1553
+ ## 5 -24 AA 2243 N5CLAA JFK MIA 147 1089 5
1554
+ ## 6 -11 UA 303 N595UA JFK SFO 359 2586 6
1555
+ ## minute time_hour
1556
+ ## 1 59 2013-11-01 23:00:00
1557
+ ## 2 50 2013-11-01 22:00:00
1558
+ ## 3 0 2013-11-01 05:00:00
1559
+ ## 4 45 2013-11-01 05:00:00
1560
+ ## 5 45 2013-11-01 05:00:00
1561
+ ## 6 0 2013-11-01 06:00:00
1562
+ \end{verbatim}
1563
+
1564
+ \hypertarget{filtering-with-na-not-available}{%
1565
+ \subsection{Filtering with NA (Not
1566
+ Available)}\label{filtering-with-na-not-available}}
1567
+
1568
+ Let's first create a `tibble' with a Not Available value (R::NA).
1569
+ Tibbles are a modern version of a data frame and operate very similarly
1570
+ to one. It differs in how it outputs the values and the result of some
1571
+ subsetting operations that are more consistent than what is obtained
1572
+ from data frame.
1573
+
1574
+ \begin{Shaded}
1575
+ \begin{Highlighting}[]
1576
+ \NormalTok{df = R.tibble(}\StringTok{x: }\NormalTok{R.c(}\DecValTok{1}\NormalTok{, R::}\DataTypeTok{NA}\NormalTok{, }\DecValTok{3}\NormalTok{))}
1577
+ \NormalTok{puts df.as__data__frame}
1578
+ \end{Highlighting}
1579
+ \end{Shaded}
1580
+
1581
+ \begin{verbatim}
1582
+ ## x
1583
+ ## 1 1
1584
+ ## 2 NA
1585
+ ## 3 3
1586
+ \end{verbatim}
1587
+
1588
+ Now filtering by :x \textgreater{} 1 shows all lines that satisfy this
1589
+ condition, where the row with R:NA does not.
1590
+
1591
+ \begin{Shaded}
1592
+ \begin{Highlighting}[]
1593
+ \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{ > }\DecValTok{1}\NormalTok{).as__data__frame}
1594
+ \end{Highlighting}
1595
+ \end{Shaded}
1596
+
1597
+ \begin{verbatim}
1598
+ ## x
1599
+ ## 1 3
1600
+ \end{verbatim}
1601
+
1602
+ To match an NA use method 'is\_\_na'
1603
+
1604
+ \begin{Shaded}
1605
+ \begin{Highlighting}[]
1606
+ \NormalTok{puts df.filter((}\StringTok{:x}\NormalTok{.is__na) | (}\StringTok{:x}\NormalTok{ > }\DecValTok{1}\NormalTok{)).as__data__frame}
1607
+ \end{Highlighting}
1608
+ \end{Shaded}
1609
+
1610
+ \begin{verbatim}
1611
+ ## x
1612
+ ## 1 NA
1613
+ ## 2 3
1614
+ \end{verbatim}
1615
+
1616
+ \hypertarget{arrange-rows-with-arrange}{%
1617
+ \subsection{Arrange Rows with arrange}\label{arrange-rows-with-arrange}}
1618
+
1619
+ Arrange reorders the rows of a data frame by the given arguments.
1620
+
1621
+ \begin{Shaded}
1622
+ \begin{Highlighting}[]
1623
+ \NormalTok{puts flights.arrange(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{).head.as__data__frame}
1624
+ \end{Highlighting}
1625
+ \end{Shaded}
1626
+
1627
+ \begin{verbatim}
1628
+ ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1629
+ ## 1 2013 1 1 517 515 2 830 819
1630
+ ## 2 2013 1 1 533 529 4 850 830
1631
+ ## 3 2013 1 1 542 540 2 923 850
1632
+ ## 4 2013 1 1 544 545 -1 1004 1022
1633
+ ## 5 2013 1 1 554 600 -6 812 837
1634
+ ## 6 2013 1 1 554 558 -4 740 728
1635
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
1636
+ ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1637
+ ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1638
+ ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1639
+ ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1640
+ ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1641
+ ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1642
+ ## minute time_hour
1643
+ ## 1 15 2013-01-01 05:00:00
1644
+ ## 2 29 2013-01-01 05:00:00
1645
+ ## 3 40 2013-01-01 05:00:00
1646
+ ## 4 45 2013-01-01 05:00:00
1647
+ ## 5 0 2013-01-01 06:00:00
1648
+ ## 6 58 2013-01-01 05:00:00
1649
+ \end{verbatim}
1650
+
1651
+ To arrange in descending order, use function `desc'
1652
+
1653
+ \begin{Shaded}
1654
+ \begin{Highlighting}[]
1655
+ \NormalTok{puts flights.arrange(}\StringTok{:dep_delay}\NormalTok{.desc).head.as__data__frame}
1656
+ \end{Highlighting}
1657
+ \end{Shaded}
1658
+
1659
+ \begin{verbatim}
1660
+ ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1661
+ ## 1 2013 1 9 641 900 1301 1242 1530
1662
+ ## 2 2013 6 15 1432 1935 1137 1607 2120
1663
+ ## 3 2013 1 10 1121 1635 1126 1239 1810
1664
+ ## 4 2013 9 20 1139 1845 1014 1457 2210
1665
+ ## 5 2013 7 22 845 1600 1005 1044 1815
1666
+ ## 6 2013 4 10 1100 1900 960 1342 2211
1667
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
1668
+ ## 1 1272 HA 51 N384HA JFK HNL 640 4983 9
1669
+ ## 2 1127 MQ 3535 N504MQ JFK CMH 74 483 19
1670
+ ## 3 1109 MQ 3695 N517MQ EWR ORD 111 719 16
1671
+ ## 4 1007 AA 177 N338AA JFK SFO 354 2586 18
1672
+ ## 5 989 MQ 3075 N665MQ JFK CVG 96 589 16
1673
+ ## 6 931 DL 2391 N959DL JFK TPA 139 1005 19
1674
+ ## minute time_hour
1675
+ ## 1 0 2013-01-09 09:00:00
1676
+ ## 2 35 2013-06-15 19:00:00
1677
+ ## 3 35 2013-01-10 16:00:00
1678
+ ## 4 45 2013-09-20 18:00:00
1679
+ ## 5 0 2013-07-22 16:00:00
1680
+ ## 6 0 2013-04-10 19:00:00
1681
+ \end{verbatim}
1682
+
1683
+ \hypertarget{selecting-columns}{%
1684
+ \subsection{Selecting columns}\label{selecting-columns}}
1685
+
1686
+ To select specific columns from a dataset we use function `select':
1687
+
1688
+ \begin{Shaded}
1689
+ \begin{Highlighting}[]
1690
+ \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{).head.as__data__frame}
1691
+ \end{Highlighting}
1692
+ \end{Shaded}
1693
+
1694
+ \begin{verbatim}
1695
+ ## year month day
1696
+ ## 1 2013 1 1
1697
+ ## 2 2013 1 1
1698
+ ## 3 2013 1 1
1699
+ ## 4 2013 1 1
1700
+ ## 5 2013 1 1
1701
+ ## 6 2013 1 1
1702
+ \end{verbatim}
1703
+
1704
+ It is also possible to select column in a given range
1705
+
1706
+ \begin{Shaded}
1707
+ \begin{Highlighting}[]
1708
+ \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{.up_to }\StringTok{:day}\NormalTok{).head.as__data__frame}
1709
+ \end{Highlighting}
1710
+ \end{Shaded}
1711
+
1712
+ \begin{verbatim}
1713
+ ## year month day
1714
+ ## 1 2013 1 1
1715
+ ## 2 2013 1 1
1716
+ ## 3 2013 1 1
1717
+ ## 4 2013 1 1
1718
+ ## 5 2013 1 1
1719
+ ## 6 2013 1 1
1720
+ \end{verbatim}
1721
+
1722
+ Select all columns that start with a given name sequence
1723
+
1724
+ \begin{Shaded}
1725
+ \begin{Highlighting}[]
1726
+ \NormalTok{puts flights.select(E.starts_with(}\StringTok{'arr'}\NormalTok{)).head.as__data__frame}
1727
+ \end{Highlighting}
1728
+ \end{Shaded}
1729
+
1730
+ \begin{verbatim}
1731
+ ## arr_time arr_delay
1732
+ ## 1 830 11
1733
+ ## 2 850 20
1734
+ ## 3 923 33
1735
+ ## 4 1004 -18
1736
+ ## 5 812 -25
1737
+ ## 6 740 12
1738
+ \end{verbatim}
1739
+
1740
+ Other functions that can be used:
1741
+
1742
+ \begin{itemize}
1743
+ \item
1744
+ ends\_with(``xyz''): matches names that end with ``xyz''.
1745
+ \item
1746
+ contains(``ijk''): matches names that contain ``ijk''.
1747
+ \item
1748
+ matches(``(.)\textbackslash{}1''): selects variables that match a
1749
+ regular expression. This one matches any variables that contain
1750
+ repeated characters.
1751
+ \item
1752
+ num\_range(``x'', (1..3)): matches x1, x2 and x3
1753
+ \end{itemize}
1754
+
1755
+ A helper function that comes in handy when we just want to rearrange
1756
+ column order is `Everything':
1757
+
1758
+ \begin{Shaded}
1759
+ \begin{Highlighting}[]
1760
+ \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{, E.everything).head.as__data__frame}
1761
+ \end{Highlighting}
1762
+ \end{Shaded}
1763
+
1764
+ \begin{verbatim}
1765
+ ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1766
+ ## 1 2013 1 1 517 515 2 830 819
1767
+ ## 2 2013 1 1 533 529 4 850 830
1768
+ ## 3 2013 1 1 542 540 2 923 850
1769
+ ## 4 2013 1 1 544 545 -1 1004 1022
1770
+ ## 5 2013 1 1 554 600 -6 812 837
1771
+ ## 6 2013 1 1 554 558 -4 740 728
1772
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
1773
+ ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1774
+ ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1775
+ ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1776
+ ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1777
+ ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1778
+ ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1779
+ ## minute time_hour
1780
+ ## 1 15 2013-01-01 05:00:00
1781
+ ## 2 29 2013-01-01 05:00:00
1782
+ ## 3 40 2013-01-01 05:00:00
1783
+ ## 4 45 2013-01-01 05:00:00
1784
+ ## 5 0 2013-01-01 06:00:00
1785
+ ## 6 58 2013-01-01 05:00:00
1786
+ \end{verbatim}
1787
+
1788
+ \hypertarget{add-variables-to-a-dataframe-with-mutate}{%
1789
+ \subsection{Add variables to a dataframe with
1790
+ `mutate'}\label{add-variables-to-a-dataframe-with-mutate}}
1791
+
1792
+ \begin{Shaded}
1793
+ \begin{Highlighting}[]
1794
+ \NormalTok{flights_sm = flights.}
1795
+ \NormalTok{ select((}\StringTok{:year}\NormalTok{.up_to }\StringTok{:day}\NormalTok{),}
1796
+ \NormalTok{ E.ends_with(}\StringTok{'delay'}\NormalTok{),}
1797
+ \StringTok{:distance}\NormalTok{,}
1798
+ \StringTok{:air_time}\NormalTok{)}
1799
+
1800
+ \NormalTok{puts flights_sm.head.as__data__frame}
1801
+ \end{Highlighting}
1802
+ \end{Shaded}
1803
+
1804
+ \begin{verbatim}
1805
+ ## year month day dep_delay arr_delay distance air_time
1806
+ ## 1 2013 1 1 2 11 1400 227
1807
+ ## 2 2013 1 1 4 20 1416 227
1808
+ ## 3 2013 1 1 2 33 1089 160
1809
+ ## 4 2013 1 1 -1 -18 1576 183
1810
+ ## 5 2013 1 1 -6 -25 762 116
1811
+ ## 6 2013 1 1 -4 12 719 150
1812
+ \end{verbatim}
1813
+
1814
+ \begin{Shaded}
1815
+ \begin{Highlighting}[]
1816
+ \NormalTok{flights_sm = flights_sm.}
1817
+ \NormalTok{ mutate(}\StringTok{gain: :dep_delay}\NormalTok{ - }\StringTok{:arr_delay}\NormalTok{,}
1818
+ \StringTok{speed: :distance}\NormalTok{ / }\StringTok{:air_time}\NormalTok{ * }\DecValTok{60}\NormalTok{)}
1819
+ \NormalTok{puts flights_sm.head.as__data__frame}
1820
+ \end{Highlighting}
1821
+ \end{Shaded}
1822
+
1823
+ \begin{verbatim}
1824
+ ## year month day dep_delay arr_delay distance air_time gain speed
1825
+ ## 1 2013 1 1 2 11 1400 227 -9 370.0441
1826
+ ## 2 2013 1 1 4 20 1416 227 -16 374.2731
1827
+ ## 3 2013 1 1 2 33 1089 160 -31 408.3750
1828
+ ## 4 2013 1 1 -1 -18 1576 183 17 516.7213
1829
+ ## 5 2013 1 1 -6 -25 762 116 19 394.1379
1830
+ ## 6 2013 1 1 -4 12 719 150 -16 287.6000
1831
+ \end{verbatim}
1832
+
1833
+ \hypertarget{summarising-data}{%
1834
+ \subsection{Summarising data}\label{summarising-data}}
1835
+
1836
+ Function `summarise' calculates summaries for the data frame. When no
1837
+ `group\_by' is used a single value is obtained from the data frame:
1838
+
1839
+ \begin{Shaded}
1840
+ \begin{Highlighting}[]
1841
+ \NormalTok{puts flights.summarise(}\StringTok{delay: }\NormalTok{E.mean(}\StringTok{:dep_delay}\NormalTok{, }\StringTok{na__rm: }\DecValTok{true}\NormalTok{)).as__data__frame}
1842
+ \end{Highlighting}
1843
+ \end{Shaded}
1844
+
1845
+ \begin{verbatim}
1846
+ ## delay
1847
+ ## 1 12.63907
1848
+ \end{verbatim}
1849
+
1850
+ When a data frame is groupe with `group\_by' summaries apply to the
1851
+ given group:
1852
+
1853
+ \begin{Shaded}
1854
+ \begin{Highlighting}[]
1855
+ \NormalTok{by_day = flights.group_by(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{)}
1856
+ \NormalTok{puts by_day.summarise(}\StringTok{delay: :dep_delay}\NormalTok{.mean(}\StringTok{na__rm: }\DecValTok{true}\NormalTok{)).head.as__data__frame}
1857
+ \end{Highlighting}
1858
+ \end{Shaded}
1859
+
1860
+ \begin{verbatim}
1861
+ ## year month day delay
1862
+ ## 1 2013 1 1 11.548926
1863
+ ## 2 2013 1 2 13.858824
1864
+ ## 3 2013 1 3 10.987832
1865
+ ## 4 2013 1 4 8.951595
1866
+ ## 5 2013 1 5 5.732218
1867
+ ## 6 2013 1 6 7.148014
1868
+ \end{verbatim}
1869
+
1870
+ Next we put many operations together by pipping them one after the
1871
+ other:
1872
+
1873
+ \begin{Shaded}
1874
+ \begin{Highlighting}[]
1875
+ \NormalTok{delays = flights.}
1876
+ \NormalTok{ group_by(}\StringTok{:dest}\NormalTok{).}
1877
+ \NormalTok{ summarise(}
1878
+ \StringTok{count: }\NormalTok{E.n,}
1879
+ \StringTok{dist: :distance}\NormalTok{.mean(}\StringTok{na__rm: }\DecValTok{true}\NormalTok{),}
1880
+ \StringTok{delay: :arr_delay}\NormalTok{.mean(}\StringTok{na__rm: }\DecValTok{true}\NormalTok{)).}
1881
+ \NormalTok{ filter(}\StringTok{:count}\NormalTok{ > }\DecValTok{20}\NormalTok{, }\StringTok{:dest}\NormalTok{ != }\StringTok{"NHL"}\NormalTok{)}
1882
+
1883
+ \NormalTok{puts delays.as__data__frame.head}
1884
+ \end{Highlighting}
1885
+ \end{Shaded}
1886
+
1887
+ \begin{verbatim}
1888
+ ## dest count dist delay
1889
+ ## 1 ABQ 254 1826.0000 4.381890
1890
+ ## 2 ACK 265 199.0000 4.852273
1891
+ ## 3 ALB 439 143.0000 14.397129
1892
+ ## 4 ATL 17215 757.1082 11.300113
1893
+ ## 5 AUS 2439 1514.2530 6.019909
1894
+ ## 6 AVL 275 583.5818 8.003831
1895
+ \end{verbatim}
1896
+
1897
+ \hypertarget{using-data-table}{%
1898
+ \section{Using Data Table}\label{using-data-table}}
1899
+
1900
+ \begin{Shaded}
1901
+ \begin{Highlighting}[]
1902
+ \NormalTok{R.library(}\StringTok{'data.table'}\NormalTok{)}
1903
+ \NormalTok{R.install_and_loads(}\StringTok{'curl'}\NormalTok{)}
1904
+
1905
+ \NormalTok{input = }\StringTok{"https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv"}
1906
+ \NormalTok{flights = R.fread(input)}
1907
+ \NormalTok{puts flights}
1908
+ \NormalTok{puts flights.dim}
1909
+ \end{Highlighting}
1910
+ \end{Shaded}
1911
+
1912
+ \begin{verbatim}
1913
+ ## year month day dep_delay arr_delay carrier origin dest air_time
1914
+ ## 1: 2014 1 1 14 13 AA JFK LAX 359
1915
+ ## 2: 2014 1 1 -3 13 AA JFK LAX 363
1916
+ ## 3: 2014 1 1 2 9 AA JFK LAX 351
1917
+ ## 4: 2014 1 1 -8 -26 AA LGA PBI 157
1918
+ ## 5: 2014 1 1 2 1 AA JFK LAX 350
1919
+ ## ---
1920
+ ## 253312: 2014 10 31 1 -30 UA LGA IAH 201
1921
+ ## 253313: 2014 10 31 -5 -14 UA EWR IAH 189
1922
+ ## 253314: 2014 10 31 -8 16 MQ LGA RDU 83
1923
+ ## 253315: 2014 10 31 -4 15 MQ LGA DTW 75
1924
+ ## 253316: 2014 10 31 -5 1 MQ LGA SDF 110
1925
+ ## distance hour
1926
+ ## 1: 2475 9
1927
+ ## 2: 2475 11
1928
+ ## 3: 2475 19
1929
+ ## 4: 1035 7
1930
+ ## 5: 2475 13
1931
+ ## ---
1932
+ ## 253312: 1416 14
1933
+ ## 253313: 1400 8
1934
+ ## 253314: 431 11
1935
+ ## 253315: 502 11
1936
+ ## 253316: 659 8
1937
+ ## [1] 253316 11
1938
+ \end{verbatim}
1939
+
1940
+ \begin{Shaded}
1941
+ \begin{Highlighting}[]
1942
+
1943
+ \NormalTok{data_table = R.data__table(}
1944
+ \DataTypeTok{ID}\NormalTok{: R.c(}\StringTok{"b"}\NormalTok{,}\StringTok{"b"}\NormalTok{,}\StringTok{"b"}\NormalTok{,}\StringTok{"a"}\NormalTok{,}\StringTok{"a"}\NormalTok{,}\StringTok{"c"}\NormalTok{),}
1945
+ \StringTok{a: }\NormalTok{(}\DecValTok{1}\NormalTok{..}\DecValTok{6}\NormalTok{),}
1946
+ \StringTok{b: }\NormalTok{(}\DecValTok{7}\NormalTok{..}\DecValTok{12}\NormalTok{),}
1947
+ \StringTok{c: }\NormalTok{(}\DecValTok{13}\NormalTok{..}\DecValTok{18}\NormalTok{)}
1948
+ \NormalTok{)}
1949
+
1950
+ \NormalTok{puts data_table}
1951
+ \NormalTok{puts data_table.}\DataTypeTok{ID}
1952
+ \end{Highlighting}
1953
+ \end{Shaded}
1954
+
1955
+ \begin{verbatim}
1956
+ ## ID a b c
1957
+ ## 1: b 1 7 13
1958
+ ## 2: b 2 8 14
1959
+ ## 3: b 3 9 15
1960
+ ## 4: a 4 10 16
1961
+ ## 5: a 5 11 17
1962
+ ## 6: c 6 12 18
1963
+ ## [1] "b" "b" "b" "a" "a" "c"
1964
+ \end{verbatim}
1965
+
1966
+ \begin{Shaded}
1967
+ \begin{Highlighting}[]
1968
+ \CommentTok{# subset rows in i}
1969
+ \NormalTok{ans = flights[(}\StringTok{:origin}\NormalTok{.eq }\StringTok{"JFK"}\NormalTok{) & (}\StringTok{:month}\NormalTok{.eq }\DecValTok{6}\NormalTok{)]}
1970
+ \NormalTok{puts ans.head}
1971
+
1972
+ \CommentTok{# Get the first two rows from flights.}
1973
+
1974
+ \NormalTok{ans = flights[(}\DecValTok{1}\NormalTok{..}\DecValTok{2}\NormalTok{)]}
1975
+ \NormalTok{puts ans}
1976
+
1977
+ \CommentTok{# Sort flights first by column origin in ascending order, and then by dest in descending order:}
1978
+
1979
+ \CommentTok{# ans = flights[E.order(:origin, -(:dest))]}
1980
+ \CommentTok{# puts ans.head}
1981
+ \end{Highlighting}
1982
+ \end{Shaded}
1983
+
1984
+ \begin{verbatim}
1985
+ ## year month day dep_delay arr_delay carrier origin dest air_time
1986
+ ## 1: 2014 6 1 -9 -5 AA JFK LAX 324
1987
+ ## 2: 2014 6 1 -10 -13 AA JFK LAX 329
1988
+ ## 3: 2014 6 1 18 -1 AA JFK LAX 326
1989
+ ## 4: 2014 6 1 -6 -16 AA JFK LAX 320
1990
+ ## 5: 2014 6 1 -4 -45 AA JFK LAX 326
1991
+ ## 6: 2014 6 1 -6 -23 AA JFK LAX 329
1992
+ ## distance hour
1993
+ ## 1: 2475 8
1994
+ ## 2: 2475 12
1995
+ ## 3: 2475 7
1996
+ ## 4: 2475 10
1997
+ ## 5: 2475 18
1998
+ ## 6: 2475 14
1999
+ ## year month day dep_delay arr_delay carrier origin dest air_time
2000
+ ## 1: 2014 1 1 14 13 AA JFK LAX 359
2001
+ ## 2: 2014 1 1 -3 13 AA JFK LAX 363
2002
+ ## distance hour
2003
+ ## 1: 2475 9
2004
+ ## 2: 2475 11
2005
+ \end{verbatim}
2006
+
2007
+ \begin{Shaded}
2008
+ \begin{Highlighting}[]
2009
+ \CommentTok{# Select column(s) in j}
2010
+ \CommentTok{# select arr_delay column, but return it as a vector.}
2011
+
2012
+ \NormalTok{ans = flights[}\StringTok{:all}\NormalTok{, }\StringTok{:arr_delay}\NormalTok{]}
2013
+ \NormalTok{puts ans.head}
2014
+
2015
+ \CommentTok{# Select arr_delay column, but return as a data.table instead.}
2016
+
2017
+ \NormalTok{ans = flights[}\StringTok{:all}\NormalTok{, }\StringTok{:arr_delay}\NormalTok{.list]}
2018
+ \NormalTok{puts ans.head}
2019
+
2020
+ \NormalTok{ans = flights[}\StringTok{:all}\NormalTok{, E.list(}\StringTok{:arr_delay}\NormalTok{, }\StringTok{:dep_delay}\NormalTok{)]}
2021
+ \end{Highlighting}
2022
+ \end{Shaded}
2023
+
2024
+ \begin{verbatim}
2025
+ ## [1] 13 13 9 -26 1 0
2026
+ ## arr_delay
2027
+ ## 1: 13
2028
+ ## 2: 13
2029
+ ## 3: 9
2030
+ ## 4: -26
2031
+ ## 5: 1
2032
+ ## 6: 0
2033
+ \end{verbatim}
2034
+
2035
+ \hypertarget{graphics-in-galaaz}{%
2036
+ \section{Graphics in Galaaz}\label{graphics-in-galaaz}}
2037
+
2038
+ Creating graphics in Galaaz is quite easy, as it can use all the power
2039
+ of ggplot2. There are many resources in the web that teaches ggplot, so
2040
+ here we give a quick example of ggplot integration with Ruby. We
2041
+ continue to use the :mtcars dataset and we will plot a diverging bar
2042
+ plot, showing cars that have `above' or `below' gas consuption. Let's
2043
+ first prepare the data frame with the necessary data:
2044
+
2045
+ \begin{Shaded}
2046
+ \begin{Highlighting}[]
2047
+ \CommentTok{# copy the R variable :mtcars to the Ruby mtcars variable}
2048
+ \NormalTok{mtcars = ~}\StringTok{:mtcars}
2049
+
2050
+ \CommentTok{# create a new column 'car_name' to store the car names so that it can be}
2051
+ \CommentTok{# used for plotting. The 'rownames' of the data frame cannot be used as}
2052
+ \CommentTok{# data for plotting}
2053
+ \NormalTok{mtcars.car_name = R.rownames(}\StringTok{:mtcars}\NormalTok{)}
2054
+
2055
+ \CommentTok{# compute normalized mpg and add it to a new column called mpg_z}
2056
+ \CommentTok{# Note that the mean value for mpg can be obtained by calling the 'mean'}
2057
+ \CommentTok{# function on the vector 'mtcars.mpg'. The same with the standard}
2058
+ \CommentTok{# deviation 'sd'. The vector is then rounded to two digits with 'round 2'}
2059
+ \NormalTok{mtcars.mpg_z = ((mtcars.mpg - mtcars.mpg.mean)/mtcars.mpg.sd).round }\DecValTok{2}
2060
+
2061
+ \CommentTok{# create a new column 'mpg_type'. Function 'ifelse' is a vectorized function}
2062
+ \CommentTok{# that looks at every element of the mpg_z vector and if the value is below}
2063
+ \CommentTok{# 0, returns 'below', otherwise returns 'above'}
2064
+ \NormalTok{mtcars.mpg_type = (mtcars.mpg_z < }\DecValTok{0}\NormalTok{).ifelse(}\StringTok{"below"}\NormalTok{, }\StringTok{"above"}\NormalTok{)}
2065
+
2066
+ \CommentTok{# order the mtcar data set by the mpg_z vector from smaler to larger values}
2067
+ \NormalTok{mtcars = mtcars[mtcars.mpg_z.order, }\StringTok{:all}\NormalTok{]}
2068
+
2069
+ \CommentTok{# convert the car_name column to a factor to retain sorted order in plot}
2070
+ \NormalTok{mtcars.car_name = mtcars.car_name.factor }\StringTok{levels: }\NormalTok{mtcars.car_name}
2071
+
2072
+ \CommentTok{# let's look at the final data frame}
2073
+ \NormalTok{puts mtcars.head}
2074
+ \end{Highlighting}
2075
+ \end{Shaded}
2076
+
2077
+ \begin{verbatim}
2078
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2079
+ ## Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
2080
+ ## Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
2081
+ ## Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
2082
+ ## Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4
2083
+ ## Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
2084
+ ## Maserati Bora 15.0 8 301 335 3.54 3.570 14.60 0 1 5 8
2085
+ ## car_name mpg_z mpg_type
2086
+ ## Cadillac Fleetwood Cadillac Fleetwood -1.61 below
2087
+ ## Lincoln Continental Lincoln Continental -1.61 below
2088
+ ## Camaro Z28 Camaro Z28 -1.13 below
2089
+ ## Duster 360 Duster 360 -0.96 below
2090
+ ## Chrysler Imperial Chrysler Imperial -0.89 below
2091
+ ## Maserati Bora Maserati Bora -0.84 below
2092
+ \end{verbatim}
2093
+
2094
+ Now, lets plot the diverging bar plot. When using gKnit, there is no
2095
+ need to call `R.awt' to create a plotting device, since gKnit does take
2096
+ care of it. Galaaz provides integration with ggplot. The interested
2097
+ reader should check online for more information on ggplot, since it is
2098
+ outside the scope of this manual describing how ggplot works. We give
2099
+ here but a brief description on how this plot is generated.
2100
+
2101
+ ggplot implements the `grammar of graphics'. In this approach, plots are
2102
+ build by adding layers to the plot. On the first layer we describe what
2103
+ we want on the `x' and `y' axis of the plot. In this case, we have
2104
+ `car\_name' on the `x' axis and `mpg\_z' on the `y' axis. Then the type
2105
+ of graph is specified by adding `geom\_bar' (for a bar graph). We
2106
+ specify that our bars should be filled using `mpg\_type', which is
2107
+ either `above' or `bellow' giving then two colours for filling. On the
2108
+ next layer we specify the labels for the graph, then we add the title
2109
+ and subtitle. Finally, in a bar chart usually bars go on the vertical
2110
+ direction, but in this graph we want the bars to be horizontally layed
2111
+ so we add `coord\_flip'.
2112
+
2113
+ \begin{Shaded}
2114
+ \begin{Highlighting}[]
2115
+ \NormalTok{require }\StringTok{'ggplot'}
2116
+
2117
+ \NormalTok{puts mtcars.ggplot(E.aes(}\StringTok{x: :car_name}\NormalTok{, }\StringTok{y: :mpg_z}\NormalTok{, }\StringTok{label: :mpg_z}\NormalTok{)) +}
2118
+ \NormalTok{ R.geom_bar(E.aes(}\StringTok{fill: :mpg_type}\NormalTok{), }\StringTok{stat: 'identity'}\NormalTok{, }\StringTok{width: }\FloatTok{0.5}\NormalTok{) +}
2119
+ \NormalTok{ R.scale_fill_manual(}\StringTok{name: 'Mileage'}\NormalTok{,}
2120
+ \StringTok{labels: }\NormalTok{R.c(}\StringTok{'Above Average'}\NormalTok{, }\StringTok{'Below Average'}\NormalTok{),}
2121
+ \StringTok{values: }\NormalTok{R.c(}\StringTok{'above'}\NormalTok{: }\StringTok{'#00ba38'}\NormalTok{, }\StringTok{'below'}\NormalTok{: }\StringTok{'#f8766d'}\NormalTok{)) +}
2122
+ \NormalTok{ R.labs(}\StringTok{subtitle: "Normalised mileage from 'mtcars'"}\NormalTok{,}
2123
+ \StringTok{title: "Diverging Bars"}\NormalTok{) + }
2124
+ \NormalTok{ R.coord_flip}
2125
+ \end{Highlighting}
2126
+ \end{Shaded}
2127
+
2128
+ \includegraphics{/home/rbotafogo/desenv/galaaz/blogs/manual/manual_files/figure-latex/diverging_bar.pdf}
2129
+
2130
+ \hypertarget{coding-with-tidyverse}{%
2131
+ \section{Coding with Tidyverse}\label{coding-with-tidyverse}}
2132
+
2133
+ In R, and when coding with `tidyverse', arguments to a function are
2134
+ usually not \emph{referencially transparent}. That is, you can't replace
2135
+ a value with a seemingly equivalent object that you've defined
2136
+ elsewhere. To see the problem, let's first define a data frame:
2137
+
2138
+ \begin{Shaded}
2139
+ \begin{Highlighting}[]
2140
+ \NormalTok{df = R.data__frame(}\StringTok{x: }\NormalTok{(}\DecValTok{1}\NormalTok{..}\DecValTok{3}\NormalTok{), }\StringTok{y: }\NormalTok{(}\DecValTok{3}\NormalTok{..}\DecValTok{1}\NormalTok{))}
2141
+ \NormalTok{puts df}
2142
+ \end{Highlighting}
2143
+ \end{Shaded}
2144
+
2145
+ \begin{verbatim}
2146
+ ## x y
2147
+ ## 1 1 3
2148
+ ## 2 2 2
2149
+ ## 3 3 1
2150
+ \end{verbatim}
2151
+
2152
+ and now, let's look at this code:
2153
+
2154
+ \begin{Shaded}
2155
+ \begin{Highlighting}[]
2156
+ \NormalTok{my_var <-}\StringTok{ }\NormalTok{x}
2157
+ \KeywordTok{filter}\NormalTok{(df, my_var }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{)}
2158
+ \end{Highlighting}
2159
+ \end{Shaded}
2160
+
2161
+ It generates the following error: "object `x' not found.
2162
+
2163
+ However, in Galaaz, arguments are referencially transparent as can be
2164
+ seen by the code bellow. Note initally that `my\_var = :x' will not give
2165
+ the error ``object `x' not found'' since `:x' is treated as an
2166
+ expression and assigned to my\_var. Then when doing (my\_var.eq 1),
2167
+ my\_var is a variable that resolves to `:x' and it becomes equivalent to
2168
+ (:x.eq 1) which is what we want.
2169
+
2170
+ \begin{Shaded}
2171
+ \begin{Highlighting}[]
2172
+ \NormalTok{my_var = }\StringTok{:x}
2173
+ \NormalTok{puts df.filter(my_var.eq }\DecValTok{1}\NormalTok{)}
2174
+ \end{Highlighting}
2175
+ \end{Shaded}
2176
+
2177
+ \begin{verbatim}
2178
+ ## x y
2179
+ ## 1 1 3
2180
+ \end{verbatim}
2181
+
2182
+ As stated by Hardley
2183
+
2184
+ \begin{quote}
2185
+ dplyr code is ambiguous. Depending on what variables are defined where,
2186
+ filter(df, x == y) could be equivalent to any of:
2187
+ \end{quote}
2188
+
2189
+ \begin{verbatim}
2190
+ df[df$x == df$y, ]
2191
+ df[df$x == y, ]
2192
+ df[x == df$y, ]
2193
+ df[x == y, ]
2194
+ \end{verbatim}
2195
+
2196
+ In galaaz this ambiguity does not exist, filter(df, x.eq y) is not a
2197
+ valid expression as expressions are build with symbols. In doing
2198
+ filter(df, :x.eq y) we are looking for elements of the `x' column that
2199
+ are equal to a previously defined y variable. Finally in filter(df,
2200
+ :x.eq :y) we are looking for elements in which the `x' column value is
2201
+ equal to the `y' column value. This can be seen in the following two
2202
+ chunks of code:
2203
+
2204
+ \begin{Shaded}
2205
+ \begin{Highlighting}[]
2206
+ \NormalTok{y = }\DecValTok{1}
2207
+ \NormalTok{x = }\DecValTok{2}
2208
+
2209
+ \CommentTok{# looking for values where the 'x' column is equal to the 'y' column}
2210
+ \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{.eq }\StringTok{:y}\NormalTok{)}
2211
+ \end{Highlighting}
2212
+ \end{Shaded}
2213
+
2214
+ \begin{verbatim}
2215
+ ## x y
2216
+ ## 1 2 2
2217
+ \end{verbatim}
2218
+
2219
+ \begin{Shaded}
2220
+ \begin{Highlighting}[]
2221
+ \CommentTok{# looking for values where the 'x' column is equal to the 'y' variable}
2222
+ \CommentTok{# in this case, the number 1}
2223
+ \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{.eq y)}
2224
+ \end{Highlighting}
2225
+ \end{Shaded}
2226
+
2227
+ \begin{verbatim}
2228
+ ## x y
2229
+ ## 1 1 3
2230
+ \end{verbatim}
2231
+
2232
+ \hypertarget{writing-a-function-that-applies-to-different-data-sets}{%
2233
+ \subsection{Writing a function that applies to different data
2234
+ sets}\label{writing-a-function-that-applies-to-different-data-sets}}
2235
+
2236
+ Let's suppose that we want to write a function that receives as the
2237
+ first argument a data frame and as second argument an expression that
2238
+ adds a column to the data frame that is equal to the sum of elements in
2239
+ column `a' plus `x'.
2240
+
2241
+ Here is the intended behaviour using the `mutate' function of `dplyr':
2242
+
2243
+ \begin{verbatim}
2244
+ mutate(df1, y = a + x)
2245
+ mutate(df2, y = a + x)
2246
+ mutate(df3, y = a + x)
2247
+ mutate(df4, y = a + x)
2248
+ \end{verbatim}
2249
+
2250
+ The naive approach to writing an R function to solve this problem is:
2251
+
2252
+ \begin{verbatim}
2253
+ mutate_y <- function(df) {
2254
+ mutate(df, y = a + x)
2255
+ }
2256
+ \end{verbatim}
2257
+
2258
+ Unfortunately, in R, this function can fail silently if one of the
2259
+ variables isn't present in the data frame, but is present in the global
2260
+ environment. We will not go through here how to solve this problem in R.
2261
+
2262
+ In Galaaz the method mutate\_y bellow will work fine and will never fail
2263
+ silently.
2264
+
2265
+ \begin{Shaded}
2266
+ \begin{Highlighting}[]
2267
+ \KeywordTok{def}\NormalTok{ mutate_y(df)}
2268
+ \NormalTok{ df.mutate(}\StringTok{:y}\NormalTok{.assign }\StringTok{:a}\NormalTok{ + }\StringTok{:x}\NormalTok{)}
2269
+ \KeywordTok{end}
2270
+ \end{Highlighting}
2271
+ \end{Shaded}
2272
+
2273
+ Here we create a data frame that has only one column named `x':
2274
+
2275
+ \begin{Shaded}
2276
+ \begin{Highlighting}[]
2277
+ \NormalTok{df1 = R.data__frame(}\StringTok{x: }\NormalTok{(}\DecValTok{1}\NormalTok{..}\DecValTok{3}\NormalTok{))}
2278
+ \NormalTok{puts df1}
2279
+ \end{Highlighting}
2280
+ \end{Shaded}
2281
+
2282
+ \begin{verbatim}
2283
+ ## x
2284
+ ## 1 1
2285
+ ## 2 2
2286
+ ## 3 3
2287
+ \end{verbatim}
2288
+
2289
+ Note that method mutate\_y will fail independetly from the fact that
2290
+ variable `a' is defined and in the scope of the method. Variable `a' has
2291
+ no relationship with the symbol `:a' used in the definition of
2292
+ `mutate\_y' above:
2293
+
2294
+ \begin{Shaded}
2295
+ \begin{Highlighting}[]
2296
+ \NormalTok{a = }\DecValTok{10}
2297
+ \NormalTok{mutate_y(df1)}
2298
+ \end{Highlighting}
2299
+ \end{Shaded}
2300
+
2301
+ \begin{verbatim}
2302
+ ## Message:
2303
+ ## Error in mutate_impl(.data, dots) :
2304
+ ## Evaluation error: object 'a' not found.
2305
+ ## In addition: Warning message:
2306
+ ## In mutate_impl(.data, dots) :
2307
+ ## mismatched protect/unprotect (unprotect with empty protect stack) (RError)
2308
+ ## Translated to internal error
2309
+ \end{verbatim}
2310
+
2311
+ \hypertarget{different-expressions}{%
2312
+ \subsection{Different expressions}\label{different-expressions}}
2313
+
2314
+ Let's move to the next problem as presented by Hardley where trying to
2315
+ write a function in R that will receive two argumens, the first a
2316
+ variable and the second an expression is not trivial. Bellow we create a
2317
+ data frame and we want to write a function that groups data by a
2318
+ variable and summarises it by an expression:
2319
+
2320
+ \begin{Shaded}
2321
+ \begin{Highlighting}[]
2322
+ \KeywordTok{set.seed}\NormalTok{(}\DecValTok{123}\NormalTok{)}
2323
+
2324
+ \NormalTok{df <-}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}
2325
+ \DataTypeTok{g1 =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{2}\NormalTok{),}
2326
+ \DataTypeTok{g2 =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{1}\NormalTok{),}
2327
+ \DataTypeTok{a =} \KeywordTok{sample}\NormalTok{(}\DecValTok{5}\NormalTok{),}
2328
+ \DataTypeTok{b =} \KeywordTok{sample}\NormalTok{(}\DecValTok{5}\NormalTok{)}
2329
+ \NormalTok{)}
2330
+
2331
+ \KeywordTok{as.data.frame}\NormalTok{(df) }
2332
+ \end{Highlighting}
2333
+ \end{Shaded}
2334
+
2335
+ \begin{verbatim}
2336
+ ## g1 g2 a b
2337
+ ## 1 1 1 2 1
2338
+ ## 2 1 2 4 3
2339
+ ## 3 2 1 5 4
2340
+ ## 4 2 2 3 2
2341
+ ## 5 2 1 1 5
2342
+ \end{verbatim}
2343
+
2344
+ \begin{Shaded}
2345
+ \begin{Highlighting}[]
2346
+ \NormalTok{d2 <-}\StringTok{ }\NormalTok{df }\OperatorTok{%>%}
2347
+ \StringTok{ }\KeywordTok{group_by}\NormalTok{(g1) }\OperatorTok{%>%}
2348
+ \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
2349
+
2350
+ \KeywordTok{as.data.frame}\NormalTok{(d2) }
2351
+ \end{Highlighting}
2352
+ \end{Shaded}
2353
+
2354
+ \begin{verbatim}
2355
+ ## g1 a
2356
+ ## 1 1 3
2357
+ ## 2 2 3
2358
+ \end{verbatim}
2359
+
2360
+ \begin{Shaded}
2361
+ \begin{Highlighting}[]
2362
+ \NormalTok{d2 <-}\StringTok{ }\NormalTok{df }\OperatorTok{%>%}
2363
+ \StringTok{ }\KeywordTok{group_by}\NormalTok{(g2) }\OperatorTok{%>%}
2364
+ \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
2365
+
2366
+ \KeywordTok{as.data.frame}\NormalTok{(d2) }
2367
+ \end{Highlighting}
2368
+ \end{Shaded}
2369
+
2370
+ \begin{verbatim}
2371
+ ## g2 a
2372
+ ## 1 1 2.666667
2373
+ ## 2 2 3.500000
2374
+ \end{verbatim}
2375
+
2376
+ As shown by Hardley, one might expect this function to do the trick:
2377
+
2378
+ \begin{Shaded}
2379
+ \begin{Highlighting}[]
2380
+ \NormalTok{my_summarise <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(df, group_var) \{}
2381
+ \NormalTok{ df }\OperatorTok{%>%}
2382
+ \StringTok{ }\KeywordTok{group_by}\NormalTok{(group_var) }\OperatorTok{%>%}
2383
+ \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
2384
+ \NormalTok{\}}
2385
+
2386
+ \CommentTok{# my_summarise(df, g1)}
2387
+ \CommentTok{#> Error: Column `group_var` is unknown}
2388
+ \end{Highlighting}
2389
+ \end{Shaded}
2390
+
2391
+ In order to solve this problem, coding with dplyr requires the
2392
+ introduction of many new concepts and functions such as `quo', `quos',
2393
+ `enquo', `enquos', `!!' (bang bang), `!!!' (triple bang). Again, we'll
2394
+ leave to Hardley the explanation on how to use all those functions.
2395
+
2396
+ Now, let's try to implement the same function in galaaz. The next code
2397
+ block first prints the `df' data frame defined previously in R (to
2398
+ access an R variable from Galaaz, we use the tilda operator
2399
+ `\textasciitilde{}' applied to the R variable name as symbol, i.e.,
2400
+ `:df'.
2401
+
2402
+ \begin{Shaded}
2403
+ \begin{Highlighting}[]
2404
+ \NormalTok{puts ~}\StringTok{:df}
2405
+ \end{Highlighting}
2406
+ \end{Shaded}
2407
+
2408
+ \begin{verbatim}
2409
+ ## g1 g2 a b
2410
+ ## 1 1 1 2 1
2411
+ ## 2 1 2 4 3
2412
+ ## 3 2 1 5 4
2413
+ ## 4 2 2 3 2
2414
+ ## 5 2 1 1 5
2415
+ \end{verbatim}
2416
+
2417
+ We then create the `my\_summarize' method and call it passing the R data
2418
+ frame and the group by variable `:g1':
2419
+
2420
+ \begin{Shaded}
2421
+ \begin{Highlighting}[]
2422
+ \KeywordTok{def}\NormalTok{ my_summarize(df, group_var)}
2423
+ \NormalTok{ df.group_by(group_var).}
2424
+ \NormalTok{ summarize(}\StringTok{a: :a}\NormalTok{.mean)}
2425
+ \KeywordTok{end}
2426
+
2427
+ \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g1}\NormalTok{).as__data__frame}
2428
+ \end{Highlighting}
2429
+ \end{Shaded}
2430
+
2431
+ \begin{verbatim}
2432
+ ## g1 a
2433
+ ## 1 1 3
2434
+ ## 2 2 3
2435
+ \end{verbatim}
2436
+
2437
+ It works!!! Well, let's make sure this was not just some coincidence
2438
+
2439
+ \begin{Shaded}
2440
+ \begin{Highlighting}[]
2441
+ \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g2}\NormalTok{).as__data__frame}
2442
+ \end{Highlighting}
2443
+ \end{Shaded}
2444
+
2445
+ \begin{verbatim}
2446
+ ## g2 a
2447
+ ## 1 1 2.666667
2448
+ ## 2 2 3.500000
2449
+ \end{verbatim}
2450
+
2451
+ Great, everything is fine! No magic, no new functions, no complexities,
2452
+ just normal, standard Ruby code. If you've ever done NSE in R, this
2453
+ certainly feels much safer and easy to implement.
2454
+
2455
+ \hypertarget{different-input-variables}{%
2456
+ \subsection{Different input variables}\label{different-input-variables}}
2457
+
2458
+ In the previous section we've managed to get rid of all NSE formulation
2459
+ for a simple example, but does this remain true for more complex
2460
+ examples, or will the Galaaz way prove inpractical for more complex
2461
+ code?
2462
+
2463
+ In the next example Hardley proposes us to write a function that given
2464
+ an expression such as `a' or `a * b', calculates three summaries. What
2465
+ we want a function that does the same as these R statements:
2466
+
2467
+ \begin{verbatim}
2468
+ summarise(df, mean = mean(a), sum = sum(a), n = n())
2469
+ #> # A tibble: 1 x 3
2470
+ #> mean sum n
2471
+ #> <dbl> <int> <int>
2472
+ #> 1 3 15 5
2473
+
2474
+ summarise(df, mean = mean(a * b), sum = sum(a * b), n = n())
2475
+ #> # A tibble: 1 x 3
2476
+ #> mean sum n
2477
+ #> <dbl> <int> <int>
2478
+ #> 1 9 45 5
2479
+ \end{verbatim}
2480
+
2481
+ Let's try it in galaaz:
2482
+
2483
+ \begin{Shaded}
2484
+ \begin{Highlighting}[]
2485
+ \KeywordTok{def}\NormalTok{ my_summarise2(df, expr)}
2486
+ \NormalTok{ df.summarize(}
2487
+ \StringTok{mean: }\NormalTok{E.mean(expr),}
2488
+ \StringTok{sum: }\NormalTok{E.sum(expr),}
2489
+ \StringTok{n: }\NormalTok{E.n}
2490
+ \NormalTok{ )}
2491
+ \KeywordTok{end}
2492
+
2493
+ \NormalTok{puts my_summarise2((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{)}
2494
+ \NormalTok{puts }\StringTok{"\textbackslash{}n"}
2495
+ \NormalTok{puts my_summarise2((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{ * }\StringTok{:b}\NormalTok{)}
2496
+ \end{Highlighting}
2497
+ \end{Shaded}
2498
+
2499
+ \begin{verbatim}
2500
+ ## mean sum n
2501
+ ## 1 3 15 5
2502
+ ##
2503
+ ## mean sum n
2504
+ ## 1 9 45 5
2505
+ \end{verbatim}
2506
+
2507
+ Once again, there is no need to use any special theory or functions. The
2508
+ only point to be careful about is the use of `E' to build expressions
2509
+ from functions `mean', `sum' and `n'.
2510
+
2511
+ \hypertarget{different-input-and-output-variable}{%
2512
+ \subsection{Different input and output
2513
+ variable}\label{different-input-and-output-variable}}
2514
+
2515
+ Now the next challenge presented by Hardley is to vary the name of the
2516
+ output variables based on the received expression. So, if the input
2517
+ expression is `a', we want our data frame columns to be named `mean\_a'
2518
+ and `sum\_a'. Now, if the input expression is `b', columns should be
2519
+ named `mean\_b' and `sum\_b'.
2520
+
2521
+ \begin{verbatim}
2522
+ mutate(df, mean_a = mean(a), sum_a = sum(a))
2523
+ #> # A tibble: 5 x 6
2524
+ #> g1 g2 a b mean_a sum_a
2525
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
2526
+ #> 1 1 1 1 3 3 15
2527
+ #> 2 1 2 4 2 3 15
2528
+ #> 3 2 1 2 1 3 15
2529
+ #> 4 2 2 5 4 3 15
2530
+ #> # … with 1 more row
2531
+
2532
+ mutate(df, mean_b = mean(b), sum_b = sum(b))
2533
+ #> # A tibble: 5 x 6
2534
+ #> g1 g2 a b mean_b sum_b
2535
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
2536
+ #> 1 1 1 1 3 3 15
2537
+ #> 2 1 2 4 2 3 15
2538
+ #> 3 2 1 2 1 3 15
2539
+ #> 4 2 2 5 4 3 15
2540
+ #> # … with 1 more row
2541
+ \end{verbatim}
2542
+
2543
+ In order to solve this problem in R, Hardley needs to introduce some
2544
+ more new functions and notations: `quo\_name' and the `:=' operator from
2545
+ package `rlang'
2546
+
2547
+ Here is our Ruby code:
2548
+
2549
+ \begin{Shaded}
2550
+ \begin{Highlighting}[]
2551
+ \KeywordTok{def}\NormalTok{ my_mutate(df, expr)}
2552
+ \NormalTok{ mean_name = }\StringTok{"mean_}\OtherTok{#\{}\NormalTok{expr.to_s}\OtherTok{\}}\StringTok{"}
2553
+ \NormalTok{ sum_name = }\StringTok{"sum_}\OtherTok{#\{}\NormalTok{expr.to_s}\OtherTok{\}}\StringTok{"}
2554
+
2555
+ \NormalTok{ df.mutate(mean_name => E.mean(expr),}
2556
+ \NormalTok{ sum_name => E.sum(expr))}
2557
+ \KeywordTok{end}
2558
+
2559
+ \NormalTok{puts my_mutate((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{)}
2560
+ \NormalTok{puts }\StringTok{"\textbackslash{}n"}
2561
+ \NormalTok{puts my_mutate((~}\StringTok{:df}\NormalTok{), }\StringTok{:b}\NormalTok{)}
2562
+ \end{Highlighting}
2563
+ \end{Shaded}
2564
+
2565
+ \begin{verbatim}
2566
+ ## g1 g2 a b mean_a sum_a
2567
+ ## 1 1 1 2 1 3 15
2568
+ ## 2 1 2 4 3 3 15
2569
+ ## 3 2 1 5 4 3 15
2570
+ ## 4 2 2 3 2 3 15
2571
+ ## 5 2 1 1 5 3 15
2572
+ ##
2573
+ ## g1 g2 a b mean_b sum_b
2574
+ ## 1 1 1 2 1 3 15
2575
+ ## 2 1 2 4 3 3 15
2576
+ ## 3 2 1 5 4 3 15
2577
+ ## 4 2 2 3 2 3 15
2578
+ ## 5 2 1 1 5 3 15
2579
+ \end{verbatim}
2580
+
2581
+ It really seems that ``Non Standard Evaluation'' is actually quite
2582
+ standard in Galaaz! But, you might have noticed a small change in the
2583
+ way the arguments to the mutate method were called. In a previous
2584
+ example we used df.summarise(mean: E.mean(:a), \ldots{}) where the
2585
+ column name was followed by a `:' colom. In this example, we have
2586
+ df.mutate(mean\_name =\textgreater{} E.mean(expr), \ldots{}) and
2587
+ variable mean\_name is not followed by `:' but by `=\textgreater{}'.
2588
+ This is standard Ruby notation.
2589
+
2590
+ {[}explain\ldots{}.{]}
2591
+
2592
+ \hypertarget{capturing-multiple-variables}{%
2593
+ \subsection{Capturing multiple
2594
+ variables}\label{capturing-multiple-variables}}
2595
+
2596
+ Moving on with new complexities, Hardley proposes us to solve the
2597
+ problem in which the summarise function will receive any number of
2598
+ grouping variables.
2599
+
2600
+ This again is quite standard Ruby. In order to receive an undefined
2601
+ number of paramenters the paramenter is preceded by '*':
2602
+
2603
+ \begin{Shaded}
2604
+ \begin{Highlighting}[]
2605
+ \KeywordTok{def}\NormalTok{ my_summarise3(df, *group_vars)}
2606
+ \NormalTok{ df.group_by(*group_vars).}
2607
+ \NormalTok{ summarise(}\StringTok{a: }\NormalTok{E.mean(}\StringTok{:a}\NormalTok{))}
2608
+ \KeywordTok{end}
2609
+
2610
+ \NormalTok{puts my_summarise3((~}\StringTok{:df}\NormalTok{), }\StringTok{:g1}\NormalTok{, }\StringTok{:g2}\NormalTok{).as__data__frame}
2611
+ \end{Highlighting}
2612
+ \end{Shaded}
2613
+
2614
+ \begin{verbatim}
2615
+ ## g1 g2 a
2616
+ ## 1 1 1 2
2617
+ ## 2 1 2 4
2618
+ ## 3 2 1 3
2619
+ ## 4 2 2 3
2620
+ \end{verbatim}
2621
+
2622
+ \hypertarget{why-does-r-require-nse-and-galaaz-does-not}{%
2623
+ \subsection{Why does R require NSE and Galaaz does
2624
+ not?}\label{why-does-r-require-nse-and-galaaz-does-not}}
2625
+
2626
+ NSE introduces a number of new concepts, such as `quoting',
2627
+ `quasiquotation', `unquoting' and `unquote-splicing', while in Galaaz
2628
+ none of those concepts are needed. What gives?
2629
+
2630
+ R is an extremely flexible language and it has lazy evaluation of
2631
+ parameters. When in R a function is called as `summarise(df, a = b)',
2632
+ the summarise function receives the litteral `a = b' parameter and can
2633
+ work with this as if it were a string. In R, it is not clear what a and
2634
+ b are, they can be expressions or they can be variables, it is up to the
2635
+ function to decide what `a = b' means.
2636
+
2637
+ In Ruby, there is no lazy evaluation of parameters and `a' is always a
2638
+ variable and so is `b'. Variables assume their value as soon as they are
2639
+ used, so `x = a' is immediately evaluate and variable `x' will receive
2640
+ the value of variable `a' as soon as the Ruby statement is executed.
2641
+ Ruby also provides the notion of a symbol; `:a' is a symbol and does not
2642
+ evaluate to anything. Galaaz uses Ruby symbols to build expressions that
2643
+ are not bound to anything: `:a.eq :b' is clearly an expression and has
2644
+ no relationship whatsoever with the statment `a = b'. By using symbols,
2645
+ variables and expressions all the possible ambiguities that are found in
2646
+ R are eliminated in Galaaz.
2647
+
2648
+ The main problem that remains, is that in R, functions are not clearly
2649
+ documented as what type of input they are expecting, they might be
2650
+ expecting regular variables or they might be expecting expressions and
2651
+ the R function will know how to deal with an input of the form `a = b',
2652
+ now for the Ruby developer it might not be immediately clear if it
2653
+ should call the function passing the value `true' if variable `a' is
2654
+ equal to variable `b' or if it should call the function passing the
2655
+ expression `:a.eq :b'.
2656
+
2657
+ \hypertarget{advanced-dplyr-features}{%
2658
+ \subsection{Advanced dplyr features}\label{advanced-dplyr-features}}
2659
+
2660
+ In the blog: Programming with dplyr by using dplyr
2661
+ (\url{https://www.r-bloggers.com/programming-with-dplyr-by-using-dplyr/})
2662
+ Iñaki Úcar shows surprise that some R users are trying to code in dplyr
2663
+ avoiding the use of NSE. For instance he says:
2664
+
2665
+ \begin{quote}
2666
+ Take the example of seplyr. It stands for standard evaluation dplyr, and
2667
+ enables us to program over dplyr without having ``to bring in (or study)
2668
+ any deep-theory or heavy-weight tools such as rlang/tidyeval''.
2669
+ \end{quote}
2670
+
2671
+ For me, there isn't really any surprise that users are trying to avoid
2672
+ dplyr deep-theory. R users frequently are not programmers and learning
2673
+ to code is already hard business, on top of that, having to learn how to
2674
+ `quote' or `enquo' or `quos' or `enquos' is not necessarily a `piece of
2675
+ cake'. So much so, that `tidyeval' has some more advanced functions that
2676
+ instead of using quoted expressions, uses strings as arguments.
2677
+
2678
+ In the following examples, we show the use of functions `group\_by\_at',
2679
+ `summarise\_at' and `rename\_at' that receive strings as argument. The
2680
+ data frame used in `starwars' that describes features of characters in
2681
+ the Starwars movies:
2682
+
2683
+ \begin{Shaded}
2684
+ \begin{Highlighting}[]
2685
+ \NormalTok{puts (~}\StringTok{:starwars}\NormalTok{).head.as__data__frame}
2686
+ \end{Highlighting}
2687
+ \end{Shaded}
2688
+
2689
+ \begin{verbatim}
2690
+ ## name height mass hair_color skin_color eye_color birth_year
2691
+ ## 1 Luke Skywalker 172 77 blond fair blue 19.0
2692
+ ## 2 C-3PO 167 75 <NA> gold yellow 112.0
2693
+ ## 3 R2-D2 96 32 <NA> white, blue red 33.0
2694
+ ## 4 Darth Vader 202 136 none white yellow 41.9
2695
+ ## 5 Leia Organa 150 49 brown light brown 19.0
2696
+ ## 6 Owen Lars 178 120 brown, grey light blue 52.0
2697
+ ## gender homeworld species
2698
+ ## 1 male Tatooine Human
2699
+ ## 2 <NA> Tatooine Droid
2700
+ ## 3 <NA> Naboo Droid
2701
+ ## 4 male Tatooine Human
2702
+ ## 5 female Alderaan Human
2703
+ ## 6 male Tatooine Human
2704
+ ## films
2705
+ ## 1 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2706
+ ## 2 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
2707
+ ## 3 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2708
+ ## 4 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
2709
+ ## 5 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2710
+ ## 6 Attack of the Clones, Revenge of the Sith, A New Hope
2711
+ ## vehicles starships
2712
+ ## 1 Snowspeeder, Imperial Speeder Bike X-wing, Imperial shuttle
2713
+ ## 2
2714
+ ## 3
2715
+ ## 4 TIE Advanced x1
2716
+ ## 5 Imperial Speeder Bike
2717
+ ## 6
2718
+ \end{verbatim}
2719
+
2720
+ The grouped\_mean function bellow will receive a grouping variable and
2721
+ calculate summaries for the value\_variables given:
2722
+
2723
+ \begin{Shaded}
2724
+ \begin{Highlighting}[]
2725
+ \NormalTok{grouped_mean <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(data, grouping_variables, value_variables) \{}
2726
+ \NormalTok{ data }\OperatorTok{%>%}
2727
+ \StringTok{ }\KeywordTok{group_by_at}\NormalTok{(grouping_variables) }\OperatorTok{%>%}
2728
+ \StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{count =} \KeywordTok{n}\NormalTok{()) }\OperatorTok{%>%}
2729
+ \StringTok{ }\KeywordTok{summarise_at}\NormalTok{(}\KeywordTok{c}\NormalTok{(value_variables, }\StringTok{"count"}\NormalTok{), mean, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{) }\OperatorTok{%>%}
2730
+ \StringTok{ }\KeywordTok{rename_at}\NormalTok{(value_variables, }\KeywordTok{funs}\NormalTok{(}\KeywordTok{paste0}\NormalTok{(}\StringTok{"mean_"}\NormalTok{, .)))}
2731
+ \NormalTok{ \}}
2732
+
2733
+ \NormalTok{gm =}\StringTok{ }\NormalTok{starwars }\OperatorTok{%>%}\StringTok{ }
2734
+ \StringTok{ }\KeywordTok{grouped_mean}\NormalTok{(}\StringTok{"eye_color"}\NormalTok{, }\KeywordTok{c}\NormalTok{(}\StringTok{"mass"}\NormalTok{, }\StringTok{"birth_year"}\NormalTok{))}
2735
+
2736
+ \KeywordTok{as.data.frame}\NormalTok{(gm) }
2737
+ \end{Highlighting}
2738
+ \end{Shaded}
2739
+
2740
+ \begin{verbatim}
2741
+ ## eye_color mean_mass mean_birth_year count
2742
+ ## 1 black 76.28571 33.00000 10
2743
+ ## 2 blue 86.51667 67.06923 19
2744
+ ## 3 blue-gray 77.00000 57.00000 1
2745
+ ## 4 brown 66.09231 108.96429 21
2746
+ ## 5 dark NaN NaN 1
2747
+ ## 6 gold NaN NaN 1
2748
+ ## 7 green, yellow 159.00000 NaN 1
2749
+ ## 8 hazel 66.00000 34.50000 3
2750
+ ## 9 orange 282.33333 231.00000 8
2751
+ ## 10 pink NaN NaN 1
2752
+ ## 11 red 81.40000 33.66667 5
2753
+ ## 12 red, blue NaN NaN 1
2754
+ ## 13 unknown 31.50000 NaN 3
2755
+ ## 14 white 48.00000 NaN 1
2756
+ ## 15 yellow 81.11111 76.38000 11
2757
+ \end{verbatim}
2758
+
2759
+ The same code with Galaaz, becomes:
2760
+
2761
+ \begin{Shaded}
2762
+ \begin{Highlighting}[]
2763
+ \KeywordTok{def}\NormalTok{ grouped_mean(data, grouping_variables, value_variables)}
2764
+ \NormalTok{ data.}
2765
+ \NormalTok{ group_by_at(grouping_variables).}
2766
+ \NormalTok{ mutate(}\StringTok{count: }\NormalTok{E.n).}
2767
+ \NormalTok{ summarise_at(E.c(value_variables, }\StringTok{"count"}\NormalTok{), ~}\StringTok{:mean}\NormalTok{, }\StringTok{na__rm: }\DecValTok{true}\NormalTok{).}
2768
+ \NormalTok{ rename_at(value_variables, E.funs(E.paste0(}\StringTok{"mean_"}\NormalTok{, value_variables)))}
2769
+ \KeywordTok{end}
2770
+
2771
+ \NormalTok{puts grouped_mean((~}\StringTok{:starwars}\NormalTok{), }\StringTok{"eye_color"}\NormalTok{, E.c(}\StringTok{"mass"}\NormalTok{, }\StringTok{"birth_year"}\NormalTok{)).as__data__frame}
2772
+ \end{Highlighting}
2773
+ \end{Shaded}
2774
+
2775
+ \begin{verbatim}
2776
+ ## eye_color mean_mass mean_birth_year count
2777
+ ## 1 black 76.28571 33.00000 10
2778
+ ## 2 blue 86.51667 67.06923 19
2779
+ ## 3 blue-gray 77.00000 57.00000 1
2780
+ ## 4 brown 66.09231 108.96429 21
2781
+ ## 5 dark NaN NaN 1
2782
+ ## 6 gold NaN NaN 1
2783
+ ## 7 green, yellow 159.00000 NaN 1
2784
+ ## 8 hazel 66.00000 34.50000 3
2785
+ ## 9 orange 282.33333 231.00000 8
2786
+ ## 10 pink NaN NaN 1
2787
+ ## 11 red 81.40000 33.66667 5
2788
+ ## 12 red, blue NaN NaN 1
2789
+ ## 13 unknown 31.50000 NaN 3
2790
+ ## 14 white 48.00000 NaN 1
2791
+ ## 15 yellow 81.11111 76.38000 11
2792
+ \end{verbatim}
2793
+
2794
+ {[}TO BE CONTINUED\ldots{}{]}
2795
+
2796
+ \hypertarget{contributing}{%
2797
+ \section{Contributing}\label{contributing}}
2798
+
2799
+ \begin{itemize}
2800
+ \tightlist
2801
+ \item
2802
+ Fork it
2803
+ \item
2804
+ Create your feature branch (git checkout -b my-new-feature)
2805
+ \item
2806
+ Write Tests!
2807
+ \item
2808
+ Commit your changes (git commit -am `Add some feature')
2809
+ \item
2810
+ Push to the branch (git push origin my-new-feature)
2811
+ \item
2812
+ Create new Pull Request
2813
+ \end{itemize}
2814
+
2815
+
2816
+ \end{document}