galaaz 0.4.9 → 0.4.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +798 -285
  3. data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +3 -12
  4. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +5 -7
  5. data/blogs/galaaz_ggplot/galaaz_ggplot.html +69 -29
  6. data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
  7. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
  8. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
  9. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/midwest_rb.pdf +0 -0
  10. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/scatter_plot_rb.pdf +0 -0
  11. data/blogs/galaaz_ggplot/midwest.Rmd +1 -9
  12. data/blogs/gknit/gknit.Rmd +37 -40
  13. data/blogs/gknit/gknit.html +32 -30
  14. data/blogs/gknit/gknit.md +36 -37
  15. data/blogs/gknit/gknit.pdf +0 -0
  16. data/blogs/gknit/gknit.tex +35 -37
  17. data/blogs/manual/manual.Rmd +548 -125
  18. data/blogs/manual/manual.html +509 -286
  19. data/blogs/manual/manual.md +798 -285
  20. data/blogs/manual/manual.pdf +0 -0
  21. data/blogs/manual/manual.tex +2816 -0
  22. data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
  23. data/blogs/nse_dplyr/nse_dplyr.Rmd +240 -74
  24. data/blogs/nse_dplyr/nse_dplyr.html +191 -87
  25. data/blogs/nse_dplyr/nse_dplyr.md +361 -107
  26. data/blogs/nse_dplyr/nse_dplyr.pdf +0 -0
  27. data/blogs/nse_dplyr/nse_dplyr.tex +1373 -0
  28. data/blogs/ruby_plot/ruby_plot.Rmd +61 -81
  29. data/blogs/ruby_plot/ruby_plot.html +54 -57
  30. data/blogs/ruby_plot/ruby_plot.md +48 -67
  31. data/blogs/ruby_plot/ruby_plot.pdf +0 -0
  32. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  33. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  34. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  35. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  36. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-latex/dose_len.png +0 -0
  43. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_delivery.png +0 -0
  44. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_dose.png +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color.png +0 -0
  46. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color2.png +0 -0
  47. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_decorations.png +0 -0
  48. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_jitter.png +0 -0
  49. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_points.png +0 -0
  50. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_box_plot.png +0 -0
  51. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_violin_plot.png +0 -0
  52. data/blogs/ruby_plot/ruby_plot_files/figure-latex/violin_with_jitter.png +0 -0
  53. data/lib/R_interface/rdata_frame.rb +0 -12
  54. data/lib/R_interface/robject.rb +14 -14
  55. data/lib/R_interface/ruby_extensions.rb +3 -31
  56. data/lib/R_interface/rvector.rb +0 -12
  57. data/lib/gknit/knitr_engine.rb +5 -3
  58. data/lib/util/exec_ruby.rb +22 -61
  59. data/specs/tmp.rb +26 -12
  60. data/version.rb +1 -1
  61. metadata +22 -17
  62. data/bin/gknit_old_r +0 -236
  63. data/blogs/dev/dev.Rmd +0 -23
  64. data/blogs/dev/dev.md +0 -58
  65. data/blogs/dev/dev2.Rmd +0 -65
  66. data/blogs/dev/model.rb +0 -41
  67. data/blogs/dplyr/dplyr.Rmd +0 -29
  68. data/blogs/dplyr/dplyr.html +0 -433
  69. data/blogs/dplyr/dplyr.md +0 -58
  70. data/blogs/dplyr/dplyr.rb +0 -63
  71. data/blogs/galaaz_ggplot/galaaz_ggplot.log +0 -640
  72. data/blogs/galaaz_ggplot/galaaz_ggplot.md +0 -431
  73. data/blogs/galaaz_ggplot/galaaz_ggplot.tex +0 -481
  74. data/blogs/galaaz_ggplot/midwest.png +0 -0
  75. data/blogs/galaaz_ggplot/scatter_plot.png +0 -0
  76. data/blogs/ruby_plot/ruby_plot.tex +0 -1077
@@ -0,0 +1,1373 @@
1
+ \documentclass[11pt,]{article}
2
+ \usepackage{lmodern}
3
+ \usepackage{amssymb,amsmath}
4
+ \usepackage{ifxetex,ifluatex}
5
+ \usepackage{fixltx2e} % provides \textsubscript
6
+ \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
7
+ \usepackage[T1]{fontenc}
8
+ \usepackage[utf8]{inputenc}
9
+ \else % if luatex or xelatex
10
+ \ifxetex
11
+ \usepackage{mathspec}
12
+ \else
13
+ \usepackage{fontspec}
14
+ \fi
15
+ \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
16
+ \fi
17
+ % use upquote if available, for straight quotes in verbatim environments
18
+ \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
19
+ % use microtype if available
20
+ \IfFileExists{microtype.sty}{%
21
+ \usepackage{microtype}
22
+ \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
23
+ }{}
24
+ \usepackage[margin=1in]{geometry}
25
+ \usepackage{hyperref}
26
+ \hypersetup{unicode=true,
27
+ pdftitle={Non Standard Evaluation in dplyr with Galaaz},
28
+ pdfauthor={Rodrigo Botafogo; Daniel Mossé - University of Pittsburgh},
29
+ pdfborder={0 0 0},
30
+ breaklinks=true}
31
+ \urlstyle{same} % don't use monospace font for urls
32
+ \usepackage{color}
33
+ \usepackage{fancyvrb}
34
+ \newcommand{\VerbBar}{|}
35
+ \newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
36
+ \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
37
+ % Add ',fontsize=\small' for more characters per line
38
+ \usepackage{framed}
39
+ \definecolor{shadecolor}{RGB}{248,248,248}
40
+ \newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
41
+ \newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{#1}}
42
+ \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
43
+ \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.77,0.63,0.00}{#1}}
44
+ \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
45
+ \newcommand{\BuiltInTok}[1]{#1}
46
+ \newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
47
+ \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
48
+ \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
49
+ \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
50
+ \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
51
+ \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{#1}}
52
+ \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
53
+ \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
54
+ \newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.64,0.00,0.00}{\textbf{#1}}}
55
+ \newcommand{\ExtensionTok}[1]{#1}
56
+ \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
57
+ \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
58
+ \newcommand{\ImportTok}[1]{#1}
59
+ \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
60
+ \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
61
+ \newcommand{\NormalTok}[1]{#1}
62
+ \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.81,0.36,0.00}{\textbf{#1}}}
63
+ \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}}
64
+ \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
65
+ \newcommand{\RegionMarkerTok}[1]{#1}
66
+ \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
67
+ \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
68
+ \newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
69
+ \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
70
+ \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
71
+ \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
72
+ \usepackage{graphicx,grffile}
73
+ \makeatletter
74
+ \def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
75
+ \def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
76
+ \makeatother
77
+ % Scale images if necessary, so that they will not overflow the page
78
+ % margins by default, and it is still possible to overwrite the defaults
79
+ % using explicit options in \includegraphics[width, height, ...]{}
80
+ \setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
81
+ \IfFileExists{parskip.sty}{%
82
+ \usepackage{parskip}
83
+ }{% else
84
+ \setlength{\parindent}{0pt}
85
+ \setlength{\parskip}{6pt plus 2pt minus 1pt}
86
+ }
87
+ \setlength{\emergencystretch}{3em} % prevent overfull lines
88
+ \providecommand{\tightlist}{%
89
+ \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
90
+ \setcounter{secnumdepth}{5}
91
+ % Redefines (sub)paragraphs to behave more like sections
92
+ \ifx\paragraph\undefined\else
93
+ \let\oldparagraph\paragraph
94
+ \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
95
+ \fi
96
+ \ifx\subparagraph\undefined\else
97
+ \let\oldsubparagraph\subparagraph
98
+ \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
99
+ \fi
100
+
101
+ %%% Use protect on footnotes to avoid problems with footnotes in titles
102
+ \let\rmarkdownfootnote\footnote%
103
+ \def\footnote{\protect\rmarkdownfootnote}
104
+
105
+ %%% Change title format to be more compact
106
+ \usepackage{titling}
107
+
108
+ % Create subtitle command for use in maketitle
109
+ \newcommand{\subtitle}[1]{
110
+ \posttitle{
111
+ \begin{center}\large#1\end{center}
112
+ }
113
+ }
114
+
115
+ \setlength{\droptitle}{-2em}
116
+
117
+ \title{Non Standard Evaluation in dplyr with Galaaz}
118
+ \pretitle{\vspace{\droptitle}\centering\huge}
119
+ \posttitle{\par}
120
+ \author{Rodrigo Botafogo \\ Daniel Mossé - University of Pittsburgh}
121
+ \preauthor{\centering\large\emph}
122
+ \postauthor{\par}
123
+ \predate{\centering\large\emph}
124
+ \postdate{\par}
125
+ \date{10/05/2019}
126
+
127
+ % usar portugues do Brasil
128
+ % \usepackage[brazilian]{babel}
129
+ \usepackage[utf8]{inputenc}
130
+
131
+ \usepackage{geometry}
132
+ \geometry{a4paper, top=1in}
133
+
134
+ % needed for kableExtra
135
+ \usepackage{longtable}
136
+ \usepackage{multirow}
137
+ \usepackage[table]{xcolor}
138
+ \usepackage{wrapfig}
139
+ \usepackage{float}
140
+ \usepackage{colortbl}
141
+ \usepackage{pdflscape}
142
+ \usepackage{tabu}
143
+ \usepackage{threeparttable}
144
+ \usepackage[normalem]{ulem}
145
+
146
+ \usepackage{bbm}
147
+ \usepackage{booktabs}
148
+ \usepackage{expex}
149
+
150
+ \usepackage{graphicx}
151
+
152
+ \usepackage{fancyhdr}
153
+ % set the header and foot style
154
+ % style 'fancy' adds the section name on the header
155
+ % and the page number on the footer
156
+ \pagestyle{fancy}
157
+
158
+ % style 'fancyhf' leaves header and footer empty
159
+ %\fancyhf{}
160
+
161
+ % sets the left head element to \rightmark, which contains the
162
+ % current section (\leftmark is the current chapter)
163
+ %\fancyhead[L]{\rightmark} .
164
+
165
+ % sets the right head element to the page number.
166
+ % \fancyhead[R]{\thepage}
167
+
168
+ % lets the head rule disappear.
169
+ % \renewcommand{\headrulewidth}{0pt}
170
+ % Possible selectors for the optional argument of \fancyhead/\fancyfoot
171
+ % are L (left), C (center) or R (right) for the position of the element
172
+ % and E (even) or O (odd) to distinguish even and odd pages. If you omit
173
+ % E/O the element is set for all pages.
174
+
175
+ % \usepackage{lipsum}
176
+
177
+ % make available command lastpage
178
+ \usepackage{lastpage}
179
+
180
+ % default fontsize 11pt better to add
181
+ % fontsize on the yaml header
182
+ % \usepackage[fontsize=11pt]{scrextend}
183
+
184
+ % comandos para formatar uma tabela
185
+ \usepackage{array}
186
+ \newcolumntype{L}[1]{>{\raggedright\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
187
+ \newcolumntype{C}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
188
+ \newcolumntype{R}[1]{>{\raggedleft\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
189
+
190
+ % necessário if we need to import other latex documents
191
+ \usepackage{import}
192
+
193
+ % Command to import an R variable to latex
194
+ \newcommand{\RtoLatex}[2]{\newcommand{#1}{#2}}
195
+
196
+ %
197
+ %\newcommand{\atraso}[1]{\color{red} \textbf {Tempo desde a Assinatura do Contrato: #1 dias}}
198
+
199
+ \begin{document}
200
+ \maketitle
201
+
202
+ {
203
+ \setcounter{tocdepth}{2}
204
+ \tableofcontents
205
+ }
206
+ \hypertarget{introduction}{%
207
+ \section{Introduction}\label{introduction}}
208
+
209
+ In this post we will see how to program with \emph{dplyr} in Galaaz.
210
+
211
+ \hypertarget{but-first-what-is-galaaz}{%
212
+ \subsection{But first, what is
213
+ Galaaz??}\label{but-first-what-is-galaaz}}
214
+
215
+ Galaaz is a system for tightly coupling Ruby and R. Ruby is a powerful
216
+ language, with a large community, a very large set of libraries and
217
+ great for web development. However, it lacks libraries for data science,
218
+ statistics, scientific plotting and machine learning. On the other hand,
219
+ R is considered one of the most powerful languages for solving all of
220
+ the above problems. Maybe the strongest competitor to R is Python with
221
+ libraries such as NumPy, Pandas, SciPy, SciKit-Learn and many more.
222
+
223
+ With Galaaz we do not intend to re-implement any of the scientific
224
+ libraries in R. However, we allow for very tight coupling between the
225
+ two languages to the point that the Ruby developer does not need to know
226
+ that there is an R engine running. Also, from the point of view of the R
227
+ user/developer Galaaz looks a lot like R, with just minor syntactic
228
+ difference, so there is almost no learning courve for the R developer.
229
+ And as we will see in this post, programming with \emph{dplyr} is easier
230
+ in Galaaz than in R.
231
+
232
+ R users are probably quite knowledgeable about \emph{dplyr}, for the
233
+ Ruby developer, \emph{dplyr} and the \emph{tidyverse} libraries are a
234
+ set of libraries for data manipulation in R, developed by Hardley
235
+ Wickham, chief scientis at RStudio and a prolific R coder and writer.
236
+
237
+ For the coupling of Ruby and R we use new technologies provided by
238
+ Oracle: GraalVM, TruffleRuby and FastR:
239
+
240
+ \begin{verbatim}
241
+ GraalVM is a universal virtual machine for running applications
242
+ written in JavaScript, Python 3, Ruby, R, JVM-based languages like Java,
243
+ Scala, Kotlin, and LLVM-based languages such as C and C++.
244
+
245
+ GraalVM removes the isolation between programming languages and enables
246
+ interoperability in a shared runtime. It can run either standalone or in
247
+ the context of OpenJDK, Node.js, Oracle Database, or MySQL.
248
+
249
+ GraalVM allows you to write polyglot applications with a seamless way to
250
+ pass values from one language to another. With GraalVM there is no copying
251
+ or marshaling necessary as it is with other polyglot systems. This lets
252
+ you achieve high performance when language boundaries are crossed. Most
253
+ of the time there is no additional cost for crossing a language boundary
254
+ at all.
255
+
256
+ Often developers have to make uncomfortable compromises that require them
257
+ to rewrite their software in other languages. For example:
258
+
259
+ * “That library is not available in my language. I need to rewrite it.”
260
+ * “That language would be the perfect fit for my problem, but we cannot
261
+ run it in our environment.”
262
+ * “That problem is already solved in my language, but the language is
263
+ too slow.”
264
+
265
+ With GraalVM we aim to allow developers to freely choose the right language
266
+ for the task at hand without making compromises.
267
+ \end{verbatim}
268
+
269
+ Interested readers should also check out the following sites:
270
+
271
+ \begin{itemize}
272
+ \tightlist
273
+ \item
274
+ \href{https://www.graalvm.org/}{GraalVM Home}
275
+ \item
276
+ \href{https://github.com/oracle/truffleruby}{TruffleRuby}
277
+ \item
278
+ \href{https://github.com/oracle/fastr}{FastR}
279
+ \item
280
+ \href{https://medium.com/graalvm/faster-r-with-fastr-4b8db0e0dceb}{Faster
281
+ R with FastR}
282
+ \item
283
+ \href{https://medium.freecodecamp.org/how-to-make-beautiful-ruby-plots-with-galaaz-320848058857}{How
284
+ to make Beautiful Ruby Plots with Galaaz}
285
+ \item
286
+ \href{https://towardsdatascience.com/ruby-plotting-with-galaaz-an-example-of-tightly-coupling-ruby-and-r-in-graalvm-520b69e21021}{Ruby
287
+ Plotting with Galaaz: An example of tightly coupling Ruby and R in
288
+ GraalVM}
289
+ \item
290
+ \href{https://towardsdatascience.com/how-to-do-reproducible-research-in-ruby-with-gknit-c26d2684d64e}{How
291
+ to do reproducible research in Ruby with gKnit}
292
+ \item
293
+ \href{https://r4ds.had.co.nz/}{R for Data Science}
294
+ \item
295
+ \href{https://adv-r.hadley.nz/}{Advanced R}
296
+ \end{itemize}
297
+
298
+ \hypertarget{programming-with-dplyr}{%
299
+ \subsection{Programming with dplyr}\label{programming-with-dplyr}}
300
+
301
+ This post will follow closely the work done in
302
+ \url{https://dplyr.tidyverse.org/articles/programming.html}, by Hardley
303
+ Wickham. In it, Hardley states:
304
+
305
+ \begin{quote}
306
+ Most dplyr functions use non-standard evaluation (NSE). This is a
307
+ catch-all term that means they don't follow the usual R rules of
308
+ evaluation. Instead, they capture the expression that you typed and
309
+ evaluate it in a custom way. This has two main benefits for dplyr code:
310
+ \end{quote}
311
+
312
+ \begin{quote}
313
+ Operations on data frames can be expressed succinctly because you don't
314
+ need to repeat the name of the data frame. For example, you can write
315
+ filter(df, x == 1, y == 2, z == 3) instead of df{[}df\$x == 1 \& df\$y
316
+ ==2 \& df\$z == 3, {]}.
317
+ \end{quote}
318
+
319
+ \begin{quote}
320
+ dplyr can choose to compute results in a different way to base R. This
321
+ is important for database backends because dplyr itself doesn't do any
322
+ work, but instead generates the SQL that tells the database what to do.
323
+ \end{quote}
324
+
325
+ \begin{quote}
326
+ Unfortunately these benefits do not come for free. There are two main
327
+ drawbacks:
328
+ \end{quote}
329
+
330
+ \begin{quote}
331
+ Most dplyr arguments are not referentially transparent. That means you
332
+ can't replace a value with a seemingly equivalent object that you've
333
+ defined elsewhere. In other words, this code:
334
+ \end{quote}
335
+
336
+ \begin{Shaded}
337
+ \begin{Highlighting}[]
338
+ \NormalTok{df <-}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\DataTypeTok{x =} \DecValTok{1}\OperatorTok{:}\DecValTok{3}\NormalTok{, }\DataTypeTok{y =} \DecValTok{3}\OperatorTok{:}\DecValTok{1}\NormalTok{)}
339
+ \KeywordTok{print}\NormalTok{(}\KeywordTok{filter}\NormalTok{(df, x }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{))}
340
+ \CommentTok{#> # A tibble: 1 x 2}
341
+ \CommentTok{#> x y}
342
+ \CommentTok{#> <int> <int>}
343
+ \CommentTok{#> 1 1 3}
344
+ \end{Highlighting}
345
+ \end{Shaded}
346
+
347
+ \begin{quote}
348
+ Is not equivalent to this code:
349
+ \end{quote}
350
+
351
+ \begin{Shaded}
352
+ \begin{Highlighting}[]
353
+ \NormalTok{my_var <-}\StringTok{ }\NormalTok{x}
354
+ \CommentTok{#> Error in eval(expr, envir, enclos): object 'x' not found}
355
+ \KeywordTok{filter}\NormalTok{(df, my_var }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{)}
356
+ \CommentTok{#> Error: object 'my_var' not found}
357
+ \end{Highlighting}
358
+ \end{Shaded}
359
+
360
+ \begin{quote}
361
+ This makes it hard to create functions with arguments that change how
362
+ dplyr verbs are computed.
363
+ \end{quote}
364
+
365
+ In this post we will see that programming with \emph{dplyr} in Galaaz
366
+ does not require knowledge of non-standard evaluation in R and can be
367
+ accomplished by utilizing normal Ruby constructs.
368
+
369
+ \hypertarget{writing-expressions-in-galaaz}{%
370
+ \section{Writing Expressions in
371
+ Galaaz}\label{writing-expressions-in-galaaz}}
372
+
373
+ Galaaz extends Ruby to work with expressions, similar to R's expressions
374
+ build with `quote' (base R) or `quo' (tidyverse). Expressions in this
375
+ context are like mathematical expressions or formulae. For instance, in
376
+ mathematics, the expression \(y = sin(x)\) describes a function but
377
+ cannot be computed unless the value of \(x\) is bound to some value.
378
+
379
+ Let's take a look at some of those expressions in Ruby:
380
+
381
+ \hypertarget{expressions-from-operators}{%
382
+ \subsection{Expressions from
383
+ operators}\label{expressions-from-operators}}
384
+
385
+ The code bellow creates an expression summing two symbols. Note that :a
386
+ and :b are Ruby symbols and are not bound to any value at the time of
387
+ expression definition:
388
+
389
+ \begin{Shaded}
390
+ \begin{Highlighting}[]
391
+ \NormalTok{exp1 = }\StringTok{:a}\NormalTok{ + }\StringTok{:b}
392
+ \NormalTok{puts exp1}
393
+ \end{Highlighting}
394
+ \end{Shaded}
395
+
396
+ \begin{verbatim}
397
+ ## a + b
398
+ \end{verbatim}
399
+
400
+ We can build any complex mathematical expression such as:
401
+
402
+ \begin{Shaded}
403
+ \begin{Highlighting}[]
404
+ \NormalTok{exp2 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) * }\FloatTok{2.0}\NormalTok{ + }\StringTok{:c}\NormalTok{ ** }\DecValTok{2}\NormalTok{ / }\StringTok{:z}
405
+ \NormalTok{puts exp2}
406
+ \end{Highlighting}
407
+ \end{Shaded}
408
+
409
+ \begin{verbatim}
410
+ ## (a + b) * 2 + c^2L/z
411
+ \end{verbatim}
412
+
413
+ The `L' after two indicates that 2 is an integer.
414
+
415
+ It is also possible to use inequality operators in building expressions:
416
+
417
+ \begin{Shaded}
418
+ \begin{Highlighting}[]
419
+ \NormalTok{exp3 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) >= }\StringTok{:z}
420
+ \NormalTok{puts exp3}
421
+ \end{Highlighting}
422
+ \end{Shaded}
423
+
424
+ \begin{verbatim}
425
+ ## a + b >= z
426
+ \end{verbatim}
427
+
428
+ Expressions' definition can also make use of normal Ruby variables
429
+ without any problem:
430
+
431
+ \begin{Shaded}
432
+ \begin{Highlighting}[]
433
+ \NormalTok{x = }\DecValTok{20}
434
+ \NormalTok{y = }\DecValTok{30}
435
+ \NormalTok{exp_var = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) * x <= }\StringTok{:z}\NormalTok{ - y}
436
+ \NormalTok{puts exp_var}
437
+ \end{Highlighting}
438
+ \end{Shaded}
439
+
440
+ \begin{verbatim}
441
+ ## (a + b) * 20L <= z - 30L
442
+ \end{verbatim}
443
+
444
+ Galaaz provides both symbolic representations for operators, such as
445
+ (\textgreater{}, \textless{}, !=) as functional notation for those
446
+ operators such as (.gt, .ge, etc.). So the same expression written above
447
+ can also be written as
448
+
449
+ \begin{Shaded}
450
+ \begin{Highlighting}[]
451
+ \NormalTok{exp4 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{).ge }\StringTok{:z}
452
+ \NormalTok{puts exp4}
453
+ \end{Highlighting}
454
+ \end{Shaded}
455
+
456
+ \begin{verbatim}
457
+ ## a + b >= z
458
+ \end{verbatim}
459
+
460
+ Two type of expression, however, can only be created with the functional
461
+ representation of the operators, those are expressions involving `==',
462
+ and `='. In order to write an expression involving `==' we need to use
463
+ the method `.eq' and for `=' we need the function `.assign'
464
+
465
+ \begin{Shaded}
466
+ \begin{Highlighting}[]
467
+ \NormalTok{exp5 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{).eq }\StringTok{:z}
468
+ \NormalTok{puts exp5}
469
+ \end{Highlighting}
470
+ \end{Shaded}
471
+
472
+ \begin{verbatim}
473
+ ## a + b == z
474
+ \end{verbatim}
475
+
476
+ \begin{Shaded}
477
+ \begin{Highlighting}[]
478
+ \NormalTok{exp6 = }\StringTok{:y}\NormalTok{.assign }\StringTok{:a}\NormalTok{ + }\StringTok{:b}
479
+ \NormalTok{puts exp6}
480
+ \end{Highlighting}
481
+ \end{Shaded}
482
+
483
+ \begin{verbatim}
484
+ ## y <- a + b
485
+ \end{verbatim}
486
+
487
+ In general we think that using the functional notation is preferable to
488
+ using the symbolic notation as otherwise, we end up writing invalid
489
+ expressions such as
490
+
491
+ \begin{Shaded}
492
+ \begin{Highlighting}[]
493
+ \NormalTok{exp_wrong = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) == }\StringTok{:z}
494
+ \NormalTok{puts exp_wrong}
495
+ \end{Highlighting}
496
+ \end{Shaded}
497
+
498
+ \begin{verbatim}
499
+ ## Message:
500
+ ## Error in function (x, y, num.eq = TRUE, single.NA = TRUE, attrib.as.set = TRUE, :
501
+ ## object 'a' not found (RError)
502
+ ## Translated to internal error
503
+ \end{verbatim}
504
+
505
+ and it might be difficult to understand what is going on here. The
506
+ problem lies with the fact that when using `==' we are comparing
507
+ expression (:a + :b) to expression :z with `=='. When the comparison is
508
+ executed, the system tries to evaluate :a, :b and :z, and those symbols
509
+ at this time are not bound to anything and we get a ``object `a' not
510
+ found'' message. If we only use functional notation, this type of error
511
+ will not occur.
512
+
513
+ \hypertarget{expressions-with-r-methods}{%
514
+ \subsection{Expressions with R
515
+ methods}\label{expressions-with-r-methods}}
516
+
517
+ It is often necessary to create an expression that uses a method or
518
+ function. For instance, in mathematics, it's quite natural to write an
519
+ expressin such as \(y = sin(x)\). In this case, the `sin' function is
520
+ part of the expression and should not immediately be executed. When we
521
+ want the function to be part of the expression, we call the function
522
+ preceeding it by the letter E, such as `E.sin(x)'
523
+
524
+ \begin{Shaded}
525
+ \begin{Highlighting}[]
526
+ \NormalTok{exp7 = }\StringTok{:y}\NormalTok{.assign E.sin(}\StringTok{:x}\NormalTok{)}
527
+ \NormalTok{puts exp7}
528
+ \end{Highlighting}
529
+ \end{Shaded}
530
+
531
+ \begin{verbatim}
532
+ ## y <- sin(x)
533
+ \end{verbatim}
534
+
535
+ Expressions can also be written using `.' notation:
536
+
537
+ \begin{Shaded}
538
+ \begin{Highlighting}[]
539
+ \NormalTok{exp8 = }\StringTok{:y}\NormalTok{.assign }\StringTok{:x}\NormalTok{.sin}
540
+ \NormalTok{puts exp8}
541
+ \end{Highlighting}
542
+ \end{Shaded}
543
+
544
+ \begin{verbatim}
545
+ ## y <- sin(x)
546
+ \end{verbatim}
547
+
548
+ When a function has multiple arguments, the first one can be used before
549
+ the `.':
550
+
551
+ \begin{Shaded}
552
+ \begin{Highlighting}[]
553
+ \NormalTok{exp9 = }\StringTok{:x}\NormalTok{.c(}\StringTok{:y}\NormalTok{)}
554
+ \NormalTok{puts exp9}
555
+ \end{Highlighting}
556
+ \end{Shaded}
557
+
558
+ \begin{verbatim}
559
+ ## c(x, y)
560
+ \end{verbatim}
561
+
562
+ \hypertarget{evaluating-an-expression}{%
563
+ \subsection{Evaluating an Expression}\label{evaluating-an-expression}}
564
+
565
+ Expressions can be evaluated by calling function `eval' with a binding.
566
+ A binding can be provided with a list:
567
+
568
+ \begin{Shaded}
569
+ \begin{Highlighting}[]
570
+ \NormalTok{exp = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) * }\FloatTok{2.0}\NormalTok{ + }\StringTok{:c}\NormalTok{ ** }\DecValTok{2}\NormalTok{ / }\StringTok{:z}
571
+ \NormalTok{puts exp.eval(R.list(}\StringTok{a: }\DecValTok{10}\NormalTok{, }\StringTok{b: }\DecValTok{20}\NormalTok{, }\StringTok{c: }\DecValTok{30}\NormalTok{, }\StringTok{z: }\DecValTok{40}\NormalTok{))}
572
+ \end{Highlighting}
573
+ \end{Shaded}
574
+
575
+ \begin{verbatim}
576
+ ## [1] 82.5
577
+ \end{verbatim}
578
+
579
+ \ldots{} with a data frame:
580
+
581
+ \begin{Shaded}
582
+ \begin{Highlighting}[]
583
+ \NormalTok{df = R.data__frame(}
584
+ \StringTok{a: }\NormalTok{R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{),}
585
+ \StringTok{b: }\NormalTok{R.c(}\DecValTok{10}\NormalTok{, }\DecValTok{20}\NormalTok{, }\DecValTok{30}\NormalTok{),}
586
+ \StringTok{c: }\NormalTok{R.c(}\DecValTok{100}\NormalTok{, }\DecValTok{200}\NormalTok{, }\DecValTok{300}\NormalTok{),}
587
+ \StringTok{z: }\NormalTok{R.c(}\DecValTok{1000}\NormalTok{, }\DecValTok{2000}\NormalTok{, }\DecValTok{3000}\NormalTok{))}
588
+
589
+ \NormalTok{puts exp.eval(df)}
590
+ \end{Highlighting}
591
+ \end{Shaded}
592
+
593
+ \begin{verbatim}
594
+ ## [1] 32 64 96
595
+ \end{verbatim}
596
+
597
+ \hypertarget{using-galaaz-to-call-r-functions}{%
598
+ \section{Using Galaaz to call R
599
+ functions}\label{using-galaaz-to-call-r-functions}}
600
+
601
+ Galaaz tries to emulate as closely as possible the way R functions are
602
+ called and migrating from R to Galaaz should be quite easy requiring
603
+ only minor syntactic changes to an R script. In this post, we do not
604
+ have enough space to write a complete manual on Galaaz (a short manual
605
+ can be found at: \url{https://www.rubydoc.info/gems/galaaz/0.4.9}), so
606
+ we will present only a few examples scripts using Galaaz.
607
+
608
+ Basically, to call an R function from Ruby with Galaaz, one only needs
609
+ to preceed the function with `R.'. For instance, to create a vector in
610
+ R, the `c' function is used. From Galaaz, a vector can be created by
611
+ using `R.c':
612
+
613
+ \begin{Shaded}
614
+ \begin{Highlighting}[]
615
+ \NormalTok{vec = R.c(}\FloatTok{1.0}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)}
616
+ \NormalTok{puts vec}
617
+ \end{Highlighting}
618
+ \end{Shaded}
619
+
620
+ \begin{verbatim}
621
+ ## [1] 1 2 3
622
+ \end{verbatim}
623
+
624
+ A list is created in R with the `list' function, so in Galaaz we do:
625
+
626
+ \begin{Shaded}
627
+ \begin{Highlighting}[]
628
+ \NormalTok{list = R.list(}\StringTok{a: }\FloatTok{1.0}\NormalTok{, }\StringTok{b: }\DecValTok{2}\NormalTok{, }\StringTok{c: }\DecValTok{3}\NormalTok{)}
629
+ \NormalTok{puts list}
630
+ \end{Highlighting}
631
+ \end{Shaded}
632
+
633
+ \begin{verbatim}
634
+ ## $a
635
+ ## [1] 1
636
+ ##
637
+ ## $b
638
+ ## [1] 2
639
+ ##
640
+ ## $c
641
+ ## [1] 3
642
+ \end{verbatim}
643
+
644
+ Note that we can use named arguments in our list. The same code in R
645
+ would be:
646
+
647
+ \begin{Shaded}
648
+ \begin{Highlighting}[]
649
+ \NormalTok{lst =}\StringTok{ }\KeywordTok{list}\NormalTok{(}\DataTypeTok{a =} \DecValTok{1}\NormalTok{, }\DataTypeTok{b =}\NormalTok{ 2L, }\DataTypeTok{c =}\NormalTok{ 3L)}
650
+ \KeywordTok{print}\NormalTok{(lst)}
651
+ \end{Highlighting}
652
+ \end{Shaded}
653
+
654
+ \begin{verbatim}
655
+ ## $a
656
+ ## [1] 1
657
+ ##
658
+ ## $b
659
+ ## [1] 2
660
+ ##
661
+ ## $c
662
+ ## [1] 3
663
+ \end{verbatim}
664
+
665
+ Now, let's say that `x' is an angle of 45\(^\circ\) and we acttually
666
+ want to create the expression \(y = sin(45^\circ)\), which is
667
+ \(y = 0.850...\). In this case, we will use `R.sin':
668
+
669
+ \begin{Shaded}
670
+ \begin{Highlighting}[]
671
+ \NormalTok{exp10 = }\StringTok{:y}\NormalTok{.assign R.sin(}\DecValTok{45}\NormalTok{)}
672
+ \NormalTok{puts exp10}
673
+ \end{Highlighting}
674
+ \end{Shaded}
675
+
676
+ \begin{verbatim}
677
+ ## y <- 0.850903524534118
678
+ \end{verbatim}
679
+
680
+ \hypertarget{filtering-using-expressions}{%
681
+ \section{Filtering using
682
+ expressions}\label{filtering-using-expressions}}
683
+
684
+ Now that we know how to write expression and call R functions let's do
685
+ some data manipulation in Galaaz. Let's first start by creating the same
686
+ data frame that we created previously in section ``Programming with
687
+ dplyr'':
688
+
689
+ \begin{Shaded}
690
+ \begin{Highlighting}[]
691
+ \NormalTok{df = R.data__frame(}\StringTok{x: }\NormalTok{(}\DecValTok{1}\NormalTok{..}\DecValTok{3}\NormalTok{), }\StringTok{y: }\NormalTok{(}\DecValTok{3}\NormalTok{..}\DecValTok{1}\NormalTok{))}
692
+ \NormalTok{puts df}
693
+ \end{Highlighting}
694
+ \end{Shaded}
695
+
696
+ \begin{verbatim}
697
+ ## x y
698
+ ## 1 1 3
699
+ ## 2 2 2
700
+ ## 3 3 1
701
+ \end{verbatim}
702
+
703
+ The `filter' function can be called on this data frame either by using
704
+ `R.filter(df, \ldots{})' or by using dot notation. We prefer to use dot
705
+ notation as shown bellow. The argument to `filter' in Galaaz should be
706
+ an expression. Note that if we gave to filter a Ruby expression such as
707
+ `x == 1', we would get an error, since there is no variable `x' defined
708
+ and if `x' was a variable then `x == 1' would either be `true' or
709
+ `false'. Our goal is to filter our data frame returning all rows in
710
+ which the `x' value is equal to 1. To express this we want: `:x.eq 1',
711
+ where :x will be interpreted by filter as the `x' column.
712
+
713
+ \begin{Shaded}
714
+ \begin{Highlighting}[]
715
+ \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{.eq }\DecValTok{1}\NormalTok{)}
716
+ \end{Highlighting}
717
+ \end{Shaded}
718
+
719
+ \begin{verbatim}
720
+ ## x y
721
+ ## 1 1 3
722
+ \end{verbatim}
723
+
724
+ In R, and when coding with `tidyverse', arguments to a function are
725
+ usually not \emph{referencially transparent}. That is, you can't replace
726
+ a value with a seemingly equivalent object that you've defined
727
+ elsewhere. In other words, this code
728
+
729
+ \begin{Shaded}
730
+ \begin{Highlighting}[]
731
+ \NormalTok{my_var <-}\StringTok{ }\NormalTok{x}
732
+ \KeywordTok{filter}\NormalTok{(df, my_var }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{)}
733
+ \end{Highlighting}
734
+ \end{Shaded}
735
+
736
+ Generates the following error: "object `x' not found.
737
+
738
+ However, in Galaaz, arguments are referencially transparent as can be
739
+ seen by the code bellow. Note initally that `my\_var = :x' will not give
740
+ the error ``object `x' not found'' since `:x' is treated as an
741
+ expression and assigned to my\_var. Then when doing (my\_var.eq 1),
742
+ my\_var is a variable that resolves to `:x' and it becomes equivalent to
743
+ (:x.eq 1) which is what we want.
744
+
745
+ \begin{Shaded}
746
+ \begin{Highlighting}[]
747
+ \NormalTok{my_var = }\StringTok{:x}
748
+ \NormalTok{puts df.filter(my_var.eq }\DecValTok{1}\NormalTok{)}
749
+ \end{Highlighting}
750
+ \end{Shaded}
751
+
752
+ \begin{verbatim}
753
+ ## x y
754
+ ## 1 1 3
755
+ \end{verbatim}
756
+
757
+ As stated by Hardley
758
+
759
+ \begin{quote}
760
+ dplyr code is ambiguous. Depending on what variables are defined where,
761
+ filter(df, x == y) could be equivalent to any of:
762
+ \end{quote}
763
+
764
+ \begin{verbatim}
765
+ df[df$x == df$y, ]
766
+ df[df$x == y, ]
767
+ df[x == df$y, ]
768
+ df[x == y, ]
769
+ \end{verbatim}
770
+
771
+ In galaaz this ambiguity does not exist, filter(df, x.eq y) is not a
772
+ valid expression as expressions are build with symbols. In doing
773
+ filter(df, :x.eq y) we are looking for elements of the `x' column that
774
+ are equal to a previously defined y variable. Finally in filter(df,
775
+ :x.eq :y) we are looking for elements in which the `x' column value is
776
+ equal to the `y' column value. This can be seen in the following two
777
+ chunks of code:
778
+
779
+ \begin{Shaded}
780
+ \begin{Highlighting}[]
781
+ \NormalTok{y = }\DecValTok{1}
782
+ \NormalTok{x = }\DecValTok{2}
783
+
784
+ \CommentTok{# looking for values where the 'x' column is equal to the 'y' column}
785
+ \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{.eq }\StringTok{:y}\NormalTok{)}
786
+ \end{Highlighting}
787
+ \end{Shaded}
788
+
789
+ \begin{verbatim}
790
+ ## x y
791
+ ## 1 2 2
792
+ \end{verbatim}
793
+
794
+ \begin{Shaded}
795
+ \begin{Highlighting}[]
796
+ \CommentTok{# looking for values where the 'x' column is equal to the 'y' variable}
797
+ \CommentTok{# in this case, the number 1}
798
+ \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{.eq y)}
799
+ \end{Highlighting}
800
+ \end{Shaded}
801
+
802
+ \begin{verbatim}
803
+ ## x y
804
+ ## 1 1 3
805
+ \end{verbatim}
806
+
807
+ \hypertarget{writing-a-function-that-applies-to-different-data-sets}{%
808
+ \section{Writing a function that applies to different data
809
+ sets}\label{writing-a-function-that-applies-to-different-data-sets}}
810
+
811
+ Let's suppose that we want to write a function that receives as the
812
+ first argument a data frame and as second argument an expression that
813
+ adds a column to the data frame that is equal to the sum of elements in
814
+ column `a' plus `x'.
815
+
816
+ Here is the intended behaviour using the `mutate' function of `dplyr':
817
+
818
+ \begin{verbatim}
819
+ mutate(df1, y = a + x)
820
+ mutate(df2, y = a + x)
821
+ mutate(df3, y = a + x)
822
+ mutate(df4, y = a + x)
823
+ \end{verbatim}
824
+
825
+ The naive approach to writing an R function to solve this problem is:
826
+
827
+ \begin{verbatim}
828
+ mutate_y <- function(df) {
829
+ mutate(df, y = a + x)
830
+ }
831
+ \end{verbatim}
832
+
833
+ Unfortunately, in R, this function can fail silently if one of the
834
+ variables isn't present in the data frame, but is present in the global
835
+ environment. We will not go through here how to solve this problem in R.
836
+
837
+ In Galaaz the method mutate\_y bellow will work fine and will never fail
838
+ silently.
839
+
840
+ \begin{Shaded}
841
+ \begin{Highlighting}[]
842
+ \KeywordTok{def}\NormalTok{ mutate_y(df)}
843
+ \NormalTok{ df.mutate(}\StringTok{:y}\NormalTok{.assign }\StringTok{:a}\NormalTok{ + }\StringTok{:x}\NormalTok{)}
844
+ \KeywordTok{end}
845
+ \end{Highlighting}
846
+ \end{Shaded}
847
+
848
+ Here we create a data frame that has only one column named `x':
849
+
850
+ \begin{Shaded}
851
+ \begin{Highlighting}[]
852
+ \NormalTok{df1 = R.data__frame(}\StringTok{x: }\NormalTok{(}\DecValTok{1}\NormalTok{..}\DecValTok{3}\NormalTok{))}
853
+ \NormalTok{puts df1}
854
+ \end{Highlighting}
855
+ \end{Shaded}
856
+
857
+ \begin{verbatim}
858
+ ## x
859
+ ## 1 1
860
+ ## 2 2
861
+ ## 3 3
862
+ \end{verbatim}
863
+
864
+ Note that method mutate\_y will fail independetly from the fact that
865
+ variable `a' is defined and in the scope of the method. Variable `a' has
866
+ no relationship with the symbol `:a' used in the definition of
867
+ `mutate\_y' above:
868
+
869
+ \begin{Shaded}
870
+ \begin{Highlighting}[]
871
+ \NormalTok{a = }\DecValTok{10}
872
+ \NormalTok{mutate_y(df1)}
873
+ \end{Highlighting}
874
+ \end{Shaded}
875
+
876
+ \begin{verbatim}
877
+ ## Message:
878
+ ## Error in mutate_impl(.data, dots) :
879
+ ## Evaluation error: object 'a' not found.
880
+ ## In addition: Warning message:
881
+ ## In mutate_impl(.data, dots) :
882
+ ## mismatched protect/unprotect (unprotect with empty protect stack) (RError)
883
+ ## Translated to internal error
884
+ \end{verbatim}
885
+
886
+ \hypertarget{different-expressions}{%
887
+ \section{Different expressions}\label{different-expressions}}
888
+
889
+ Let's move to the next problem as presented by Hardley where trying to
890
+ write a function in R that will receive two argumens, the first a
891
+ variable and the second an expression is not trivial. Bellow we create a
892
+ data frame and we want to write a function that groups data by a
893
+ variable and summarises it by an expression:
894
+
895
+ \begin{Shaded}
896
+ \begin{Highlighting}[]
897
+ \KeywordTok{set.seed}\NormalTok{(}\DecValTok{123}\NormalTok{)}
898
+
899
+ \NormalTok{df <-}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}
900
+ \DataTypeTok{g1 =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{2}\NormalTok{),}
901
+ \DataTypeTok{g2 =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{1}\NormalTok{),}
902
+ \DataTypeTok{a =} \KeywordTok{sample}\NormalTok{(}\DecValTok{5}\NormalTok{),}
903
+ \DataTypeTok{b =} \KeywordTok{sample}\NormalTok{(}\DecValTok{5}\NormalTok{)}
904
+ \NormalTok{)}
905
+
906
+ \KeywordTok{as.data.frame}\NormalTok{(df) }
907
+ \end{Highlighting}
908
+ \end{Shaded}
909
+
910
+ \begin{verbatim}
911
+ ## g1 g2 a b
912
+ ## 1 1 1 2 1
913
+ ## 2 1 2 4 3
914
+ ## 3 2 1 5 4
915
+ ## 4 2 2 3 2
916
+ ## 5 2 1 1 5
917
+ \end{verbatim}
918
+
919
+ \begin{Shaded}
920
+ \begin{Highlighting}[]
921
+ \NormalTok{d2 <-}\StringTok{ }\NormalTok{df }\OperatorTok{%>%}
922
+ \StringTok{ }\KeywordTok{group_by}\NormalTok{(g1) }\OperatorTok{%>%}
923
+ \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
924
+
925
+ \KeywordTok{as.data.frame}\NormalTok{(d2) }
926
+ \end{Highlighting}
927
+ \end{Shaded}
928
+
929
+ \begin{verbatim}
930
+ ## g1 a
931
+ ## 1 1 3
932
+ ## 2 2 3
933
+ \end{verbatim}
934
+
935
+ \begin{Shaded}
936
+ \begin{Highlighting}[]
937
+ \NormalTok{d2 <-}\StringTok{ }\NormalTok{df }\OperatorTok{%>%}
938
+ \StringTok{ }\KeywordTok{group_by}\NormalTok{(g2) }\OperatorTok{%>%}
939
+ \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
940
+
941
+ \KeywordTok{as.data.frame}\NormalTok{(d2) }
942
+ \end{Highlighting}
943
+ \end{Shaded}
944
+
945
+ \begin{verbatim}
946
+ ## g2 a
947
+ ## 1 1 2.666667
948
+ ## 2 2 3.500000
949
+ \end{verbatim}
950
+
951
+ As shown by Hardley, one might expect this function to do the trick:
952
+
953
+ \begin{Shaded}
954
+ \begin{Highlighting}[]
955
+ \NormalTok{my_summarise <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(df, group_var) \{}
956
+ \NormalTok{ df }\OperatorTok{%>%}
957
+ \StringTok{ }\KeywordTok{group_by}\NormalTok{(group_var) }\OperatorTok{%>%}
958
+ \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
959
+ \NormalTok{\}}
960
+
961
+ \CommentTok{# my_summarise(df, g1)}
962
+ \CommentTok{#> Error: Column `group_var` is unknown}
963
+ \end{Highlighting}
964
+ \end{Shaded}
965
+
966
+ In order to solve this problem, coding with dplyr requires the
967
+ introduction of many new concepts and functions such as `quo', `quos',
968
+ `enquo', `enquos', `!!' (bang bang), `!!!' (triple bang). Again, we'll
969
+ leave to Hardley the explanation on how to use all those functions.
970
+
971
+ Now, let's try to implement the same function in galaaz. The next code
972
+ block first prints the `df' data frame define previously in R (to access
973
+ an R variable from Galaaz, we use the tilda operator `\textasciitilde{}'
974
+ applied to the R variable name as symbol, i.e., `:df'. We then create
975
+ the `my\_summarize' method and call it passing the R data frame and the
976
+ group by variable `:g1':
977
+
978
+ \begin{Shaded}
979
+ \begin{Highlighting}[]
980
+ \NormalTok{puts ~}\StringTok{:df}
981
+ \NormalTok{print }\StringTok{"\textbackslash{}n"}
982
+
983
+ \KeywordTok{def}\NormalTok{ my_summarize(df, group_var)}
984
+ \NormalTok{ df.group_by(group_var).}
985
+ \NormalTok{ summarize(}\StringTok{a: :a}\NormalTok{.mean)}
986
+ \KeywordTok{end}
987
+
988
+ \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g1}\NormalTok{).as__data__frame}
989
+ \end{Highlighting}
990
+ \end{Shaded}
991
+
992
+ \begin{verbatim}
993
+ ## g1 g2 a b
994
+ ## 1 1 1 2 1
995
+ ## 2 1 2 4 3
996
+ ## 3 2 1 5 4
997
+ ## 4 2 2 3 2
998
+ ## 5 2 1 1 5
999
+ ##
1000
+ ## g1 a
1001
+ ## 1 1 3
1002
+ ## 2 2 3
1003
+ \end{verbatim}
1004
+
1005
+ It works!!! Well, let's make sure this was not just some coincidence
1006
+
1007
+ \begin{Shaded}
1008
+ \begin{Highlighting}[]
1009
+ \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g2}\NormalTok{).as__data__frame}
1010
+ \end{Highlighting}
1011
+ \end{Shaded}
1012
+
1013
+ \begin{verbatim}
1014
+ ## g2 a
1015
+ ## 1 1 2.666667
1016
+ ## 2 2 3.500000
1017
+ \end{verbatim}
1018
+
1019
+ Great, everything is fine! No magic, no new functions, no complexities,
1020
+ just normal, standard Ruby code. If you've ever done NSE in R, this
1021
+ certainly feels much safer and easy to implement.
1022
+
1023
+ \hypertarget{different-input-variables}{%
1024
+ \section{Different input variables}\label{different-input-variables}}
1025
+
1026
+ In the previous section we've managed to get rid of all NSE formulation
1027
+ for a simple example, but does this remain true for more complex
1028
+ examples, or will the Galaaz way prove inpractical for more complex
1029
+ code?
1030
+
1031
+ In the next example Hardley proposes us to write a function that given
1032
+ an expression such as `a' or `a * b', calculates three summaries. What
1033
+ we want a function that does the same as these R statements:
1034
+
1035
+ \begin{verbatim}
1036
+ summarise(df, mean = mean(a), sum = sum(a), n = n())
1037
+ #> # A tibble: 1 x 3
1038
+ #> mean sum n
1039
+ #> <dbl> <int> <int>
1040
+ #> 1 3 15 5
1041
+
1042
+ summarise(df, mean = mean(a * b), sum = sum(a * b), n = n())
1043
+ #> # A tibble: 1 x 3
1044
+ #> mean sum n
1045
+ #> <dbl> <int> <int>
1046
+ #> 1 9 45 5
1047
+ \end{verbatim}
1048
+
1049
+ Let's try it in galaaz:
1050
+
1051
+ \begin{Shaded}
1052
+ \begin{Highlighting}[]
1053
+ \KeywordTok{def}\NormalTok{ my_summarise2(df, expr)}
1054
+ \NormalTok{ df.summarize(}
1055
+ \StringTok{mean: }\NormalTok{E.mean(expr),}
1056
+ \StringTok{sum: }\NormalTok{E.sum(expr),}
1057
+ \StringTok{n: }\NormalTok{E.n}
1058
+ \NormalTok{ )}
1059
+ \KeywordTok{end}
1060
+
1061
+ \NormalTok{puts my_summarise2((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{)}
1062
+ \NormalTok{puts my_summarise2((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{ * }\StringTok{:b}\NormalTok{)}
1063
+ \end{Highlighting}
1064
+ \end{Shaded}
1065
+
1066
+ \begin{verbatim}
1067
+ ## mean sum n
1068
+ ## 1 3 15 5
1069
+ ## mean sum n
1070
+ ## 1 9 45 5
1071
+ \end{verbatim}
1072
+
1073
+ Once again, there is no need to use any special theory or functions. The
1074
+ only point to be careful about is the use of `E' to build expressions
1075
+ from functions `mean', `sum' and `n'.
1076
+
1077
+ \hypertarget{different-input-and-output-variable}{%
1078
+ \section{Different input and output
1079
+ variable}\label{different-input-and-output-variable}}
1080
+
1081
+ Now the next challenge presented by Hardley is to vary the name of the
1082
+ output variables based on the received expression. So, if the input
1083
+ expression is `a', we want our data frame columns to be named `mean\_a'
1084
+ and `sum\_a'. Now, if the input expression is `b', columns should be
1085
+ named `mean\_b' and `sum\_b'.
1086
+
1087
+ \begin{verbatim}
1088
+ mutate(df, mean_a = mean(a), sum_a = sum(a))
1089
+ #> # A tibble: 5 x 6
1090
+ #> g1 g2 a b mean_a sum_a
1091
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
1092
+ #> 1 1 1 1 3 3 15
1093
+ #> 2 1 2 4 2 3 15
1094
+ #> 3 2 1 2 1 3 15
1095
+ #> 4 2 2 5 4 3 15
1096
+ #> # … with 1 more row
1097
+
1098
+ mutate(df, mean_b = mean(b), sum_b = sum(b))
1099
+ #> # A tibble: 5 x 6
1100
+ #> g1 g2 a b mean_b sum_b
1101
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
1102
+ #> 1 1 1 1 3 3 15
1103
+ #> 2 1 2 4 2 3 15
1104
+ #> 3 2 1 2 1 3 15
1105
+ #> 4 2 2 5 4 3 15
1106
+ #> # … with 1 more row
1107
+ \end{verbatim}
1108
+
1109
+ In order to solve this problem in R, Hardley needs to introduce some
1110
+ more new functions and notations: `quo\_name' and the `:=' operator from
1111
+ package `rlang'
1112
+
1113
+ Here is our Ruby code:
1114
+
1115
+ \begin{Shaded}
1116
+ \begin{Highlighting}[]
1117
+ \KeywordTok{def}\NormalTok{ my_mutate(df, expr)}
1118
+ \NormalTok{ mean_name = }\StringTok{"mean_}\OtherTok{#\{}\NormalTok{expr.to_s}\OtherTok{\}}\StringTok{"}
1119
+ \NormalTok{ sum_name = }\StringTok{"sum_}\OtherTok{#\{}\NormalTok{expr.to_s}\OtherTok{\}}\StringTok{"}
1120
+
1121
+ \NormalTok{ df.mutate(mean_name => E.mean(expr),}
1122
+ \NormalTok{ sum_name => E.sum(expr))}
1123
+ \KeywordTok{end}
1124
+
1125
+ \NormalTok{puts my_mutate((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{)}
1126
+ \NormalTok{puts my_mutate((~}\StringTok{:df}\NormalTok{), }\StringTok{:b}\NormalTok{)}
1127
+ \end{Highlighting}
1128
+ \end{Shaded}
1129
+
1130
+ \begin{verbatim}
1131
+ ## g1 g2 a b mean_a sum_a
1132
+ ## 1 1 1 2 1 3 15
1133
+ ## 2 1 2 4 3 3 15
1134
+ ## 3 2 1 5 4 3 15
1135
+ ## 4 2 2 3 2 3 15
1136
+ ## 5 2 1 1 5 3 15
1137
+ ## g1 g2 a b mean_b sum_b
1138
+ ## 1 1 1 2 1 3 15
1139
+ ## 2 1 2 4 3 3 15
1140
+ ## 3 2 1 5 4 3 15
1141
+ ## 4 2 2 3 2 3 15
1142
+ ## 5 2 1 1 5 3 15
1143
+ \end{verbatim}
1144
+
1145
+ It really seems that ``Non Standard Evaluation'' is actually quite
1146
+ standard in Galaaz! But, you might have noticed a small change in the
1147
+ way the arguments to the mutate method were called. In a previous
1148
+ example we used df.summarise(mean: E.mean(:a), \ldots{}) where the
1149
+ column name was followed by a `:' colom. In this example, we have
1150
+ df.mutate(mean\_name =\textgreater{} E.mean(expr), \ldots{}) and
1151
+ variable mean\_name is not followed by `:' but by `=\textgreater{}'.
1152
+ This is standard Ruby notation.
1153
+
1154
+ {[}explain\ldots{}.{]}
1155
+
1156
+ \hypertarget{capturing-multiple-variables}{%
1157
+ \section{Capturing multiple
1158
+ variables}\label{capturing-multiple-variables}}
1159
+
1160
+ Moving on with new complexities, Hardley proposes us to solve the
1161
+ problem in which the summarise function will receive any number of
1162
+ grouping variables.
1163
+
1164
+ This again is quite standard Ruby. In order to receive an undefined
1165
+ number of paramenters the paramenter is preceded by '*':
1166
+
1167
+ \begin{Shaded}
1168
+ \begin{Highlighting}[]
1169
+ \KeywordTok{def}\NormalTok{ my_summarise3(df, *group_vars)}
1170
+ \NormalTok{ df.group_by(*group_vars).}
1171
+ \NormalTok{ summarise(}\StringTok{a: }\NormalTok{E.mean(}\StringTok{:a}\NormalTok{))}
1172
+ \KeywordTok{end}
1173
+
1174
+ \NormalTok{puts my_summarise3((~}\StringTok{:df}\NormalTok{), }\StringTok{:g1}\NormalTok{, }\StringTok{:g2}\NormalTok{).as__data__frame}
1175
+ \end{Highlighting}
1176
+ \end{Shaded}
1177
+
1178
+ \begin{verbatim}
1179
+ ## g1 g2 a
1180
+ ## 1 1 1 2
1181
+ ## 2 1 2 4
1182
+ ## 3 2 1 3
1183
+ ## 4 2 2 3
1184
+ \end{verbatim}
1185
+
1186
+ \hypertarget{why-does-r-require-nse-and-galaaz-does-not}{%
1187
+ \section{Why does R require NSE and Galaaz does
1188
+ not?}\label{why-does-r-require-nse-and-galaaz-does-not}}
1189
+
1190
+ NSE introduces a number of new concepts, such as `quoting',
1191
+ `quasiquotation', `unquoting' and `unquote-splicing', while in Galaaz
1192
+ none of those concepts are needed. What gives?
1193
+
1194
+ R is an extremely flexible language and it has lazy evaluation of
1195
+ parameters. When in R a function is called as `summarise(df, a = b)',
1196
+ the summarise function receives the litteral `a = b' parameter and can
1197
+ work with this as if it were a string. In R, it is not clear what a and
1198
+ b are, they can be expressions or they can be variables, it is up to the
1199
+ function to decide what `a = b' means.
1200
+
1201
+ In Ruby, there is no lazy evaluation of parameters and `a' is always a
1202
+ variable and so is `b'. Variables assume their value as soon as they are
1203
+ used, so `x = a' is immediately evaluate and variable `x' will receive
1204
+ the value of variable `a' as soon as the Ruby statement is executed.
1205
+ Ruby also provides the notion of a symbol; `:a' is a symbol and does not
1206
+ evaluate to anything. Galaaz uses Ruby symbols to build expressions that
1207
+ are not bound to anything: `:a.eq :b' is clearly an expression and has
1208
+ no relationship whatsoever with the statment `a = b'. By using symbols,
1209
+ variables and expressions all the possible ambiguities that are found in
1210
+ R are eliminated in Galaaz.
1211
+
1212
+ The main problem that remains, is that in R, functions are not clearly
1213
+ documented as what type of input they are expecting, they might be
1214
+ expecting regular variables or they might be expecting expressions and
1215
+ the R function will know how to deal with an input of the form `a = b',
1216
+ now for the Ruby developer it might not be immediately clear if it
1217
+ should call the function passing the value `true' if variable `a' is
1218
+ equal to variable `b' or if it should call the function passing the
1219
+ expression `:a.eq :b'.
1220
+
1221
+ \hypertarget{advanced-dplyr-features}{%
1222
+ \section{Advanced dplyr features}\label{advanced-dplyr-features}}
1223
+
1224
+ In the blog: Programming with dplyr by using dplyr
1225
+ (\url{https://www.r-bloggers.com/programming-with-dplyr-by-using-dplyr/})
1226
+ Iñaki Úcar shows surprise that some R users are trying to code in dplyr
1227
+ avoiding the use of NSE. For instance he says:
1228
+
1229
+ \begin{quote}
1230
+ Take the example of seplyr. It stands for standard evaluation dplyr, and
1231
+ enables us to program over dplyr without having ``to bring in (or study)
1232
+ any deep-theory or heavy-weight tools such as rlang/tidyeval''.
1233
+ \end{quote}
1234
+
1235
+ For me, there isn't really any surprise that users are trying to avoid
1236
+ dplyr deep-theory. R users frequently are not programmers and learning
1237
+ to code is already hard business, on top of that, having to learn how to
1238
+ `quote' or `enquo' or `quos' or `enquos' is not necessarily a `piece of
1239
+ cake'. So much so, that `tidyeval' has some more advanced functions that
1240
+ instead of using quoted expressions, uses strings as arguments.
1241
+
1242
+ In the following examples, we show the use of functions `group\_by\_at',
1243
+ `summarise\_at' and `rename\_at' that receive strings as argument. The
1244
+ data frame used in `starwars' that describes features of characters in
1245
+ the Starwars movies:
1246
+
1247
+ \begin{Shaded}
1248
+ \begin{Highlighting}[]
1249
+ \NormalTok{puts (~}\StringTok{:starwars}\NormalTok{).head.as__data__frame}
1250
+ \end{Highlighting}
1251
+ \end{Shaded}
1252
+
1253
+ \begin{verbatim}
1254
+ ## name height mass hair_color skin_color eye_color birth_year
1255
+ ## 1 Luke Skywalker 172 77 blond fair blue 19.0
1256
+ ## 2 C-3PO 167 75 <NA> gold yellow 112.0
1257
+ ## 3 R2-D2 96 32 <NA> white, blue red 33.0
1258
+ ## 4 Darth Vader 202 136 none white yellow 41.9
1259
+ ## 5 Leia Organa 150 49 brown light brown 19.0
1260
+ ## 6 Owen Lars 178 120 brown, grey light blue 52.0
1261
+ ## gender homeworld species
1262
+ ## 1 male Tatooine Human
1263
+ ## 2 <NA> Tatooine Droid
1264
+ ## 3 <NA> Naboo Droid
1265
+ ## 4 male Tatooine Human
1266
+ ## 5 female Alderaan Human
1267
+ ## 6 male Tatooine Human
1268
+ ## films
1269
+ ## 1 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
1270
+ ## 2 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
1271
+ ## 3 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
1272
+ ## 4 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
1273
+ ## 5 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
1274
+ ## 6 Attack of the Clones, Revenge of the Sith, A New Hope
1275
+ ## vehicles starships
1276
+ ## 1 Snowspeeder, Imperial Speeder Bike X-wing, Imperial shuttle
1277
+ ## 2
1278
+ ## 3
1279
+ ## 4 TIE Advanced x1
1280
+ ## 5 Imperial Speeder Bike
1281
+ ## 6
1282
+ \end{verbatim}
1283
+
1284
+ The grouped\_mean function bellow will receive a grouping variable and
1285
+ calculate summaries for the value\_variables given:
1286
+
1287
+ \begin{Shaded}
1288
+ \begin{Highlighting}[]
1289
+ \NormalTok{grouped_mean <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(data, grouping_variables, value_variables) \{}
1290
+ \NormalTok{ data }\OperatorTok{%>%}
1291
+ \StringTok{ }\KeywordTok{group_by_at}\NormalTok{(grouping_variables) }\OperatorTok{%>%}
1292
+ \StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{count =} \KeywordTok{n}\NormalTok{()) }\OperatorTok{%>%}
1293
+ \StringTok{ }\KeywordTok{summarise_at}\NormalTok{(}\KeywordTok{c}\NormalTok{(value_variables, }\StringTok{"count"}\NormalTok{), mean, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{) }\OperatorTok{%>%}
1294
+ \StringTok{ }\KeywordTok{rename_at}\NormalTok{(value_variables, }\KeywordTok{funs}\NormalTok{(}\KeywordTok{paste0}\NormalTok{(}\StringTok{"mean_"}\NormalTok{, .)))}
1295
+ \NormalTok{ \}}
1296
+
1297
+ \NormalTok{gm =}\StringTok{ }\NormalTok{starwars }\OperatorTok{%>%}\StringTok{ }
1298
+ \StringTok{ }\KeywordTok{grouped_mean}\NormalTok{(}\StringTok{"eye_color"}\NormalTok{, }\KeywordTok{c}\NormalTok{(}\StringTok{"mass"}\NormalTok{, }\StringTok{"birth_year"}\NormalTok{))}
1299
+
1300
+ \KeywordTok{as.data.frame}\NormalTok{(gm) }
1301
+ \end{Highlighting}
1302
+ \end{Shaded}
1303
+
1304
+ \begin{verbatim}
1305
+ ## eye_color mean_mass mean_birth_year count
1306
+ ## 1 black 76.28571 33.00000 10
1307
+ ## 2 blue 86.51667 67.06923 19
1308
+ ## 3 blue-gray 77.00000 57.00000 1
1309
+ ## 4 brown 66.09231 108.96429 21
1310
+ ## 5 dark NaN NaN 1
1311
+ ## 6 gold NaN NaN 1
1312
+ ## 7 green, yellow 159.00000 NaN 1
1313
+ ## 8 hazel 66.00000 34.50000 3
1314
+ ## 9 orange 282.33333 231.00000 8
1315
+ ## 10 pink NaN NaN 1
1316
+ ## 11 red 81.40000 33.66667 5
1317
+ ## 12 red, blue NaN NaN 1
1318
+ ## 13 unknown 31.50000 NaN 3
1319
+ ## 14 white 48.00000 NaN 1
1320
+ ## 15 yellow 81.11111 76.38000 11
1321
+ \end{verbatim}
1322
+
1323
+ The same code with Galaaz, becomes:
1324
+
1325
+ \begin{Shaded}
1326
+ \begin{Highlighting}[]
1327
+ \KeywordTok{def}\NormalTok{ grouped_mean(data, grouping_variables, value_variables)}
1328
+ \NormalTok{ data.}
1329
+ \NormalTok{ group_by_at(grouping_variables).}
1330
+ \NormalTok{ mutate(}\StringTok{count: }\NormalTok{E.n).}
1331
+ \NormalTok{ summarise_at(E.c(value_variables, }\StringTok{"count"}\NormalTok{), ~}\StringTok{:mean}\NormalTok{, }\StringTok{na__rm: }\DecValTok{true}\NormalTok{).}
1332
+ \NormalTok{ rename_at(value_variables, E.funs(E.paste0(}\StringTok{"mean_"}\NormalTok{, value_variables)))}
1333
+ \KeywordTok{end}
1334
+
1335
+ \NormalTok{puts grouped_mean((~}\StringTok{:starwars}\NormalTok{), }\StringTok{"eye_color"}\NormalTok{, E.c(}\StringTok{"mass"}\NormalTok{, }\StringTok{"birth_year"}\NormalTok{)).as__data__frame}
1336
+ \end{Highlighting}
1337
+ \end{Shaded}
1338
+
1339
+ \begin{verbatim}
1340
+ ## eye_color mean_mass mean_birth_year count
1341
+ ## 1 black 76.28571 33.00000 10
1342
+ ## 2 blue 86.51667 67.06923 19
1343
+ ## 3 blue-gray 77.00000 57.00000 1
1344
+ ## 4 brown 66.09231 108.96429 21
1345
+ ## 5 dark NaN NaN 1
1346
+ ## 6 gold NaN NaN 1
1347
+ ## 7 green, yellow 159.00000 NaN 1
1348
+ ## 8 hazel 66.00000 34.50000 3
1349
+ ## 9 orange 282.33333 231.00000 8
1350
+ ## 10 pink NaN NaN 1
1351
+ ## 11 red 81.40000 33.66667 5
1352
+ ## 12 red, blue NaN NaN 1
1353
+ ## 13 unknown 31.50000 NaN 3
1354
+ ## 14 white 48.00000 NaN 1
1355
+ ## 15 yellow 81.11111 76.38000 11
1356
+ \end{verbatim}
1357
+
1358
+ \hypertarget{conclusion}{%
1359
+ \section{Conclusion}\label{conclusion}}
1360
+
1361
+ Ruby and Galaaz provide a nice framework for developing code that uses R
1362
+ functions. Although R is a very powerful and flexible language,
1363
+ sometimes, too much flexibility makes life harder for the casual user.
1364
+ We believe however, that even for the advanced user, Ruby integrated
1365
+ with R throught Galaaz, makes a powerful environment for data analysis.
1366
+ In this blog post we showed how Galaaz consistent syntax eliminates the
1367
+ need for complex constructs such as quoting, enquoting, quasiquotation,
1368
+ etc. This simplification comes from the fact that expressions and
1369
+ variables are clearly separated objects, which is not the case in the R
1370
+ language.
1371
+
1372
+
1373
+ \end{document}