galaaz 0.4.10 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (163) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2048 -531
  3. data/Rakefile +3 -2
  4. data/bin/gknit +152 -6
  5. data/bin/gknit-draft +105 -0
  6. data/bin/gknit-draft.rb +28 -0
  7. data/bin/gknit_Rscript +127 -0
  8. data/bin/grun +27 -1
  9. data/bin/gstudio +47 -4
  10. data/bin/{gstudio.rb → gstudio_irb.rb} +0 -0
  11. data/bin/gstudio_pry.rb +7 -0
  12. data/blogs/galaaz_ggplot/galaaz_ggplot.html +10 -195
  13. data/blogs/galaaz_ggplot/galaaz_ggplot.md +404 -0
  14. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
  15. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
  16. data/blogs/gknit/gknit.Rmd +5 -3
  17. data/blogs/gknit/gknit.pdf +0 -0
  18. data/blogs/gknit/lst.rds +0 -0
  19. data/blogs/manual/lst.rds +0 -0
  20. data/blogs/manual/manual.Rmd +826 -53
  21. data/blogs/manual/manual.html +2338 -695
  22. data/blogs/manual/manual.md +2032 -539
  23. data/blogs/manual/manual.pdf +0 -0
  24. data/blogs/manual/manual.tex +1804 -594
  25. data/blogs/manual/manual_files/figure-html/bubble-1.png +0 -0
  26. data/blogs/manual/manual_files/figure-html/diverging_bar.png +0 -0
  27. data/blogs/manual/manual_files/figure-latex/bubble-1.png +0 -0
  28. data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
  29. data/blogs/manual/model.rb +41 -0
  30. data/blogs/nse_dplyr/nse_dplyr.Rmd +226 -73
  31. data/blogs/nse_dplyr/nse_dplyr.html +254 -336
  32. data/blogs/nse_dplyr/nse_dplyr.md +353 -158
  33. data/blogs/oh_my/oh_my.html +274 -386
  34. data/blogs/oh_my/oh_my.md +208 -205
  35. data/blogs/ruby_plot/ruby_plot.html +20 -205
  36. data/blogs/ruby_plot/ruby_plot.md +14 -15
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  43. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  44. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  46. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  47. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  48. data/examples/Bibliography/master.bib +50 -0
  49. data/examples/Bibliography/stats.bib +72 -0
  50. data/examples/islr/x_y_rnorm.jpg +0 -0
  51. data/examples/latex_templates/Test-acm_article/Makefile +16 -0
  52. data/examples/latex_templates/Test-acm_article/Test-acm_article.Rmd +65 -0
  53. data/examples/latex_templates/Test-acm_article/acm_proc_article-sp.cls +1670 -0
  54. data/examples/latex_templates/Test-acm_article/sensys-abstract.cls +703 -0
  55. data/examples/latex_templates/Test-acm_article/sigproc.bib +59 -0
  56. data/examples/latex_templates/Test-acs_article/Test-acs_article.Rmd +260 -0
  57. data/examples/latex_templates/Test-acs_article/Test-acs_article.pdf +0 -0
  58. data/examples/latex_templates/Test-acs_article/acs-Test-acs_article.bib +11 -0
  59. data/examples/latex_templates/Test-acs_article/acs-my_output.bib +11 -0
  60. data/examples/latex_templates/Test-acs_article/acstest.bib +17 -0
  61. data/examples/latex_templates/Test-aea_article/AEA.cls +1414 -0
  62. data/{blogs/gknit/marshal.dump → examples/latex_templates/Test-aea_article/BibFile.bib} +0 -0
  63. data/examples/latex_templates/Test-aea_article/Test-aea_article.Rmd +108 -0
  64. data/examples/latex_templates/Test-aea_article/Test-aea_article.pdf +0 -0
  65. data/examples/latex_templates/Test-aea_article/aea.bst +1269 -0
  66. data/examples/latex_templates/Test-aea_article/multicol.sty +853 -0
  67. data/examples/latex_templates/Test-aea_article/references.bib +0 -0
  68. data/examples/latex_templates/Test-aea_article/setspace.sty +546 -0
  69. data/examples/latex_templates/Test-amq_article/Test-amq_article.Rmd +256 -0
  70. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdf +0 -0
  71. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdfsync +3397 -0
  72. data/examples/latex_templates/Test-amq_article/pics/Figure2.pdf +0 -0
  73. data/examples/latex_templates/Test-ams_article/Test-ams_article.Rmd +215 -0
  74. data/examples/latex_templates/Test-ams_article/amstest.bib +436 -0
  75. data/examples/latex_templates/Test-asa_article/Test-asa_article.Rmd +153 -0
  76. data/examples/latex_templates/Test-asa_article/Test-asa_article.pdf +0 -0
  77. data/examples/latex_templates/Test-asa_article/agsm.bst +1353 -0
  78. data/examples/latex_templates/Test-asa_article/bibliography.bib +233 -0
  79. data/examples/latex_templates/Test-ieee_article/IEEEtran.bst +2409 -0
  80. data/examples/latex_templates/Test-ieee_article/IEEEtran.cls +6346 -0
  81. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.Rmd +175 -0
  82. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.pdf +0 -0
  83. data/examples/latex_templates/Test-ieee_article/mybibfile.bib +20 -0
  84. data/examples/latex_templates/Test-rjournal_article/RJournal.sty +335 -0
  85. data/examples/latex_templates/Test-rjournal_article/RJreferences.bib +18 -0
  86. data/examples/latex_templates/Test-rjournal_article/RJwrapper.pdf +0 -0
  87. data/examples/latex_templates/Test-rjournal_article/Test-rjournal_article.Rmd +52 -0
  88. data/examples/latex_templates/Test-springer_article/Test-springer_article.Rmd +65 -0
  89. data/examples/latex_templates/Test-springer_article/Test-springer_article.pdf +0 -0
  90. data/examples/latex_templates/Test-springer_article/bibliography.bib +26 -0
  91. data/examples/latex_templates/Test-springer_article/spbasic.bst +1658 -0
  92. data/examples/latex_templates/Test-springer_article/spmpsci.bst +1512 -0
  93. data/examples/latex_templates/Test-springer_article/spphys.bst +1443 -0
  94. data/examples/latex_templates/Test-springer_article/svglov3.clo +113 -0
  95. data/examples/latex_templates/Test-springer_article/svjour3.cls +1431 -0
  96. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.Rmd +73 -0
  97. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.pdf +0 -0
  98. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.Rmd +382 -0
  99. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.pdf +0 -0
  100. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.Rmd +164 -0
  101. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.pdf +0 -0
  102. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.Rmd +92 -0
  103. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.pdf +0 -0
  104. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/attend-grade-relationships.csv +482 -0
  105. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.Rmd +280 -0
  106. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.pdf +0 -0
  107. data/examples/rmarkdown/svm-xaringan-example/svm-xaringan-example.Rmd +386 -0
  108. data/lib/R_interface/r.rb +1 -1
  109. data/lib/R_interface/r_libs.R +1 -1
  110. data/lib/R_interface/r_methods.rb +10 -0
  111. data/lib/R_interface/rpkg.rb +1 -0
  112. data/lib/R_interface/rsupport.rb +4 -6
  113. data/lib/gknit.rb +2 -0
  114. data/lib/gknit/draft.rb +105 -0
  115. data/lib/gknit/knitr_engine.rb +0 -33
  116. data/lib/util/exec_ruby.rb +1 -27
  117. data/specs/figures/bg.jpeg +0 -0
  118. data/specs/figures/bg.png +0 -0
  119. data/specs/figures/dose_len.png +0 -0
  120. data/specs/figures/no_args.jpeg +0 -0
  121. data/specs/figures/no_args.png +0 -0
  122. data/specs/figures/width_height.jpeg +0 -0
  123. data/specs/figures/width_height.png +0 -0
  124. data/specs/figures/width_height_units1.jpeg +0 -0
  125. data/specs/figures/width_height_units1.png +0 -0
  126. data/specs/figures/width_height_units2.jpeg +0 -0
  127. data/specs/figures/width_height_units2.png +0 -0
  128. data/specs/r_dataframe.spec.rb +11 -11
  129. data/specs/ruby_expression.spec.rb +1 -0
  130. data/specs/tmp.rb +41 -20
  131. data/version.rb +1 -1
  132. metadata +73 -35
  133. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +0 -41
  134. data/blogs/galaaz_ggplot/galaaz_ggplot.out +0 -10
  135. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/midwest_rb.pdf +0 -0
  136. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/scatter_plot_rb.pdf +0 -0
  137. data/blogs/gknit/gknit.md +0 -1430
  138. data/blogs/gknit/gknit.tex +0 -1358
  139. data/blogs/manual/graph.rb +0 -29
  140. data/blogs/nse_dplyr/nse_dplyr.tex +0 -1373
  141. data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +0 -662
  142. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +0 -57
  143. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +0 -106
  144. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +0 -110
  145. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +0 -174
  146. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +0 -236
  147. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +0 -296
  148. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +0 -236
  149. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +0 -218
  150. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +0 -128
  151. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +0 -150
  152. data/blogs/ruby_plot/ruby_plot_files/figure-latex/dose_len.png +0 -0
  153. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_delivery.png +0 -0
  154. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_dose.png +0 -0
  155. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color.png +0 -0
  156. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color2.png +0 -0
  157. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_decorations.png +0 -0
  158. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_jitter.png +0 -0
  159. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_points.png +0 -0
  160. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_box_plot.png +0 -0
  161. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_violin_plot.png +0 -0
  162. data/blogs/ruby_plot/ruby_plot_files/figure-latex/violin_with_jitter.png +0 -0
  163. data/examples/paper/paper.rb +0 -36
@@ -1,29 +0,0 @@
1
- # Graphics with ggplot
2
-
3
- ```{ruby diverging_bar}
4
- require 'ggplot'
5
-
6
- R.theme_set R.theme_bw
7
-
8
- # Data Prep
9
- mtcars = ~:mtcars
10
- mtcars.car_name = R.rownames(:mtcars)
11
- # compute normalized mpg
12
- mtcars.mpg_z = ((mtcars.mpg - mtcars.mpg.mean)/mtcars.mpg.sd).round 2
13
- mtcars.mpg_type = (mtcars.mpg_z > 0) ? "below" : "above"
14
- mtcars = mtcars[mtcars.mpg_z.order, :all]
15
- # convert to factor to retain sorted order in plot
16
- mtcars.car_name = mtcars.car_name.factor levels: mtcars.car_name
17
-
18
- # Diverging Barcharts
19
- gg = mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
20
- R.geom_bar(E.aes(fill: :mpg_type), stat: 'identity', width: 0.5) +
21
- R.scale_fill_manual(name: "Mileage",
22
- labels: R.c("Above Average", "Below Average"),
23
- values: R.c("above": "#00ba38", "below": "#f8766d")) +
24
- R.labs(subtitle: "Normalised mileage from 'mtcars'",
25
- title: "Diverging Bars") +
26
- R.coord_flip()
27
-
28
- puts gg
29
- ```
@@ -1,1373 +0,0 @@
1
- \documentclass[11pt,]{article}
2
- \usepackage{lmodern}
3
- \usepackage{amssymb,amsmath}
4
- \usepackage{ifxetex,ifluatex}
5
- \usepackage{fixltx2e} % provides \textsubscript
6
- \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
7
- \usepackage[T1]{fontenc}
8
- \usepackage[utf8]{inputenc}
9
- \else % if luatex or xelatex
10
- \ifxetex
11
- \usepackage{mathspec}
12
- \else
13
- \usepackage{fontspec}
14
- \fi
15
- \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
16
- \fi
17
- % use upquote if available, for straight quotes in verbatim environments
18
- \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
19
- % use microtype if available
20
- \IfFileExists{microtype.sty}{%
21
- \usepackage{microtype}
22
- \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
23
- }{}
24
- \usepackage[margin=1in]{geometry}
25
- \usepackage{hyperref}
26
- \hypersetup{unicode=true,
27
- pdftitle={Non Standard Evaluation in dplyr with Galaaz},
28
- pdfauthor={Rodrigo Botafogo; Daniel Mossé - University of Pittsburgh},
29
- pdfborder={0 0 0},
30
- breaklinks=true}
31
- \urlstyle{same} % don't use monospace font for urls
32
- \usepackage{color}
33
- \usepackage{fancyvrb}
34
- \newcommand{\VerbBar}{|}
35
- \newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
36
- \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
37
- % Add ',fontsize=\small' for more characters per line
38
- \usepackage{framed}
39
- \definecolor{shadecolor}{RGB}{248,248,248}
40
- \newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
41
- \newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{#1}}
42
- \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
43
- \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.77,0.63,0.00}{#1}}
44
- \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
45
- \newcommand{\BuiltInTok}[1]{#1}
46
- \newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
47
- \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
48
- \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
49
- \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
50
- \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
51
- \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{#1}}
52
- \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
53
- \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
54
- \newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.64,0.00,0.00}{\textbf{#1}}}
55
- \newcommand{\ExtensionTok}[1]{#1}
56
- \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
57
- \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
58
- \newcommand{\ImportTok}[1]{#1}
59
- \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
60
- \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
61
- \newcommand{\NormalTok}[1]{#1}
62
- \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.81,0.36,0.00}{\textbf{#1}}}
63
- \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}}
64
- \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
65
- \newcommand{\RegionMarkerTok}[1]{#1}
66
- \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
67
- \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
68
- \newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
69
- \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
70
- \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
71
- \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
72
- \usepackage{graphicx,grffile}
73
- \makeatletter
74
- \def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
75
- \def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
76
- \makeatother
77
- % Scale images if necessary, so that they will not overflow the page
78
- % margins by default, and it is still possible to overwrite the defaults
79
- % using explicit options in \includegraphics[width, height, ...]{}
80
- \setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
81
- \IfFileExists{parskip.sty}{%
82
- \usepackage{parskip}
83
- }{% else
84
- \setlength{\parindent}{0pt}
85
- \setlength{\parskip}{6pt plus 2pt minus 1pt}
86
- }
87
- \setlength{\emergencystretch}{3em} % prevent overfull lines
88
- \providecommand{\tightlist}{%
89
- \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
90
- \setcounter{secnumdepth}{5}
91
- % Redefines (sub)paragraphs to behave more like sections
92
- \ifx\paragraph\undefined\else
93
- \let\oldparagraph\paragraph
94
- \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
95
- \fi
96
- \ifx\subparagraph\undefined\else
97
- \let\oldsubparagraph\subparagraph
98
- \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
99
- \fi
100
-
101
- %%% Use protect on footnotes to avoid problems with footnotes in titles
102
- \let\rmarkdownfootnote\footnote%
103
- \def\footnote{\protect\rmarkdownfootnote}
104
-
105
- %%% Change title format to be more compact
106
- \usepackage{titling}
107
-
108
- % Create subtitle command for use in maketitle
109
- \newcommand{\subtitle}[1]{
110
- \posttitle{
111
- \begin{center}\large#1\end{center}
112
- }
113
- }
114
-
115
- \setlength{\droptitle}{-2em}
116
-
117
- \title{Non Standard Evaluation in dplyr with Galaaz}
118
- \pretitle{\vspace{\droptitle}\centering\huge}
119
- \posttitle{\par}
120
- \author{Rodrigo Botafogo \\ Daniel Mossé - University of Pittsburgh}
121
- \preauthor{\centering\large\emph}
122
- \postauthor{\par}
123
- \predate{\centering\large\emph}
124
- \postdate{\par}
125
- \date{10/05/2019}
126
-
127
- % usar portugues do Brasil
128
- % \usepackage[brazilian]{babel}
129
- \usepackage[utf8]{inputenc}
130
-
131
- \usepackage{geometry}
132
- \geometry{a4paper, top=1in}
133
-
134
- % needed for kableExtra
135
- \usepackage{longtable}
136
- \usepackage{multirow}
137
- \usepackage[table]{xcolor}
138
- \usepackage{wrapfig}
139
- \usepackage{float}
140
- \usepackage{colortbl}
141
- \usepackage{pdflscape}
142
- \usepackage{tabu}
143
- \usepackage{threeparttable}
144
- \usepackage[normalem]{ulem}
145
-
146
- \usepackage{bbm}
147
- \usepackage{booktabs}
148
- \usepackage{expex}
149
-
150
- \usepackage{graphicx}
151
-
152
- \usepackage{fancyhdr}
153
- % set the header and foot style
154
- % style 'fancy' adds the section name on the header
155
- % and the page number on the footer
156
- \pagestyle{fancy}
157
-
158
- % style 'fancyhf' leaves header and footer empty
159
- %\fancyhf{}
160
-
161
- % sets the left head element to \rightmark, which contains the
162
- % current section (\leftmark is the current chapter)
163
- %\fancyhead[L]{\rightmark} .
164
-
165
- % sets the right head element to the page number.
166
- % \fancyhead[R]{\thepage}
167
-
168
- % lets the head rule disappear.
169
- % \renewcommand{\headrulewidth}{0pt}
170
- % Possible selectors for the optional argument of \fancyhead/\fancyfoot
171
- % are L (left), C (center) or R (right) for the position of the element
172
- % and E (even) or O (odd) to distinguish even and odd pages. If you omit
173
- % E/O the element is set for all pages.
174
-
175
- % \usepackage{lipsum}
176
-
177
- % make available command lastpage
178
- \usepackage{lastpage}
179
-
180
- % default fontsize 11pt better to add
181
- % fontsize on the yaml header
182
- % \usepackage[fontsize=11pt]{scrextend}
183
-
184
- % comandos para formatar uma tabela
185
- \usepackage{array}
186
- \newcolumntype{L}[1]{>{\raggedright\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
187
- \newcolumntype{C}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
188
- \newcolumntype{R}[1]{>{\raggedleft\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
189
-
190
- % necessário if we need to import other latex documents
191
- \usepackage{import}
192
-
193
- % Command to import an R variable to latex
194
- \newcommand{\RtoLatex}[2]{\newcommand{#1}{#2}}
195
-
196
- %
197
- %\newcommand{\atraso}[1]{\color{red} \textbf {Tempo desde a Assinatura do Contrato: #1 dias}}
198
-
199
- \begin{document}
200
- \maketitle
201
-
202
- {
203
- \setcounter{tocdepth}{2}
204
- \tableofcontents
205
- }
206
- \hypertarget{introduction}{%
207
- \section{Introduction}\label{introduction}}
208
-
209
- In this post we will see how to program with \emph{dplyr} in Galaaz.
210
-
211
- \hypertarget{but-first-what-is-galaaz}{%
212
- \subsection{But first, what is
213
- Galaaz??}\label{but-first-what-is-galaaz}}
214
-
215
- Galaaz is a system for tightly coupling Ruby and R. Ruby is a powerful
216
- language, with a large community, a very large set of libraries and
217
- great for web development. However, it lacks libraries for data science,
218
- statistics, scientific plotting and machine learning. On the other hand,
219
- R is considered one of the most powerful languages for solving all of
220
- the above problems. Maybe the strongest competitor to R is Python with
221
- libraries such as NumPy, Pandas, SciPy, SciKit-Learn and many more.
222
-
223
- With Galaaz we do not intend to re-implement any of the scientific
224
- libraries in R. However, we allow for very tight coupling between the
225
- two languages to the point that the Ruby developer does not need to know
226
- that there is an R engine running. Also, from the point of view of the R
227
- user/developer Galaaz looks a lot like R, with just minor syntactic
228
- difference, so there is almost no learning courve for the R developer.
229
- And as we will see in this post, programming with \emph{dplyr} is easier
230
- in Galaaz than in R.
231
-
232
- R users are probably quite knowledgeable about \emph{dplyr}, for the
233
- Ruby developer, \emph{dplyr} and the \emph{tidyverse} libraries are a
234
- set of libraries for data manipulation in R, developed by Hardley
235
- Wickham, chief scientis at RStudio and a prolific R coder and writer.
236
-
237
- For the coupling of Ruby and R we use new technologies provided by
238
- Oracle: GraalVM, TruffleRuby and FastR:
239
-
240
- \begin{verbatim}
241
- GraalVM is a universal virtual machine for running applications
242
- written in JavaScript, Python 3, Ruby, R, JVM-based languages like Java,
243
- Scala, Kotlin, and LLVM-based languages such as C and C++.
244
-
245
- GraalVM removes the isolation between programming languages and enables
246
- interoperability in a shared runtime. It can run either standalone or in
247
- the context of OpenJDK, Node.js, Oracle Database, or MySQL.
248
-
249
- GraalVM allows you to write polyglot applications with a seamless way to
250
- pass values from one language to another. With GraalVM there is no copying
251
- or marshaling necessary as it is with other polyglot systems. This lets
252
- you achieve high performance when language boundaries are crossed. Most
253
- of the time there is no additional cost for crossing a language boundary
254
- at all.
255
-
256
- Often developers have to make uncomfortable compromises that require them
257
- to rewrite their software in other languages. For example:
258
-
259
- * “That library is not available in my language. I need to rewrite it.”
260
- * “That language would be the perfect fit for my problem, but we cannot
261
- run it in our environment.”
262
- * “That problem is already solved in my language, but the language is
263
- too slow.”
264
-
265
- With GraalVM we aim to allow developers to freely choose the right language
266
- for the task at hand without making compromises.
267
- \end{verbatim}
268
-
269
- Interested readers should also check out the following sites:
270
-
271
- \begin{itemize}
272
- \tightlist
273
- \item
274
- \href{https://www.graalvm.org/}{GraalVM Home}
275
- \item
276
- \href{https://github.com/oracle/truffleruby}{TruffleRuby}
277
- \item
278
- \href{https://github.com/oracle/fastr}{FastR}
279
- \item
280
- \href{https://medium.com/graalvm/faster-r-with-fastr-4b8db0e0dceb}{Faster
281
- R with FastR}
282
- \item
283
- \href{https://medium.freecodecamp.org/how-to-make-beautiful-ruby-plots-with-galaaz-320848058857}{How
284
- to make Beautiful Ruby Plots with Galaaz}
285
- \item
286
- \href{https://towardsdatascience.com/ruby-plotting-with-galaaz-an-example-of-tightly-coupling-ruby-and-r-in-graalvm-520b69e21021}{Ruby
287
- Plotting with Galaaz: An example of tightly coupling Ruby and R in
288
- GraalVM}
289
- \item
290
- \href{https://towardsdatascience.com/how-to-do-reproducible-research-in-ruby-with-gknit-c26d2684d64e}{How
291
- to do reproducible research in Ruby with gKnit}
292
- \item
293
- \href{https://r4ds.had.co.nz/}{R for Data Science}
294
- \item
295
- \href{https://adv-r.hadley.nz/}{Advanced R}
296
- \end{itemize}
297
-
298
- \hypertarget{programming-with-dplyr}{%
299
- \subsection{Programming with dplyr}\label{programming-with-dplyr}}
300
-
301
- This post will follow closely the work done in
302
- \url{https://dplyr.tidyverse.org/articles/programming.html}, by Hardley
303
- Wickham. In it, Hardley states:
304
-
305
- \begin{quote}
306
- Most dplyr functions use non-standard evaluation (NSE). This is a
307
- catch-all term that means they don't follow the usual R rules of
308
- evaluation. Instead, they capture the expression that you typed and
309
- evaluate it in a custom way. This has two main benefits for dplyr code:
310
- \end{quote}
311
-
312
- \begin{quote}
313
- Operations on data frames can be expressed succinctly because you don't
314
- need to repeat the name of the data frame. For example, you can write
315
- filter(df, x == 1, y == 2, z == 3) instead of df{[}df\$x == 1 \& df\$y
316
- ==2 \& df\$z == 3, {]}.
317
- \end{quote}
318
-
319
- \begin{quote}
320
- dplyr can choose to compute results in a different way to base R. This
321
- is important for database backends because dplyr itself doesn't do any
322
- work, but instead generates the SQL that tells the database what to do.
323
- \end{quote}
324
-
325
- \begin{quote}
326
- Unfortunately these benefits do not come for free. There are two main
327
- drawbacks:
328
- \end{quote}
329
-
330
- \begin{quote}
331
- Most dplyr arguments are not referentially transparent. That means you
332
- can't replace a value with a seemingly equivalent object that you've
333
- defined elsewhere. In other words, this code:
334
- \end{quote}
335
-
336
- \begin{Shaded}
337
- \begin{Highlighting}[]
338
- \NormalTok{df <-}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}\DataTypeTok{x =} \DecValTok{1}\OperatorTok{:}\DecValTok{3}\NormalTok{, }\DataTypeTok{y =} \DecValTok{3}\OperatorTok{:}\DecValTok{1}\NormalTok{)}
339
- \KeywordTok{print}\NormalTok{(}\KeywordTok{filter}\NormalTok{(df, x }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{))}
340
- \CommentTok{#> # A tibble: 1 x 2}
341
- \CommentTok{#> x y}
342
- \CommentTok{#> <int> <int>}
343
- \CommentTok{#> 1 1 3}
344
- \end{Highlighting}
345
- \end{Shaded}
346
-
347
- \begin{quote}
348
- Is not equivalent to this code:
349
- \end{quote}
350
-
351
- \begin{Shaded}
352
- \begin{Highlighting}[]
353
- \NormalTok{my_var <-}\StringTok{ }\NormalTok{x}
354
- \CommentTok{#> Error in eval(expr, envir, enclos): object 'x' not found}
355
- \KeywordTok{filter}\NormalTok{(df, my_var }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{)}
356
- \CommentTok{#> Error: object 'my_var' not found}
357
- \end{Highlighting}
358
- \end{Shaded}
359
-
360
- \begin{quote}
361
- This makes it hard to create functions with arguments that change how
362
- dplyr verbs are computed.
363
- \end{quote}
364
-
365
- In this post we will see that programming with \emph{dplyr} in Galaaz
366
- does not require knowledge of non-standard evaluation in R and can be
367
- accomplished by utilizing normal Ruby constructs.
368
-
369
- \hypertarget{writing-expressions-in-galaaz}{%
370
- \section{Writing Expressions in
371
- Galaaz}\label{writing-expressions-in-galaaz}}
372
-
373
- Galaaz extends Ruby to work with expressions, similar to R's expressions
374
- build with `quote' (base R) or `quo' (tidyverse). Expressions in this
375
- context are like mathematical expressions or formulae. For instance, in
376
- mathematics, the expression \(y = sin(x)\) describes a function but
377
- cannot be computed unless the value of \(x\) is bound to some value.
378
-
379
- Let's take a look at some of those expressions in Ruby:
380
-
381
- \hypertarget{expressions-from-operators}{%
382
- \subsection{Expressions from
383
- operators}\label{expressions-from-operators}}
384
-
385
- The code bellow creates an expression summing two symbols. Note that :a
386
- and :b are Ruby symbols and are not bound to any value at the time of
387
- expression definition:
388
-
389
- \begin{Shaded}
390
- \begin{Highlighting}[]
391
- \NormalTok{exp1 = }\StringTok{:a}\NormalTok{ + }\StringTok{:b}
392
- \NormalTok{puts exp1}
393
- \end{Highlighting}
394
- \end{Shaded}
395
-
396
- \begin{verbatim}
397
- ## a + b
398
- \end{verbatim}
399
-
400
- We can build any complex mathematical expression such as:
401
-
402
- \begin{Shaded}
403
- \begin{Highlighting}[]
404
- \NormalTok{exp2 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) * }\FloatTok{2.0}\NormalTok{ + }\StringTok{:c}\NormalTok{ ** }\DecValTok{2}\NormalTok{ / }\StringTok{:z}
405
- \NormalTok{puts exp2}
406
- \end{Highlighting}
407
- \end{Shaded}
408
-
409
- \begin{verbatim}
410
- ## (a + b) * 2 + c^2L/z
411
- \end{verbatim}
412
-
413
- The `L' after two indicates that 2 is an integer.
414
-
415
- It is also possible to use inequality operators in building expressions:
416
-
417
- \begin{Shaded}
418
- \begin{Highlighting}[]
419
- \NormalTok{exp3 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) >= }\StringTok{:z}
420
- \NormalTok{puts exp3}
421
- \end{Highlighting}
422
- \end{Shaded}
423
-
424
- \begin{verbatim}
425
- ## a + b >= z
426
- \end{verbatim}
427
-
428
- Expressions' definition can also make use of normal Ruby variables
429
- without any problem:
430
-
431
- \begin{Shaded}
432
- \begin{Highlighting}[]
433
- \NormalTok{x = }\DecValTok{20}
434
- \NormalTok{y = }\DecValTok{30}
435
- \NormalTok{exp_var = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) * x <= }\StringTok{:z}\NormalTok{ - y}
436
- \NormalTok{puts exp_var}
437
- \end{Highlighting}
438
- \end{Shaded}
439
-
440
- \begin{verbatim}
441
- ## (a + b) * 20L <= z - 30L
442
- \end{verbatim}
443
-
444
- Galaaz provides both symbolic representations for operators, such as
445
- (\textgreater{}, \textless{}, !=) as functional notation for those
446
- operators such as (.gt, .ge, etc.). So the same expression written above
447
- can also be written as
448
-
449
- \begin{Shaded}
450
- \begin{Highlighting}[]
451
- \NormalTok{exp4 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{).ge }\StringTok{:z}
452
- \NormalTok{puts exp4}
453
- \end{Highlighting}
454
- \end{Shaded}
455
-
456
- \begin{verbatim}
457
- ## a + b >= z
458
- \end{verbatim}
459
-
460
- Two type of expression, however, can only be created with the functional
461
- representation of the operators, those are expressions involving `==',
462
- and `='. In order to write an expression involving `==' we need to use
463
- the method `.eq' and for `=' we need the function `.assign'
464
-
465
- \begin{Shaded}
466
- \begin{Highlighting}[]
467
- \NormalTok{exp5 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{).eq }\StringTok{:z}
468
- \NormalTok{puts exp5}
469
- \end{Highlighting}
470
- \end{Shaded}
471
-
472
- \begin{verbatim}
473
- ## a + b == z
474
- \end{verbatim}
475
-
476
- \begin{Shaded}
477
- \begin{Highlighting}[]
478
- \NormalTok{exp6 = }\StringTok{:y}\NormalTok{.assign }\StringTok{:a}\NormalTok{ + }\StringTok{:b}
479
- \NormalTok{puts exp6}
480
- \end{Highlighting}
481
- \end{Shaded}
482
-
483
- \begin{verbatim}
484
- ## y <- a + b
485
- \end{verbatim}
486
-
487
- In general we think that using the functional notation is preferable to
488
- using the symbolic notation as otherwise, we end up writing invalid
489
- expressions such as
490
-
491
- \begin{Shaded}
492
- \begin{Highlighting}[]
493
- \NormalTok{exp_wrong = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) == }\StringTok{:z}
494
- \NormalTok{puts exp_wrong}
495
- \end{Highlighting}
496
- \end{Shaded}
497
-
498
- \begin{verbatim}
499
- ## Message:
500
- ## Error in function (x, y, num.eq = TRUE, single.NA = TRUE, attrib.as.set = TRUE, :
501
- ## object 'a' not found (RError)
502
- ## Translated to internal error
503
- \end{verbatim}
504
-
505
- and it might be difficult to understand what is going on here. The
506
- problem lies with the fact that when using `==' we are comparing
507
- expression (:a + :b) to expression :z with `=='. When the comparison is
508
- executed, the system tries to evaluate :a, :b and :z, and those symbols
509
- at this time are not bound to anything and we get a ``object `a' not
510
- found'' message. If we only use functional notation, this type of error
511
- will not occur.
512
-
513
- \hypertarget{expressions-with-r-methods}{%
514
- \subsection{Expressions with R
515
- methods}\label{expressions-with-r-methods}}
516
-
517
- It is often necessary to create an expression that uses a method or
518
- function. For instance, in mathematics, it's quite natural to write an
519
- expressin such as \(y = sin(x)\). In this case, the `sin' function is
520
- part of the expression and should not immediately be executed. When we
521
- want the function to be part of the expression, we call the function
522
- preceeding it by the letter E, such as `E.sin(x)'
523
-
524
- \begin{Shaded}
525
- \begin{Highlighting}[]
526
- \NormalTok{exp7 = }\StringTok{:y}\NormalTok{.assign E.sin(}\StringTok{:x}\NormalTok{)}
527
- \NormalTok{puts exp7}
528
- \end{Highlighting}
529
- \end{Shaded}
530
-
531
- \begin{verbatim}
532
- ## y <- sin(x)
533
- \end{verbatim}
534
-
535
- Expressions can also be written using `.' notation:
536
-
537
- \begin{Shaded}
538
- \begin{Highlighting}[]
539
- \NormalTok{exp8 = }\StringTok{:y}\NormalTok{.assign }\StringTok{:x}\NormalTok{.sin}
540
- \NormalTok{puts exp8}
541
- \end{Highlighting}
542
- \end{Shaded}
543
-
544
- \begin{verbatim}
545
- ## y <- sin(x)
546
- \end{verbatim}
547
-
548
- When a function has multiple arguments, the first one can be used before
549
- the `.':
550
-
551
- \begin{Shaded}
552
- \begin{Highlighting}[]
553
- \NormalTok{exp9 = }\StringTok{:x}\NormalTok{.c(}\StringTok{:y}\NormalTok{)}
554
- \NormalTok{puts exp9}
555
- \end{Highlighting}
556
- \end{Shaded}
557
-
558
- \begin{verbatim}
559
- ## c(x, y)
560
- \end{verbatim}
561
-
562
- \hypertarget{evaluating-an-expression}{%
563
- \subsection{Evaluating an Expression}\label{evaluating-an-expression}}
564
-
565
- Expressions can be evaluated by calling function `eval' with a binding.
566
- A binding can be provided with a list:
567
-
568
- \begin{Shaded}
569
- \begin{Highlighting}[]
570
- \NormalTok{exp = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) * }\FloatTok{2.0}\NormalTok{ + }\StringTok{:c}\NormalTok{ ** }\DecValTok{2}\NormalTok{ / }\StringTok{:z}
571
- \NormalTok{puts exp.eval(R.list(}\StringTok{a: }\DecValTok{10}\NormalTok{, }\StringTok{b: }\DecValTok{20}\NormalTok{, }\StringTok{c: }\DecValTok{30}\NormalTok{, }\StringTok{z: }\DecValTok{40}\NormalTok{))}
572
- \end{Highlighting}
573
- \end{Shaded}
574
-
575
- \begin{verbatim}
576
- ## [1] 82.5
577
- \end{verbatim}
578
-
579
- \ldots{} with a data frame:
580
-
581
- \begin{Shaded}
582
- \begin{Highlighting}[]
583
- \NormalTok{df = R.data__frame(}
584
- \StringTok{a: }\NormalTok{R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{),}
585
- \StringTok{b: }\NormalTok{R.c(}\DecValTok{10}\NormalTok{, }\DecValTok{20}\NormalTok{, }\DecValTok{30}\NormalTok{),}
586
- \StringTok{c: }\NormalTok{R.c(}\DecValTok{100}\NormalTok{, }\DecValTok{200}\NormalTok{, }\DecValTok{300}\NormalTok{),}
587
- \StringTok{z: }\NormalTok{R.c(}\DecValTok{1000}\NormalTok{, }\DecValTok{2000}\NormalTok{, }\DecValTok{3000}\NormalTok{))}
588
-
589
- \NormalTok{puts exp.eval(df)}
590
- \end{Highlighting}
591
- \end{Shaded}
592
-
593
- \begin{verbatim}
594
- ## [1] 32 64 96
595
- \end{verbatim}
596
-
597
- \hypertarget{using-galaaz-to-call-r-functions}{%
598
- \section{Using Galaaz to call R
599
- functions}\label{using-galaaz-to-call-r-functions}}
600
-
601
- Galaaz tries to emulate as closely as possible the way R functions are
602
- called and migrating from R to Galaaz should be quite easy requiring
603
- only minor syntactic changes to an R script. In this post, we do not
604
- have enough space to write a complete manual on Galaaz (a short manual
605
- can be found at: \url{https://www.rubydoc.info/gems/galaaz/0.4.9}), so
606
- we will present only a few examples scripts using Galaaz.
607
-
608
- Basically, to call an R function from Ruby with Galaaz, one only needs
609
- to preceed the function with `R.'. For instance, to create a vector in
610
- R, the `c' function is used. From Galaaz, a vector can be created by
611
- using `R.c':
612
-
613
- \begin{Shaded}
614
- \begin{Highlighting}[]
615
- \NormalTok{vec = R.c(}\FloatTok{1.0}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)}
616
- \NormalTok{puts vec}
617
- \end{Highlighting}
618
- \end{Shaded}
619
-
620
- \begin{verbatim}
621
- ## [1] 1 2 3
622
- \end{verbatim}
623
-
624
- A list is created in R with the `list' function, so in Galaaz we do:
625
-
626
- \begin{Shaded}
627
- \begin{Highlighting}[]
628
- \NormalTok{list = R.list(}\StringTok{a: }\FloatTok{1.0}\NormalTok{, }\StringTok{b: }\DecValTok{2}\NormalTok{, }\StringTok{c: }\DecValTok{3}\NormalTok{)}
629
- \NormalTok{puts list}
630
- \end{Highlighting}
631
- \end{Shaded}
632
-
633
- \begin{verbatim}
634
- ## $a
635
- ## [1] 1
636
- ##
637
- ## $b
638
- ## [1] 2
639
- ##
640
- ## $c
641
- ## [1] 3
642
- \end{verbatim}
643
-
644
- Note that we can use named arguments in our list. The same code in R
645
- would be:
646
-
647
- \begin{Shaded}
648
- \begin{Highlighting}[]
649
- \NormalTok{lst =}\StringTok{ }\KeywordTok{list}\NormalTok{(}\DataTypeTok{a =} \DecValTok{1}\NormalTok{, }\DataTypeTok{b =}\NormalTok{ 2L, }\DataTypeTok{c =}\NormalTok{ 3L)}
650
- \KeywordTok{print}\NormalTok{(lst)}
651
- \end{Highlighting}
652
- \end{Shaded}
653
-
654
- \begin{verbatim}
655
- ## $a
656
- ## [1] 1
657
- ##
658
- ## $b
659
- ## [1] 2
660
- ##
661
- ## $c
662
- ## [1] 3
663
- \end{verbatim}
664
-
665
- Now, let's say that `x' is an angle of 45\(^\circ\) and we acttually
666
- want to create the expression \(y = sin(45^\circ)\), which is
667
- \(y = 0.850...\). In this case, we will use `R.sin':
668
-
669
- \begin{Shaded}
670
- \begin{Highlighting}[]
671
- \NormalTok{exp10 = }\StringTok{:y}\NormalTok{.assign R.sin(}\DecValTok{45}\NormalTok{)}
672
- \NormalTok{puts exp10}
673
- \end{Highlighting}
674
- \end{Shaded}
675
-
676
- \begin{verbatim}
677
- ## y <- 0.850903524534118
678
- \end{verbatim}
679
-
680
- \hypertarget{filtering-using-expressions}{%
681
- \section{Filtering using
682
- expressions}\label{filtering-using-expressions}}
683
-
684
- Now that we know how to write expression and call R functions let's do
685
- some data manipulation in Galaaz. Let's first start by creating the same
686
- data frame that we created previously in section ``Programming with
687
- dplyr'':
688
-
689
- \begin{Shaded}
690
- \begin{Highlighting}[]
691
- \NormalTok{df = R.data__frame(}\StringTok{x: }\NormalTok{(}\DecValTok{1}\NormalTok{..}\DecValTok{3}\NormalTok{), }\StringTok{y: }\NormalTok{(}\DecValTok{3}\NormalTok{..}\DecValTok{1}\NormalTok{))}
692
- \NormalTok{puts df}
693
- \end{Highlighting}
694
- \end{Shaded}
695
-
696
- \begin{verbatim}
697
- ## x y
698
- ## 1 1 3
699
- ## 2 2 2
700
- ## 3 3 1
701
- \end{verbatim}
702
-
703
- The `filter' function can be called on this data frame either by using
704
- `R.filter(df, \ldots{})' or by using dot notation. We prefer to use dot
705
- notation as shown bellow. The argument to `filter' in Galaaz should be
706
- an expression. Note that if we gave to filter a Ruby expression such as
707
- `x == 1', we would get an error, since there is no variable `x' defined
708
- and if `x' was a variable then `x == 1' would either be `true' or
709
- `false'. Our goal is to filter our data frame returning all rows in
710
- which the `x' value is equal to 1. To express this we want: `:x.eq 1',
711
- where :x will be interpreted by filter as the `x' column.
712
-
713
- \begin{Shaded}
714
- \begin{Highlighting}[]
715
- \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{.eq }\DecValTok{1}\NormalTok{)}
716
- \end{Highlighting}
717
- \end{Shaded}
718
-
719
- \begin{verbatim}
720
- ## x y
721
- ## 1 1 3
722
- \end{verbatim}
723
-
724
- In R, and when coding with `tidyverse', arguments to a function are
725
- usually not \emph{referencially transparent}. That is, you can't replace
726
- a value with a seemingly equivalent object that you've defined
727
- elsewhere. In other words, this code
728
-
729
- \begin{Shaded}
730
- \begin{Highlighting}[]
731
- \NormalTok{my_var <-}\StringTok{ }\NormalTok{x}
732
- \KeywordTok{filter}\NormalTok{(df, my_var }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{)}
733
- \end{Highlighting}
734
- \end{Shaded}
735
-
736
- Generates the following error: "object `x' not found.
737
-
738
- However, in Galaaz, arguments are referencially transparent as can be
739
- seen by the code bellow. Note initally that `my\_var = :x' will not give
740
- the error ``object `x' not found'' since `:x' is treated as an
741
- expression and assigned to my\_var. Then when doing (my\_var.eq 1),
742
- my\_var is a variable that resolves to `:x' and it becomes equivalent to
743
- (:x.eq 1) which is what we want.
744
-
745
- \begin{Shaded}
746
- \begin{Highlighting}[]
747
- \NormalTok{my_var = }\StringTok{:x}
748
- \NormalTok{puts df.filter(my_var.eq }\DecValTok{1}\NormalTok{)}
749
- \end{Highlighting}
750
- \end{Shaded}
751
-
752
- \begin{verbatim}
753
- ## x y
754
- ## 1 1 3
755
- \end{verbatim}
756
-
757
- As stated by Hardley
758
-
759
- \begin{quote}
760
- dplyr code is ambiguous. Depending on what variables are defined where,
761
- filter(df, x == y) could be equivalent to any of:
762
- \end{quote}
763
-
764
- \begin{verbatim}
765
- df[df$x == df$y, ]
766
- df[df$x == y, ]
767
- df[x == df$y, ]
768
- df[x == y, ]
769
- \end{verbatim}
770
-
771
- In galaaz this ambiguity does not exist, filter(df, x.eq y) is not a
772
- valid expression as expressions are build with symbols. In doing
773
- filter(df, :x.eq y) we are looking for elements of the `x' column that
774
- are equal to a previously defined y variable. Finally in filter(df,
775
- :x.eq :y) we are looking for elements in which the `x' column value is
776
- equal to the `y' column value. This can be seen in the following two
777
- chunks of code:
778
-
779
- \begin{Shaded}
780
- \begin{Highlighting}[]
781
- \NormalTok{y = }\DecValTok{1}
782
- \NormalTok{x = }\DecValTok{2}
783
-
784
- \CommentTok{# looking for values where the 'x' column is equal to the 'y' column}
785
- \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{.eq }\StringTok{:y}\NormalTok{)}
786
- \end{Highlighting}
787
- \end{Shaded}
788
-
789
- \begin{verbatim}
790
- ## x y
791
- ## 1 2 2
792
- \end{verbatim}
793
-
794
- \begin{Shaded}
795
- \begin{Highlighting}[]
796
- \CommentTok{# looking for values where the 'x' column is equal to the 'y' variable}
797
- \CommentTok{# in this case, the number 1}
798
- \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{.eq y)}
799
- \end{Highlighting}
800
- \end{Shaded}
801
-
802
- \begin{verbatim}
803
- ## x y
804
- ## 1 1 3
805
- \end{verbatim}
806
-
807
- \hypertarget{writing-a-function-that-applies-to-different-data-sets}{%
808
- \section{Writing a function that applies to different data
809
- sets}\label{writing-a-function-that-applies-to-different-data-sets}}
810
-
811
- Let's suppose that we want to write a function that receives as the
812
- first argument a data frame and as second argument an expression that
813
- adds a column to the data frame that is equal to the sum of elements in
814
- column `a' plus `x'.
815
-
816
- Here is the intended behaviour using the `mutate' function of `dplyr':
817
-
818
- \begin{verbatim}
819
- mutate(df1, y = a + x)
820
- mutate(df2, y = a + x)
821
- mutate(df3, y = a + x)
822
- mutate(df4, y = a + x)
823
- \end{verbatim}
824
-
825
- The naive approach to writing an R function to solve this problem is:
826
-
827
- \begin{verbatim}
828
- mutate_y <- function(df) {
829
- mutate(df, y = a + x)
830
- }
831
- \end{verbatim}
832
-
833
- Unfortunately, in R, this function can fail silently if one of the
834
- variables isn't present in the data frame, but is present in the global
835
- environment. We will not go through here how to solve this problem in R.
836
-
837
- In Galaaz the method mutate\_y bellow will work fine and will never fail
838
- silently.
839
-
840
- \begin{Shaded}
841
- \begin{Highlighting}[]
842
- \KeywordTok{def}\NormalTok{ mutate_y(df)}
843
- \NormalTok{ df.mutate(}\StringTok{:y}\NormalTok{.assign }\StringTok{:a}\NormalTok{ + }\StringTok{:x}\NormalTok{)}
844
- \KeywordTok{end}
845
- \end{Highlighting}
846
- \end{Shaded}
847
-
848
- Here we create a data frame that has only one column named `x':
849
-
850
- \begin{Shaded}
851
- \begin{Highlighting}[]
852
- \NormalTok{df1 = R.data__frame(}\StringTok{x: }\NormalTok{(}\DecValTok{1}\NormalTok{..}\DecValTok{3}\NormalTok{))}
853
- \NormalTok{puts df1}
854
- \end{Highlighting}
855
- \end{Shaded}
856
-
857
- \begin{verbatim}
858
- ## x
859
- ## 1 1
860
- ## 2 2
861
- ## 3 3
862
- \end{verbatim}
863
-
864
- Note that method mutate\_y will fail independetly from the fact that
865
- variable `a' is defined and in the scope of the method. Variable `a' has
866
- no relationship with the symbol `:a' used in the definition of
867
- `mutate\_y' above:
868
-
869
- \begin{Shaded}
870
- \begin{Highlighting}[]
871
- \NormalTok{a = }\DecValTok{10}
872
- \NormalTok{mutate_y(df1)}
873
- \end{Highlighting}
874
- \end{Shaded}
875
-
876
- \begin{verbatim}
877
- ## Message:
878
- ## Error in mutate_impl(.data, dots) :
879
- ## Evaluation error: object 'a' not found.
880
- ## In addition: Warning message:
881
- ## In mutate_impl(.data, dots) :
882
- ## mismatched protect/unprotect (unprotect with empty protect stack) (RError)
883
- ## Translated to internal error
884
- \end{verbatim}
885
-
886
- \hypertarget{different-expressions}{%
887
- \section{Different expressions}\label{different-expressions}}
888
-
889
- Let's move to the next problem as presented by Hardley where trying to
890
- write a function in R that will receive two argumens, the first a
891
- variable and the second an expression is not trivial. Bellow we create a
892
- data frame and we want to write a function that groups data by a
893
- variable and summarises it by an expression:
894
-
895
- \begin{Shaded}
896
- \begin{Highlighting}[]
897
- \KeywordTok{set.seed}\NormalTok{(}\DecValTok{123}\NormalTok{)}
898
-
899
- \NormalTok{df <-}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}
900
- \DataTypeTok{g1 =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{2}\NormalTok{),}
901
- \DataTypeTok{g2 =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{1}\NormalTok{),}
902
- \DataTypeTok{a =} \KeywordTok{sample}\NormalTok{(}\DecValTok{5}\NormalTok{),}
903
- \DataTypeTok{b =} \KeywordTok{sample}\NormalTok{(}\DecValTok{5}\NormalTok{)}
904
- \NormalTok{)}
905
-
906
- \KeywordTok{as.data.frame}\NormalTok{(df) }
907
- \end{Highlighting}
908
- \end{Shaded}
909
-
910
- \begin{verbatim}
911
- ## g1 g2 a b
912
- ## 1 1 1 2 1
913
- ## 2 1 2 4 3
914
- ## 3 2 1 5 4
915
- ## 4 2 2 3 2
916
- ## 5 2 1 1 5
917
- \end{verbatim}
918
-
919
- \begin{Shaded}
920
- \begin{Highlighting}[]
921
- \NormalTok{d2 <-}\StringTok{ }\NormalTok{df }\OperatorTok{%>%}
922
- \StringTok{ }\KeywordTok{group_by}\NormalTok{(g1) }\OperatorTok{%>%}
923
- \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
924
-
925
- \KeywordTok{as.data.frame}\NormalTok{(d2) }
926
- \end{Highlighting}
927
- \end{Shaded}
928
-
929
- \begin{verbatim}
930
- ## g1 a
931
- ## 1 1 3
932
- ## 2 2 3
933
- \end{verbatim}
934
-
935
- \begin{Shaded}
936
- \begin{Highlighting}[]
937
- \NormalTok{d2 <-}\StringTok{ }\NormalTok{df }\OperatorTok{%>%}
938
- \StringTok{ }\KeywordTok{group_by}\NormalTok{(g2) }\OperatorTok{%>%}
939
- \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
940
-
941
- \KeywordTok{as.data.frame}\NormalTok{(d2) }
942
- \end{Highlighting}
943
- \end{Shaded}
944
-
945
- \begin{verbatim}
946
- ## g2 a
947
- ## 1 1 2.666667
948
- ## 2 2 3.500000
949
- \end{verbatim}
950
-
951
- As shown by Hardley, one might expect this function to do the trick:
952
-
953
- \begin{Shaded}
954
- \begin{Highlighting}[]
955
- \NormalTok{my_summarise <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(df, group_var) \{}
956
- \NormalTok{ df }\OperatorTok{%>%}
957
- \StringTok{ }\KeywordTok{group_by}\NormalTok{(group_var) }\OperatorTok{%>%}
958
- \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
959
- \NormalTok{\}}
960
-
961
- \CommentTok{# my_summarise(df, g1)}
962
- \CommentTok{#> Error: Column `group_var` is unknown}
963
- \end{Highlighting}
964
- \end{Shaded}
965
-
966
- In order to solve this problem, coding with dplyr requires the
967
- introduction of many new concepts and functions such as `quo', `quos',
968
- `enquo', `enquos', `!!' (bang bang), `!!!' (triple bang). Again, we'll
969
- leave to Hardley the explanation on how to use all those functions.
970
-
971
- Now, let's try to implement the same function in galaaz. The next code
972
- block first prints the `df' data frame define previously in R (to access
973
- an R variable from Galaaz, we use the tilda operator `\textasciitilde{}'
974
- applied to the R variable name as symbol, i.e., `:df'. We then create
975
- the `my\_summarize' method and call it passing the R data frame and the
976
- group by variable `:g1':
977
-
978
- \begin{Shaded}
979
- \begin{Highlighting}[]
980
- \NormalTok{puts ~}\StringTok{:df}
981
- \NormalTok{print }\StringTok{"\textbackslash{}n"}
982
-
983
- \KeywordTok{def}\NormalTok{ my_summarize(df, group_var)}
984
- \NormalTok{ df.group_by(group_var).}
985
- \NormalTok{ summarize(}\StringTok{a: :a}\NormalTok{.mean)}
986
- \KeywordTok{end}
987
-
988
- \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g1}\NormalTok{).as__data__frame}
989
- \end{Highlighting}
990
- \end{Shaded}
991
-
992
- \begin{verbatim}
993
- ## g1 g2 a b
994
- ## 1 1 1 2 1
995
- ## 2 1 2 4 3
996
- ## 3 2 1 5 4
997
- ## 4 2 2 3 2
998
- ## 5 2 1 1 5
999
- ##
1000
- ## g1 a
1001
- ## 1 1 3
1002
- ## 2 2 3
1003
- \end{verbatim}
1004
-
1005
- It works!!! Well, let's make sure this was not just some coincidence
1006
-
1007
- \begin{Shaded}
1008
- \begin{Highlighting}[]
1009
- \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g2}\NormalTok{).as__data__frame}
1010
- \end{Highlighting}
1011
- \end{Shaded}
1012
-
1013
- \begin{verbatim}
1014
- ## g2 a
1015
- ## 1 1 2.666667
1016
- ## 2 2 3.500000
1017
- \end{verbatim}
1018
-
1019
- Great, everything is fine! No magic, no new functions, no complexities,
1020
- just normal, standard Ruby code. If you've ever done NSE in R, this
1021
- certainly feels much safer and easy to implement.
1022
-
1023
- \hypertarget{different-input-variables}{%
1024
- \section{Different input variables}\label{different-input-variables}}
1025
-
1026
- In the previous section we've managed to get rid of all NSE formulation
1027
- for a simple example, but does this remain true for more complex
1028
- examples, or will the Galaaz way prove inpractical for more complex
1029
- code?
1030
-
1031
- In the next example Hardley proposes us to write a function that given
1032
- an expression such as `a' or `a * b', calculates three summaries. What
1033
- we want a function that does the same as these R statements:
1034
-
1035
- \begin{verbatim}
1036
- summarise(df, mean = mean(a), sum = sum(a), n = n())
1037
- #> # A tibble: 1 x 3
1038
- #> mean sum n
1039
- #> <dbl> <int> <int>
1040
- #> 1 3 15 5
1041
-
1042
- summarise(df, mean = mean(a * b), sum = sum(a * b), n = n())
1043
- #> # A tibble: 1 x 3
1044
- #> mean sum n
1045
- #> <dbl> <int> <int>
1046
- #> 1 9 45 5
1047
- \end{verbatim}
1048
-
1049
- Let's try it in galaaz:
1050
-
1051
- \begin{Shaded}
1052
- \begin{Highlighting}[]
1053
- \KeywordTok{def}\NormalTok{ my_summarise2(df, expr)}
1054
- \NormalTok{ df.summarize(}
1055
- \StringTok{mean: }\NormalTok{E.mean(expr),}
1056
- \StringTok{sum: }\NormalTok{E.sum(expr),}
1057
- \StringTok{n: }\NormalTok{E.n}
1058
- \NormalTok{ )}
1059
- \KeywordTok{end}
1060
-
1061
- \NormalTok{puts my_summarise2((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{)}
1062
- \NormalTok{puts my_summarise2((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{ * }\StringTok{:b}\NormalTok{)}
1063
- \end{Highlighting}
1064
- \end{Shaded}
1065
-
1066
- \begin{verbatim}
1067
- ## mean sum n
1068
- ## 1 3 15 5
1069
- ## mean sum n
1070
- ## 1 9 45 5
1071
- \end{verbatim}
1072
-
1073
- Once again, there is no need to use any special theory or functions. The
1074
- only point to be careful about is the use of `E' to build expressions
1075
- from functions `mean', `sum' and `n'.
1076
-
1077
- \hypertarget{different-input-and-output-variable}{%
1078
- \section{Different input and output
1079
- variable}\label{different-input-and-output-variable}}
1080
-
1081
- Now the next challenge presented by Hardley is to vary the name of the
1082
- output variables based on the received expression. So, if the input
1083
- expression is `a', we want our data frame columns to be named `mean\_a'
1084
- and `sum\_a'. Now, if the input expression is `b', columns should be
1085
- named `mean\_b' and `sum\_b'.
1086
-
1087
- \begin{verbatim}
1088
- mutate(df, mean_a = mean(a), sum_a = sum(a))
1089
- #> # A tibble: 5 x 6
1090
- #> g1 g2 a b mean_a sum_a
1091
- #> <dbl> <dbl> <int> <int> <dbl> <int>
1092
- #> 1 1 1 1 3 3 15
1093
- #> 2 1 2 4 2 3 15
1094
- #> 3 2 1 2 1 3 15
1095
- #> 4 2 2 5 4 3 15
1096
- #> # … with 1 more row
1097
-
1098
- mutate(df, mean_b = mean(b), sum_b = sum(b))
1099
- #> # A tibble: 5 x 6
1100
- #> g1 g2 a b mean_b sum_b
1101
- #> <dbl> <dbl> <int> <int> <dbl> <int>
1102
- #> 1 1 1 1 3 3 15
1103
- #> 2 1 2 4 2 3 15
1104
- #> 3 2 1 2 1 3 15
1105
- #> 4 2 2 5 4 3 15
1106
- #> # … with 1 more row
1107
- \end{verbatim}
1108
-
1109
- In order to solve this problem in R, Hardley needs to introduce some
1110
- more new functions and notations: `quo\_name' and the `:=' operator from
1111
- package `rlang'
1112
-
1113
- Here is our Ruby code:
1114
-
1115
- \begin{Shaded}
1116
- \begin{Highlighting}[]
1117
- \KeywordTok{def}\NormalTok{ my_mutate(df, expr)}
1118
- \NormalTok{ mean_name = }\StringTok{"mean_}\OtherTok{#\{}\NormalTok{expr.to_s}\OtherTok{\}}\StringTok{"}
1119
- \NormalTok{ sum_name = }\StringTok{"sum_}\OtherTok{#\{}\NormalTok{expr.to_s}\OtherTok{\}}\StringTok{"}
1120
-
1121
- \NormalTok{ df.mutate(mean_name => E.mean(expr),}
1122
- \NormalTok{ sum_name => E.sum(expr))}
1123
- \KeywordTok{end}
1124
-
1125
- \NormalTok{puts my_mutate((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{)}
1126
- \NormalTok{puts my_mutate((~}\StringTok{:df}\NormalTok{), }\StringTok{:b}\NormalTok{)}
1127
- \end{Highlighting}
1128
- \end{Shaded}
1129
-
1130
- \begin{verbatim}
1131
- ## g1 g2 a b mean_a sum_a
1132
- ## 1 1 1 2 1 3 15
1133
- ## 2 1 2 4 3 3 15
1134
- ## 3 2 1 5 4 3 15
1135
- ## 4 2 2 3 2 3 15
1136
- ## 5 2 1 1 5 3 15
1137
- ## g1 g2 a b mean_b sum_b
1138
- ## 1 1 1 2 1 3 15
1139
- ## 2 1 2 4 3 3 15
1140
- ## 3 2 1 5 4 3 15
1141
- ## 4 2 2 3 2 3 15
1142
- ## 5 2 1 1 5 3 15
1143
- \end{verbatim}
1144
-
1145
- It really seems that ``Non Standard Evaluation'' is actually quite
1146
- standard in Galaaz! But, you might have noticed a small change in the
1147
- way the arguments to the mutate method were called. In a previous
1148
- example we used df.summarise(mean: E.mean(:a), \ldots{}) where the
1149
- column name was followed by a `:' colom. In this example, we have
1150
- df.mutate(mean\_name =\textgreater{} E.mean(expr), \ldots{}) and
1151
- variable mean\_name is not followed by `:' but by `=\textgreater{}'.
1152
- This is standard Ruby notation.
1153
-
1154
- {[}explain\ldots{}.{]}
1155
-
1156
- \hypertarget{capturing-multiple-variables}{%
1157
- \section{Capturing multiple
1158
- variables}\label{capturing-multiple-variables}}
1159
-
1160
- Moving on with new complexities, Hardley proposes us to solve the
1161
- problem in which the summarise function will receive any number of
1162
- grouping variables.
1163
-
1164
- This again is quite standard Ruby. In order to receive an undefined
1165
- number of paramenters the paramenter is preceded by '*':
1166
-
1167
- \begin{Shaded}
1168
- \begin{Highlighting}[]
1169
- \KeywordTok{def}\NormalTok{ my_summarise3(df, *group_vars)}
1170
- \NormalTok{ df.group_by(*group_vars).}
1171
- \NormalTok{ summarise(}\StringTok{a: }\NormalTok{E.mean(}\StringTok{:a}\NormalTok{))}
1172
- \KeywordTok{end}
1173
-
1174
- \NormalTok{puts my_summarise3((~}\StringTok{:df}\NormalTok{), }\StringTok{:g1}\NormalTok{, }\StringTok{:g2}\NormalTok{).as__data__frame}
1175
- \end{Highlighting}
1176
- \end{Shaded}
1177
-
1178
- \begin{verbatim}
1179
- ## g1 g2 a
1180
- ## 1 1 1 2
1181
- ## 2 1 2 4
1182
- ## 3 2 1 3
1183
- ## 4 2 2 3
1184
- \end{verbatim}
1185
-
1186
- \hypertarget{why-does-r-require-nse-and-galaaz-does-not}{%
1187
- \section{Why does R require NSE and Galaaz does
1188
- not?}\label{why-does-r-require-nse-and-galaaz-does-not}}
1189
-
1190
- NSE introduces a number of new concepts, such as `quoting',
1191
- `quasiquotation', `unquoting' and `unquote-splicing', while in Galaaz
1192
- none of those concepts are needed. What gives?
1193
-
1194
- R is an extremely flexible language and it has lazy evaluation of
1195
- parameters. When in R a function is called as `summarise(df, a = b)',
1196
- the summarise function receives the litteral `a = b' parameter and can
1197
- work with this as if it were a string. In R, it is not clear what a and
1198
- b are, they can be expressions or they can be variables, it is up to the
1199
- function to decide what `a = b' means.
1200
-
1201
- In Ruby, there is no lazy evaluation of parameters and `a' is always a
1202
- variable and so is `b'. Variables assume their value as soon as they are
1203
- used, so `x = a' is immediately evaluate and variable `x' will receive
1204
- the value of variable `a' as soon as the Ruby statement is executed.
1205
- Ruby also provides the notion of a symbol; `:a' is a symbol and does not
1206
- evaluate to anything. Galaaz uses Ruby symbols to build expressions that
1207
- are not bound to anything: `:a.eq :b' is clearly an expression and has
1208
- no relationship whatsoever with the statment `a = b'. By using symbols,
1209
- variables and expressions all the possible ambiguities that are found in
1210
- R are eliminated in Galaaz.
1211
-
1212
- The main problem that remains, is that in R, functions are not clearly
1213
- documented as what type of input they are expecting, they might be
1214
- expecting regular variables or they might be expecting expressions and
1215
- the R function will know how to deal with an input of the form `a = b',
1216
- now for the Ruby developer it might not be immediately clear if it
1217
- should call the function passing the value `true' if variable `a' is
1218
- equal to variable `b' or if it should call the function passing the
1219
- expression `:a.eq :b'.
1220
-
1221
- \hypertarget{advanced-dplyr-features}{%
1222
- \section{Advanced dplyr features}\label{advanced-dplyr-features}}
1223
-
1224
- In the blog: Programming with dplyr by using dplyr
1225
- (\url{https://www.r-bloggers.com/programming-with-dplyr-by-using-dplyr/})
1226
- Iñaki Úcar shows surprise that some R users are trying to code in dplyr
1227
- avoiding the use of NSE. For instance he says:
1228
-
1229
- \begin{quote}
1230
- Take the example of seplyr. It stands for standard evaluation dplyr, and
1231
- enables us to program over dplyr without having ``to bring in (or study)
1232
- any deep-theory or heavy-weight tools such as rlang/tidyeval''.
1233
- \end{quote}
1234
-
1235
- For me, there isn't really any surprise that users are trying to avoid
1236
- dplyr deep-theory. R users frequently are not programmers and learning
1237
- to code is already hard business, on top of that, having to learn how to
1238
- `quote' or `enquo' or `quos' or `enquos' is not necessarily a `piece of
1239
- cake'. So much so, that `tidyeval' has some more advanced functions that
1240
- instead of using quoted expressions, uses strings as arguments.
1241
-
1242
- In the following examples, we show the use of functions `group\_by\_at',
1243
- `summarise\_at' and `rename\_at' that receive strings as argument. The
1244
- data frame used in `starwars' that describes features of characters in
1245
- the Starwars movies:
1246
-
1247
- \begin{Shaded}
1248
- \begin{Highlighting}[]
1249
- \NormalTok{puts (~}\StringTok{:starwars}\NormalTok{).head.as__data__frame}
1250
- \end{Highlighting}
1251
- \end{Shaded}
1252
-
1253
- \begin{verbatim}
1254
- ## name height mass hair_color skin_color eye_color birth_year
1255
- ## 1 Luke Skywalker 172 77 blond fair blue 19.0
1256
- ## 2 C-3PO 167 75 <NA> gold yellow 112.0
1257
- ## 3 R2-D2 96 32 <NA> white, blue red 33.0
1258
- ## 4 Darth Vader 202 136 none white yellow 41.9
1259
- ## 5 Leia Organa 150 49 brown light brown 19.0
1260
- ## 6 Owen Lars 178 120 brown, grey light blue 52.0
1261
- ## gender homeworld species
1262
- ## 1 male Tatooine Human
1263
- ## 2 <NA> Tatooine Droid
1264
- ## 3 <NA> Naboo Droid
1265
- ## 4 male Tatooine Human
1266
- ## 5 female Alderaan Human
1267
- ## 6 male Tatooine Human
1268
- ## films
1269
- ## 1 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
1270
- ## 2 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
1271
- ## 3 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
1272
- ## 4 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
1273
- ## 5 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
1274
- ## 6 Attack of the Clones, Revenge of the Sith, A New Hope
1275
- ## vehicles starships
1276
- ## 1 Snowspeeder, Imperial Speeder Bike X-wing, Imperial shuttle
1277
- ## 2
1278
- ## 3
1279
- ## 4 TIE Advanced x1
1280
- ## 5 Imperial Speeder Bike
1281
- ## 6
1282
- \end{verbatim}
1283
-
1284
- The grouped\_mean function bellow will receive a grouping variable and
1285
- calculate summaries for the value\_variables given:
1286
-
1287
- \begin{Shaded}
1288
- \begin{Highlighting}[]
1289
- \NormalTok{grouped_mean <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(data, grouping_variables, value_variables) \{}
1290
- \NormalTok{ data }\OperatorTok{%>%}
1291
- \StringTok{ }\KeywordTok{group_by_at}\NormalTok{(grouping_variables) }\OperatorTok{%>%}
1292
- \StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{count =} \KeywordTok{n}\NormalTok{()) }\OperatorTok{%>%}
1293
- \StringTok{ }\KeywordTok{summarise_at}\NormalTok{(}\KeywordTok{c}\NormalTok{(value_variables, }\StringTok{"count"}\NormalTok{), mean, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{) }\OperatorTok{%>%}
1294
- \StringTok{ }\KeywordTok{rename_at}\NormalTok{(value_variables, }\KeywordTok{funs}\NormalTok{(}\KeywordTok{paste0}\NormalTok{(}\StringTok{"mean_"}\NormalTok{, .)))}
1295
- \NormalTok{ \}}
1296
-
1297
- \NormalTok{gm =}\StringTok{ }\NormalTok{starwars }\OperatorTok{%>%}\StringTok{ }
1298
- \StringTok{ }\KeywordTok{grouped_mean}\NormalTok{(}\StringTok{"eye_color"}\NormalTok{, }\KeywordTok{c}\NormalTok{(}\StringTok{"mass"}\NormalTok{, }\StringTok{"birth_year"}\NormalTok{))}
1299
-
1300
- \KeywordTok{as.data.frame}\NormalTok{(gm) }
1301
- \end{Highlighting}
1302
- \end{Shaded}
1303
-
1304
- \begin{verbatim}
1305
- ## eye_color mean_mass mean_birth_year count
1306
- ## 1 black 76.28571 33.00000 10
1307
- ## 2 blue 86.51667 67.06923 19
1308
- ## 3 blue-gray 77.00000 57.00000 1
1309
- ## 4 brown 66.09231 108.96429 21
1310
- ## 5 dark NaN NaN 1
1311
- ## 6 gold NaN NaN 1
1312
- ## 7 green, yellow 159.00000 NaN 1
1313
- ## 8 hazel 66.00000 34.50000 3
1314
- ## 9 orange 282.33333 231.00000 8
1315
- ## 10 pink NaN NaN 1
1316
- ## 11 red 81.40000 33.66667 5
1317
- ## 12 red, blue NaN NaN 1
1318
- ## 13 unknown 31.50000 NaN 3
1319
- ## 14 white 48.00000 NaN 1
1320
- ## 15 yellow 81.11111 76.38000 11
1321
- \end{verbatim}
1322
-
1323
- The same code with Galaaz, becomes:
1324
-
1325
- \begin{Shaded}
1326
- \begin{Highlighting}[]
1327
- \KeywordTok{def}\NormalTok{ grouped_mean(data, grouping_variables, value_variables)}
1328
- \NormalTok{ data.}
1329
- \NormalTok{ group_by_at(grouping_variables).}
1330
- \NormalTok{ mutate(}\StringTok{count: }\NormalTok{E.n).}
1331
- \NormalTok{ summarise_at(E.c(value_variables, }\StringTok{"count"}\NormalTok{), ~}\StringTok{:mean}\NormalTok{, }\StringTok{na__rm: }\DecValTok{true}\NormalTok{).}
1332
- \NormalTok{ rename_at(value_variables, E.funs(E.paste0(}\StringTok{"mean_"}\NormalTok{, value_variables)))}
1333
- \KeywordTok{end}
1334
-
1335
- \NormalTok{puts grouped_mean((~}\StringTok{:starwars}\NormalTok{), }\StringTok{"eye_color"}\NormalTok{, E.c(}\StringTok{"mass"}\NormalTok{, }\StringTok{"birth_year"}\NormalTok{)).as__data__frame}
1336
- \end{Highlighting}
1337
- \end{Shaded}
1338
-
1339
- \begin{verbatim}
1340
- ## eye_color mean_mass mean_birth_year count
1341
- ## 1 black 76.28571 33.00000 10
1342
- ## 2 blue 86.51667 67.06923 19
1343
- ## 3 blue-gray 77.00000 57.00000 1
1344
- ## 4 brown 66.09231 108.96429 21
1345
- ## 5 dark NaN NaN 1
1346
- ## 6 gold NaN NaN 1
1347
- ## 7 green, yellow 159.00000 NaN 1
1348
- ## 8 hazel 66.00000 34.50000 3
1349
- ## 9 orange 282.33333 231.00000 8
1350
- ## 10 pink NaN NaN 1
1351
- ## 11 red 81.40000 33.66667 5
1352
- ## 12 red, blue NaN NaN 1
1353
- ## 13 unknown 31.50000 NaN 3
1354
- ## 14 white 48.00000 NaN 1
1355
- ## 15 yellow 81.11111 76.38000 11
1356
- \end{verbatim}
1357
-
1358
- \hypertarget{conclusion}{%
1359
- \section{Conclusion}\label{conclusion}}
1360
-
1361
- Ruby and Galaaz provide a nice framework for developing code that uses R
1362
- functions. Although R is a very powerful and flexible language,
1363
- sometimes, too much flexibility makes life harder for the casual user.
1364
- We believe however, that even for the advanced user, Ruby integrated
1365
- with R throught Galaaz, makes a powerful environment for data analysis.
1366
- In this blog post we showed how Galaaz consistent syntax eliminates the
1367
- need for complex constructs such as quoting, enquoting, quasiquotation,
1368
- etc. This simplification comes from the fact that expressions and
1369
- variables are clearly separated objects, which is not the case in the R
1370
- language.
1371
-
1372
-
1373
- \end{document}