galaaz 0.4.6 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (181) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +3575 -118
  3. data/Rakefile +21 -4
  4. data/bin/gknit +152 -6
  5. data/bin/gknit-draft +105 -0
  6. data/bin/gknit-draft.rb +28 -0
  7. data/bin/gknit_Rscript +127 -0
  8. data/bin/grun +27 -1
  9. data/bin/gstudio +47 -4
  10. data/bin/{gstudio.rb → gstudio_irb.rb} +0 -0
  11. data/bin/gstudio_pry.rb +7 -0
  12. data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +3 -12
  13. data/blogs/galaaz_ggplot/galaaz_ggplot.html +77 -222
  14. data/blogs/galaaz_ggplot/galaaz_ggplot.md +4 -31
  15. data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
  16. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
  17. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
  18. data/blogs/galaaz_ggplot/midwest.Rmd +1 -9
  19. data/blogs/gknit/gknit.Rmd +232 -123
  20. data/blogs/{dev/dev.html → gknit/gknit.html} +1897 -33
  21. data/blogs/gknit/gknit.pdf +0 -0
  22. data/blogs/gknit/lst.rds +0 -0
  23. data/blogs/gknit/stats.bib +27 -0
  24. data/blogs/manual/lst.rds +0 -0
  25. data/blogs/manual/manual.Rmd +1893 -47
  26. data/blogs/manual/manual.html +3153 -347
  27. data/blogs/manual/manual.md +3575 -118
  28. data/blogs/manual/manual.pdf +0 -0
  29. data/blogs/manual/manual.tex +4026 -0
  30. data/blogs/manual/manual_files/figure-html/bubble-1.png +0 -0
  31. data/blogs/manual/manual_files/figure-html/diverging_bar.png +0 -0
  32. data/blogs/manual/manual_files/figure-latex/bubble-1.png +0 -0
  33. data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
  34. data/blogs/{dev → manual}/model.rb +0 -0
  35. data/blogs/nse_dplyr/nse_dplyr.Rmd +849 -0
  36. data/blogs/nse_dplyr/nse_dplyr.html +878 -0
  37. data/blogs/nse_dplyr/nse_dplyr.md +1198 -0
  38. data/blogs/nse_dplyr/nse_dplyr.pdf +0 -0
  39. data/blogs/oh_my/oh_my.html +274 -386
  40. data/blogs/oh_my/oh_my.md +208 -205
  41. data/blogs/ruby_plot/ruby_plot.Rmd +64 -84
  42. data/blogs/ruby_plot/ruby_plot.html +235 -208
  43. data/blogs/ruby_plot/ruby_plot.md +239 -34
  44. data/blogs/ruby_plot/ruby_plot.pdf +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  46. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  47. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  48. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  49. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  50. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  51. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  52. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  53. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  54. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  55. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  56. data/examples/Bibliography/master.bib +50 -0
  57. data/examples/Bibliography/stats.bib +72 -0
  58. data/examples/islr/ch2.spec.rb +1 -1
  59. data/examples/islr/ch3_boston.rb +4 -4
  60. data/examples/islr/x_y_rnorm.jpg +0 -0
  61. data/examples/latex_templates/Test-acm_article/Makefile +16 -0
  62. data/examples/latex_templates/Test-acm_article/Test-acm_article.Rmd +65 -0
  63. data/examples/latex_templates/Test-acm_article/acm_proc_article-sp.cls +1670 -0
  64. data/examples/latex_templates/Test-acm_article/sensys-abstract.cls +703 -0
  65. data/examples/latex_templates/Test-acm_article/sigproc.bib +59 -0
  66. data/examples/latex_templates/Test-acs_article/Test-acs_article.Rmd +260 -0
  67. data/examples/latex_templates/Test-acs_article/Test-acs_article.pdf +0 -0
  68. data/examples/latex_templates/Test-acs_article/acs-Test-acs_article.bib +11 -0
  69. data/examples/latex_templates/Test-acs_article/acs-my_output.bib +11 -0
  70. data/examples/latex_templates/Test-acs_article/acstest.bib +17 -0
  71. data/examples/latex_templates/Test-aea_article/AEA.cls +1414 -0
  72. data/examples/latex_templates/Test-aea_article/BibFile.bib +0 -0
  73. data/examples/latex_templates/Test-aea_article/Test-aea_article.Rmd +108 -0
  74. data/examples/latex_templates/Test-aea_article/Test-aea_article.pdf +0 -0
  75. data/examples/latex_templates/Test-aea_article/aea.bst +1269 -0
  76. data/examples/latex_templates/Test-aea_article/multicol.sty +853 -0
  77. data/examples/latex_templates/Test-aea_article/references.bib +0 -0
  78. data/examples/latex_templates/Test-aea_article/setspace.sty +546 -0
  79. data/examples/latex_templates/Test-amq_article/Test-amq_article.Rmd +256 -0
  80. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdf +0 -0
  81. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdfsync +3397 -0
  82. data/examples/latex_templates/Test-amq_article/pics/Figure2.pdf +0 -0
  83. data/examples/latex_templates/Test-ams_article/Test-ams_article.Rmd +215 -0
  84. data/examples/latex_templates/Test-ams_article/amstest.bib +436 -0
  85. data/examples/latex_templates/Test-asa_article/Test-asa_article.Rmd +153 -0
  86. data/examples/latex_templates/Test-asa_article/Test-asa_article.pdf +0 -0
  87. data/examples/latex_templates/Test-asa_article/agsm.bst +1353 -0
  88. data/examples/latex_templates/Test-asa_article/bibliography.bib +233 -0
  89. data/examples/latex_templates/Test-ieee_article/IEEEtran.bst +2409 -0
  90. data/examples/latex_templates/Test-ieee_article/IEEEtran.cls +6346 -0
  91. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.Rmd +175 -0
  92. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.pdf +0 -0
  93. data/examples/latex_templates/Test-ieee_article/mybibfile.bib +20 -0
  94. data/examples/latex_templates/Test-rjournal_article/RJournal.sty +335 -0
  95. data/examples/latex_templates/Test-rjournal_article/RJreferences.bib +18 -0
  96. data/examples/latex_templates/Test-rjournal_article/RJwrapper.pdf +0 -0
  97. data/examples/latex_templates/Test-rjournal_article/Test-rjournal_article.Rmd +52 -0
  98. data/examples/latex_templates/Test-springer_article/Test-springer_article.Rmd +65 -0
  99. data/examples/latex_templates/Test-springer_article/Test-springer_article.pdf +0 -0
  100. data/examples/latex_templates/Test-springer_article/bibliography.bib +26 -0
  101. data/examples/latex_templates/Test-springer_article/spbasic.bst +1658 -0
  102. data/examples/latex_templates/Test-springer_article/spmpsci.bst +1512 -0
  103. data/examples/latex_templates/Test-springer_article/spphys.bst +1443 -0
  104. data/examples/latex_templates/Test-springer_article/svglov3.clo +113 -0
  105. data/examples/latex_templates/Test-springer_article/svjour3.cls +1431 -0
  106. data/examples/misc/moneyball.rb +1 -1
  107. data/examples/misc/subsetting.rb +37 -37
  108. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.Rmd +73 -0
  109. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.pdf +0 -0
  110. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.Rmd +382 -0
  111. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.pdf +0 -0
  112. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.Rmd +164 -0
  113. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.pdf +0 -0
  114. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.Rmd +92 -0
  115. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.pdf +0 -0
  116. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/attend-grade-relationships.csv +482 -0
  117. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.Rmd +280 -0
  118. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.pdf +0 -0
  119. data/examples/rmarkdown/svm-xaringan-example/svm-xaringan-example.Rmd +386 -0
  120. data/lib/R_interface/r.rb +2 -2
  121. data/lib/R_interface/r_libs.R +6 -1
  122. data/lib/R_interface/r_methods.rb +12 -2
  123. data/lib/R_interface/rdata_frame.rb +8 -17
  124. data/lib/R_interface/rindexed_object.rb +1 -2
  125. data/lib/R_interface/rlist.rb +1 -0
  126. data/lib/R_interface/robject.rb +20 -23
  127. data/lib/R_interface/rpkg.rb +15 -6
  128. data/lib/R_interface/rsupport.rb +13 -19
  129. data/lib/R_interface/ruby_extensions.rb +14 -18
  130. data/lib/R_interface/rvector.rb +0 -12
  131. data/lib/gknit.rb +2 -0
  132. data/lib/gknit/draft.rb +105 -0
  133. data/lib/gknit/knitr_engine.rb +6 -37
  134. data/lib/util/exec_ruby.rb +22 -84
  135. data/lib/util/inline_file.rb +7 -3
  136. data/specs/figures/bg.jpeg +0 -0
  137. data/specs/figures/bg.png +0 -0
  138. data/specs/figures/bg.svg +2 -2
  139. data/specs/figures/dose_len.png +0 -0
  140. data/specs/figures/no_args.jpeg +0 -0
  141. data/specs/figures/no_args.png +0 -0
  142. data/specs/figures/no_args.svg +2 -2
  143. data/specs/figures/width_height.jpeg +0 -0
  144. data/specs/figures/width_height.png +0 -0
  145. data/specs/figures/width_height_units1.jpeg +0 -0
  146. data/specs/figures/width_height_units1.png +0 -0
  147. data/specs/figures/width_height_units2.jpeg +0 -0
  148. data/specs/figures/width_height_units2.png +0 -0
  149. data/specs/r_dataframe.spec.rb +184 -11
  150. data/specs/r_list.spec.rb +4 -4
  151. data/specs/r_list_apply.spec.rb +11 -10
  152. data/specs/ruby_expression.spec.rb +3 -11
  153. data/specs/tmp.rb +106 -34
  154. data/version.rb +1 -1
  155. metadata +96 -33
  156. data/bin/gknit_old_r +0 -236
  157. data/blogs/dev/dev.Rmd +0 -77
  158. data/blogs/dev/dev.md +0 -87
  159. data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
  160. data/blogs/dev/dev_files/figure-html/diverging_bar. +0 -0
  161. data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
  162. data/blogs/dplyr/dplyr.rb +0 -63
  163. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +0 -43
  164. data/blogs/galaaz_ggplot/galaaz_ggplot.log +0 -640
  165. data/blogs/galaaz_ggplot/galaaz_ggplot.out +0 -10
  166. data/blogs/galaaz_ggplot/galaaz_ggplot.tex +0 -481
  167. data/blogs/galaaz_ggplot/midwest.png +0 -0
  168. data/blogs/galaaz_ggplot/scatter_plot.png +0 -0
  169. data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +0 -662
  170. data/blogs/ruby_plot/ruby_plot.tex +0 -1077
  171. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +0 -57
  172. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +0 -106
  173. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +0 -110
  174. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +0 -174
  175. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +0 -236
  176. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +0 -296
  177. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +0 -236
  178. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +0 -218
  179. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +0 -128
  180. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +0 -150
  181. data/examples/paper/paper.rb +0 -36
Binary file
@@ -0,0 +1,4026 @@
1
+ \documentclass[11pt,]{article}
2
+ \usepackage{lmodern}
3
+ \usepackage{amssymb,amsmath}
4
+ \usepackage{ifxetex,ifluatex}
5
+ \usepackage{fixltx2e} % provides \textsubscript
6
+ \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
7
+ \usepackage[T1]{fontenc}
8
+ \usepackage[utf8]{inputenc}
9
+ \else % if luatex or xelatex
10
+ \ifxetex
11
+ \usepackage{mathspec}
12
+ \else
13
+ \usepackage{fontspec}
14
+ \fi
15
+ \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase}
16
+ \fi
17
+ % use upquote if available, for straight quotes in verbatim environments
18
+ \IfFileExists{upquote.sty}{\usepackage{upquote}}{}
19
+ % use microtype if available
20
+ \IfFileExists{microtype.sty}{%
21
+ \usepackage{microtype}
22
+ \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
23
+ }{}
24
+ \usepackage[margin=1in]{geometry}
25
+ \usepackage{hyperref}
26
+ \hypersetup{unicode=true,
27
+ pdftitle={Galaaz Manual},
28
+ pdfauthor={Rodrigo Botafogo},
29
+ pdfborder={0 0 0},
30
+ breaklinks=true}
31
+ \urlstyle{same} % don't use monospace font for urls
32
+ \usepackage{color}
33
+ \usepackage{fancyvrb}
34
+ \newcommand{\VerbBar}{|}
35
+ \newcommand{\VERB}{\Verb[commandchars=\\\{\}]}
36
+ \DefineVerbatimEnvironment{Highlighting}{Verbatim}{commandchars=\\\{\}}
37
+ % Add ',fontsize=\small' for more characters per line
38
+ \usepackage{framed}
39
+ \definecolor{shadecolor}{RGB}{248,248,248}
40
+ \newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
41
+ \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
42
+ \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{#1}}
43
+ \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
44
+ \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
45
+ \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
46
+ \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
47
+ \newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
48
+ \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
49
+ \newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
50
+ \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
51
+ \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
52
+ \newcommand{\ImportTok}[1]{#1}
53
+ \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
54
+ \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
55
+ \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
56
+ \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
57
+ \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}}
58
+ \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
59
+ \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
60
+ \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
61
+ \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.81,0.36,0.00}{\textbf{#1}}}
62
+ \newcommand{\BuiltInTok}[1]{#1}
63
+ \newcommand{\ExtensionTok}[1]{#1}
64
+ \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
65
+ \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.77,0.63,0.00}{#1}}
66
+ \newcommand{\RegionMarkerTok}[1]{#1}
67
+ \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
68
+ \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
69
+ \newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{#1}}
70
+ \newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.64,0.00,0.00}{\textbf{#1}}}
71
+ \newcommand{\NormalTok}[1]{#1}
72
+ \usepackage{longtable,booktabs}
73
+ \usepackage{graphicx,grffile}
74
+ \makeatletter
75
+ \def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi}
76
+ \def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi}
77
+ \makeatother
78
+ % Scale images if necessary, so that they will not overflow the page
79
+ % margins by default, and it is still possible to overwrite the defaults
80
+ % using explicit options in \includegraphics[width, height, ...]{}
81
+ \setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio}
82
+ \IfFileExists{parskip.sty}{%
83
+ \usepackage{parskip}
84
+ }{% else
85
+ \setlength{\parindent}{0pt}
86
+ \setlength{\parskip}{6pt plus 2pt minus 1pt}
87
+ }
88
+ \setlength{\emergencystretch}{3em} % prevent overfull lines
89
+ \providecommand{\tightlist}{%
90
+ \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
91
+ \setcounter{secnumdepth}{5}
92
+ % Redefines (sub)paragraphs to behave more like sections
93
+ \ifx\paragraph\undefined\else
94
+ \let\oldparagraph\paragraph
95
+ \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
96
+ \fi
97
+ \ifx\subparagraph\undefined\else
98
+ \let\oldsubparagraph\subparagraph
99
+ \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
100
+ \fi
101
+
102
+ %%% Use protect on footnotes to avoid problems with footnotes in titles
103
+ \let\rmarkdownfootnote\footnote%
104
+ \def\footnote{\protect\rmarkdownfootnote}
105
+
106
+ %%% Change title format to be more compact
107
+ \usepackage{titling}
108
+
109
+ % Create subtitle command for use in maketitle
110
+ \newcommand{\subtitle}[1]{
111
+ \posttitle{
112
+ \begin{center}\large#1\end{center}
113
+ }
114
+ }
115
+
116
+ \setlength{\droptitle}{-2em}
117
+
118
+ \title{Galaaz Manual}
119
+ \pretitle{\vspace{\droptitle}\centering\huge}
120
+ \posttitle{\par}
121
+ \subtitle{How to tightly couple Ruby and R in GraalVM}
122
+ \author{Rodrigo Botafogo}
123
+ \preauthor{\centering\large\emph}
124
+ \postauthor{\par}
125
+ \predate{\centering\large\emph}
126
+ \postdate{\par}
127
+ \date{2019}
128
+
129
+ % usar portugues do Brasil
130
+ % \usepackage[brazilian]{babel}
131
+ \usepackage[utf8]{inputenc}
132
+
133
+ \usepackage{geometry}
134
+ \geometry{a4paper, top=1in}
135
+
136
+ % needed for kableExtra
137
+ \usepackage{longtable}
138
+ \usepackage{multirow}
139
+ \usepackage[table]{xcolor}
140
+ \usepackage{wrapfig}
141
+ \usepackage{float}
142
+ \usepackage{colortbl}
143
+ \usepackage{pdflscape}
144
+ \usepackage{tabu}
145
+ \usepackage{threeparttable}
146
+ \usepackage[normalem]{ulem}
147
+
148
+ \usepackage{bbm}
149
+ \usepackage{booktabs}
150
+ \usepackage{expex}
151
+
152
+ \usepackage{graphicx}
153
+
154
+ \usepackage{fancyhdr}
155
+ % set the header and foot style
156
+ % style 'fancy' adds the section name on the header
157
+ % and the page number on the footer
158
+ \pagestyle{fancy}
159
+
160
+ % style 'fancyhf' leaves header and footer empty
161
+ %\fancyhf{}
162
+
163
+ % sets the left head element to \rightmark, which contains the
164
+ % current section (\leftmark is the current chapter)
165
+ %\fancyhead[L]{\rightmark} .
166
+
167
+ % sets the right head element to the page number.
168
+ % \fancyhead[R]{\thepage}
169
+
170
+ % lets the head rule disappear.
171
+ % \renewcommand{\headrulewidth}{0pt}
172
+ % Possible selectors for the optional argument of \fancyhead/\fancyfoot
173
+ % are L (left), C (center) or R (right) for the position of the element
174
+ % and E (even) or O (odd) to distinguish even and odd pages. If you omit
175
+ % E/O the element is set for all pages.
176
+
177
+ % \usepackage{lipsum}
178
+
179
+ % make available command lastpage
180
+ \usepackage{lastpage}
181
+
182
+ % default fontsize 11pt better to add
183
+ % fontsize on the yaml header
184
+ % \usepackage[fontsize=11pt]{scrextend}
185
+
186
+ % comandos para formatar uma tabela
187
+ \usepackage{array}
188
+ \newcolumntype{L}[1]{>{\raggedright\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
189
+ \newcolumntype{C}[1]{>{\centering\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
190
+ \newcolumntype{R}[1]{>{\raggedleft\let\newline\\\arraybackslash\hspace{0pt}}m{#1}}
191
+
192
+ % necessário if we need to import other latex documents
193
+ \usepackage{import}
194
+
195
+ % Command to import an R variable to latex
196
+ \newcommand{\RtoLatex}[2]{\newcommand{#1}{#2}}
197
+
198
+ %
199
+ %\newcommand{\atraso}[1]{\color{red} \textbf {Tempo desde a Assinatura do Contrato: #1 dias}}
200
+ \usepackage{booktabs}
201
+ \usepackage{longtable}
202
+ \usepackage{array}
203
+ \usepackage{multirow}
204
+ \usepackage{wrapfig}
205
+ \usepackage{float}
206
+ \usepackage{colortbl}
207
+ \usepackage{pdflscape}
208
+ \usepackage{tabu}
209
+ \usepackage{threeparttable}
210
+ \usepackage{threeparttablex}
211
+ \usepackage[normalem]{ulem}
212
+ \usepackage{makecell}
213
+ \usepackage{xcolor}
214
+
215
+ \begin{document}
216
+ \maketitle
217
+
218
+ {
219
+ \setcounter{tocdepth}{3}
220
+ \tableofcontents
221
+ }
222
+ \section{Introduction}\label{introduction}
223
+
224
+ Galaaz is a system for tightly coupling Ruby and R. Ruby is a powerful
225
+ language, with a large community, a very large set of libraries and
226
+ great for web development. However, it lacks libraries for data science,
227
+ statistics, scientific plotting and machine learning. On the other hand,
228
+ R is considered one of the most powerful languages for solving all of
229
+ the above problems. Maybe the strongest competitor to R is Python with
230
+ libraries such as NumPy, Panda, SciPy, SciKit-Learn and a couple more.
231
+
232
+ With Galaaz we do not intend to re-implement any of the scientific
233
+ libraries in R, we allow for very tight coupling between the two
234
+ languages to the point that the Ruby developer does not need to know
235
+ that there is an R engine running.
236
+
237
+ According to Wikipedia ``Ruby is a dynamic, interpreted, reflective,
238
+ object-oriented, general-purpose programming language. It was designed
239
+ and developed in the mid-1990s by Yukihiro''Matz" Matsumoto in Japan."
240
+ It reached high popularity with the development of Ruby on Rails (RoR)
241
+ by David Heinemeier Hansson. RoR is a web application framework first
242
+ released around 2005. It makes extensive use of Ruby's metaprogramming
243
+ features. With RoR, Ruby became very popular. According to
244
+ \href{https://www.tiobe.com/tiobe-index/ruby/}{Ruby's Tiobe index} it
245
+ peeked in popularity around 2008, then declined until 2015 when it
246
+ started picking up again. At the time of this writing (November 2018),
247
+ the Tiobe index puts Ruby in 16th position as most popular language.
248
+
249
+ Python, a language similar to Ruby, ranks 4th in the index. Java, C and
250
+ C++ take the first three positions. Ruby is often criticized for its
251
+ focus on web applications. But Ruby can do
252
+ \href{https://github.com/markets/awesome-ruby}{much more} than just web
253
+ applications. Yet, for scientific computing, Ruby lags way behind Python
254
+ and R. Python has Django framework for web, NumPy for numerical arrays,
255
+ Pandas for data analysis. R is a free software environment for
256
+ statistical computing and graphics with thousands of libraries for data
257
+ analysis.
258
+
259
+ Until recently, there was no real perspective for Ruby to bridge this
260
+ gap. Implementing a complete scientific computing infrastructure would
261
+ take too long. Enters \href{https://www.graalvm.org/}{Oracle's GraalVM}:
262
+
263
+ \begin{quote}
264
+ GraalVM is a universal virtual machine for running applications written
265
+ in JavaScript, Python 3, Ruby, R, JVM-based languages like Java, Scala,
266
+ Kotlin, and LLVM-based languages such as C and C++.
267
+
268
+ GraalVM removes the isolation between programming languages and enables
269
+ interoperability in a shared runtime. It can run either standalone or in
270
+ the context of OpenJDK, Node.js, Oracle Database, or MySQL.
271
+
272
+ GraalVM allows you to write polyglot applications with a seamless way to
273
+ pass values from one language to another. With GraalVM there is no
274
+ copying or marshaling necessary as it is with other polyglot systems.
275
+ This lets you achieve high performance when language boundaries are
276
+ crossed. Most of the time there is no additional cost for crossing a
277
+ language boundary at all.
278
+
279
+ Often developers have to make uncomfortable compromises that require
280
+ them to rewrite their software in other languages. For example:
281
+
282
+ \begin{itemize}
283
+ \tightlist
284
+ \item
285
+ That library is not available in my language. I need to rewrite it.
286
+ \item
287
+ That language would be the perfect fit for my problem, but we cannot
288
+ run it in our environment.
289
+ \item
290
+ That problem is already solved in my language, but the language is too
291
+ slow.
292
+ \end{itemize}
293
+
294
+ With GraalVM we aim to allow developers to freely choose the right
295
+ language for the task at hand without making compromises.
296
+ \end{quote}
297
+
298
+ As stated above, GraalVM is a \emph{universal} virtual machine that
299
+ allows Ruby and R (and other languages) to run on the same environment.
300
+ GraalVM allows polyglot applications to \emph{seamlessly} interact with
301
+ one another and pass values from one language to the other. Although a
302
+ great idea, GraalVM still requires application writers to know several
303
+ languages. To eliminate that requirement, we built Galaaz, a gem for
304
+ Ruby, to tightly couple Ruby and R and allow those languages to interact
305
+ in a way that the user will be unaware of such interaction. In other
306
+ words, a Ruby programmer will be able to use all the capabilities of R
307
+ without knowing the R syntax.
308
+
309
+ Library wrapping is a usual way of bringing features from one language
310
+ into another. To improve performance, Python often wraps more efficient
311
+ C libraries. For the Python developer, the existence of such C libraries
312
+ is hidden. The problem with library wrapping is that for any new
313
+ library, there is the need to handcraft a new wrapper.
314
+
315
+ Galaaz, instead of wrapping a single C or R library, wraps the whole R
316
+ language in Ruby. Doing so, all thousands of R libraries are available
317
+ immediately to Ruby developers without any new wrapping effort.
318
+
319
+ \subsection{What does Galaaz mean}\label{what-does-galaaz-mean}
320
+
321
+ Galaaz is the Portuguese name for ``Galahad''. From Wikipedia:
322
+
323
+ \begin{verbatim}
324
+ Sir Galahad (sometimes referred to as Galeas or Galath),
325
+ in Arthurian legend, is a knight of King Arthur's Round Table and one
326
+ of the three achievers of the Holy Grail. He is the illegitimate son
327
+ of Sir Lancelot and Elaine of Corbenic, and is renowned for his
328
+ gallantry and purity as the most perfect of all knights. Emerging quite
329
+ late in the medieval Arthurian tradition, Sir Galahad first appears in the
330
+ Lancelot–Grail cycle, and his story is taken up in later works such as
331
+ the Post-Vulgate Cycle and Sir Thomas Malory's Le Morte d'Arthur.
332
+ His name should not be mistaken with Galehaut, a different knight from
333
+ Arthurian legend.
334
+ \end{verbatim}
335
+
336
+ \section{System Compatibility}\label{system-compatibility}
337
+
338
+ \begin{itemize}
339
+ \tightlist
340
+ \item
341
+ Oracle Linux 7
342
+ \item
343
+ Ubuntu 18.04 LTS
344
+ \item
345
+ Ubuntu 16.04 LTS
346
+ \item
347
+ Fedora 28
348
+ \item
349
+ macOS 10.14 (Mojave)
350
+ \item
351
+ macOS 10.13 (High Sierra)
352
+ \end{itemize}
353
+
354
+ \section{Dependencies}\label{dependencies}
355
+
356
+ \begin{itemize}
357
+ \tightlist
358
+ \item
359
+ TruffleRuby
360
+ \item
361
+ FastR
362
+ \end{itemize}
363
+
364
+ \section{Installation}\label{installation}
365
+
366
+ \begin{itemize}
367
+ \tightlist
368
+ \item
369
+ Install GrallVM (\url{http://www.graalvm.org/})
370
+ \item
371
+ Install Ruby (gu install Ruby)
372
+ \item
373
+ Install FastR (gu install R)
374
+ \item
375
+ Install rake if you want to run the specs and examples (gem install
376
+ rake)
377
+ \end{itemize}
378
+
379
+ \section{Usage}\label{usage}
380
+
381
+ \begin{itemize}
382
+ \tightlist
383
+ \item
384
+ Interactive shell: use `gstudio' on the command line
385
+ \end{itemize}
386
+
387
+ \begin{quote}
388
+ gstudio
389
+ \end{quote}
390
+
391
+ \begin{Shaded}
392
+ \begin{Highlighting}[]
393
+ \NormalTok{ vec = R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{4}\NormalTok{)}
394
+ \NormalTok{ puts vec}
395
+ \end{Highlighting}
396
+ \end{Shaded}
397
+
398
+ \begin{verbatim}
399
+ ## [1] 1 2 3 4
400
+ \end{verbatim}
401
+
402
+ \begin{itemize}
403
+ \tightlist
404
+ \item
405
+ Run all specs
406
+ \end{itemize}
407
+
408
+ \begin{quote}
409
+ galaaz specs:all
410
+ \end{quote}
411
+
412
+ \begin{itemize}
413
+ \tightlist
414
+ \item
415
+ Run graphics slideshow (80+ graphics)
416
+ \end{itemize}
417
+
418
+ \begin{quote}
419
+ galaaz sthda:all
420
+ \end{quote}
421
+
422
+ \begin{itemize}
423
+ \tightlist
424
+ \item
425
+ Run labs from Introduction to Statistical Learning with R
426
+ \end{itemize}
427
+
428
+ \begin{quote}
429
+ galaaz islr:all
430
+ \end{quote}
431
+
432
+ \begin{itemize}
433
+ \tightlist
434
+ \item
435
+ See all available examples
436
+ \end{itemize}
437
+
438
+ \begin{quote}
439
+ galaaz -T
440
+ \end{quote}
441
+
442
+ Shows a list with all available executalbe tasks. To execute a task,
443
+ substitute the `rake' word in the list with `galaaz'. For instance, the
444
+ following line shows up after `galaaz -T'
445
+
446
+ rake master\_list:scatter\_plot \# scatter\_plot from:\ldots{}.
447
+
448
+ execute
449
+
450
+ \begin{quote}
451
+ galaaz master\_list:scatter\_plot
452
+ \end{quote}
453
+
454
+ \section{Accessing R from Ruby}\label{accessing-r-from-ruby}
455
+
456
+ One of the nice aspects of Galaaz on GraalVM, is that variables and
457
+ functions defined in R, can be easily accessed from Ruby. For instance,
458
+ to access the `mtcars' data frame from R in Ruby, we use the `:mtcar'
459
+ symbol preceded by the `\textasciitilde{}' operator, thus
460
+ `\textasciitilde{}:r\_vec' retrieves the value of the `mtcars' variable.
461
+
462
+ \begin{Shaded}
463
+ \begin{Highlighting}[]
464
+ \NormalTok{puts ~}\StringTok{:mtcars}
465
+ \end{Highlighting}
466
+ \end{Shaded}
467
+
468
+ \begin{verbatim}
469
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
470
+ ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
471
+ ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
472
+ ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
473
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
474
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
475
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
476
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
477
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
478
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
479
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
480
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
481
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
482
+ ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
483
+ ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
484
+ ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
485
+ ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
486
+ ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
487
+ ## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
488
+ ## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
489
+ ## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
490
+ ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
491
+ ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
492
+ ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
493
+ ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
494
+ ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
495
+ ## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
496
+ ## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
497
+ ## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
498
+ ## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
499
+ ## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
500
+ ## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
501
+ ## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
502
+ \end{verbatim}
503
+
504
+ To access an R function from Ruby, the R function needs to be preceeded
505
+ by `R.' scoping. Bellow we see and example of creating a R::Vector by
506
+ calling the `c' R function
507
+
508
+ \begin{Shaded}
509
+ \begin{Highlighting}[]
510
+ \NormalTok{puts vec = R.c(}\FloatTok{1.0}\NormalTok{, }\FloatTok{2.0}\NormalTok{, }\FloatTok{3.0}\NormalTok{, }\FloatTok{4.0}\NormalTok{)}
511
+ \end{Highlighting}
512
+ \end{Shaded}
513
+
514
+ \begin{verbatim}
515
+ ## [1] 1 2 3 4
516
+ \end{verbatim}
517
+
518
+ Note that `vec' is an object of type R::Vector:
519
+
520
+ \begin{Shaded}
521
+ \begin{Highlighting}[]
522
+ \NormalTok{puts vec.class}
523
+ \end{Highlighting}
524
+ \end{Shaded}
525
+
526
+ \begin{verbatim}
527
+ ## R::Vector
528
+ \end{verbatim}
529
+
530
+ Every object created by a call to an R function will be of a type that
531
+ inherits from R::Object. In R, there is also a function `class'. In
532
+ order to access that function we can call method `rclass' in the
533
+ R::Object:
534
+
535
+ \begin{Shaded}
536
+ \begin{Highlighting}[]
537
+ \NormalTok{puts vec.rclass}
538
+ \end{Highlighting}
539
+ \end{Shaded}
540
+
541
+ \begin{verbatim}
542
+ ## [1] "numeric"
543
+ \end{verbatim}
544
+
545
+ When working with R::Object(s), it is possible to use the `.' operator
546
+ to pipe operations. When using `.', the object to which the `.' is
547
+ applied becomes the first argument of the corresponding R function. For
548
+ instance, function `c' in R, can be used to concatenate two vectors or
549
+ more vectors (in R, there are no scalar values, scalars are converted to
550
+ vectors of size 1. Within Galaaz, scalar parameter is converted to a
551
+ size one vector):
552
+
553
+ \begin{Shaded}
554
+ \begin{Highlighting}[]
555
+ \NormalTok{puts R.c(vec, }\DecValTok{10}\NormalTok{, }\DecValTok{20}\NormalTok{, }\DecValTok{30}\NormalTok{)}
556
+ \end{Highlighting}
557
+ \end{Shaded}
558
+
559
+ \begin{verbatim}
560
+ ## [1] 1 2 3 4 10 20 30
561
+ \end{verbatim}
562
+
563
+ The call above to the `c' function can also be done using `.' notation:
564
+
565
+ \begin{Shaded}
566
+ \begin{Highlighting}[]
567
+ \NormalTok{puts vec.c(}\DecValTok{10}\NormalTok{, }\DecValTok{20}\NormalTok{, }\DecValTok{30}\NormalTok{)}
568
+ \end{Highlighting}
569
+ \end{Shaded}
570
+
571
+ \begin{verbatim}
572
+ ## [1] 1 2 3 4 10 20 30
573
+ \end{verbatim}
574
+
575
+ We will talk about vector indexing in a latter section. But notice here
576
+ that indexing an R::Vector will return another R::Vector:
577
+
578
+ \begin{Shaded}
579
+ \begin{Highlighting}[]
580
+ \NormalTok{puts vec[}\DecValTok{1}\NormalTok{]}
581
+ \end{Highlighting}
582
+ \end{Shaded}
583
+
584
+ \begin{verbatim}
585
+ ## [1] 1
586
+ \end{verbatim}
587
+
588
+ Sometimes we want to index an R::Object and get back a Ruby object that
589
+ is not wrapped in an R::Object, but the native Ruby object. For this, we
590
+ can index the R object with the `\textgreater{}\textgreater{}' operator:
591
+
592
+ \begin{Shaded}
593
+ \begin{Highlighting}[]
594
+ \NormalTok{puts vec >> }\DecValTok{0}
595
+ \NormalTok{puts vec >> }\DecValTok{2}
596
+ \end{Highlighting}
597
+ \end{Shaded}
598
+
599
+ \begin{verbatim}
600
+ ## 1.0
601
+ ## 3.0
602
+ \end{verbatim}
603
+
604
+ It is also possible to call an R function with named arguments, by
605
+ creating the function in Galaaz with named parameters. For instance,
606
+ here is an example of creating a `list' with named elements:
607
+
608
+ \begin{Shaded}
609
+ \begin{Highlighting}[]
610
+ \NormalTok{puts R.list(}\StringTok{first_name: "Rodrigo"}\NormalTok{, }\StringTok{last_name: "Botafogo"}\NormalTok{)}
611
+ \end{Highlighting}
612
+ \end{Shaded}
613
+
614
+ \begin{verbatim}
615
+ ## $first_name
616
+ ## [1] "Rodrigo"
617
+ ##
618
+ ## $last_name
619
+ ## [1] "Botafogo"
620
+ \end{verbatim}
621
+
622
+ Many R functions receive another function as argument. For instance,
623
+ method `map' applies a function to every element of a vector. With
624
+ Galaaz, it is possible to pass a Proc, Method or Lambda in place of the
625
+ expected R function. In this next example, we will add 2 to every
626
+ element of our previously created vector:
627
+
628
+ \begin{Shaded}
629
+ \begin{Highlighting}[]
630
+ \NormalTok{puts vec.map \{ |x| x + }\DecValTok{2}\NormalTok{ \}}
631
+ \end{Highlighting}
632
+ \end{Shaded}
633
+
634
+ \begin{verbatim}
635
+ ## [1] 3
636
+ ## [1] 4
637
+ ## [1] 5
638
+ ## [1] 6
639
+ \end{verbatim}
640
+
641
+ \section{gKnitting a Document}\label{gknitting-a-document}
642
+
643
+ This manual has been formatted usign gKnit. gKnit uses Knitr and R
644
+ markdown to knit a document in Ruby or R and output it in any of the
645
+ available formats for R markdown. gKnit runs atop of GraalVM, and
646
+ Galaaz. In gKnit, Ruby variables are persisted between chunks, making it
647
+ an ideal solution for literate programming. Also, since it is based on
648
+ Galaaz, Ruby chunks can have access to R variables and Polyglot
649
+ Programming with Ruby and R is quite natural.
650
+
651
+ The idea of ``literate programming'' was first introduced by Donald
652
+ Knuth in the 1980's (Knuth 1984). The main intention of this approach
653
+ was to develop software interspersing macro snippets, traditional source
654
+ code, and a natural language such as English in a document that could be
655
+ compiled into executable code and at the same time easily read by a
656
+ human developer. According to Knuth ``The practitioner of literate
657
+ programming can be regarded as an essayist, whose main concern is with
658
+ exposition and excellence of style.''
659
+
660
+ The idea of literate programming evolved into the idea of reproducible
661
+ research, in which all the data, software code, documentation, graphics
662
+ etc. needed to reproduce the research and its reports could be included
663
+ in a single document or set of documents that when distributed to peers
664
+ could be rerun generating the same output and reports.
665
+
666
+ The R community has put a great deal of effort in reproducible research.
667
+ In 2002, Sweave was introduced and it allowed mixing R code with Latex
668
+ generating high quality PDF documents. A Sweave document could include
669
+ code, the results of executing the code, graphics and text such that it
670
+ contained the whole narrative to reproduce the research. In 2012, Knitr,
671
+ developed by Yihui Xie from RStudio was released to replace Sweave and
672
+ to consolidate in one single package the many extensions and add-on
673
+ packages that were necessary for Sweave.
674
+
675
+ With Knitr, \textbf{R markdown} was also developed, an extension to the
676
+ Markdown format. With \textbf{R markdown} and Knitr it is possible to
677
+ generate reports in a multitude of formats such as HTML, markdown,
678
+ Latex, PDF, dvi, etc. \textbf{R markdown} also allows the use of
679
+ multiple programming languages such as R, Ruby, Python, etc. in the same
680
+ document.
681
+
682
+ In \textbf{R markdown}, text is interspersed with code chunks that can
683
+ be executed and both the code and its results can become part of the
684
+ final report. Although \textbf{R markdown} allows multiple programming
685
+ languages in the same document, only R and Python (with the reticulate
686
+ package) can persist variables between chunks. For other languages, such
687
+ as Ruby, every chunk will start a new process and thus all data is lost
688
+ between chunks, unless it is somehow stored in a data file that is read
689
+ by the next chunk.
690
+
691
+ Being able to persist data between chunks is critical for literate
692
+ programming otherwise the flow of the narrative is lost by all the
693
+ effort of having to save data and then reload it. Although this might,
694
+ at first, seem like a small nuisance, not being able to persist data
695
+ between chunks is a major issue. For example, let's take a look at the
696
+ following simple example in which we want to show how to create a list
697
+ and the use it. Let's first assume that data cannot be persisted between
698
+ chunks. In the next chunk we create a list, then we would need to save
699
+ it to file, but to save it, we need somehow to marshal the data into a
700
+ binary format:
701
+
702
+ \begin{Shaded}
703
+ \begin{Highlighting}[]
704
+ \NormalTok{lst = R.list(}\StringTok{a: }\DecValTok{1}\NormalTok{, }\StringTok{b: }\DecValTok{2}\NormalTok{, }\StringTok{c: }\DecValTok{3}\NormalTok{)}
705
+ \NormalTok{lst.saveRDS(}\StringTok{"lst.rds"}\NormalTok{)}
706
+ \end{Highlighting}
707
+ \end{Shaded}
708
+
709
+ then, on the next chunk, where variable `lst' is used, we need to read
710
+ back it's value
711
+
712
+ \begin{Shaded}
713
+ \begin{Highlighting}[]
714
+ \NormalTok{lst = R.readRDS(}\StringTok{"lst.rds"}\NormalTok{)}
715
+ \NormalTok{puts lst}
716
+ \end{Highlighting}
717
+ \end{Shaded}
718
+
719
+ \begin{verbatim}
720
+ ## $a
721
+ ## [1] 1
722
+ ##
723
+ ## $b
724
+ ## [1] 2
725
+ ##
726
+ ## $c
727
+ ## [1] 3
728
+ \end{verbatim}
729
+
730
+ Now, any single code has dozens of variables that we might want to use
731
+ and reuse between chunks. Clearly, such an approach becomes quickly
732
+ unmanageable. Probably, because of this problem, it is very rare to see
733
+ any \textbf{R markdown} document in the Ruby community.
734
+
735
+ When variables can be used accross chunks, then no overhead is needed:
736
+
737
+ \begin{Shaded}
738
+ \begin{Highlighting}[]
739
+ \NormalTok{lst = R.list(}\StringTok{a: }\DecValTok{1}\NormalTok{, }\StringTok{b: }\DecValTok{2}\NormalTok{, }\StringTok{c: }\DecValTok{3}\NormalTok{)}
740
+ \CommentTok{# any other code can be added here}
741
+ \end{Highlighting}
742
+ \end{Shaded}
743
+
744
+ \begin{Shaded}
745
+ \begin{Highlighting}[]
746
+ \NormalTok{puts lst}
747
+ \end{Highlighting}
748
+ \end{Shaded}
749
+
750
+ \begin{verbatim}
751
+ ## $a
752
+ ## [1] 1
753
+ ##
754
+ ## $b
755
+ ## [1] 2
756
+ ##
757
+ ## $c
758
+ ## [1] 3
759
+ \end{verbatim}
760
+
761
+ In the Python community, the same effort to have code and text in an
762
+ integrated environment started around the first decade of 2000. In 2006
763
+ iPython 0.7.2 was released. In 2014, Fernando Pérez, spun off project
764
+ Jupyter from iPython creating a web-based interactive computation
765
+ environment. Jupyter can now be used with many languages, including Ruby
766
+ with the iruby gem (\url{https://github.com/SciRuby/iruby}). In order to
767
+ have multiple languages in a Jupyter notebook the SoS kernel was
768
+ developed (\url{https://vatlab.github.io/sos-docs/}).
769
+
770
+ \subsection{\texorpdfstring{gKnit and \textbf{R
771
+ markdown}}{gKnit and R markdown}}\label{gknit-and-r-markdown}
772
+
773
+ gKnit is based on knitr and \textbf{R markdown} and can knit a document
774
+ written both in Ruby and/or R and output it in any of the available
775
+ formats of \textbf{R markdown}. gKnit allows ruby developers to do
776
+ literate programming and reproducible research by allowing them to have
777
+ in a single document, text and code.
778
+
779
+ In gKnit, Ruby variables are persisted between chunks, making it an
780
+ ideal solution for literate programming in this language. Also, since it
781
+ is based on Galaaz, Ruby chunks can have access to R variables and
782
+ Polyglot Programming with Ruby and R is quite natural.
783
+
784
+ This is not a blog post on \textbf{R markdown}, and the interested user
785
+ is directed to the following links for detailed information on its
786
+ capabilities and use.
787
+
788
+ \begin{itemize}
789
+ \tightlist
790
+ \item
791
+ \url{https://rmarkdown.rstudio.com/} or
792
+ \item
793
+ \url{https://bookdown.org/yihui/rmarkdown/}
794
+ \end{itemize}
795
+
796
+ In this post, we will describe just the main aspects of \textbf{R
797
+ markdown}, so the user can start gKnitting Ruby and R documents quickly.
798
+
799
+ \subsection{The Yaml header}\label{the-yaml-header}
800
+
801
+ An \textbf{R markdown} document should start with a Yaml header and be
802
+ stored in a file with `.Rmd' extension. This document has the following
803
+ header for gKitting an HTML document.
804
+
805
+ \begin{verbatim}
806
+ ---
807
+ title: "How to do reproducible research in Ruby with gKnit"
808
+ author:
809
+ - "Rodrigo Botafogo"
810
+ - "Daniel Mossé - University of Pittsburgh"
811
+ tags: [Tech, Data Science, Ruby, R, GraalVM]
812
+ date: "20/02/2019"
813
+ output:
814
+ html_document:
815
+ self_contained: true
816
+ keep_md: true
817
+ pdf_document:
818
+ includes:
819
+ in_header: ["../../sty/galaaz.sty"]
820
+ number_sections: yes
821
+ ---
822
+ \end{verbatim}
823
+
824
+ For more information on the options in the Yaml header,
825
+ \href{https://bookdown.org/yihui/rmarkdown/html-document.html}{check
826
+ here}.
827
+
828
+ \subsection{\texorpdfstring{\textbf{R Markdown}
829
+ formatting}{R Markdown formatting}}\label{r-markdown-formatting}
830
+
831
+ Document formatting can be done with simple markups such as:
832
+
833
+ \subsection{Headers}\label{headers}
834
+
835
+ \begin{verbatim}
836
+ # Header 1
837
+
838
+ ## Header 2
839
+
840
+ ### Header 3
841
+ \end{verbatim}
842
+
843
+ \subsection{Lists}\label{lists}
844
+
845
+ \begin{verbatim}
846
+ Unordered lists:
847
+
848
+ * Item 1
849
+ * Item 2
850
+ + Item 2a
851
+ + Item 2b
852
+ \end{verbatim}
853
+
854
+ \begin{verbatim}
855
+ Ordered Lists
856
+
857
+ 1. Item 1
858
+ 2. Item 2
859
+ 3. Item 3
860
+ + Item 3a
861
+ + Item 3b
862
+ \end{verbatim}
863
+
864
+ For more R markdown formatting go to
865
+ \url{https://rmarkdown.rstudio.com/authoring_basics.html}.
866
+
867
+ \subsection{R chunks}\label{r-chunks}
868
+
869
+ Running and executing Ruby and R code is actually what really interests
870
+ us is this blog.\\
871
+ Inserting a code chunk is done by adding code in a block delimited by
872
+ three back ticks followed by an open curly brace (`\{') followed with
873
+ the engine name (r, ruby, rb, include, \ldots{}), an any optional
874
+ chunk\_label and options, as shown bellow:
875
+
876
+ \begin{verbatim}
877
+ ```{engine_name [chunk_label], [chunk_options]}
878
+ ```
879
+ \end{verbatim}
880
+
881
+ for instance, let's add an R chunk to the document labeled
882
+ `first\_r\_chunk'. This is a very simple code just to create a variable
883
+ and print it out, as follows:
884
+
885
+ \begin{verbatim}
886
+ ```{r first_r_chunk}
887
+ vec <- c(1, 2, 3)
888
+ print(vec)
889
+ ```
890
+ \end{verbatim}
891
+
892
+ If this block is added to an \textbf{R markdown} document and gKnitted
893
+ the result will be:
894
+
895
+ \begin{Shaded}
896
+ \begin{Highlighting}[]
897
+ \NormalTok{vec <-}\StringTok{ }\KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)}
898
+ \KeywordTok{print}\NormalTok{(vec)}
899
+ \end{Highlighting}
900
+ \end{Shaded}
901
+
902
+ \begin{verbatim}
903
+ ## [1] 1 2 3
904
+ \end{verbatim}
905
+
906
+ Now let's say that we want to do some analysis in the code, but just
907
+ print the result and not the code itself. For this, we need to add the
908
+ option `echo = FALSE'.
909
+
910
+ \begin{verbatim}
911
+ ```{r second_r_chunk, echo = FALSE}
912
+ vec2 <- c(10, 20, 30)
913
+ vec3 <- vec * vec2
914
+ print(vec3)
915
+ ```
916
+ \end{verbatim}
917
+
918
+ Here is how this block will show up in the document. Observe that the
919
+ code is not shown and we only see the execution result in a white box
920
+
921
+ \begin{verbatim}
922
+ ## [1] 10 40 90
923
+ \end{verbatim}
924
+
925
+ A description of the available chunk options can be found in
926
+ \url{https://yihui.name/knitr/}.
927
+
928
+ Let's add another R chunk with a function definition. In this example, a
929
+ vector `r\_vec' is created and a new function `reduce\_sum' is defined.
930
+ The chunk specification is
931
+
932
+ \begin{verbatim}
933
+ ```{r data_creation}
934
+ r_vec <- c(1, 2, 3, 4, 5)
935
+
936
+ reduce_sum <- function(...) {
937
+ Reduce(sum, as.list(...))
938
+ }
939
+ ```
940
+ \end{verbatim}
941
+
942
+ and this is how it will look like once executed. From now on, to be
943
+ concise in the presentation we will not show chunk definitions any
944
+ longer.
945
+
946
+ \begin{Shaded}
947
+ \begin{Highlighting}[]
948
+ \NormalTok{r_vec <-}\StringTok{ }\KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{4}\NormalTok{, }\DecValTok{5}\NormalTok{)}
949
+
950
+ \NormalTok{reduce_sum <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(...) \{}
951
+ \KeywordTok{Reduce}\NormalTok{(sum, }\KeywordTok{as.list}\NormalTok{(...))}
952
+ \NormalTok{\}}
953
+ \end{Highlighting}
954
+ \end{Shaded}
955
+
956
+ We can, possibly in another chunk, access the vector and call the
957
+ function as follows:
958
+
959
+ \begin{Shaded}
960
+ \begin{Highlighting}[]
961
+ \KeywordTok{print}\NormalTok{(r_vec)}
962
+ \end{Highlighting}
963
+ \end{Shaded}
964
+
965
+ \begin{verbatim}
966
+ ## [1] 1 2 3 4 5
967
+ \end{verbatim}
968
+
969
+ \begin{Shaded}
970
+ \begin{Highlighting}[]
971
+ \KeywordTok{print}\NormalTok{(}\KeywordTok{reduce_sum}\NormalTok{(r_vec))}
972
+ \end{Highlighting}
973
+ \end{Shaded}
974
+
975
+ \begin{verbatim}
976
+ ## [1] 15
977
+ \end{verbatim}
978
+
979
+ \subsection{R Graphics with ggplot}\label{r-graphics-with-ggplot}
980
+
981
+ In the following chunk, we create a bubble chart in R using ggplot and
982
+ include it in this document. Note that there is no directive in the code
983
+ to include the image, this occurs automatically. The `mpg' dataframe is
984
+ natively available to R and to Galaaz as well.
985
+
986
+ For the reader not knowledgeable of ggplot, ggplot is a graphics library
987
+ based on ``the grammar of graphics'' (Wilkinson 2005). The idea of the
988
+ grammar of graphics is to build a graphics by adding layers to the plot.
989
+ More information can be found in
990
+ \url{https://towardsdatascience.com/a-comprehensive-guide-to-the-grammar-of-graphics-for-effective-visualization-of-multi-dimensional-1f92b4ed4149}.
991
+
992
+ In the plot bellow the `mpg' dataset from base R is used. ``The data
993
+ concerns city-cycle fuel consumption in miles per gallon, to be
994
+ predicted in terms of 3 multivalued discrete and 5 continuous
995
+ attributes.'' (Quinlan, 1993)
996
+
997
+ First, the `mpg' dataset if filtered to extract only cars from the
998
+ following manumactures: Audi, Ford, Honda, and Hyundai and stored in the
999
+ `mpg\_select' variable. Then, the selected dataframe is passed to the
1000
+ ggplot function specifying in the aesthetic method (aes) that
1001
+ `displacement' (disp) should be plotted in the `x' axis and `city
1002
+ mileage' should be on the `y' axis. In the `labs' layer we pass the
1003
+ `title' and `subtitle' for the plot. To the basic plot `g', geom\_jitter
1004
+ is added, that plots cars from the same manufactures with the same color
1005
+ (col=manufactures) and the size of the car point equal its high way
1006
+ consumption (size = hwy). Finally, a last layer is plotter containing a
1007
+ linear regression line (method = ``lm'') for every manufacturer.
1008
+
1009
+ \begin{Shaded}
1010
+ \begin{Highlighting}[]
1011
+ \CommentTok{# load package and data}
1012
+ \KeywordTok{library}\NormalTok{(ggplot2)}
1013
+ \end{Highlighting}
1014
+ \end{Shaded}
1015
+
1016
+ \begin{verbatim}
1017
+ ## Message:
1018
+ ## Registered S3 methods overwritten by 'ggplot2':
1019
+ ## method from
1020
+ ## [.quosures rlang
1021
+ ## c.quosures rlang
1022
+ ## print.quosures rlang
1023
+ \end{verbatim}
1024
+
1025
+ \begin{Shaded}
1026
+ \begin{Highlighting}[]
1027
+ \KeywordTok{data}\NormalTok{(mpg, }\DataTypeTok{package=}\StringTok{"ggplot2"}\NormalTok{)}
1028
+
1029
+ \NormalTok{mpg_select <-}\StringTok{ }\NormalTok{mpg[mpg}\OperatorTok{$}\NormalTok{manufacturer }\OperatorTok{%in%}\StringTok{ }\KeywordTok{c}\NormalTok{(}\StringTok{"audi"}\NormalTok{, }\StringTok{"ford"}\NormalTok{, }\StringTok{"honda"}\NormalTok{, }\StringTok{"hyundai"}\NormalTok{), ]}
1030
+
1031
+ \CommentTok{# Scatterplot}
1032
+ \KeywordTok{theme_set}\NormalTok{(}\KeywordTok{theme_bw}\NormalTok{()) }\CommentTok{# pre-set the bw theme.}
1033
+ \NormalTok{g <-}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(mpg_select, }\KeywordTok{aes}\NormalTok{(displ, cty)) }\OperatorTok{+}\StringTok{ }
1034
+ \StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{subtitle=}\StringTok{"mpg: Displacement vs City Mileage"}\NormalTok{,}
1035
+ \DataTypeTok{title=}\StringTok{"Bubble chart"}\NormalTok{)}
1036
+
1037
+ \NormalTok{g }\OperatorTok{+}\StringTok{ }\KeywordTok{geom_jitter}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{col=}\NormalTok{manufacturer, }\DataTypeTok{size=}\NormalTok{hwy)) }\OperatorTok{+}\StringTok{ }
1038
+ \StringTok{ }\KeywordTok{geom_smooth}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{col=}\NormalTok{manufacturer), }\DataTypeTok{method=}\StringTok{"lm"}\NormalTok{, }\DataTypeTok{se=}\NormalTok{F)}
1039
+ \end{Highlighting}
1040
+ \end{Shaded}
1041
+
1042
+ \includegraphics{manual_files/figure-latex/bubble-1.png}
1043
+
1044
+ \subsection{Ruby chunks}\label{ruby-chunks}
1045
+
1046
+ Including a Ruby chunk is just as easy as including an R chunk in the
1047
+ document: just change the name of the engine to `ruby'. It is also
1048
+ possible to pass chunk options to the Ruby engine; however, this version
1049
+ does not accept all the options that are available to R chunks. Future
1050
+ versions will add those options.
1051
+
1052
+ \begin{verbatim}
1053
+ ```{ruby first_ruby_chunk}
1054
+ ```
1055
+ \end{verbatim}
1056
+
1057
+ In this example, the ruby chunk is called `first\_ruby\_chunk'. One
1058
+ important aspect of chunk labels is that they cannot be duplicated. If a
1059
+ chunk label is duplicated, gKnit will stop with an error.
1060
+
1061
+ In the following chunk, variable `a', `b' and `c' are standard Ruby
1062
+ variables and `vec' and `vec2' are two vectors created by calling the
1063
+ `c' method on the R module.
1064
+
1065
+ In Galaaz, the R module allows us to access R functions transparently.
1066
+ The `c' function in R, is a function that concatenates its arguments
1067
+ making a vector.
1068
+
1069
+ It should be clear that there is no requirement in gknit to call or use
1070
+ any R functions. gKnit will knit standard Ruby code, or even general
1071
+ text without any code.
1072
+
1073
+ \begin{Shaded}
1074
+ \begin{Highlighting}[]
1075
+ \NormalTok{a = [}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{]}
1076
+ \NormalTok{b = }\StringTok{"US$ 250.000"}
1077
+ \NormalTok{c = }\StringTok{"The 'outputs' function"}
1078
+
1079
+ \NormalTok{vec = R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)}
1080
+ \NormalTok{vec2 = R.c(}\DecValTok{10}\NormalTok{, }\DecValTok{20}\NormalTok{, }\DecValTok{30}\NormalTok{)}
1081
+ \end{Highlighting}
1082
+ \end{Shaded}
1083
+
1084
+ In the next block, variables `a', `vec' and `vec2' are used and printed.
1085
+
1086
+ \begin{Shaded}
1087
+ \begin{Highlighting}[]
1088
+ \NormalTok{puts a}
1089
+ \NormalTok{puts vec * vec2}
1090
+ \end{Highlighting}
1091
+ \end{Shaded}
1092
+
1093
+ \begin{verbatim}
1094
+ ## 1
1095
+ ## 2
1096
+ ## 3
1097
+ ## [1] 10 40 90
1098
+ \end{verbatim}
1099
+
1100
+ Note that `a' is a standard Ruby Array and `vec' and `vec2' are vectors
1101
+ that behave accordingly, where multiplication works as expected.
1102
+
1103
+ \subsection{Inline Ruby code}\label{inline-ruby-code}
1104
+
1105
+ When using a Ruby chunk, the code and the output are formatted in blocks
1106
+ as seen above. This formatting is not always desired. Sometimes, we want
1107
+ to have the results of the Ruby evaluation included in the middle of a
1108
+ phrase. gKnit allows adding inline Ruby code with the `rb' engine. The
1109
+ following chunk specification will create and inline Ruby text:
1110
+
1111
+ \begin{verbatim}
1112
+ This is some text with inline Ruby accessing variable 'b' which has value:
1113
+ ```{rb puts b}
1114
+ ```
1115
+ and is followed by some other text!
1116
+ \end{verbatim}
1117
+
1118
+ This is some text with inline Ruby accessing variable `b' which has
1119
+ value: US\$ 250.000 and is followed by some other text!
1120
+
1121
+ Note that it is important not to add any new line before of after the
1122
+ code block if we want everything to be in only one line, resulting in
1123
+ the following sentence with inline Ruby code.
1124
+
1125
+ \subsubsection{\texorpdfstring{The `outputs'
1126
+ function}{The outputs function}}\label{the-outputs-function}
1127
+
1128
+ He have previously used the standard `puts' method in Ruby chunks in
1129
+ order produce output. The result of a `puts', as seen in all previous
1130
+ chunks that use it, is formatted inside a white box that follows the
1131
+ code block. Many times however, we would like to do some processing in
1132
+ the Ruby chunk and have the result of this processing generate and
1133
+ output that is ``included'' in the document as if we had typed it in
1134
+ \textbf{R markdown} document.
1135
+
1136
+ For example, suppose we want to create a new heading in our document,
1137
+ but the heading phrase is the result of some code processing: maybe it's
1138
+ the first line of a file we are going to read. Method `outputs' adds its
1139
+ output as if typed in the \textbf{R markdown} document.
1140
+
1141
+ Take now a look at variable `c' (it was defined in a previous block
1142
+ above) as `c = ``The 'outputs' function''. ``The 'outputs' function'' is
1143
+ actually the name of this section and it was created using the 'outputs'
1144
+ function inside a Ruby chunk.
1145
+
1146
+ The ruby chunk to generate this heading is:
1147
+
1148
+ \begin{verbatim}
1149
+ ```{ruby heading}
1150
+ outputs "### #{c}"
1151
+ ```
1152
+ \end{verbatim}
1153
+
1154
+ The three `\#\#\#' is the way we add a Heading 3 in \textbf{R markdown}.
1155
+
1156
+ \subsubsection{HTML Output from Ruby
1157
+ Chunks}\label{html-output-from-ruby-chunks}
1158
+
1159
+ We've just seen the use of method `outputs' to add text to the the
1160
+ \textbf{R markdown} document. This technique can also be used to add
1161
+ HTML code to the document. In \textbf{R markdown}, any html code typed
1162
+ directly in the document will be properly rendered.\\
1163
+ Here, for instance, is a table definition in HTML and its output in the
1164
+ document:
1165
+
1166
+ \begin{verbatim}
1167
+ <table style="width:100%">
1168
+ <tr>
1169
+ <th>Firstname</th>
1170
+ <th>Lastname</th>
1171
+ <th>Age</th>
1172
+ </tr>
1173
+ <tr>
1174
+ <td>Jill</td>
1175
+ <td>Smith</td>
1176
+ <td>50</td>
1177
+ </tr>
1178
+ <tr>
1179
+ <td>Eve</td>
1180
+ <td>Jackson</td>
1181
+ <td>94</td>
1182
+ </tr>
1183
+ </table>
1184
+ \end{verbatim}
1185
+
1186
+ \begin{verbatim}
1187
+ <th>Firstname</th>
1188
+ <th>Lastname</th>
1189
+ <th>Age</th>
1190
+ \end{verbatim}
1191
+
1192
+ \begin{verbatim}
1193
+ <td>Jill</td>
1194
+ <td>Smith</td>
1195
+ <td>50</td>
1196
+ \end{verbatim}
1197
+
1198
+ \begin{verbatim}
1199
+ <td>Eve</td>
1200
+ <td>Jackson</td>
1201
+ <td>94</td>
1202
+ \end{verbatim}
1203
+
1204
+ But manually creating HTML output is not always easy or desirable,
1205
+ specially if we intend the document to be rendered in other formats, for
1206
+ example, as Latex. Also, The above table looks ugly. The `kableExtra'
1207
+ library is a great library for creating beautiful tables. Take a look at
1208
+ \url{https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html}
1209
+
1210
+ In the next chunk, we output the `mtcars' dataframe from R in a nicely
1211
+ formatted table. Note that we retrieve the mtcars dataframe by using
1212
+ `\textasciitilde{}:mtcars'.
1213
+
1214
+ \begin{Shaded}
1215
+ \begin{Highlighting}[]
1216
+ \NormalTok{R.install_and_loads(}\StringTok{'kableExtra'}\NormalTok{)}
1217
+ \NormalTok{outputs (~}\StringTok{:mtcars}\NormalTok{).kable.kable_styling}
1218
+ \end{Highlighting}
1219
+ \end{Shaded}
1220
+
1221
+ \begin{table}[H]
1222
+ \centering
1223
+ \begin{tabular}{l|r|r|r|r|r|r|r|r|r|r|r}
1224
+ \hline
1225
+ & mpg & cyl & disp & hp & drat & wt & qsec & vs & am & gear & carb\\
1226
+ \hline
1227
+ Mazda RX4 & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.620 & 16.46 & 0 & 1 & 4 & 4\\
1228
+ \hline
1229
+ Mazda RX4 Wag & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.875 & 17.02 & 0 & 1 & 4 & 4\\
1230
+ \hline
1231
+ Datsun 710 & 22.8 & 4 & 108.0 & 93 & 3.85 & 2.320 & 18.61 & 1 & 1 & 4 & 1\\
1232
+ \hline
1233
+ Hornet 4 Drive & 21.4 & 6 & 258.0 & 110 & 3.08 & 3.215 & 19.44 & 1 & 0 & 3 & 1\\
1234
+ \hline
1235
+ Hornet Sportabout & 18.7 & 8 & 360.0 & 175 & 3.15 & 3.440 & 17.02 & 0 & 0 & 3 & 2\\
1236
+ \hline
1237
+ Valiant & 18.1 & 6 & 225.0 & 105 & 2.76 & 3.460 & 20.22 & 1 & 0 & 3 & 1\\
1238
+ \hline
1239
+ Duster 360 & 14.3 & 8 & 360.0 & 245 & 3.21 & 3.570 & 15.84 & 0 & 0 & 3 & 4\\
1240
+ \hline
1241
+ Merc 240D & 24.4 & 4 & 146.7 & 62 & 3.69 & 3.190 & 20.00 & 1 & 0 & 4 & 2\\
1242
+ \hline
1243
+ Merc 230 & 22.8 & 4 & 140.8 & 95 & 3.92 & 3.150 & 22.90 & 1 & 0 & 4 & 2\\
1244
+ \hline
1245
+ Merc 280 & 19.2 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.30 & 1 & 0 & 4 & 4\\
1246
+ \hline
1247
+ Merc 280C & 17.8 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.90 & 1 & 0 & 4 & 4\\
1248
+ \hline
1249
+ Merc 450SE & 16.4 & 8 & 275.8 & 180 & 3.07 & 4.070 & 17.40 & 0 & 0 & 3 & 3\\
1250
+ \hline
1251
+ Merc 450SL & 17.3 & 8 & 275.8 & 180 & 3.07 & 3.730 & 17.60 & 0 & 0 & 3 & 3\\
1252
+ \hline
1253
+ Merc 450SLC & 15.2 & 8 & 275.8 & 180 & 3.07 & 3.780 & 18.00 & 0 & 0 & 3 & 3\\
1254
+ \hline
1255
+ Cadillac Fleetwood & 10.4 & 8 & 472.0 & 205 & 2.93 & 5.250 & 17.98 & 0 & 0 & 3 & 4\\
1256
+ \hline
1257
+ Lincoln Continental & 10.4 & 8 & 460.0 & 215 & 3.00 & 5.424 & 17.82 & 0 & 0 & 3 & 4\\
1258
+ \hline
1259
+ Chrysler Imperial & 14.7 & 8 & 440.0 & 230 & 3.23 & 5.345 & 17.42 & 0 & 0 & 3 & 4\\
1260
+ \hline
1261
+ Fiat 128 & 32.4 & 4 & 78.7 & 66 & 4.08 & 2.200 & 19.47 & 1 & 1 & 4 & 1\\
1262
+ \hline
1263
+ Honda Civic & 30.4 & 4 & 75.7 & 52 & 4.93 & 1.615 & 18.52 & 1 & 1 & 4 & 2\\
1264
+ \hline
1265
+ Toyota Corolla & 33.9 & 4 & 71.1 & 65 & 4.22 & 1.835 & 19.90 & 1 & 1 & 4 & 1\\
1266
+ \hline
1267
+ Toyota Corona & 21.5 & 4 & 120.1 & 97 & 3.70 & 2.465 & 20.01 & 1 & 0 & 3 & 1\\
1268
+ \hline
1269
+ Dodge Challenger & 15.5 & 8 & 318.0 & 150 & 2.76 & 3.520 & 16.87 & 0 & 0 & 3 & 2\\
1270
+ \hline
1271
+ AMC Javelin & 15.2 & 8 & 304.0 & 150 & 3.15 & 3.435 & 17.30 & 0 & 0 & 3 & 2\\
1272
+ \hline
1273
+ Camaro Z28 & 13.3 & 8 & 350.0 & 245 & 3.73 & 3.840 & 15.41 & 0 & 0 & 3 & 4\\
1274
+ \hline
1275
+ Pontiac Firebird & 19.2 & 8 & 400.0 & 175 & 3.08 & 3.845 & 17.05 & 0 & 0 & 3 & 2\\
1276
+ \hline
1277
+ Fiat X1-9 & 27.3 & 4 & 79.0 & 66 & 4.08 & 1.935 & 18.90 & 1 & 1 & 4 & 1\\
1278
+ \hline
1279
+ Porsche 914-2 & 26.0 & 4 & 120.3 & 91 & 4.43 & 2.140 & 16.70 & 0 & 1 & 5 & 2\\
1280
+ \hline
1281
+ Lotus Europa & 30.4 & 4 & 95.1 & 113 & 3.77 & 1.513 & 16.90 & 1 & 1 & 5 & 2\\
1282
+ \hline
1283
+ Ford Pantera L & 15.8 & 8 & 351.0 & 264 & 4.22 & 3.170 & 14.50 & 0 & 1 & 5 & 4\\
1284
+ \hline
1285
+ Ferrari Dino & 19.7 & 6 & 145.0 & 175 & 3.62 & 2.770 & 15.50 & 0 & 1 & 5 & 6\\
1286
+ \hline
1287
+ Maserati Bora & 15.0 & 8 & 301.0 & 335 & 3.54 & 3.570 & 14.60 & 0 & 1 & 5 & 8\\
1288
+ \hline
1289
+ Volvo 142E & 21.4 & 4 & 121.0 & 109 & 4.11 & 2.780 & 18.60 & 1 & 1 & 4 & 2\\
1290
+ \hline
1291
+ \end{tabular}
1292
+ \end{table}
1293
+
1294
+ \subsection{Including Ruby files in a
1295
+ chunk}\label{including-ruby-files-in-a-chunk}
1296
+
1297
+ R is a language that was created to be easy and fast for statisticians
1298
+ to use. As far as I know, it was not a language to be used for
1299
+ developing large systems. Of course, there are large systems and
1300
+ libraries in R, but the focus of the language is for developing
1301
+ statistical models and distribute that to peers.
1302
+
1303
+ Ruby on the other hand, is a language for large software development.
1304
+ Systems written in Ruby will have dozens, hundreds or even thousands of
1305
+ files. To document a large system with literate programming, we cannot
1306
+ expect the developer to add all the files in a single `.Rmd' file. gKnit
1307
+ provides the `include' chunk engine to include a Ruby file as if it had
1308
+ being typed in the `.Rmd' file.
1309
+
1310
+ To include a file, the following chunk should be created, where is the
1311
+ name of the file to be included and where the extension, if it is `.rb',
1312
+ does not need to be added. If the `relative' option is not included,
1313
+ then it is treated as TRUE. When `relative' is true, ruby's
1314
+ `require\_relative' semantics is used to load the file, when false,
1315
+ Ruby's \$LOAD\_PATH is searched to find the file and it is 'require'd.
1316
+
1317
+ \begin{verbatim}
1318
+ ```{include <filename>, relative = <TRUE/FALSE>}
1319
+ ```
1320
+ \end{verbatim}
1321
+
1322
+ Bellow we include file `model.rb', which is in the same directory of
1323
+ this blog.\\
1324
+ This code uses R `caret' package to split a dataset in a train and test
1325
+ sets. The `caret' package is a very important a useful package for doing
1326
+ Data Analysis, it has hundreds of functions for all steps of the Data
1327
+ Analysis workflow. To use `caret' just to split a dataset is like using
1328
+ the proverbial cannon to kill the fly. We use it here only to show that
1329
+ integrating Ruby and R and using even a very complex package as `caret'
1330
+ is trivial with Galaaz.
1331
+
1332
+ A word of advice: the `caret' package has lots of dependencies and
1333
+ installing it in a Linux system is a time consuming operation. Method
1334
+ `R.install\_and\_loads' will install the package if it is not already
1335
+ installed and can take a while.
1336
+
1337
+ \begin{verbatim}
1338
+ ```{include model}
1339
+ ```
1340
+ \end{verbatim}
1341
+
1342
+ \begin{verbatim}
1343
+ require 'galaaz'
1344
+
1345
+ # Loads the R 'caret' package. If not present, installs it
1346
+ R.install_and_loads 'caret'
1347
+
1348
+ class Model
1349
+
1350
+ attr_reader :data
1351
+ attr_reader :test
1352
+ attr_reader :train
1353
+
1354
+ #==========================================================
1355
+ #
1356
+ #==========================================================
1357
+
1358
+ def initialize(data, percent_train:, seed: 123)
1359
+
1360
+ R.set__seed(seed)
1361
+ @data = data
1362
+ @percent_train = percent_train
1363
+ @seed = seed
1364
+
1365
+ end
1366
+
1367
+ #==========================================================
1368
+ #
1369
+ #==========================================================
1370
+
1371
+ def partition(field)
1372
+
1373
+ train_index =
1374
+ R.createDataPartition(@data.send(field), p: @percet_train,
1375
+ list: false, times: 1)
1376
+ @train = @data[train_index, :all]
1377
+ @test = @data[-train_index, :all]
1378
+
1379
+ end
1380
+
1381
+ end
1382
+ \end{verbatim}
1383
+
1384
+ \begin{Shaded}
1385
+ \begin{Highlighting}[]
1386
+ \NormalTok{mtcars = ~}\StringTok{:mtcars}
1387
+ \NormalTok{model = }\DataTypeTok{Model}\NormalTok{.new(mtcars, }\StringTok{percent_train: }\FloatTok{0.8}\NormalTok{)}
1388
+ \NormalTok{model.partition(}\StringTok{:mpg}\NormalTok{)}
1389
+ \NormalTok{puts model.train.head}
1390
+ \NormalTok{puts model.test.head}
1391
+ \end{Highlighting}
1392
+ \end{Shaded}
1393
+
1394
+ \begin{verbatim}
1395
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1396
+ ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
1397
+ ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
1398
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
1399
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
1400
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
1401
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
1402
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1403
+ ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
1404
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
1405
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
1406
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
1407
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
1408
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
1409
+ \end{verbatim}
1410
+
1411
+ \subsection{Documenting Gems}\label{documenting-gems}
1412
+
1413
+ gKnit also allows developers to document and load files that are not in
1414
+ the same directory of the `.Rmd' file.
1415
+
1416
+ Here is an example of loading the `find.rb' file from TruffleRuby. In
1417
+ this example, relative is set to FALSE, so Ruby will look for the file
1418
+ in its \$LOAD\_PATH, and the user does not need to no it's directory.
1419
+
1420
+ \begin{verbatim}
1421
+ ```{include find, relative = FALSE}
1422
+ ```
1423
+ \end{verbatim}
1424
+
1425
+ \begin{verbatim}
1426
+ # frozen_string_literal: true
1427
+ #
1428
+ # find.rb: the Find module for processing all files under a given directory.
1429
+ #
1430
+
1431
+ #
1432
+ # The +Find+ module supports the top-down traversal of a set of file paths.
1433
+ #
1434
+ # For example, to total the size of all files under your home directory,
1435
+ # ignoring anything in a "dot" directory (e.g. $HOME/.ssh):
1436
+ #
1437
+ # require 'find'
1438
+ #
1439
+ # total_size = 0
1440
+ #
1441
+ # Find.find(ENV["HOME"]) do |path|
1442
+ # if FileTest.directory?(path)
1443
+ # if File.basename(path)[0] == ?.
1444
+ # Find.prune # Don't look any further into this directory.
1445
+ # else
1446
+ # next
1447
+ # end
1448
+ # else
1449
+ # total_size += FileTest.size(path)
1450
+ # end
1451
+ # end
1452
+ #
1453
+ module Find
1454
+
1455
+ #
1456
+ # Calls the associated block with the name of every file and directory listed
1457
+ # as arguments, then recursively on their subdirectories, and so on.
1458
+ #
1459
+ # Returns an enumerator if no block is given.
1460
+ #
1461
+ # See the +Find+ module documentation for an example.
1462
+ #
1463
+ def find(*paths, ignore_error: true) # :yield: path
1464
+ block_given? or return enum_for(__method__, *paths, ignore_error: ignore_error)
1465
+
1466
+ fs_encoding = Encoding.find("filesystem")
1467
+
1468
+ paths.collect!{|d| raise Errno::ENOENT, d unless File.exist?(d); d.dup}.each do |path|
1469
+ path = path.to_path if path.respond_to? :to_path
1470
+ enc = path.encoding == Encoding::US_ASCII ? fs_encoding : path.encoding
1471
+ ps = [path]
1472
+ while file = ps.shift
1473
+ catch(:prune) do
1474
+ yield file.dup.taint
1475
+ begin
1476
+ s = File.lstat(file)
1477
+ rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
1478
+ raise unless ignore_error
1479
+ next
1480
+ end
1481
+ if s.directory? then
1482
+ begin
1483
+ fs = Dir.children(file, encoding: enc)
1484
+ rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
1485
+ raise unless ignore_error
1486
+ next
1487
+ end
1488
+ fs.sort!
1489
+ fs.reverse_each {|f|
1490
+ f = File.join(file, f)
1491
+ ps.unshift f.untaint
1492
+ }
1493
+ end
1494
+ end
1495
+ end
1496
+ end
1497
+ nil
1498
+ end
1499
+
1500
+ #
1501
+ # Skips the current file or directory, restarting the loop with the next
1502
+ # entry. If the current file is a directory, that directory will not be
1503
+ # recursively entered. Meaningful only within the block associated with
1504
+ # Find::find.
1505
+ #
1506
+ # See the +Find+ module documentation for an example.
1507
+ #
1508
+ def prune
1509
+ throw :prune
1510
+ end
1511
+
1512
+ module_function :find, :prune
1513
+ end
1514
+ \end{verbatim}
1515
+
1516
+ \subsection{Converting to PDF}\label{converting-to-pdf}
1517
+
1518
+ One of the beauties of knitr is that the same input can be converted to
1519
+ many different outputs. One very useful format, is, of course, PDF. In
1520
+ order to converted an \textbf{R markdown} file to PDF it is necessary to
1521
+ have LaTeX installed on the system. We will not explain here how to
1522
+ install LaTeX as there are plenty of documents on the web showing how to
1523
+ proceed.
1524
+
1525
+ gKnit comes with a simple LaTeX style file for gknitting this blog as a
1526
+ PDF document. Here is the Yaml header to generate this blog in PDF
1527
+ format instead of HTML:
1528
+
1529
+ \begin{verbatim}
1530
+ ---
1531
+ title: "gKnit - Ruby and R Knitting with Galaaz in GraalVM"
1532
+ author: "Rodrigo Botafogo"
1533
+ tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, knitr, gknit]
1534
+ date: "29 October 2018"
1535
+ output:
1536
+ pdf\_document:
1537
+ includes:
1538
+ in\_header: ["../../sty/galaaz.sty"]
1539
+ number\_sections: yes
1540
+ ---
1541
+ \end{verbatim}
1542
+
1543
+ \subsection{Template based documents
1544
+ generation}\label{template-based-documents-generation}
1545
+
1546
+ When a document is converted to PDF it follows a certain convertion
1547
+ template. We've seen above the use of `galaaz.sty' as a basic template
1548
+ to generate a PDF document. Using the `gknit-draft' app that comes with
1549
+ Galaaz, the same .Rmd file can be compiled to different looking PDF
1550
+ documents. Galaaz automatically loads the `rticles' R package that comes
1551
+ with templates for the following journals with the respective template
1552
+ name:
1553
+
1554
+ \begin{itemize}
1555
+ \tightlist
1556
+ \item
1557
+ ACM articles: acm\_article
1558
+ \item
1559
+ ACS articles: acs\_article
1560
+ \item
1561
+ AEA journal submissions: aea\_article
1562
+ \item
1563
+ AGU journal submissions: ????
1564
+ \item
1565
+ AMS articles: ams\_article
1566
+ \item
1567
+ American Statistical Association: asa\_article
1568
+ \item
1569
+ Biometrics articles: biometrics\_article
1570
+ \item
1571
+ Bulletin de l'AMQ journal submissions: amq\_article
1572
+ \item
1573
+ CTeX documents: ctex
1574
+ \item
1575
+ Elsevier journal submissions: elsevier\_article
1576
+ \item
1577
+ IEEE Transaction journal submissions: ieee\_article
1578
+ \item
1579
+ JSS articles: jss\_article
1580
+ \item
1581
+ MDPI journal submissions: mdpi\_article
1582
+ \item
1583
+ Monthly Notices of the Royal Astronomical Society articles:
1584
+ mnras\_article
1585
+ \item
1586
+ NNRAS journal submissions: nmras\_article
1587
+ \item
1588
+ PeerJ articles: peerj\_article
1589
+ \item
1590
+ Royal Society Open Science journal submissions: rsos\_article
1591
+ \item
1592
+ Royal Statistical Society: rss\_article
1593
+ \item
1594
+ Sage journal submissions: sage\_article
1595
+ \item
1596
+ Springer journal submissions: springer\_article
1597
+ \item
1598
+ Statistics in Medicine journal submissions: sim\_article
1599
+ \item
1600
+ Copernicus Publications journal submissions: copernicus\_article
1601
+ \item
1602
+ The R Journal articles: rjournal\_article
1603
+ \item
1604
+ Frontiers articles: ???
1605
+ \item
1606
+ Taylor \& Francis articles: ???
1607
+ \item
1608
+ Bulletin De L'AMQ: amq\_article
1609
+ \item
1610
+ PLOS journal: plos\_article
1611
+ \item
1612
+ Proceedings of the National Academy of Sciences of the USA:
1613
+ pnas\_article
1614
+ \end{itemize}
1615
+
1616
+ In order to create a document with one of those templates, use the
1617
+ following command:
1618
+
1619
+ \begin{verbatim}
1620
+ gknit-draft --filename <my_document> --template <template> --package <package>
1621
+ --create_dir
1622
+ \end{verbatim}
1623
+
1624
+ So, in order to create a template for writing an R Journal, use:
1625
+
1626
+ \begin{verbatim}
1627
+ gknit-draft --filename my_r_article --template rjournal_article --package rticles
1628
+ --create_dir
1629
+ \end{verbatim}
1630
+
1631
+ \section{Accessing R variables}\label{accessing-r-variables}
1632
+
1633
+ Galaaz allows Ruby to access variables created in R. For example, the
1634
+ `mtcars' data set is available in R and can be accessed from Ruby by
1635
+ using the `tilda' operator followed by the symbol for the variable, in
1636
+ this case `:mtcar'. In the code bellow method `outputs' is used to
1637
+ output the `mtcars' data set nicely formatted in HTML by use of the
1638
+ `kable' and `kable\_styling' functions. Method `outputs' is only
1639
+ available when used with `gknit'.
1640
+
1641
+ \begin{Shaded}
1642
+ \begin{Highlighting}[]
1643
+ \NormalTok{outputs (~}\StringTok{:mtcars}\NormalTok{).kable.kable_styling}
1644
+ \end{Highlighting}
1645
+ \end{Shaded}
1646
+
1647
+ \begin{table}[H]
1648
+ \centering
1649
+ \begin{tabular}{l|r|r|r|r|r|r|r|r|r|r|r}
1650
+ \hline
1651
+ & mpg & cyl & disp & hp & drat & wt & qsec & vs & am & gear & carb\\
1652
+ \hline
1653
+ Mazda RX4 & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.620 & 16.46 & 0 & 1 & 4 & 4\\
1654
+ \hline
1655
+ Mazda RX4 Wag & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.875 & 17.02 & 0 & 1 & 4 & 4\\
1656
+ \hline
1657
+ Datsun 710 & 22.8 & 4 & 108.0 & 93 & 3.85 & 2.320 & 18.61 & 1 & 1 & 4 & 1\\
1658
+ \hline
1659
+ Hornet 4 Drive & 21.4 & 6 & 258.0 & 110 & 3.08 & 3.215 & 19.44 & 1 & 0 & 3 & 1\\
1660
+ \hline
1661
+ Hornet Sportabout & 18.7 & 8 & 360.0 & 175 & 3.15 & 3.440 & 17.02 & 0 & 0 & 3 & 2\\
1662
+ \hline
1663
+ Valiant & 18.1 & 6 & 225.0 & 105 & 2.76 & 3.460 & 20.22 & 1 & 0 & 3 & 1\\
1664
+ \hline
1665
+ Duster 360 & 14.3 & 8 & 360.0 & 245 & 3.21 & 3.570 & 15.84 & 0 & 0 & 3 & 4\\
1666
+ \hline
1667
+ Merc 240D & 24.4 & 4 & 146.7 & 62 & 3.69 & 3.190 & 20.00 & 1 & 0 & 4 & 2\\
1668
+ \hline
1669
+ Merc 230 & 22.8 & 4 & 140.8 & 95 & 3.92 & 3.150 & 22.90 & 1 & 0 & 4 & 2\\
1670
+ \hline
1671
+ Merc 280 & 19.2 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.30 & 1 & 0 & 4 & 4\\
1672
+ \hline
1673
+ Merc 280C & 17.8 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.90 & 1 & 0 & 4 & 4\\
1674
+ \hline
1675
+ Merc 450SE & 16.4 & 8 & 275.8 & 180 & 3.07 & 4.070 & 17.40 & 0 & 0 & 3 & 3\\
1676
+ \hline
1677
+ Merc 450SL & 17.3 & 8 & 275.8 & 180 & 3.07 & 3.730 & 17.60 & 0 & 0 & 3 & 3\\
1678
+ \hline
1679
+ Merc 450SLC & 15.2 & 8 & 275.8 & 180 & 3.07 & 3.780 & 18.00 & 0 & 0 & 3 & 3\\
1680
+ \hline
1681
+ Cadillac Fleetwood & 10.4 & 8 & 472.0 & 205 & 2.93 & 5.250 & 17.98 & 0 & 0 & 3 & 4\\
1682
+ \hline
1683
+ Lincoln Continental & 10.4 & 8 & 460.0 & 215 & 3.00 & 5.424 & 17.82 & 0 & 0 & 3 & 4\\
1684
+ \hline
1685
+ Chrysler Imperial & 14.7 & 8 & 440.0 & 230 & 3.23 & 5.345 & 17.42 & 0 & 0 & 3 & 4\\
1686
+ \hline
1687
+ Fiat 128 & 32.4 & 4 & 78.7 & 66 & 4.08 & 2.200 & 19.47 & 1 & 1 & 4 & 1\\
1688
+ \hline
1689
+ Honda Civic & 30.4 & 4 & 75.7 & 52 & 4.93 & 1.615 & 18.52 & 1 & 1 & 4 & 2\\
1690
+ \hline
1691
+ Toyota Corolla & 33.9 & 4 & 71.1 & 65 & 4.22 & 1.835 & 19.90 & 1 & 1 & 4 & 1\\
1692
+ \hline
1693
+ Toyota Corona & 21.5 & 4 & 120.1 & 97 & 3.70 & 2.465 & 20.01 & 1 & 0 & 3 & 1\\
1694
+ \hline
1695
+ Dodge Challenger & 15.5 & 8 & 318.0 & 150 & 2.76 & 3.520 & 16.87 & 0 & 0 & 3 & 2\\
1696
+ \hline
1697
+ AMC Javelin & 15.2 & 8 & 304.0 & 150 & 3.15 & 3.435 & 17.30 & 0 & 0 & 3 & 2\\
1698
+ \hline
1699
+ Camaro Z28 & 13.3 & 8 & 350.0 & 245 & 3.73 & 3.840 & 15.41 & 0 & 0 & 3 & 4\\
1700
+ \hline
1701
+ Pontiac Firebird & 19.2 & 8 & 400.0 & 175 & 3.08 & 3.845 & 17.05 & 0 & 0 & 3 & 2\\
1702
+ \hline
1703
+ Fiat X1-9 & 27.3 & 4 & 79.0 & 66 & 4.08 & 1.935 & 18.90 & 1 & 1 & 4 & 1\\
1704
+ \hline
1705
+ Porsche 914-2 & 26.0 & 4 & 120.3 & 91 & 4.43 & 2.140 & 16.70 & 0 & 1 & 5 & 2\\
1706
+ \hline
1707
+ Lotus Europa & 30.4 & 4 & 95.1 & 113 & 3.77 & 1.513 & 16.90 & 1 & 1 & 5 & 2\\
1708
+ \hline
1709
+ Ford Pantera L & 15.8 & 8 & 351.0 & 264 & 4.22 & 3.170 & 14.50 & 0 & 1 & 5 & 4\\
1710
+ \hline
1711
+ Ferrari Dino & 19.7 & 6 & 145.0 & 175 & 3.62 & 2.770 & 15.50 & 0 & 1 & 5 & 6\\
1712
+ \hline
1713
+ Maserati Bora & 15.0 & 8 & 301.0 & 335 & 3.54 & 3.570 & 14.60 & 0 & 1 & 5 & 8\\
1714
+ \hline
1715
+ Volvo 142E & 21.4 & 4 & 121.0 & 109 & 4.11 & 2.780 & 18.60 & 1 & 1 & 4 & 2\\
1716
+ \hline
1717
+ \end{tabular}
1718
+ \end{table}
1719
+
1720
+ \section{Basic Data Types}\label{basic-data-types}
1721
+
1722
+ \subsection{Vector}\label{vector}
1723
+
1724
+ Vectors can be thought of as contiguous cells containing data. Cells are
1725
+ accessed through indexing operations such as x{[}5{]}. Galaaz has six
1726
+ basic (`atomic') vector types: logical, integer, real, complex, string
1727
+ (or character) and raw. The modes and storage modes for the different
1728
+ vector types are listed in the following table.
1729
+
1730
+ \begin{longtable}[]{@{}lcr@{}}
1731
+ \toprule
1732
+ typeof & mode & storage.mode\tabularnewline
1733
+ \midrule
1734
+ \endhead
1735
+ logical & logical & logical\tabularnewline
1736
+ integer & numeric & integer\tabularnewline
1737
+ double & numeric & double\tabularnewline
1738
+ complex & complex & comples\tabularnewline
1739
+ character & character & character\tabularnewline
1740
+ raw & raw & raw\tabularnewline
1741
+ \bottomrule
1742
+ \end{longtable}
1743
+
1744
+ Single numbers, such as 4.2, and strings, such as ``four point two'' are
1745
+ still vectors, of length 1; there are no more basic types. Vectors with
1746
+ length zero are possible (and useful). String vectors have mode and
1747
+ storage mode ``character''. A single element of a character vector is
1748
+ often referred to as a character string.
1749
+
1750
+ To create a vector the `c' (concatenate) method from the `R' module
1751
+ should be used:
1752
+
1753
+ \begin{Shaded}
1754
+ \begin{Highlighting}[]
1755
+ \NormalTok{vec = R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)}
1756
+ \NormalTok{puts vec}
1757
+ \end{Highlighting}
1758
+ \end{Shaded}
1759
+
1760
+ \begin{verbatim}
1761
+ ## [1] 1 2 3
1762
+ \end{verbatim}
1763
+
1764
+ Lets take a look at the type, mode and storage.mode of our vector vec.
1765
+ In order to print this out, we are creating a data frame `df' and
1766
+ printing it out. A data frame, for those not familiar with it, is
1767
+ basically a table. Here we create the data frame and add the column name
1768
+ by passing named parameters for each column, such as `typeof:', `mode:'
1769
+ and 'storage\_\_mode?`. You should also note here that the double
1770
+ underscore is converted to a'.`. So, when printed 'storage\_\_mode' will
1771
+ actually print as `storage.mode'.
1772
+
1773
+ Data frames will later be more carefully described. In R, the method
1774
+ used to create a data frame is `data.frame', in Galaaz we use
1775
+ `data\_\_frame'.
1776
+
1777
+ \begin{Shaded}
1778
+ \begin{Highlighting}[]
1779
+ \NormalTok{df = R.data__frame(}\StringTok{typeof: }\NormalTok{vec.typeof, }\StringTok{mode: }\NormalTok{vec.mode, }\StringTok{storage__mode: }\NormalTok{vec.storage__mode)}
1780
+ \NormalTok{puts df}
1781
+ \end{Highlighting}
1782
+ \end{Shaded}
1783
+
1784
+ \begin{verbatim}
1785
+ ## typeof mode storage.mode
1786
+ ## 1 integer numeric integer
1787
+ \end{verbatim}
1788
+
1789
+ If you want to create a vector with floating point numbers, then we need
1790
+ at least one of the vector's element to be a float, such as 1.0. R users
1791
+ should be careful, since in R a number like `1' is converted to float
1792
+ and to have an integer the R developer will use `1L'. Galaaz follows
1793
+ normal Ruby rules and the number 1 is an integer and 1.0 is a float.
1794
+
1795
+ \begin{Shaded}
1796
+ \begin{Highlighting}[]
1797
+ \NormalTok{vec = R.c(}\FloatTok{1.0}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)}
1798
+ \NormalTok{puts vec}
1799
+ \end{Highlighting}
1800
+ \end{Shaded}
1801
+
1802
+ \begin{verbatim}
1803
+ ## [1] 1 2 3
1804
+ \end{verbatim}
1805
+
1806
+ \begin{Shaded}
1807
+ \begin{Highlighting}[]
1808
+ \NormalTok{df = R.data__frame(}\StringTok{typeof: }\NormalTok{vec.typeof, }\StringTok{mode: }\NormalTok{vec.mode, }\StringTok{storage__mode: }\NormalTok{vec.storage__mode)}
1809
+ \NormalTok{outputs df.kable.kable_styling}
1810
+ \end{Highlighting}
1811
+ \end{Shaded}
1812
+
1813
+ \begin{table}[H]
1814
+ \centering
1815
+ \begin{tabular}{l|l|l}
1816
+ \hline
1817
+ typeof & mode & storage.mode\\
1818
+ \hline
1819
+ double & numeric & double\\
1820
+ \hline
1821
+ \end{tabular}
1822
+ \end{table}
1823
+
1824
+ In this next example we try to create a vector with a variable `hello'
1825
+ that has not yet being defined. This will raise an exception that is
1826
+ printed out. We get two return blocks, the first with a message
1827
+ explaining what went wrong and the second with the full backtrace of the
1828
+ error.
1829
+
1830
+ \begin{Shaded}
1831
+ \begin{Highlighting}[]
1832
+ \NormalTok{vec = R.c(}\DecValTok{1}\NormalTok{, hello, }\DecValTok{5}\NormalTok{)}
1833
+ \end{Highlighting}
1834
+ \end{Shaded}
1835
+
1836
+ \begin{verbatim}
1837
+ ## Message:
1838
+ ## undefined local variable or method `hello' for #<RC:0x3d8 @out_list=nil>:RC
1839
+ \end{verbatim}
1840
+
1841
+ \begin{verbatim}
1842
+ ## Message:
1843
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:103:in `get_binding'
1844
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `eval'
1845
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `exec_ruby'
1846
+ ## /home/rbotafogo/desenv/galaaz/lib/gknit/knitr_engine.rb:650:in `block in initialize'
1847
+ ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
1848
+ ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
1849
+ ## (eval):3:in `function(...) {\n rb_method(...)'
1850
+ ## unknown.r:1:in `in_dir'
1851
+ ## unknown.r:1:in `block_exec'
1852
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:92:in `call_block'
1853
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:6:in `process_group.block'
1854
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:3:in `<no source>'
1855
+ ## unknown.r:1:in `withCallingHandlers'
1856
+ ## unknown.r:1:in `process_file'
1857
+ ## unknown.r:1:in `<no source>'
1858
+ ## unknown.r:1:in `<no source>'
1859
+ ## <REPL>:4:in `<repl wrapper>'
1860
+ ## <REPL>:1
1861
+ \end{verbatim}
1862
+
1863
+ Here is a vector with logical values
1864
+
1865
+ \begin{Shaded}
1866
+ \begin{Highlighting}[]
1867
+ \NormalTok{vec = R.c(}\DecValTok{true}\NormalTok{, }\DecValTok{true}\NormalTok{, }\DecValTok{false}\NormalTok{, }\DecValTok{false}\NormalTok{, }\DecValTok{true}\NormalTok{)}
1868
+ \NormalTok{puts vec}
1869
+ \end{Highlighting}
1870
+ \end{Shaded}
1871
+
1872
+ \begin{verbatim}
1873
+ ## [1] TRUE TRUE FALSE FALSE TRUE
1874
+ \end{verbatim}
1875
+
1876
+ \subsubsection{Combining Vectors}\label{combining-vectors}
1877
+
1878
+ The `c' functions used to create vectors can also be used to combine two
1879
+ vectors:
1880
+
1881
+ \begin{Shaded}
1882
+ \begin{Highlighting}[]
1883
+ \NormalTok{vec1 = R.c(}\FloatTok{10.0}\NormalTok{, }\FloatTok{20.0}\NormalTok{, }\FloatTok{30.0}\NormalTok{)}
1884
+ \NormalTok{vec2 = R.c(}\FloatTok{4.0}\NormalTok{, }\FloatTok{5.0}\NormalTok{, }\FloatTok{6.0}\NormalTok{)}
1885
+ \NormalTok{vec = R.c(vec1, vec2)}
1886
+ \NormalTok{puts vec}
1887
+ \end{Highlighting}
1888
+ \end{Shaded}
1889
+
1890
+ \begin{verbatim}
1891
+ ## [1] 10 20 30 4 5 6
1892
+ \end{verbatim}
1893
+
1894
+ In galaaz, methods can be chainned (somewhat like the pipe operator in R
1895
+ \%\textgreater{}\%, but more generic). In this next example, method `c'
1896
+ is chainned after `vec1'. This also looks like `c' is a method of the
1897
+ vector, but in reallity, this is actually closer to the pipe operator.
1898
+ When Galaaz identifies that `c' is not a method of `vec' it actually
1899
+ tries to call `R.c' with `vec1' as the first argument concatenated with
1900
+ all the other available arguments. The code bellow is automatically
1901
+ converted to the code above.
1902
+
1903
+ \begin{Shaded}
1904
+ \begin{Highlighting}[]
1905
+ \NormalTok{vec = vec1.c(vec2)}
1906
+ \NormalTok{puts vec}
1907
+ \end{Highlighting}
1908
+ \end{Shaded}
1909
+
1910
+ \begin{verbatim}
1911
+ ## [1] 10 20 30 4 5 6
1912
+ \end{verbatim}
1913
+
1914
+ \subsubsection{Vector Arithmetic}\label{vector-arithmetic}
1915
+
1916
+ Arithmetic operations on vectors are performed element by element:
1917
+
1918
+ \begin{Shaded}
1919
+ \begin{Highlighting}[]
1920
+ \NormalTok{puts vec1 + vec2}
1921
+ \end{Highlighting}
1922
+ \end{Shaded}
1923
+
1924
+ \begin{verbatim}
1925
+ ## [1] 14 25 36
1926
+ \end{verbatim}
1927
+
1928
+ \begin{Shaded}
1929
+ \begin{Highlighting}[]
1930
+ \NormalTok{puts vec1 * }\DecValTok{5}
1931
+ \end{Highlighting}
1932
+ \end{Shaded}
1933
+
1934
+ \begin{verbatim}
1935
+ ## [1] 50 100 150
1936
+ \end{verbatim}
1937
+
1938
+ When vectors have different length, a recycling rule is applied to the
1939
+ shorter vector:
1940
+
1941
+ \begin{Shaded}
1942
+ \begin{Highlighting}[]
1943
+ \NormalTok{vec3 = R.c(}\FloatTok{1.0}\NormalTok{, }\FloatTok{2.0}\NormalTok{, }\FloatTok{3.0}\NormalTok{, }\FloatTok{4.0}\NormalTok{, }\FloatTok{5.0}\NormalTok{, }\FloatTok{6.0}\NormalTok{, }\FloatTok{7.0}\NormalTok{, }\FloatTok{8.0}\NormalTok{, }\FloatTok{9.0}\NormalTok{)}
1944
+ \NormalTok{puts vec4 = vec1 + vec3}
1945
+ \end{Highlighting}
1946
+ \end{Shaded}
1947
+
1948
+ \begin{verbatim}
1949
+ ## [1] 11 22 33 14 25 36 17 28 39
1950
+ \end{verbatim}
1951
+
1952
+ \subsubsection{Vector Indexing}\label{vector-indexing}
1953
+
1954
+ Vectors can be indexed by using the `{[}{]}' operator:
1955
+
1956
+ \begin{Shaded}
1957
+ \begin{Highlighting}[]
1958
+ \NormalTok{puts vec4[}\DecValTok{3}\NormalTok{]}
1959
+ \end{Highlighting}
1960
+ \end{Shaded}
1961
+
1962
+ \begin{verbatim}
1963
+ ## [1] 33
1964
+ \end{verbatim}
1965
+
1966
+ We can also index a vector with another vector. For example, in the code
1967
+ bellow, we take elements 1, 3, 5, and 7 from vec3:
1968
+
1969
+ \begin{Shaded}
1970
+ \begin{Highlighting}[]
1971
+ \NormalTok{puts vec4[R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{5}\NormalTok{, }\DecValTok{7}\NormalTok{)]}
1972
+ \end{Highlighting}
1973
+ \end{Shaded}
1974
+
1975
+ \begin{verbatim}
1976
+ ## [1] 11 33 25 17
1977
+ \end{verbatim}
1978
+
1979
+ Repeating an index and having indices out of order is valid code:
1980
+
1981
+ \begin{Shaded}
1982
+ \begin{Highlighting}[]
1983
+ \NormalTok{puts vec4[R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{1}\NormalTok{)]}
1984
+ \end{Highlighting}
1985
+ \end{Shaded}
1986
+
1987
+ \begin{verbatim}
1988
+ ## [1] 11 33 33 11
1989
+ \end{verbatim}
1990
+
1991
+ It is also possible to index a vector with a negative number or negative
1992
+ vector. In these cases the indexed values are not returned:
1993
+
1994
+ \begin{Shaded}
1995
+ \begin{Highlighting}[]
1996
+ \NormalTok{puts vec4[-}\DecValTok{3}\NormalTok{]}
1997
+ \NormalTok{puts vec4[-R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{5}\NormalTok{, }\DecValTok{7}\NormalTok{)]}
1998
+ \end{Highlighting}
1999
+ \end{Shaded}
2000
+
2001
+ \begin{verbatim}
2002
+ ## [1] 11 22 14 25 36 17 28 39
2003
+ ## [1] 22 14 36 28 39
2004
+ \end{verbatim}
2005
+
2006
+ If an index is out of range, a missing value (NA) will be reported.
2007
+
2008
+ \begin{Shaded}
2009
+ \begin{Highlighting}[]
2010
+ \NormalTok{puts vec4[}\DecValTok{30}\NormalTok{]}
2011
+ \end{Highlighting}
2012
+ \end{Shaded}
2013
+
2014
+ \begin{verbatim}
2015
+ ## [1] NA
2016
+ \end{verbatim}
2017
+
2018
+ It is also possible to index a vector by range:
2019
+
2020
+ \begin{Shaded}
2021
+ \begin{Highlighting}[]
2022
+ \NormalTok{puts vec4[(}\DecValTok{2}\NormalTok{..}\DecValTok{5}\NormalTok{)]}
2023
+ \end{Highlighting}
2024
+ \end{Shaded}
2025
+
2026
+ \begin{verbatim}
2027
+ ## [1] 22 33 14 25
2028
+ \end{verbatim}
2029
+
2030
+ Elements in a vector can be named using the `names' attribute of a
2031
+ vector:
2032
+
2033
+ \begin{Shaded}
2034
+ \begin{Highlighting}[]
2035
+ \NormalTok{full_name = R.c(}\StringTok{"Rodrigo"}\NormalTok{, }\StringTok{"A"}\NormalTok{, }\StringTok{"Botafogo"}\NormalTok{)}
2036
+ \NormalTok{full_name.names = R.c(}\StringTok{"First"}\NormalTok{, }\StringTok{"Middle"}\NormalTok{, }\StringTok{"Last"}\NormalTok{)}
2037
+ \NormalTok{puts full_name}
2038
+ \end{Highlighting}
2039
+ \end{Shaded}
2040
+
2041
+ \begin{verbatim}
2042
+ ## First Middle Last
2043
+ ## "Rodrigo" "A" "Botafogo"
2044
+ \end{verbatim}
2045
+
2046
+ Or it can also be named by using the `c' function with named
2047
+ paramenters:
2048
+
2049
+ \begin{Shaded}
2050
+ \begin{Highlighting}[]
2051
+ \NormalTok{full_name = R.c(}\DataTypeTok{First}\NormalTok{: }\StringTok{"Rodrigo"}\NormalTok{, }\DataTypeTok{Middle}\NormalTok{: }\StringTok{"A"}\NormalTok{, }\DataTypeTok{Last}\NormalTok{: }\StringTok{"Botafogo"}\NormalTok{)}
2052
+ \NormalTok{puts full_name}
2053
+ \end{Highlighting}
2054
+ \end{Shaded}
2055
+
2056
+ \begin{verbatim}
2057
+ ## First Middle Last
2058
+ ## "Rodrigo" "A" "Botafogo"
2059
+ \end{verbatim}
2060
+
2061
+ \subsubsection{Extracting Native Ruby Types from a
2062
+ Vector}\label{extracting-native-ruby-types-from-a-vector}
2063
+
2064
+ Vectors created with `R.c' are of class R::Vector. You might have
2065
+ noticed that when indexing a vector, a new vector is returned, even if
2066
+ this vector has one single element. In order to use R::Vector with other
2067
+ ruby classes it might be necessary to extract the actual Ruby native
2068
+ type from the vector. In order to do this extraction the
2069
+ `\textgreater{}\textgreater{}' operator is used.
2070
+
2071
+ \begin{Shaded}
2072
+ \begin{Highlighting}[]
2073
+ \NormalTok{puts vec4}
2074
+ \NormalTok{puts vec4 >> }\DecValTok{0}
2075
+ \NormalTok{puts vec4 >> }\DecValTok{4}
2076
+ \end{Highlighting}
2077
+ \end{Shaded}
2078
+
2079
+ \begin{verbatim}
2080
+ ## [1] 11 22 33 14 25 36 17 28 39
2081
+ ## 11.0
2082
+ ## 25.0
2083
+ \end{verbatim}
2084
+
2085
+ Note that indexing with `\textgreater{}\textgreater{}' starts at 0 and
2086
+ not at 1, also, we cannot do negative indexing.
2087
+
2088
+ \subsection{Matrix}\label{matrix}
2089
+
2090
+ A matrix is a collection of elements organized as a two dimensional
2091
+ table. A matrix can be created by the `matrix' function:
2092
+
2093
+ \begin{Shaded}
2094
+ \begin{Highlighting}[]
2095
+ \NormalTok{mat = R.matrix(R.c(}\FloatTok{1.0}\NormalTok{, }\FloatTok{2.0}\NormalTok{, }\FloatTok{3.0}\NormalTok{, }\FloatTok{4.0}\NormalTok{, }\FloatTok{5.0}\NormalTok{, }\FloatTok{6.0}\NormalTok{, }\FloatTok{7.0}\NormalTok{, }\FloatTok{8.0}\NormalTok{, }\FloatTok{9.0}\NormalTok{),}
2096
+ \StringTok{nrow: }\DecValTok{3}\NormalTok{,}
2097
+ \StringTok{ncol: }\DecValTok{3}\NormalTok{)}
2098
+
2099
+ \NormalTok{puts mat}
2100
+ \end{Highlighting}
2101
+ \end{Shaded}
2102
+
2103
+ \begin{verbatim}
2104
+ ## [,1] [,2] [,3]
2105
+ ## [1,] 1 4 7
2106
+ ## [2,] 2 5 8
2107
+ ## [3,] 3 6 9
2108
+ \end{verbatim}
2109
+
2110
+ Note that matrices data is organized by column first. It is possible to
2111
+ organize the matrix memory by row first passing an extra argument to the
2112
+ `matrix' function:
2113
+
2114
+ \begin{Shaded}
2115
+ \begin{Highlighting}[]
2116
+ \NormalTok{mat_row = R.matrix(R.c(}\FloatTok{1.0}\NormalTok{, }\FloatTok{2.0}\NormalTok{, }\FloatTok{3.0}\NormalTok{, }\FloatTok{4.0}\NormalTok{, }\FloatTok{5.0}\NormalTok{, }\FloatTok{6.0}\NormalTok{, }\FloatTok{7.0}\NormalTok{, }\FloatTok{8.0}\NormalTok{, }\FloatTok{9.0}\NormalTok{),}
2117
+ \StringTok{nrow: }\DecValTok{3}\NormalTok{,}
2118
+ \StringTok{ncol: }\DecValTok{3}\NormalTok{,}
2119
+ \StringTok{byrow: }\DecValTok{true}\NormalTok{)}
2120
+
2121
+ \NormalTok{puts mat_row}
2122
+ \end{Highlighting}
2123
+ \end{Shaded}
2124
+
2125
+ \begin{verbatim}
2126
+ ## [,1] [,2] [,3]
2127
+ ## [1,] 1 2 3
2128
+ ## [2,] 4 5 6
2129
+ ## [3,] 7 8 9
2130
+ \end{verbatim}
2131
+
2132
+ \subsubsection{Indexing a Matrix}\label{indexing-a-matrix}
2133
+
2134
+ A matrix can be indexed by {[}row, column{]}:
2135
+
2136
+ \begin{Shaded}
2137
+ \begin{Highlighting}[]
2138
+ \NormalTok{puts mat_row[}\DecValTok{1}\NormalTok{, }\DecValTok{1}\NormalTok{]}
2139
+ \NormalTok{puts mat_row[}\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{]}
2140
+ \end{Highlighting}
2141
+ \end{Shaded}
2142
+
2143
+ \begin{verbatim}
2144
+ ## [1] 1
2145
+ ## [1] 6
2146
+ \end{verbatim}
2147
+
2148
+ It is possible to index an entire row or column with the `:all' keyword
2149
+
2150
+ \begin{Shaded}
2151
+ \begin{Highlighting}[]
2152
+ \NormalTok{puts mat_row[}\DecValTok{1}\NormalTok{, }\StringTok{:all}\NormalTok{]}
2153
+ \NormalTok{puts mat_row[}\StringTok{:all}\NormalTok{, }\DecValTok{2}\NormalTok{]}
2154
+ \end{Highlighting}
2155
+ \end{Shaded}
2156
+
2157
+ \begin{verbatim}
2158
+ ## [1] 1 2 3
2159
+ ## [1] 2 5 8
2160
+ \end{verbatim}
2161
+
2162
+ Indexing with a vector is also possible for matrices. In the following
2163
+ example we want rows 1 and 3 and columns 2 and 3 building a 2 x 2
2164
+ matrix.
2165
+
2166
+ \begin{Shaded}
2167
+ \begin{Highlighting}[]
2168
+ \NormalTok{puts mat_row[R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{3}\NormalTok{), R.c(}\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)]}
2169
+ \end{Highlighting}
2170
+ \end{Shaded}
2171
+
2172
+ \begin{verbatim}
2173
+ ## [,1] [,2]
2174
+ ## [1,] 2 3
2175
+ ## [2,] 8 9
2176
+ \end{verbatim}
2177
+
2178
+ Matrices can be combined with functions `rbind':
2179
+
2180
+ \begin{Shaded}
2181
+ \begin{Highlighting}[]
2182
+ \NormalTok{puts mat_row.rbind(mat)}
2183
+ \end{Highlighting}
2184
+ \end{Shaded}
2185
+
2186
+ \begin{verbatim}
2187
+ ## [,1] [,2] [,3]
2188
+ ## [1,] 1 2 3
2189
+ ## [2,] 4 5 6
2190
+ ## [3,] 7 8 9
2191
+ ## [4,] 1 4 7
2192
+ ## [5,] 2 5 8
2193
+ ## [6,] 3 6 9
2194
+ \end{verbatim}
2195
+
2196
+ and `cbind':
2197
+
2198
+ \begin{Shaded}
2199
+ \begin{Highlighting}[]
2200
+ \NormalTok{puts mat_row.cbind(mat)}
2201
+ \end{Highlighting}
2202
+ \end{Shaded}
2203
+
2204
+ \begin{verbatim}
2205
+ ## [,1] [,2] [,3] [,4] [,5] [,6]
2206
+ ## [1,] 1 2 3 1 4 7
2207
+ ## [2,] 4 5 6 2 5 8
2208
+ ## [3,] 7 8 9 3 6 9
2209
+ \end{verbatim}
2210
+
2211
+ \subsection{List}\label{list}
2212
+
2213
+ A list is a data structure that can contain sublists of different types,
2214
+ while vector and matrix can only hold one type of element.
2215
+
2216
+ \begin{Shaded}
2217
+ \begin{Highlighting}[]
2218
+ \NormalTok{nums = R.c(}\FloatTok{1.0}\NormalTok{, }\FloatTok{2.0}\NormalTok{, }\FloatTok{3.0}\NormalTok{)}
2219
+ \NormalTok{strs = R.c(}\StringTok{"a"}\NormalTok{, }\StringTok{"b"}\NormalTok{, }\StringTok{"c"}\NormalTok{, }\StringTok{"d"}\NormalTok{)}
2220
+ \NormalTok{bool = R.c(}\DecValTok{true}\NormalTok{, }\DecValTok{true}\NormalTok{, }\DecValTok{false}\NormalTok{)}
2221
+ \NormalTok{lst = R.list(}\StringTok{nums: }\NormalTok{nums, }\StringTok{strs: }\NormalTok{strs, }\StringTok{bool: }\NormalTok{bool)}
2222
+ \NormalTok{puts lst}
2223
+ \end{Highlighting}
2224
+ \end{Shaded}
2225
+
2226
+ \begin{verbatim}
2227
+ ## $nums
2228
+ ## [1] 1 2 3
2229
+ ##
2230
+ ## $strs
2231
+ ## [1] "a" "b" "c" "d"
2232
+ ##
2233
+ ## $bool
2234
+ ## [1] TRUE TRUE FALSE
2235
+ \end{verbatim}
2236
+
2237
+ Note that `lst' elements are named elements.
2238
+
2239
+ \subsubsection{List Indexing}\label{list-indexing}
2240
+
2241
+ List indexing, also called slicing, is done using the `{[}{]}' operator
2242
+ and the `{[}{[}{]}{]}' operator. Let's first start with the `{[}{]}'
2243
+ operator. The list above has three sublist indexing with `{[}{]}' will
2244
+ return one of the sublists.
2245
+
2246
+ \begin{Shaded}
2247
+ \begin{Highlighting}[]
2248
+ \NormalTok{puts lst[}\DecValTok{1}\NormalTok{]}
2249
+ \end{Highlighting}
2250
+ \end{Shaded}
2251
+
2252
+ \begin{verbatim}
2253
+ ## $nums
2254
+ ## [1] 1 2 3
2255
+ \end{verbatim}
2256
+
2257
+ Note that when using `{[}{]}' a new list is returned. When using the
2258
+ double square bracket operator the value returned is the actual element
2259
+ of the list in the given position and not a slice of the original list
2260
+
2261
+ \begin{Shaded}
2262
+ \begin{Highlighting}[]
2263
+ \NormalTok{puts lst[[}\DecValTok{1}\NormalTok{]]}
2264
+ \end{Highlighting}
2265
+ \end{Shaded}
2266
+
2267
+ \begin{verbatim}
2268
+ ## [1] 1 2 3
2269
+ \end{verbatim}
2270
+
2271
+ When elements are named, as dones with lst, indexing can be done by
2272
+ name:
2273
+
2274
+ \begin{Shaded}
2275
+ \begin{Highlighting}[]
2276
+ \NormalTok{puts lst[[}\StringTok{'bool'}\NormalTok{]][[}\DecValTok{1}\NormalTok{]] >> }\DecValTok{0}
2277
+ \end{Highlighting}
2278
+ \end{Shaded}
2279
+
2280
+ \begin{verbatim}
2281
+ ## true
2282
+ \end{verbatim}
2283
+
2284
+ In this example, first the `bool' element of the list was extracted, not
2285
+ as a list, but as a vector, then the first element of the vector was
2286
+ extracted (note that vectors also accept the `{[}{[}{]}{]}' operator)
2287
+ and then the vector was indexed by its first element, extracting the
2288
+ native Ruby type.
2289
+
2290
+ \subsection{Data Frame}\label{data-frame}
2291
+
2292
+ A data frame is a table like structure in which each column has the same
2293
+ number of rows. Data frames are the basic structure for storing data for
2294
+ data analysis. We have already seen a data frame previously when we
2295
+ accessed variable `\textasciitilde{}:mtcars'. In order to create a data
2296
+ frame, function 'data\_\_frame' is used:
2297
+
2298
+ \begin{Shaded}
2299
+ \begin{Highlighting}[]
2300
+ \NormalTok{df = R.data__frame(}
2301
+ \StringTok{year: }\NormalTok{R.c(}\DecValTok{2010}\NormalTok{, }\DecValTok{2011}\NormalTok{, }\DecValTok{2012}\NormalTok{),}
2302
+ \StringTok{income: }\NormalTok{R.c(}\FloatTok{1000.0}\NormalTok{, }\FloatTok{1500.0}\NormalTok{, }\FloatTok{2000.0}\NormalTok{))}
2303
+
2304
+ \NormalTok{puts df}
2305
+ \end{Highlighting}
2306
+ \end{Shaded}
2307
+
2308
+ \begin{verbatim}
2309
+ ## year income
2310
+ ## 1 2010 1000
2311
+ ## 2 2011 1500
2312
+ ## 3 2012 2000
2313
+ \end{verbatim}
2314
+
2315
+ \subsubsection{Data Frame Indexing}\label{data-frame-indexing}
2316
+
2317
+ A data frame can be indexed the same way as a matrix, by using `{[}row,
2318
+ column{]}', where row and column can either be a numeric or the name of
2319
+ the row or column
2320
+
2321
+ \begin{Shaded}
2322
+ \begin{Highlighting}[]
2323
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{).head}
2324
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{]}
2325
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[}\StringTok{'Datsun 710'}\NormalTok{, }\StringTok{'mpg'}\NormalTok{]}
2326
+ \end{Highlighting}
2327
+ \end{Shaded}
2328
+
2329
+ \begin{verbatim}
2330
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2331
+ ## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
2332
+ ## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
2333
+ ## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
2334
+ ## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
2335
+ ## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
2336
+ ## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
2337
+ ## [1] 6
2338
+ ## [1] 22.8
2339
+ \end{verbatim}
2340
+
2341
+ Extracting a column from a data frame as a vector can be done by using
2342
+ the double square bracket operator:
2343
+
2344
+ \begin{Shaded}
2345
+ \begin{Highlighting}[]
2346
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[[}\StringTok{'mpg'}\NormalTok{]]}
2347
+ \end{Highlighting}
2348
+ \end{Shaded}
2349
+
2350
+ \begin{verbatim}
2351
+ ## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
2352
+ ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
2353
+ ## [29] 15.8 19.7 15.0 21.4
2354
+ \end{verbatim}
2355
+
2356
+ A data frame column can also be accessed as if it were an instance
2357
+ variable of the data frame:
2358
+
2359
+ \begin{Shaded}
2360
+ \begin{Highlighting}[]
2361
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{).mpg}
2362
+ \end{Highlighting}
2363
+ \end{Shaded}
2364
+
2365
+ \begin{verbatim}
2366
+ ## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
2367
+ ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
2368
+ ## [29] 15.8 19.7 15.0 21.4
2369
+ \end{verbatim}
2370
+
2371
+ Slicing a data frame can be done by indexing it with a vector (we use
2372
+ `head' to reduce the output):
2373
+
2374
+ \begin{Shaded}
2375
+ \begin{Highlighting}[]
2376
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[R.c(}\StringTok{'mpg'}\NormalTok{, }\StringTok{'hp'}\NormalTok{)].head}
2377
+ \end{Highlighting}
2378
+ \end{Shaded}
2379
+
2380
+ \begin{verbatim}
2381
+ ## mpg hp
2382
+ ## Mazda RX4 21.0 110
2383
+ ## Mazda RX4 Wag 21.0 110
2384
+ ## Datsun 710 22.8 93
2385
+ ## Hornet 4 Drive 21.4 110
2386
+ ## Hornet Sportabout 18.7 175
2387
+ ## Valiant 18.1 105
2388
+ \end{verbatim}
2389
+
2390
+ A row slice can be obtained by indexing by row and using the `:all'
2391
+ keyword for the column:
2392
+
2393
+ \begin{Shaded}
2394
+ \begin{Highlighting}[]
2395
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[R.c(}\StringTok{'Datsun 710'}\NormalTok{, }\StringTok{'Camaro Z28'}\NormalTok{), }\StringTok{:all}\NormalTok{]}
2396
+ \end{Highlighting}
2397
+ \end{Shaded}
2398
+
2399
+ \begin{verbatim}
2400
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2401
+ ## Datsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1
2402
+ ## Camaro Z28 13.3 8 350 245 3.73 3.84 15.41 0 0 3 4
2403
+ \end{verbatim}
2404
+
2405
+ Finally, a data frame can also be indexed with a logical vector. In this
2406
+ next example, the `am' column of :mtcars is compared with 0 (with method
2407
+ `eq'). When `am' is equal to 0 the car is automatic. So, by doing
2408
+ `(\textasciitilde{}:mtcars).am.eq 0' a logical vector is created with
2409
+ `true' whenever `am' is 0 and `false' otherwise.
2410
+
2411
+ \begin{Shaded}
2412
+ \begin{Highlighting}[]
2413
+ \CommentTok{# obtain a vector with 'true' for cars with automatic transmission}
2414
+ \NormalTok{automatic = (~}\StringTok{:mtcars}\NormalTok{).am.eq }\DecValTok{0}
2415
+ \NormalTok{puts automatic}
2416
+ \end{Highlighting}
2417
+ \end{Shaded}
2418
+
2419
+ \begin{verbatim}
2420
+ ## [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
2421
+ ## [12] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE
2422
+ ## [23] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
2423
+ \end{verbatim}
2424
+
2425
+ Using this logical vector, the data frame is indexed, returning a new
2426
+ data frame in which all cars have automatic transmission.
2427
+
2428
+ \begin{Shaded}
2429
+ \begin{Highlighting}[]
2430
+ \CommentTok{# slice the data frame by using this vector}
2431
+ \NormalTok{puts (~}\StringTok{:mtcars}\NormalTok{)[automatic, }\StringTok{:all}\NormalTok{]}
2432
+ \end{Highlighting}
2433
+ \end{Shaded}
2434
+
2435
+ \begin{verbatim}
2436
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2437
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
2438
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
2439
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
2440
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
2441
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
2442
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
2443
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
2444
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
2445
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
2446
+ ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
2447
+ ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
2448
+ ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
2449
+ ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
2450
+ ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
2451
+ ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
2452
+ ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
2453
+ ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
2454
+ ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
2455
+ ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
2456
+ \end{verbatim}
2457
+
2458
+ \section{Writing Expressions in
2459
+ Galaaz}\label{writing-expressions-in-galaaz}
2460
+
2461
+ Galaaz extends Ruby to work with complex expressions, similar to R's
2462
+ expressions build with `quote' (base R) or `quo' (tidyverse). Let's take
2463
+ a look at some of those expressions.
2464
+
2465
+ \subsection{Expressions from
2466
+ operators}\label{expressions-from-operators}
2467
+
2468
+ The code bellow creates an expression summing two symbols
2469
+
2470
+ \begin{Shaded}
2471
+ \begin{Highlighting}[]
2472
+ \NormalTok{exp1 = }\StringTok{:a}\NormalTok{ + }\StringTok{:b}
2473
+ \NormalTok{puts exp1}
2474
+ \end{Highlighting}
2475
+ \end{Shaded}
2476
+
2477
+ \begin{verbatim}
2478
+ ## a + b
2479
+ \end{verbatim}
2480
+
2481
+ We can build any complex mathematical expression
2482
+
2483
+ \begin{Shaded}
2484
+ \begin{Highlighting}[]
2485
+ \NormalTok{exp2 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) * }\FloatTok{2.0}\NormalTok{ + }\StringTok{:c}\NormalTok{ ** }\DecValTok{2}\NormalTok{ / }\StringTok{:z}
2486
+ \NormalTok{puts exp2}
2487
+ \end{Highlighting}
2488
+ \end{Shaded}
2489
+
2490
+ \begin{verbatim}
2491
+ ## (a + b) * 2 + c^2L/z
2492
+ \end{verbatim}
2493
+
2494
+ It is also possible to use inequality operators in building expressions
2495
+
2496
+ \begin{Shaded}
2497
+ \begin{Highlighting}[]
2498
+ \NormalTok{exp3 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) >= }\StringTok{:z}
2499
+ \NormalTok{puts exp3}
2500
+ \end{Highlighting}
2501
+ \end{Shaded}
2502
+
2503
+ \begin{verbatim}
2504
+ ## a + b >= z
2505
+ \end{verbatim}
2506
+
2507
+ Galaaz provides both symbolic representations for operators, such as
2508
+ (\textgreater{}, \textless{}, !=) as functional notation for those
2509
+ operators such as (.gt, .ge, etc.). So the same expression written above
2510
+ can also be written as
2511
+
2512
+ \begin{Shaded}
2513
+ \begin{Highlighting}[]
2514
+ \NormalTok{exp4 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{).ge }\StringTok{:z}
2515
+ \NormalTok{puts exp4}
2516
+ \end{Highlighting}
2517
+ \end{Shaded}
2518
+
2519
+ \begin{verbatim}
2520
+ ## a + b >= z
2521
+ \end{verbatim}
2522
+
2523
+ Two type of expression can only be created with the functional
2524
+ representation of the operators, those are expressions involving `==',
2525
+ and `='. In order to write an expression involving `==' we need to use
2526
+ the method `.eq' and for `=' we need the function `.assign'
2527
+
2528
+ \begin{Shaded}
2529
+ \begin{Highlighting}[]
2530
+ \NormalTok{exp5 = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{).eq }\StringTok{:z}
2531
+ \NormalTok{puts exp5}
2532
+ \end{Highlighting}
2533
+ \end{Shaded}
2534
+
2535
+ \begin{verbatim}
2536
+ ## a + b == z
2537
+ \end{verbatim}
2538
+
2539
+ \begin{Shaded}
2540
+ \begin{Highlighting}[]
2541
+ \NormalTok{exp6 = }\StringTok{:y}\NormalTok{.assign }\StringTok{:a}\NormalTok{ + }\StringTok{:b}
2542
+ \NormalTok{puts exp6}
2543
+ \end{Highlighting}
2544
+ \end{Shaded}
2545
+
2546
+ \begin{verbatim}
2547
+ ## y <- a + b
2548
+ \end{verbatim}
2549
+
2550
+ In general we think that using the functional notation is preferable to
2551
+ using the symbolic notation as otherwise, we end up writing invalid
2552
+ expressions such as
2553
+
2554
+ \begin{Shaded}
2555
+ \begin{Highlighting}[]
2556
+ \NormalTok{exp_wrong = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) == }\StringTok{:z}
2557
+ \NormalTok{puts exp_wrong}
2558
+ \end{Highlighting}
2559
+ \end{Shaded}
2560
+
2561
+ and it might be difficult to understand what is going on here. The
2562
+ problem lies with the fact that when using `==' we are comparing
2563
+ expression (:a + :b) to expression :z with `=='. When the comparison is
2564
+ executed, the system tries to evaluate :a, :b and :z, and those symbols
2565
+ at this time are not bound to anything and we get a ``object `a' not
2566
+ found'' message. If we only use functional notation, this type of error
2567
+ will not occur.
2568
+
2569
+ \subsection{Expressions with R
2570
+ methods}\label{expressions-with-r-methods}
2571
+
2572
+ It is often necessary to create an expression that uses a method or
2573
+ function. For instance, in mathematics, it's quite natural to write an
2574
+ expressin such as \(y = sin(x)\). In this case, the `sin' function is
2575
+ part of the expression and should not immediately executed. Now, let's
2576
+ say that `x' is an angle of 45\(^\circ\) and we acttually want our
2577
+ expression to be \(y = 0.850...\). When we want the function to be part
2578
+ of the expression, we call the function preceeding it by the letter E,
2579
+ such as `E.sin(x)'
2580
+
2581
+ \begin{Shaded}
2582
+ \begin{Highlighting}[]
2583
+ \NormalTok{exp7 = }\StringTok{:y}\NormalTok{.assign E.sin(}\StringTok{:x}\NormalTok{)}
2584
+ \NormalTok{puts exp7}
2585
+ \end{Highlighting}
2586
+ \end{Shaded}
2587
+
2588
+ \begin{verbatim}
2589
+ ## y <- sin(x)
2590
+ \end{verbatim}
2591
+
2592
+ Expressions can also be written using `.' notation:
2593
+
2594
+ \begin{Shaded}
2595
+ \begin{Highlighting}[]
2596
+ \NormalTok{exp8 = }\StringTok{:y}\NormalTok{.assign }\StringTok{:x}\NormalTok{.sin}
2597
+ \NormalTok{puts exp8}
2598
+ \end{Highlighting}
2599
+ \end{Shaded}
2600
+
2601
+ \begin{verbatim}
2602
+ ## y <- sin(x)
2603
+ \end{verbatim}
2604
+
2605
+ When a function has multiple arguments, the first one can be used before
2606
+ the `.':
2607
+
2608
+ \begin{Shaded}
2609
+ \begin{Highlighting}[]
2610
+ \NormalTok{exp9 = }\StringTok{:x}\NormalTok{.c(}\StringTok{:y}\NormalTok{)}
2611
+ \NormalTok{puts exp9}
2612
+ \end{Highlighting}
2613
+ \end{Shaded}
2614
+
2615
+ \begin{verbatim}
2616
+ ## c(x, y)
2617
+ \end{verbatim}
2618
+
2619
+ \subsection{Evaluating an Expression}\label{evaluating-an-expression}
2620
+
2621
+ Expressions can be evaluated by calling function `eval' with a binding.
2622
+ A binding can be provided with a list:
2623
+
2624
+ \begin{Shaded}
2625
+ \begin{Highlighting}[]
2626
+ \NormalTok{exp = (}\StringTok{:a}\NormalTok{ + }\StringTok{:b}\NormalTok{) * }\FloatTok{2.0}\NormalTok{ + }\StringTok{:c}\NormalTok{ ** }\DecValTok{2}\NormalTok{ / }\StringTok{:z}
2627
+ \NormalTok{puts exp.eval(R.list(}\StringTok{a: }\DecValTok{10}\NormalTok{, }\StringTok{b: }\DecValTok{20}\NormalTok{, }\StringTok{c: }\DecValTok{30}\NormalTok{, }\StringTok{z: }\DecValTok{40}\NormalTok{))}
2628
+ \end{Highlighting}
2629
+ \end{Shaded}
2630
+
2631
+ \begin{verbatim}
2632
+ ## [1] 82.5
2633
+ \end{verbatim}
2634
+
2635
+ \ldots{} with a data frame:
2636
+
2637
+ \begin{Shaded}
2638
+ \begin{Highlighting}[]
2639
+ \NormalTok{df = R.data__frame(}
2640
+ \StringTok{a: }\NormalTok{R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{),}
2641
+ \StringTok{b: }\NormalTok{R.c(}\DecValTok{10}\NormalTok{, }\DecValTok{20}\NormalTok{, }\DecValTok{30}\NormalTok{),}
2642
+ \StringTok{c: }\NormalTok{R.c(}\DecValTok{100}\NormalTok{, }\DecValTok{200}\NormalTok{, }\DecValTok{300}\NormalTok{),}
2643
+ \StringTok{z: }\NormalTok{R.c(}\DecValTok{1000}\NormalTok{, }\DecValTok{2000}\NormalTok{, }\DecValTok{3000}\NormalTok{))}
2644
+
2645
+ \NormalTok{puts exp.eval(df)}
2646
+ \end{Highlighting}
2647
+ \end{Shaded}
2648
+
2649
+ \begin{verbatim}
2650
+ ## [1] 32 64 96
2651
+ \end{verbatim}
2652
+
2653
+ \section{Manipulating Data}\label{manipulating-data}
2654
+
2655
+ One of the major benefits of Galaaz is to bring strong data manipulation
2656
+ to Ruby. The following examples were extracted from Hardley's ``R for
2657
+ Data Science'' (\url{https://r4ds.had.co.nz/}). This is a highly
2658
+ recommended book for those not already familiar with the `tidyverse'
2659
+ style of programming in R. In the sections to follow, we will limit
2660
+ ourselves to convert the R code to Galaaz.
2661
+
2662
+ For these examples, we will investigate the nycflights13 data set
2663
+ available on the package by the same name. We use function
2664
+ `R.install\_and\_loads' that checks if the library is available locally,
2665
+ and if not, installs it. This data frame contains all 336,776 flights
2666
+ that departed from New York City in 2013. The data comes from the US
2667
+ Bureau of Transportation Statistics.
2668
+
2669
+ Dplyr uses `tibbles' in place of data frames; unfortunately, tibbles do
2670
+ not print yet properly in Galaaz due to a bug in fastR. In order to
2671
+ print a tibble we need to convert it to a data frame using the
2672
+ 'as\_\_data\_\_frame' method.
2673
+
2674
+ \begin{Shaded}
2675
+ \begin{Highlighting}[]
2676
+ \NormalTok{R.install_and_loads(}\StringTok{'nycflights13'}\NormalTok{)}
2677
+ \NormalTok{R.library(}\StringTok{'dplyr'}\NormalTok{)}
2678
+ \end{Highlighting}
2679
+ \end{Shaded}
2680
+
2681
+ \begin{Shaded}
2682
+ \begin{Highlighting}[]
2683
+ \NormalTok{flights = ~}\StringTok{:flights}
2684
+ \NormalTok{puts flights.head}
2685
+ \end{Highlighting}
2686
+ \end{Shaded}
2687
+
2688
+ \begin{verbatim}
2689
+ ## # A tibble: 6 x 19
2690
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2691
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2692
+ ## 1 2013 1 1 517 515 2 830
2693
+ ## 2 2013 1 1 533 529 4 850
2694
+ ## 3 2013 1 1 542 540 2 923
2695
+ ## 4 2013 1 1 544 545 -1 1004
2696
+ ## 5 2013 1 1 554 600 -6 812
2697
+ ## 6 2013 1 1 554 558 -4 740
2698
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2699
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2700
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2701
+ ## # time_hour <dttm>
2702
+ \end{verbatim}
2703
+
2704
+ \subsection{Filtering rows with
2705
+ Filter}\label{filtering-rows-with-filter}
2706
+
2707
+ In this example we filter the flights data set by giving to the filter
2708
+ function two expressions: the first :month.eq 1
2709
+
2710
+ \begin{Shaded}
2711
+ \begin{Highlighting}[]
2712
+ \NormalTok{puts flights.filter((}\StringTok{:month}\NormalTok{.eq }\DecValTok{1}\NormalTok{), (}\StringTok{:day}\NormalTok{.eq }\DecValTok{1}\NormalTok{)).head}
2713
+ \end{Highlighting}
2714
+ \end{Shaded}
2715
+
2716
+ \begin{verbatim}
2717
+ ## # A tibble: 6 x 19
2718
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2719
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2720
+ ## 1 2013 1 1 517 515 2 830
2721
+ ## 2 2013 1 1 533 529 4 850
2722
+ ## 3 2013 1 1 542 540 2 923
2723
+ ## 4 2013 1 1 544 545 -1 1004
2724
+ ## 5 2013 1 1 554 600 -6 812
2725
+ ## 6 2013 1 1 554 558 -4 740
2726
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2727
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2728
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2729
+ ## # time_hour <dttm>
2730
+ \end{verbatim}
2731
+
2732
+ \subsection{Logical Operators}\label{logical-operators}
2733
+
2734
+ All flights that departed in November of December
2735
+
2736
+ \begin{Shaded}
2737
+ \begin{Highlighting}[]
2738
+ \NormalTok{puts flights.filter((}\StringTok{:month}\NormalTok{.eq }\DecValTok{11}\NormalTok{) | (}\StringTok{:month}\NormalTok{.eq }\DecValTok{12}\NormalTok{)).head}
2739
+ \end{Highlighting}
2740
+ \end{Shaded}
2741
+
2742
+ \begin{verbatim}
2743
+ ## # A tibble: 6 x 19
2744
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2745
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2746
+ ## 1 2013 11 1 5 2359 6 352
2747
+ ## 2 2013 11 1 35 2250 105 123
2748
+ ## 3 2013 11 1 455 500 -5 641
2749
+ ## 4 2013 11 1 539 545 -6 856
2750
+ ## 5 2013 11 1 542 545 -3 831
2751
+ ## 6 2013 11 1 549 600 -11 912
2752
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2753
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2754
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2755
+ ## # time_hour <dttm>
2756
+ \end{verbatim}
2757
+
2758
+ The same as above, but using the `in' operator. In R, it is possible to
2759
+ define many operators by doing \%\%. The \%in\% operator checks if a
2760
+ value is in a vector. In order to use those operators from Galaaz the
2761
+ '.\_`method is used, where the first argument is the operator's symbol,
2762
+ in this case':in' and the second argument is the vector:
2763
+
2764
+ \begin{Shaded}
2765
+ \begin{Highlighting}[]
2766
+ \NormalTok{puts flights.filter(}\StringTok{:month}\NormalTok{._ }\StringTok{:in}\NormalTok{, R.c(}\DecValTok{11}\NormalTok{, }\DecValTok{12}\NormalTok{)).head}
2767
+ \end{Highlighting}
2768
+ \end{Shaded}
2769
+
2770
+ \begin{verbatim}
2771
+ ## # A tibble: 6 x 19
2772
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2773
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2774
+ ## 1 2013 11 1 5 2359 6 352
2775
+ ## 2 2013 11 1 35 2250 105 123
2776
+ ## 3 2013 11 1 455 500 -5 641
2777
+ ## 4 2013 11 1 539 545 -6 856
2778
+ ## 5 2013 11 1 542 545 -3 831
2779
+ ## 6 2013 11 1 549 600 -11 912
2780
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2781
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2782
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2783
+ ## # time_hour <dttm>
2784
+ \end{verbatim}
2785
+
2786
+ \subsection{Filtering with NA (Not
2787
+ Available)}\label{filtering-with-na-not-available}
2788
+
2789
+ Let's first create a `tibble' with a Not Available value (R::NA).
2790
+ Tibbles are a modern version of a data frame and operate very similarly
2791
+ to one. It differs in how it outputs the values and the result of some
2792
+ subsetting operations that are more consistent than what is obtained
2793
+ from data frame.
2794
+
2795
+ \begin{Shaded}
2796
+ \begin{Highlighting}[]
2797
+ \NormalTok{df = R.tibble(}\StringTok{x: }\NormalTok{R.c(}\DecValTok{1}\NormalTok{, R::}\DataTypeTok{NA}\NormalTok{, }\DecValTok{3}\NormalTok{))}
2798
+ \NormalTok{puts df}
2799
+ \end{Highlighting}
2800
+ \end{Shaded}
2801
+
2802
+ \begin{verbatim}
2803
+ ## # A tibble: 3 x 1
2804
+ ## x
2805
+ ## <int>
2806
+ ## 1 1
2807
+ ## 2
2808
+ ## 3 3
2809
+ \end{verbatim}
2810
+
2811
+ Now filtering by :x \textgreater{} 1 shows all lines that satisfy this
2812
+ condition, where the row with R:NA does not.
2813
+
2814
+ \begin{Shaded}
2815
+ \begin{Highlighting}[]
2816
+ \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{ > }\DecValTok{1}\NormalTok{)}
2817
+ \end{Highlighting}
2818
+ \end{Shaded}
2819
+
2820
+ \begin{verbatim}
2821
+ ## # A tibble: 1 x 1
2822
+ ## x
2823
+ ## <int>
2824
+ ## 1 3
2825
+ \end{verbatim}
2826
+
2827
+ To match an NA use method 'is\_\_na'
2828
+
2829
+ \begin{Shaded}
2830
+ \begin{Highlighting}[]
2831
+ \NormalTok{puts df.filter((}\StringTok{:x}\NormalTok{.is__na) | (}\StringTok{:x}\NormalTok{ > }\DecValTok{1}\NormalTok{))}
2832
+ \end{Highlighting}
2833
+ \end{Shaded}
2834
+
2835
+ \begin{verbatim}
2836
+ ## # A tibble: 2 x 1
2837
+ ## x
2838
+ ## <int>
2839
+ ## 1
2840
+ ## 2 3
2841
+ \end{verbatim}
2842
+
2843
+ \subsection{Arrange Rows with arrange}\label{arrange-rows-with-arrange}
2844
+
2845
+ Arrange reorders the rows of a data frame by the given arguments.
2846
+
2847
+ \begin{Shaded}
2848
+ \begin{Highlighting}[]
2849
+ \NormalTok{puts flights.arrange(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{).head}
2850
+ \end{Highlighting}
2851
+ \end{Shaded}
2852
+
2853
+ \begin{verbatim}
2854
+ ## # A tibble: 6 x 19
2855
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2856
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2857
+ ## 1 2013 1 1 517 515 2 830
2858
+ ## 2 2013 1 1 533 529 4 850
2859
+ ## 3 2013 1 1 542 540 2 923
2860
+ ## 4 2013 1 1 544 545 -1 1004
2861
+ ## 5 2013 1 1 554 600 -6 812
2862
+ ## 6 2013 1 1 554 558 -4 740
2863
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2864
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2865
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2866
+ ## # time_hour <dttm>
2867
+ \end{verbatim}
2868
+
2869
+ To arrange in descending order, use function `desc'
2870
+
2871
+ \begin{Shaded}
2872
+ \begin{Highlighting}[]
2873
+ \NormalTok{puts flights.arrange(}\StringTok{:dep_delay}\NormalTok{.desc).head}
2874
+ \end{Highlighting}
2875
+ \end{Shaded}
2876
+
2877
+ \begin{verbatim}
2878
+ ## # A tibble: 6 x 19
2879
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2880
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2881
+ ## 1 2013 1 9 641 900 1301 1242
2882
+ ## 2 2013 6 15 1432 1935 1137 1607
2883
+ ## 3 2013 1 10 1121 1635 1126 1239
2884
+ ## 4 2013 9 20 1139 1845 1014 1457
2885
+ ## 5 2013 7 22 845 1600 1005 1044
2886
+ ## 6 2013 4 10 1100 1900 960 1342
2887
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2888
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2889
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2890
+ ## # time_hour <dttm>
2891
+ \end{verbatim}
2892
+
2893
+ \subsection{Selecting columns}\label{selecting-columns}
2894
+
2895
+ To select specific columns from a dataset we use function `select':
2896
+
2897
+ \begin{Shaded}
2898
+ \begin{Highlighting}[]
2899
+ \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{).head}
2900
+ \end{Highlighting}
2901
+ \end{Shaded}
2902
+
2903
+ \begin{verbatim}
2904
+ ## # A tibble: 6 x 3
2905
+ ## year month day
2906
+ ## <int> <int> <int>
2907
+ ## 1 2013 1 1
2908
+ ## 2 2013 1 1
2909
+ ## 3 2013 1 1
2910
+ ## 4 2013 1 1
2911
+ ## 5 2013 1 1
2912
+ ## 6 2013 1 1
2913
+ \end{verbatim}
2914
+
2915
+ It is also possible to select column in a given range
2916
+
2917
+ \begin{Shaded}
2918
+ \begin{Highlighting}[]
2919
+ \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{.up_to }\StringTok{:day}\NormalTok{).head}
2920
+ \end{Highlighting}
2921
+ \end{Shaded}
2922
+
2923
+ \begin{verbatim}
2924
+ ## # A tibble: 6 x 3
2925
+ ## year month day
2926
+ ## <int> <int> <int>
2927
+ ## 1 2013 1 1
2928
+ ## 2 2013 1 1
2929
+ ## 3 2013 1 1
2930
+ ## 4 2013 1 1
2931
+ ## 5 2013 1 1
2932
+ ## 6 2013 1 1
2933
+ \end{verbatim}
2934
+
2935
+ Select all columns that start with a given name sequence
2936
+
2937
+ \begin{Shaded}
2938
+ \begin{Highlighting}[]
2939
+ \NormalTok{puts flights.select(E.starts_with(}\StringTok{'arr'}\NormalTok{)).head}
2940
+ \end{Highlighting}
2941
+ \end{Shaded}
2942
+
2943
+ \begin{verbatim}
2944
+ ## # A tibble: 6 x 2
2945
+ ## arr_time arr_delay
2946
+ ## <int> <dbl>
2947
+ ## 1 830 11
2948
+ ## 2 850 20
2949
+ ## 3 923 33
2950
+ ## 4 1004 -18
2951
+ ## 5 812 -25
2952
+ ## 6 740 12
2953
+ \end{verbatim}
2954
+
2955
+ Other functions that can be used:
2956
+
2957
+ \begin{itemize}
2958
+ \item
2959
+ ends\_with(``xyz''): matches names that end with ``xyz''.
2960
+ \item
2961
+ contains(``ijk''): matches names that contain ``ijk''.
2962
+ \item
2963
+ matches(``(.)\textbackslash{}1''): selects variables that match a
2964
+ regular expression. This one matches any variables that contain
2965
+ repeated characters.
2966
+ \item
2967
+ num\_range(``x'', (1..3)): matches x1, x2 and x3
2968
+ \end{itemize}
2969
+
2970
+ A helper function that comes in handy when we just want to rearrange
2971
+ column order is `Everything':
2972
+
2973
+ \begin{Shaded}
2974
+ \begin{Highlighting}[]
2975
+ \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{, E.everything).head}
2976
+ \end{Highlighting}
2977
+ \end{Shaded}
2978
+
2979
+ \begin{verbatim}
2980
+ ## # A tibble: 6 x 19
2981
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2982
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2983
+ ## 1 2013 1 1 517 515 2 830
2984
+ ## 2 2013 1 1 533 529 4 850
2985
+ ## 3 2013 1 1 542 540 2 923
2986
+ ## 4 2013 1 1 544 545 -1 1004
2987
+ ## 5 2013 1 1 554 600 -6 812
2988
+ ## 6 2013 1 1 554 558 -4 740
2989
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2990
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2991
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2992
+ ## # time_hour <dttm>
2993
+ \end{verbatim}
2994
+
2995
+ \subsection{\texorpdfstring{Add variables to a dataframe with
2996
+ `mutate'}{Add variables to a dataframe with mutate}}\label{add-variables-to-a-dataframe-with-mutate}
2997
+
2998
+ \begin{Shaded}
2999
+ \begin{Highlighting}[]
3000
+ \NormalTok{flights_sm = flights.}
3001
+ \NormalTok{ select((}\StringTok{:year}\NormalTok{.up_to }\StringTok{:day}\NormalTok{),}
3002
+ \NormalTok{ E.ends_with(}\StringTok{'delay'}\NormalTok{),}
3003
+ \StringTok{:distance}\NormalTok{,}
3004
+ \StringTok{:air_time}\NormalTok{)}
3005
+
3006
+ \NormalTok{puts flights_sm.head}
3007
+ \end{Highlighting}
3008
+ \end{Shaded}
3009
+
3010
+ \begin{verbatim}
3011
+ ## # A tibble: 6 x 7
3012
+ ## year month day dep_delay arr_delay distance air_time
3013
+ ## <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
3014
+ ## 1 2013 1 1 2 11 1400 227
3015
+ ## 2 2013 1 1 4 20 1416 227
3016
+ ## 3 2013 1 1 2 33 1089 160
3017
+ ## 4 2013 1 1 -1 -18 1576 183
3018
+ ## 5 2013 1 1 -6 -25 762 116
3019
+ ## 6 2013 1 1 -4 12 719 150
3020
+ \end{verbatim}
3021
+
3022
+ \begin{Shaded}
3023
+ \begin{Highlighting}[]
3024
+ \NormalTok{flights_sm = flights_sm.}
3025
+ \NormalTok{ mutate(}\StringTok{gain: :dep_delay}\NormalTok{ - }\StringTok{:arr_delay}\NormalTok{,}
3026
+ \StringTok{speed: :distance}\NormalTok{ / }\StringTok{:air_time}\NormalTok{ * }\DecValTok{60}\NormalTok{)}
3027
+ \NormalTok{puts flights_sm.head}
3028
+ \end{Highlighting}
3029
+ \end{Shaded}
3030
+
3031
+ \begin{verbatim}
3032
+ ## # A tibble: 6 x 9
3033
+ ## year month day dep_delay arr_delay distance air_time gain speed
3034
+ ## <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
3035
+ ## 1 2013 1 1 2 11 1400 227 -9 370.
3036
+ ## 2 2013 1 1 4 20 1416 227 -16 374.
3037
+ ## 3 2013 1 1 2 33 1089 160 -31 408.
3038
+ ## 4 2013 1 1 -1 -18 1576 183 17 517.
3039
+ ## 5 2013 1 1 -6 -25 762 116 19 394.
3040
+ ## 6 2013 1 1 -4 12 719 150 -16 288.
3041
+ \end{verbatim}
3042
+
3043
+ \subsection{Summarising data}\label{summarising-data}
3044
+
3045
+ Function `summarise' calculates summaries for the data frame. When no
3046
+ `group\_by' is used a single value is obtained from the data frame:
3047
+
3048
+ \begin{Shaded}
3049
+ \begin{Highlighting}[]
3050
+ \NormalTok{puts flights.summarise(}\StringTok{delay: }\NormalTok{E.mean(}\StringTok{:dep_delay}\NormalTok{, }\StringTok{na__rm: }\DecValTok{true}\NormalTok{))}
3051
+ \end{Highlighting}
3052
+ \end{Shaded}
3053
+
3054
+ \begin{verbatim}
3055
+ ## # A tibble: 1 x 1
3056
+ ## delay
3057
+ ## <dbl>
3058
+ ## 1 12.6
3059
+ \end{verbatim}
3060
+
3061
+ When a data frame is grouped with `group\_by' summaries apply to the
3062
+ given group:
3063
+
3064
+ \begin{Shaded}
3065
+ \begin{Highlighting}[]
3066
+ \NormalTok{by_day = flights.group_by(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{)}
3067
+ \NormalTok{puts by_day.summarise(}\StringTok{delay: :dep_delay}\NormalTok{.mean(}\StringTok{na__rm: }\DecValTok{true}\NormalTok{)).head}
3068
+ \end{Highlighting}
3069
+ \end{Shaded}
3070
+
3071
+ \begin{verbatim}
3072
+ ## # A tibble: 6 x 4
3073
+ ## # Groups: year, month [1]
3074
+ ## year month day delay
3075
+ ## * <int> <int> <int> <dbl>
3076
+ ## 1 2013 1 1 11.5
3077
+ ## 2 2013 1 2 13.9
3078
+ ## 3 2013 1 3 11.0
3079
+ ## 4 2013 1 4 8.95
3080
+ ## 5 2013 1 5 5.73
3081
+ ## 6 2013 1 6 7.15
3082
+ \end{verbatim}
3083
+
3084
+ Next we put many operations together by pipping them one after the
3085
+ other:
3086
+
3087
+ \begin{Shaded}
3088
+ \begin{Highlighting}[]
3089
+ \NormalTok{delays = flights.}
3090
+ \NormalTok{ group_by(}\StringTok{:dest}\NormalTok{).}
3091
+ \NormalTok{ summarise(}
3092
+ \StringTok{count: }\NormalTok{E.n,}
3093
+ \StringTok{dist: :distance}\NormalTok{.mean(}\StringTok{na__rm: }\DecValTok{true}\NormalTok{),}
3094
+ \StringTok{delay: :arr_delay}\NormalTok{.mean(}\StringTok{na__rm: }\DecValTok{true}\NormalTok{)).}
3095
+ \NormalTok{ filter(}\StringTok{:count}\NormalTok{ > }\DecValTok{20}\NormalTok{, }\StringTok{:dest}\NormalTok{ != }\StringTok{"NHL"}\NormalTok{)}
3096
+
3097
+ \NormalTok{puts delays.head}
3098
+ \end{Highlighting}
3099
+ \end{Shaded}
3100
+
3101
+ \begin{verbatim}
3102
+ ## # A tibble: 6 x 4
3103
+ ## dest count dist delay
3104
+ ## <chr> <int> <dbl> <dbl>
3105
+ ## 1 ABQ 254 1826 4.38
3106
+ ## 2 ACK 265 199 4.85
3107
+ ## 3 ALB 439 143 14.4
3108
+ ## 4 ATL 17215 757. 11.3
3109
+ ## 5 AUS 2439 1514. 6.02
3110
+ ## 6 AVL 275 584. 8.00
3111
+ \end{verbatim}
3112
+
3113
+ \section{Using Data Table}\label{using-data-table}
3114
+
3115
+ \begin{Shaded}
3116
+ \begin{Highlighting}[]
3117
+ \NormalTok{R.library(}\StringTok{'data.table'}\NormalTok{)}
3118
+ \NormalTok{R.install_and_loads(}\StringTok{'curl'}\NormalTok{)}
3119
+
3120
+ \NormalTok{input = }\StringTok{"https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv"}
3121
+ \NormalTok{flights = R.fread(input)}
3122
+ \NormalTok{puts flights}
3123
+ \NormalTok{puts flights.dim}
3124
+ \end{Highlighting}
3125
+ \end{Shaded}
3126
+
3127
+ \begin{verbatim}
3128
+ ## year month day dep_delay arr_delay carrier origin dest air_time
3129
+ ## 1: 2014 1 1 14 13 AA JFK LAX 359
3130
+ ## 2: 2014 1 1 -3 13 AA JFK LAX 363
3131
+ ## 3: 2014 1 1 2 9 AA JFK LAX 351
3132
+ ## 4: 2014 1 1 -8 -26 AA LGA PBI 157
3133
+ ## 5: 2014 1 1 2 1 AA JFK LAX 350
3134
+ ## ---
3135
+ ## 253312: 2014 10 31 1 -30 UA LGA IAH 201
3136
+ ## 253313: 2014 10 31 -5 -14 UA EWR IAH 189
3137
+ ## 253314: 2014 10 31 -8 16 MQ LGA RDU 83
3138
+ ## 253315: 2014 10 31 -4 15 MQ LGA DTW 75
3139
+ ## 253316: 2014 10 31 -5 1 MQ LGA SDF 110
3140
+ ## distance hour
3141
+ ## 1: 2475 9
3142
+ ## 2: 2475 11
3143
+ ## 3: 2475 19
3144
+ ## 4: 1035 7
3145
+ ## 5: 2475 13
3146
+ ## ---
3147
+ ## 253312: 1416 14
3148
+ ## 253313: 1400 8
3149
+ ## 253314: 431 11
3150
+ ## 253315: 502 11
3151
+ ## 253316: 659 8
3152
+ ## [1] 253316 11
3153
+ \end{verbatim}
3154
+
3155
+ \begin{Shaded}
3156
+ \begin{Highlighting}[]
3157
+
3158
+ \NormalTok{data_table = R.data__table(}
3159
+ \DataTypeTok{ID}\NormalTok{: R.c(}\StringTok{"b"}\NormalTok{,}\StringTok{"b"}\NormalTok{,}\StringTok{"b"}\NormalTok{,}\StringTok{"a"}\NormalTok{,}\StringTok{"a"}\NormalTok{,}\StringTok{"c"}\NormalTok{),}
3160
+ \StringTok{a: }\NormalTok{(}\DecValTok{1}\NormalTok{..}\DecValTok{6}\NormalTok{),}
3161
+ \StringTok{b: }\NormalTok{(}\DecValTok{7}\NormalTok{..}\DecValTok{12}\NormalTok{),}
3162
+ \StringTok{c: }\NormalTok{(}\DecValTok{13}\NormalTok{..}\DecValTok{18}\NormalTok{)}
3163
+ \NormalTok{)}
3164
+
3165
+ \NormalTok{puts data_table}
3166
+ \NormalTok{puts data_table.}\DataTypeTok{ID}
3167
+ \end{Highlighting}
3168
+ \end{Shaded}
3169
+
3170
+ \begin{verbatim}
3171
+ ## ID a b c
3172
+ ## 1: b 1 7 13
3173
+ ## 2: b 2 8 14
3174
+ ## 3: b 3 9 15
3175
+ ## 4: a 4 10 16
3176
+ ## 5: a 5 11 17
3177
+ ## 6: c 6 12 18
3178
+ ## [1] "b" "b" "b" "a" "a" "c"
3179
+ \end{verbatim}
3180
+
3181
+ \begin{Shaded}
3182
+ \begin{Highlighting}[]
3183
+ \CommentTok{# subset rows in i}
3184
+ \NormalTok{ans = flights[(}\StringTok{:origin}\NormalTok{.eq }\StringTok{"JFK"}\NormalTok{) & (}\StringTok{:month}\NormalTok{.eq }\DecValTok{6}\NormalTok{)]}
3185
+ \NormalTok{puts ans.head}
3186
+
3187
+ \CommentTok{# Get the first two rows from flights.}
3188
+
3189
+ \NormalTok{ans = flights[(}\DecValTok{1}\NormalTok{..}\DecValTok{2}\NormalTok{)]}
3190
+ \NormalTok{puts ans}
3191
+
3192
+ \CommentTok{# Sort flights first by column origin in ascending order, and then by dest in descending order:}
3193
+
3194
+ \CommentTok{# ans = flights[E.order(:origin, -(:dest))]}
3195
+ \CommentTok{# puts ans.head}
3196
+ \end{Highlighting}
3197
+ \end{Shaded}
3198
+
3199
+ \begin{verbatim}
3200
+ ## year month day dep_delay arr_delay carrier origin dest air_time
3201
+ ## 1: 2014 6 1 -9 -5 AA JFK LAX 324
3202
+ ## 2: 2014 6 1 -10 -13 AA JFK LAX 329
3203
+ ## 3: 2014 6 1 18 -1 AA JFK LAX 326
3204
+ ## 4: 2014 6 1 -6 -16 AA JFK LAX 320
3205
+ ## 5: 2014 6 1 -4 -45 AA JFK LAX 326
3206
+ ## 6: 2014 6 1 -6 -23 AA JFK LAX 329
3207
+ ## distance hour
3208
+ ## 1: 2475 8
3209
+ ## 2: 2475 12
3210
+ ## 3: 2475 7
3211
+ ## 4: 2475 10
3212
+ ## 5: 2475 18
3213
+ ## 6: 2475 14
3214
+ ## year month day dep_delay arr_delay carrier origin dest air_time
3215
+ ## 1: 2014 1 1 14 13 AA JFK LAX 359
3216
+ ## 2: 2014 1 1 -3 13 AA JFK LAX 363
3217
+ ## distance hour
3218
+ ## 1: 2475 9
3219
+ ## 2: 2475 11
3220
+ \end{verbatim}
3221
+
3222
+ \begin{Shaded}
3223
+ \begin{Highlighting}[]
3224
+ \CommentTok{# Select column(s) in j}
3225
+ \CommentTok{# select arr_delay column, but return it as a vector.}
3226
+
3227
+ \NormalTok{ans = flights[}\StringTok{:all}\NormalTok{, }\StringTok{:arr_delay}\NormalTok{]}
3228
+ \NormalTok{puts ans.head}
3229
+
3230
+ \CommentTok{# Select arr_delay column, but return as a data.table instead.}
3231
+
3232
+ \NormalTok{ans = flights[}\StringTok{:all}\NormalTok{, }\StringTok{:arr_delay}\NormalTok{.list]}
3233
+ \NormalTok{puts ans.head}
3234
+
3235
+ \NormalTok{ans = flights[}\StringTok{:all}\NormalTok{, E.list(}\StringTok{:arr_delay}\NormalTok{, }\StringTok{:dep_delay}\NormalTok{)]}
3236
+ \end{Highlighting}
3237
+ \end{Shaded}
3238
+
3239
+ \begin{verbatim}
3240
+ ## [1] 13 13 9 -26 1 0
3241
+ ## arr_delay
3242
+ ## 1: 13
3243
+ ## 2: 13
3244
+ ## 3: 9
3245
+ ## 4: -26
3246
+ ## 5: 1
3247
+ ## 6: 0
3248
+ \end{verbatim}
3249
+
3250
+ \section{Graphics in Galaaz}\label{graphics-in-galaaz}
3251
+
3252
+ Creating graphics in Galaaz is quite easy, as it can use all the power
3253
+ of ggplot2. There are many resources in the web that teaches ggplot, so
3254
+ here we give a quick example of ggplot integration with Ruby. We
3255
+ continue to use the :mtcars dataset and we will plot a diverging bar
3256
+ plot, showing cars that have `above' or `below' gas consuption. Let's
3257
+ first prepare the data frame with the necessary data:
3258
+
3259
+ \begin{Shaded}
3260
+ \begin{Highlighting}[]
3261
+ \CommentTok{# copy the R variable :mtcars to the Ruby mtcars variable}
3262
+ \NormalTok{mtcars = ~}\StringTok{:mtcars}
3263
+
3264
+ \CommentTok{# create a new column 'car_name' to store the car names so that it can be}
3265
+ \CommentTok{# used for plotting. The 'rownames' of the data frame cannot be used as}
3266
+ \CommentTok{# data for plotting}
3267
+ \NormalTok{mtcars.car_name = R.rownames(}\StringTok{:mtcars}\NormalTok{)}
3268
+
3269
+ \CommentTok{# compute normalized mpg and add it to a new column called mpg_z}
3270
+ \CommentTok{# Note that the mean value for mpg can be obtained by calling the 'mean'}
3271
+ \CommentTok{# function on the vector 'mtcars.mpg'. The same with the standard}
3272
+ \CommentTok{# deviation 'sd'. The vector is then rounded to two digits with 'round 2'}
3273
+ \NormalTok{mtcars.mpg_z = ((mtcars.mpg - mtcars.mpg.mean)/mtcars.mpg.sd).round }\DecValTok{2}
3274
+
3275
+ \CommentTok{# create a new column 'mpg_type'. Function 'ifelse' is a vectorized function}
3276
+ \CommentTok{# that looks at every element of the mpg_z vector and if the value is below}
3277
+ \CommentTok{# 0, returns 'below', otherwise returns 'above'}
3278
+ \NormalTok{mtcars.mpg_type = (mtcars.mpg_z < }\DecValTok{0}\NormalTok{).ifelse(}\StringTok{"below"}\NormalTok{, }\StringTok{"above"}\NormalTok{)}
3279
+
3280
+ \CommentTok{# order the mtcar data set by the mpg_z vector from smaler to larger values}
3281
+ \NormalTok{mtcars = mtcars[mtcars.mpg_z.order, }\StringTok{:all}\NormalTok{]}
3282
+
3283
+ \CommentTok{# convert the car_name column to a factor to retain sorted order in plot}
3284
+ \NormalTok{mtcars.car_name = mtcars.car_name.factor }\StringTok{levels: }\NormalTok{mtcars.car_name}
3285
+
3286
+ \CommentTok{# let's look at the final data frame}
3287
+ \NormalTok{puts mtcars.head}
3288
+ \end{Highlighting}
3289
+ \end{Shaded}
3290
+
3291
+ \begin{verbatim}
3292
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
3293
+ ## Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
3294
+ ## Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
3295
+ ## Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
3296
+ ## Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4
3297
+ ## Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
3298
+ ## Maserati Bora 15.0 8 301 335 3.54 3.570 14.60 0 1 5 8
3299
+ ## car_name mpg_z mpg_type
3300
+ ## Cadillac Fleetwood Cadillac Fleetwood -1.61 below
3301
+ ## Lincoln Continental Lincoln Continental -1.61 below
3302
+ ## Camaro Z28 Camaro Z28 -1.13 below
3303
+ ## Duster 360 Duster 360 -0.96 below
3304
+ ## Chrysler Imperial Chrysler Imperial -0.89 below
3305
+ ## Maserati Bora Maserati Bora -0.84 below
3306
+ \end{verbatim}
3307
+
3308
+ Now, lets plot the diverging bar plot. When using gKnit, there is no
3309
+ need to call `R.awt' to create a plotting device, since gKnit does take
3310
+ care of it. Galaaz provides integration with ggplot. The interested
3311
+ reader should check online for more information on ggplot, since it is
3312
+ outside the scope of this manual describing how ggplot works. We give
3313
+ here but a brief description on how this plot is generated.
3314
+
3315
+ ggplot implements the `grammar of graphics'. In this approach, plots are
3316
+ build by adding layers to the plot. On the first layer we describe what
3317
+ we want on the `x' and `y' axis of the plot. In this case, we have
3318
+ `car\_name' on the `x' axis and `mpg\_z' on the `y' axis. Then the type
3319
+ of graph is specified by adding `geom\_bar' (for a bar graph). We
3320
+ specify that our bars should be filled using `mpg\_type', which is
3321
+ either `above' or `bellow' giving then two colours for filling. On the
3322
+ next layer we specify the labels for the graph, then we add the title
3323
+ and subtitle. Finally, in a bar chart usually bars go on the vertical
3324
+ direction, but in this graph we want the bars to be horizontally layed
3325
+ so we add `coord\_flip'.
3326
+
3327
+ \begin{Shaded}
3328
+ \begin{Highlighting}[]
3329
+ \NormalTok{require }\StringTok{'ggplot'}
3330
+
3331
+ \NormalTok{puts mtcars.ggplot(E.aes(}\StringTok{x: :car_name}\NormalTok{, }\StringTok{y: :mpg_z}\NormalTok{, }\StringTok{label: :mpg_z}\NormalTok{)) +}
3332
+ \NormalTok{ R.geom_bar(E.aes(}\StringTok{fill: :mpg_type}\NormalTok{), }\StringTok{stat: 'identity'}\NormalTok{, }\StringTok{width: }\FloatTok{0.5}\NormalTok{) +}
3333
+ \NormalTok{ R.scale_fill_manual(}\StringTok{name: 'Mileage'}\NormalTok{,}
3334
+ \StringTok{labels: }\NormalTok{R.c(}\StringTok{'Above Average'}\NormalTok{, }\StringTok{'Below Average'}\NormalTok{),}
3335
+ \StringTok{values: }\NormalTok{R.c(}\StringTok{'above'}\NormalTok{: }\StringTok{'#00ba38'}\NormalTok{, }\StringTok{'below'}\NormalTok{: }\StringTok{'#f8766d'}\NormalTok{)) +}
3336
+ \NormalTok{ R.labs(}\StringTok{subtitle: "Normalised mileage from 'mtcars'"}\NormalTok{,}
3337
+ \StringTok{title: "Diverging Bars"}\NormalTok{) + }
3338
+ \NormalTok{ R.coord_flip}
3339
+ \end{Highlighting}
3340
+ \end{Shaded}
3341
+
3342
+ \includegraphics{manual_files/figure-latex/diverging_bar.pdf}
3343
+
3344
+ \section{Coding with Tidyverse}\label{coding-with-tidyverse}
3345
+
3346
+ In R, and when coding with `tidyverse', arguments to a function are
3347
+ usually not \emph{referencially transparent}. That is, you can't replace
3348
+ a value with a seemingly equivalent object that you've defined
3349
+ elsewhere. To see the problem, let's first define a data frame:
3350
+
3351
+ \begin{Shaded}
3352
+ \begin{Highlighting}[]
3353
+ \NormalTok{df = R.data__frame(}\StringTok{x: }\NormalTok{(}\DecValTok{1}\NormalTok{..}\DecValTok{3}\NormalTok{), }\StringTok{y: }\NormalTok{(}\DecValTok{3}\NormalTok{..}\DecValTok{1}\NormalTok{))}
3354
+ \NormalTok{puts df}
3355
+ \end{Highlighting}
3356
+ \end{Shaded}
3357
+
3358
+ \begin{verbatim}
3359
+ ## x y
3360
+ ## 1 1 3
3361
+ ## 2 2 2
3362
+ ## 3 3 1
3363
+ \end{verbatim}
3364
+
3365
+ and now, let's look at this code:
3366
+
3367
+ \begin{Shaded}
3368
+ \begin{Highlighting}[]
3369
+ \NormalTok{my_var <-}\StringTok{ }\NormalTok{x}
3370
+ \KeywordTok{filter}\NormalTok{(df, my_var }\OperatorTok{==}\StringTok{ }\DecValTok{1}\NormalTok{)}
3371
+ \end{Highlighting}
3372
+ \end{Shaded}
3373
+
3374
+ It generates the following error: ``object `x' not found.
3375
+
3376
+ However, in Galaaz, arguments are referencially transparent as can be
3377
+ seen by the code bellow. Note initally that `my\_var = :x' will not give
3378
+ the error ``object `x' not found'' since `:x' is treated as an
3379
+ expression and assigned to my\_var. Then when doing (my\_var.eq 1),
3380
+ my\_var is a variable that resolves to `:x' and it becomes equivalent to
3381
+ (:x.eq 1) which is what we want.
3382
+
3383
+ \begin{Shaded}
3384
+ \begin{Highlighting}[]
3385
+ \NormalTok{my_var = }\StringTok{:x}
3386
+ \NormalTok{puts df.filter(my_var.eq }\DecValTok{1}\NormalTok{)}
3387
+ \end{Highlighting}
3388
+ \end{Shaded}
3389
+
3390
+ \begin{verbatim}
3391
+ ## x y
3392
+ ## 1 1 3
3393
+ \end{verbatim}
3394
+
3395
+ As stated by Hardley
3396
+
3397
+ \begin{quote}
3398
+ dplyr code is ambiguous. Depending on what variables are defined where,
3399
+ filter(df, x == y) could be equivalent to any of:
3400
+ \end{quote}
3401
+
3402
+ \begin{verbatim}
3403
+ df[df$x == df$y, ]
3404
+ df[df$x == y, ]
3405
+ df[x == df$y, ]
3406
+ df[x == y, ]
3407
+ \end{verbatim}
3408
+
3409
+ In galaaz this ambiguity does not exist, filter(df, x.eq y) is not a
3410
+ valid expression as expressions are build with symbols. In doing
3411
+ filter(df, :x.eq y) we are looking for elements of the `x' column that
3412
+ are equal to a previously defined y variable. Finally in filter(df,
3413
+ :x.eq :y) we are looking for elements in which the `x' column value is
3414
+ equal to the `y' column value. This can be seen in the following two
3415
+ chunks of code:
3416
+
3417
+ \begin{Shaded}
3418
+ \begin{Highlighting}[]
3419
+ \NormalTok{y = }\DecValTok{1}
3420
+ \NormalTok{x = }\DecValTok{2}
3421
+
3422
+ \CommentTok{# looking for values where the 'x' column is equal to the 'y' column}
3423
+ \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{.eq }\StringTok{:y}\NormalTok{)}
3424
+ \end{Highlighting}
3425
+ \end{Shaded}
3426
+
3427
+ \begin{verbatim}
3428
+ ## x y
3429
+ ## 1 2 2
3430
+ \end{verbatim}
3431
+
3432
+ \begin{Shaded}
3433
+ \begin{Highlighting}[]
3434
+ \CommentTok{# looking for values where the 'x' column is equal to the 'y' variable}
3435
+ \CommentTok{# in this case, the number 1}
3436
+ \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{.eq y)}
3437
+ \end{Highlighting}
3438
+ \end{Shaded}
3439
+
3440
+ \begin{verbatim}
3441
+ ## x y
3442
+ ## 1 1 3
3443
+ \end{verbatim}
3444
+
3445
+ \subsection{Writing a function that applies to different data
3446
+ sets}\label{writing-a-function-that-applies-to-different-data-sets}
3447
+
3448
+ Let's suppose that we want to write a function that receives as the
3449
+ first argument a data frame and as second argument an expression that
3450
+ adds a column to the data frame that is equal to the sum of elements in
3451
+ column `a' plus `x'.
3452
+
3453
+ Here is the intended behaviour using the `mutate' function of `dplyr':
3454
+
3455
+ \begin{verbatim}
3456
+ mutate(df1, y = a + x)
3457
+ mutate(df2, y = a + x)
3458
+ mutate(df3, y = a + x)
3459
+ mutate(df4, y = a + x)
3460
+ \end{verbatim}
3461
+
3462
+ The naive approach to writing an R function to solve this problem is:
3463
+
3464
+ \begin{verbatim}
3465
+ mutate_y <- function(df) {
3466
+ mutate(df, y = a + x)
3467
+ }
3468
+ \end{verbatim}
3469
+
3470
+ Unfortunately, in R, this function can fail silently if one of the
3471
+ variables isn't present in the data frame, but is present in the global
3472
+ environment. We will not go through here how to solve this problem in R.
3473
+
3474
+ In Galaaz the method mutate\_y bellow will work fine and will never fail
3475
+ silently.
3476
+
3477
+ \begin{Shaded}
3478
+ \begin{Highlighting}[]
3479
+ \KeywordTok{def}\NormalTok{ mutate_y(df)}
3480
+ \NormalTok{ df.mutate(}\StringTok{:y}\NormalTok{.assign }\StringTok{:a}\NormalTok{ + }\StringTok{:x}\NormalTok{)}
3481
+ \KeywordTok{end}
3482
+ \end{Highlighting}
3483
+ \end{Shaded}
3484
+
3485
+ Here we create a data frame that has only one column named `x':
3486
+
3487
+ \begin{Shaded}
3488
+ \begin{Highlighting}[]
3489
+ \NormalTok{df1 = R.data__frame(}\StringTok{x: }\NormalTok{(}\DecValTok{1}\NormalTok{..}\DecValTok{3}\NormalTok{))}
3490
+ \NormalTok{puts df1}
3491
+ \end{Highlighting}
3492
+ \end{Shaded}
3493
+
3494
+ \begin{verbatim}
3495
+ ## x
3496
+ ## 1 1
3497
+ ## 2 2
3498
+ ## 3 3
3499
+ \end{verbatim}
3500
+
3501
+ Note that method mutate\_y will fail independetly from the fact that
3502
+ variable `a' is defined and in the scope of the method. Variable `a' has
3503
+ no relationship with the symbol `:a' used in the definition of
3504
+ `mutate\_y' above:
3505
+
3506
+ \begin{Shaded}
3507
+ \begin{Highlighting}[]
3508
+ \NormalTok{a = }\DecValTok{10}
3509
+ \NormalTok{mutate_y(df1)}
3510
+ \end{Highlighting}
3511
+ \end{Shaded}
3512
+
3513
+ \begin{verbatim}
3514
+ ## Message:
3515
+ ## Error in mutate_impl(.data, dots) :
3516
+ ## Evaluation error: object 'a' not found.
3517
+ ## In addition: Warning message:
3518
+ ## In mutate_impl(.data, dots) :
3519
+ ## mismatched protect/unprotect (unprotect with empty protect stack) (RError)
3520
+ ## Translated to internal error
3521
+ \end{verbatim}
3522
+
3523
+ \subsection{Different expressions}\label{different-expressions}
3524
+
3525
+ Let's move to the next problem as presented by Hardley where trying to
3526
+ write a function in R that will receive two argumens, the first a
3527
+ variable and the second an expression is not trivial. Bellow we create a
3528
+ data frame and we want to write a function that groups data by a
3529
+ variable and summarises it by an expression:
3530
+
3531
+ \begin{Shaded}
3532
+ \begin{Highlighting}[]
3533
+ \KeywordTok{set.seed}\NormalTok{(}\DecValTok{123}\NormalTok{)}
3534
+
3535
+ \NormalTok{df <-}\StringTok{ }\KeywordTok{data.frame}\NormalTok{(}
3536
+ \DataTypeTok{g1 =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{2}\NormalTok{),}
3537
+ \DataTypeTok{g2 =} \KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{1}\NormalTok{),}
3538
+ \DataTypeTok{a =} \KeywordTok{sample}\NormalTok{(}\DecValTok{5}\NormalTok{),}
3539
+ \DataTypeTok{b =} \KeywordTok{sample}\NormalTok{(}\DecValTok{5}\NormalTok{)}
3540
+ \NormalTok{)}
3541
+
3542
+ \KeywordTok{as.data.frame}\NormalTok{(df) }
3543
+ \end{Highlighting}
3544
+ \end{Shaded}
3545
+
3546
+ \begin{verbatim}
3547
+ ## g1 g2 a b
3548
+ ## 1 1 1 3 3
3549
+ ## 2 1 2 2 1
3550
+ ## 3 2 1 5 2
3551
+ ## 4 2 2 4 5
3552
+ ## 5 2 1 1 4
3553
+ \end{verbatim}
3554
+
3555
+ \begin{Shaded}
3556
+ \begin{Highlighting}[]
3557
+ \NormalTok{d2 <-}\StringTok{ }\NormalTok{df }\OperatorTok{%>%}
3558
+ \StringTok{ }\KeywordTok{group_by}\NormalTok{(g1) }\OperatorTok{%>%}
3559
+ \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
3560
+
3561
+ \KeywordTok{as.data.frame}\NormalTok{(d2) }
3562
+ \end{Highlighting}
3563
+ \end{Shaded}
3564
+
3565
+ \begin{verbatim}
3566
+ ## g1 a
3567
+ ## 1 1 2.500000
3568
+ ## 2 2 3.333333
3569
+ \end{verbatim}
3570
+
3571
+ \begin{Shaded}
3572
+ \begin{Highlighting}[]
3573
+ \NormalTok{d2 <-}\StringTok{ }\NormalTok{df }\OperatorTok{%>%}
3574
+ \StringTok{ }\KeywordTok{group_by}\NormalTok{(g2) }\OperatorTok{%>%}
3575
+ \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
3576
+
3577
+ \KeywordTok{as.data.frame}\NormalTok{(d2) }
3578
+ \end{Highlighting}
3579
+ \end{Shaded}
3580
+
3581
+ \begin{verbatim}
3582
+ ## g2 a
3583
+ ## 1 1 3
3584
+ ## 2 2 3
3585
+ \end{verbatim}
3586
+
3587
+ As shown by Hardley, one might expect this function to do the trick:
3588
+
3589
+ \begin{Shaded}
3590
+ \begin{Highlighting}[]
3591
+ \NormalTok{my_summarise <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(df, group_var) \{}
3592
+ \NormalTok{ df }\OperatorTok{%>%}
3593
+ \StringTok{ }\KeywordTok{group_by}\NormalTok{(group_var) }\OperatorTok{%>%}
3594
+ \StringTok{ }\KeywordTok{summarise}\NormalTok{(}\DataTypeTok{a =} \KeywordTok{mean}\NormalTok{(a))}
3595
+ \NormalTok{\}}
3596
+
3597
+ \CommentTok{# my_summarise(df, g1)}
3598
+ \CommentTok{#> Error: Column `group_var` is unknown}
3599
+ \end{Highlighting}
3600
+ \end{Shaded}
3601
+
3602
+ In order to solve this problem, coding with dplyr requires the
3603
+ introduction of many new concepts and functions such as `quo', `quos',
3604
+ `enquo', `enquos', `!!' (bang bang), `!!!' (triple bang). Again, we'll
3605
+ leave to Hardley the explanation on how to use all those functions.
3606
+
3607
+ Now, let's try to implement the same function in galaaz. The next code
3608
+ block first prints the `df' data frame defined previously in R (to
3609
+ access an R variable from Galaaz, we use the tilda operator
3610
+ `\textasciitilde{}' applied to the R variable name as symbol, i.e.,
3611
+ `:df'.
3612
+
3613
+ \begin{Shaded}
3614
+ \begin{Highlighting}[]
3615
+ \NormalTok{puts ~}\StringTok{:df}
3616
+ \end{Highlighting}
3617
+ \end{Shaded}
3618
+
3619
+ \begin{verbatim}
3620
+ ## g1 g2 a b
3621
+ ## 1 1 1 3 3
3622
+ ## 2 1 2 2 1
3623
+ ## 3 2 1 5 2
3624
+ ## 4 2 2 4 5
3625
+ ## 5 2 1 1 4
3626
+ \end{verbatim}
3627
+
3628
+ We then create the `my\_summarize' method and call it passing the R data
3629
+ frame and the group by variable `:g1':
3630
+
3631
+ \begin{Shaded}
3632
+ \begin{Highlighting}[]
3633
+ \KeywordTok{def}\NormalTok{ my_summarize(df, group_var)}
3634
+ \NormalTok{ df.group_by(group_var).}
3635
+ \NormalTok{ summarize(}\StringTok{a: :a}\NormalTok{.mean)}
3636
+ \KeywordTok{end}
3637
+
3638
+ \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g1}\NormalTok{)}
3639
+ \end{Highlighting}
3640
+ \end{Shaded}
3641
+
3642
+ \begin{verbatim}
3643
+ ## # A tibble: 2 x 2
3644
+ ## g1 a
3645
+ ## <dbl> <dbl>
3646
+ ## 1 1 2.5
3647
+ ## 2 2 3.33
3648
+ \end{verbatim}
3649
+
3650
+ It works!!! Well, let's make sure this was not just some coincidence
3651
+
3652
+ \begin{Shaded}
3653
+ \begin{Highlighting}[]
3654
+ \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g2}\NormalTok{)}
3655
+ \end{Highlighting}
3656
+ \end{Shaded}
3657
+
3658
+ \begin{verbatim}
3659
+ ## # A tibble: 2 x 2
3660
+ ## g2 a
3661
+ ## <dbl> <dbl>
3662
+ ## 1 1 3
3663
+ ## 2 2 3
3664
+ \end{verbatim}
3665
+
3666
+ Great, everything is fine! No magic, no new functions, no complexities,
3667
+ just normal, standard Ruby code. If you've ever done NSE in R, this
3668
+ certainly feels much safer and easy to implement.
3669
+
3670
+ \subsection{Different input variables}\label{different-input-variables}
3671
+
3672
+ In the previous section we've managed to get rid of all NSE formulation
3673
+ for a simple example, but does this remain true for more complex
3674
+ examples, or will the Galaaz way prove inpractical for more complex
3675
+ code?
3676
+
3677
+ In the next example Hardley proposes us to write a function that given
3678
+ an expression such as `a' or `a * b', calculates three summaries. What
3679
+ we want a function that does the same as these R statements:
3680
+
3681
+ \begin{verbatim}
3682
+ summarise(df, mean = mean(a), sum = sum(a), n = n())
3683
+ #> # A tibble: 1 x 3
3684
+ #> mean sum n
3685
+ #> <dbl> <int> <int>
3686
+ #> 1 3 15 5
3687
+
3688
+ summarise(df, mean = mean(a * b), sum = sum(a * b), n = n())
3689
+ #> # A tibble: 1 x 3
3690
+ #> mean sum n
3691
+ #> <dbl> <int> <int>
3692
+ #> 1 9 45 5
3693
+ \end{verbatim}
3694
+
3695
+ Let's try it in galaaz:
3696
+
3697
+ \begin{Shaded}
3698
+ \begin{Highlighting}[]
3699
+ \KeywordTok{def}\NormalTok{ my_summarise2(df, expr)}
3700
+ \NormalTok{ df.summarize(}
3701
+ \StringTok{mean: }\NormalTok{E.mean(expr),}
3702
+ \StringTok{sum: }\NormalTok{E.sum(expr),}
3703
+ \StringTok{n: }\NormalTok{E.n}
3704
+ \NormalTok{ )}
3705
+ \KeywordTok{end}
3706
+
3707
+ \NormalTok{puts my_summarise2((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{)}
3708
+ \NormalTok{puts }\StringTok{"\textbackslash{}n"}
3709
+ \NormalTok{puts my_summarise2((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{ * }\StringTok{:b}\NormalTok{)}
3710
+ \end{Highlighting}
3711
+ \end{Shaded}
3712
+
3713
+ \begin{verbatim}
3714
+ ## mean sum n
3715
+ ## 1 3 15 5
3716
+ ##
3717
+ ## mean sum n
3718
+ ## 1 9 45 5
3719
+ \end{verbatim}
3720
+
3721
+ Once again, there is no need to use any special theory or functions. The
3722
+ only point to be careful about is the use of `E' to build expressions
3723
+ from functions `mean', `sum' and `n'.
3724
+
3725
+ \subsection{Different input and output
3726
+ variable}\label{different-input-and-output-variable}
3727
+
3728
+ Now the next challenge presented by Hardley is to vary the name of the
3729
+ output variables based on the received expression. So, if the input
3730
+ expression is `a', we want our data frame columns to be named `mean\_a'
3731
+ and `sum\_a'. Now, if the input expression is `b', columns should be
3732
+ named `mean\_b' and `sum\_b'.
3733
+
3734
+ \begin{verbatim}
3735
+ mutate(df, mean_a = mean(a), sum_a = sum(a))
3736
+ #> # A tibble: 5 x 6
3737
+ #> g1 g2 a b mean_a sum_a
3738
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
3739
+ #> 1 1 1 1 3 3 15
3740
+ #> 2 1 2 4 2 3 15
3741
+ #> 3 2 1 2 1 3 15
3742
+ #> 4 2 2 5 4 3 15
3743
+ #> # … with 1 more row
3744
+
3745
+ mutate(df, mean_b = mean(b), sum_b = sum(b))
3746
+ #> # A tibble: 5 x 6
3747
+ #> g1 g2 a b mean_b sum_b
3748
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
3749
+ #> 1 1 1 1 3 3 15
3750
+ #> 2 1 2 4 2 3 15
3751
+ #> 3 2 1 2 1 3 15
3752
+ #> 4 2 2 5 4 3 15
3753
+ #> # … with 1 more row
3754
+ \end{verbatim}
3755
+
3756
+ In order to solve this problem in R, Hardley needs to introduce some
3757
+ more new functions and notations: `quo\_name' and the `:=' operator from
3758
+ package `rlang'
3759
+
3760
+ Here is our Ruby code:
3761
+
3762
+ \begin{Shaded}
3763
+ \begin{Highlighting}[]
3764
+ \KeywordTok{def}\NormalTok{ my_mutate(df, expr)}
3765
+ \NormalTok{ mean_name = }\StringTok{"mean_}\OtherTok{#\{}\NormalTok{expr.to_s}\OtherTok{\}}\StringTok{"}
3766
+ \NormalTok{ sum_name = }\StringTok{"sum_}\OtherTok{#\{}\NormalTok{expr.to_s}\OtherTok{\}}\StringTok{"}
3767
+
3768
+ \NormalTok{ df.mutate(mean_name => E.mean(expr),}
3769
+ \NormalTok{ sum_name => E.sum(expr))}
3770
+ \KeywordTok{end}
3771
+
3772
+ \NormalTok{puts my_mutate((~}\StringTok{:df}\NormalTok{), }\StringTok{:a}\NormalTok{)}
3773
+ \NormalTok{puts }\StringTok{"\textbackslash{}n"}
3774
+ \NormalTok{puts my_mutate((~}\StringTok{:df}\NormalTok{), }\StringTok{:b}\NormalTok{)}
3775
+ \end{Highlighting}
3776
+ \end{Shaded}
3777
+
3778
+ \begin{verbatim}
3779
+ ## g1 g2 a b mean_a sum_a
3780
+ ## 1 1 1 3 3 3 15
3781
+ ## 2 1 2 2 1 3 15
3782
+ ## 3 2 1 5 2 3 15
3783
+ ## 4 2 2 4 5 3 15
3784
+ ## 5 2 1 1 4 3 15
3785
+ ##
3786
+ ## g1 g2 a b mean_b sum_b
3787
+ ## 1 1 1 3 3 3 15
3788
+ ## 2 1 2 2 1 3 15
3789
+ ## 3 2 1 5 2 3 15
3790
+ ## 4 2 2 4 5 3 15
3791
+ ## 5 2 1 1 4 3 15
3792
+ \end{verbatim}
3793
+
3794
+ It really seems that ``Non Standard Evaluation'' is actually quite
3795
+ standard in Galaaz! But, you might have noticed a small change in the
3796
+ way the arguments to the mutate method were called. In a previous
3797
+ example we used df.summarise(mean: E.mean(:a), \ldots{}) where the
3798
+ column name was followed by a `:' colom. In this example, we have
3799
+ df.mutate(mean\_name =\textgreater{} E.mean(expr), \ldots{}) and
3800
+ variable mean\_name is not followed by `:' but by `=\textgreater{}'.
3801
+ This is standard Ruby notation.
3802
+
3803
+ {[}explain\ldots{}.{]}
3804
+
3805
+ \subsection{Capturing multiple
3806
+ variables}\label{capturing-multiple-variables}
3807
+
3808
+ Moving on with new complexities, Hardley proposes us to solve the
3809
+ problem in which the summarise function will receive any number of
3810
+ grouping variables.
3811
+
3812
+ This again is quite standard Ruby. In order to receive an undefined
3813
+ number of paramenters the paramenter is preceded by '*':
3814
+
3815
+ \begin{Shaded}
3816
+ \begin{Highlighting}[]
3817
+ \KeywordTok{def}\NormalTok{ my_summarise3(df, *group_vars)}
3818
+ \NormalTok{ df.group_by(*group_vars).}
3819
+ \NormalTok{ summarise(}\StringTok{a: }\NormalTok{E.mean(}\StringTok{:a}\NormalTok{))}
3820
+ \KeywordTok{end}
3821
+
3822
+ \NormalTok{puts my_summarise3((~}\StringTok{:df}\NormalTok{), }\StringTok{:g1}\NormalTok{, }\StringTok{:g2}\NormalTok{)}
3823
+ \end{Highlighting}
3824
+ \end{Shaded}
3825
+
3826
+ \begin{verbatim}
3827
+ ## # A tibble: 4 x 3
3828
+ ## # Groups: g1 [?]
3829
+ ## g1 g2 a
3830
+ ## <dbl> <dbl> <dbl>
3831
+ ## 1 1 1 3
3832
+ ## 2 1 2 2
3833
+ ## 3 2 1 3
3834
+ ## 4 2 2 4
3835
+ \end{verbatim}
3836
+
3837
+ \subsection{Why does R require NSE and Galaaz does
3838
+ not?}\label{why-does-r-require-nse-and-galaaz-does-not}
3839
+
3840
+ NSE introduces a number of new concepts, such as `quoting',
3841
+ `quasiquotation', `unquoting' and `unquote-splicing', while in Galaaz
3842
+ none of those concepts are needed. What gives?
3843
+
3844
+ R is an extremely flexible language and it has lazy evaluation of
3845
+ parameters. When in R a function is called as `summarise(df, a = b)',
3846
+ the summarise function receives the litteral `a = b' parameter and can
3847
+ work with this as if it were a string. In R, it is not clear what a and
3848
+ b are, they can be expressions or they can be variables, it is up to the
3849
+ function to decide what `a = b' means.
3850
+
3851
+ In Ruby, there is no lazy evaluation of parameters and `a' is always a
3852
+ variable and so is `b'. Variables assume their value as soon as they are
3853
+ used, so `x = a' is immediately evaluate and variable `x' will receive
3854
+ the value of variable `a' as soon as the Ruby statement is executed.
3855
+ Ruby also provides the notion of a symbol; `:a' is a symbol and does not
3856
+ evaluate to anything. Galaaz uses Ruby symbols to build expressions that
3857
+ are not bound to anything: `:a.eq :b' is clearly an expression and has
3858
+ no relationship whatsoever with the statment `a = b'. By using symbols,
3859
+ variables and expressions all the possible ambiguities that are found in
3860
+ R are eliminated in Galaaz.
3861
+
3862
+ The main problem that remains, is that in R, functions are not clearly
3863
+ documented as what type of input they are expecting, they might be
3864
+ expecting regular variables or they might be expecting expressions and
3865
+ the R function will know how to deal with an input of the form `a = b',
3866
+ now for the Ruby developer it might not be immediately clear if it
3867
+ should call the function passing the value `true' if variable `a' is
3868
+ equal to variable `b' or if it should call the function passing the
3869
+ expression `:a.eq :b'.
3870
+
3871
+ \subsection{Advanced dplyr features}\label{advanced-dplyr-features}
3872
+
3873
+ In the blog: Programming with dplyr by using dplyr
3874
+ (\url{https://www.r-bloggers.com/programming-with-dplyr-by-using-dplyr/})
3875
+ Iñaki Úcar shows surprise that some R users are trying to code in dplyr
3876
+ avoiding the use of NSE. For instance he says:
3877
+
3878
+ \begin{quote}
3879
+ Take the example of seplyr. It stands for standard evaluation dplyr, and
3880
+ enables us to program over dplyr without having ``to bring in (or study)
3881
+ any deep-theory or heavy-weight tools such as rlang/tidyeval''.
3882
+ \end{quote}
3883
+
3884
+ For me, there isn't really any surprise that users are trying to avoid
3885
+ dplyr deep-theory. R users frequently are not programmers and learning
3886
+ to code is already hard business, on top of that, having to learn how to
3887
+ `quote' or `enquo' or `quos' or `enquos' is not necessarily a `piece of
3888
+ cake'. So much so, that `tidyeval' has some more advanced functions that
3889
+ instead of using quoted expressions, uses strings as arguments.
3890
+
3891
+ In the following examples, we show the use of functions `group\_by\_at',
3892
+ `summarise\_at' and `rename\_at' that receive strings as argument. The
3893
+ data frame used in `starwars' that describes features of characters in
3894
+ the Starwars movies:
3895
+
3896
+ \begin{Shaded}
3897
+ \begin{Highlighting}[]
3898
+ \NormalTok{puts (~}\StringTok{:starwars}\NormalTok{).head}
3899
+ \end{Highlighting}
3900
+ \end{Shaded}
3901
+
3902
+ \begin{verbatim}
3903
+ ## # A tibble: 6 x 13
3904
+ ## name height mass hair_color skin_color eye_color birth_year gender
3905
+ ## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
3906
+ ## 1 Luke~ 172 77 blond fair blue 19 male
3907
+ ## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
3908
+ ## 3 R2-D2 96 32 <NA> white, bl~ red 33 <NA>
3909
+ ## 4 Dart~ 202 136 none white yellow 41.9 male
3910
+ ## 5 Leia~ 150 49 brown light brown 19 female
3911
+ ## 6 Owen~ 178 120 brown, gr~ light blue 52 male
3912
+ ## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
3913
+ ## # vehicles <list>, starships <list>
3914
+ \end{verbatim}
3915
+
3916
+ The grouped\_mean function bellow will receive a grouping variable and
3917
+ calculate summaries for the value\_variables given:
3918
+
3919
+ \begin{Shaded}
3920
+ \begin{Highlighting}[]
3921
+ \NormalTok{grouped_mean <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(data, grouping_variables, value_variables) \{}
3922
+ \NormalTok{ data }\OperatorTok{%>%}
3923
+ \StringTok{ }\KeywordTok{group_by_at}\NormalTok{(grouping_variables) }\OperatorTok{%>%}
3924
+ \StringTok{ }\KeywordTok{mutate}\NormalTok{(}\DataTypeTok{count =} \KeywordTok{n}\NormalTok{()) }\OperatorTok{%>%}
3925
+ \StringTok{ }\KeywordTok{summarise_at}\NormalTok{(}\KeywordTok{c}\NormalTok{(value_variables, }\StringTok{"count"}\NormalTok{), mean, }\DataTypeTok{na.rm =} \OtherTok{TRUE}\NormalTok{) }\OperatorTok{%>%}
3926
+ \StringTok{ }\KeywordTok{rename_at}\NormalTok{(value_variables, }\KeywordTok{funs}\NormalTok{(}\KeywordTok{paste0}\NormalTok{(}\StringTok{"mean_"}\NormalTok{, .)))}
3927
+ \NormalTok{ \}}
3928
+
3929
+ \NormalTok{gm =}\StringTok{ }\NormalTok{starwars }\OperatorTok{%>%}\StringTok{ }
3930
+ \StringTok{ }\KeywordTok{grouped_mean}\NormalTok{(}\StringTok{"eye_color"}\NormalTok{, }\KeywordTok{c}\NormalTok{(}\StringTok{"mass"}\NormalTok{, }\StringTok{"birth_year"}\NormalTok{))}
3931
+
3932
+ \KeywordTok{as.data.frame}\NormalTok{(gm) }
3933
+ \end{Highlighting}
3934
+ \end{Shaded}
3935
+
3936
+ \begin{verbatim}
3937
+ ## eye_color mean_mass mean_birth_year count
3938
+ ## 1 black 76.28571 33.00000 10
3939
+ ## 2 blue 86.51667 67.06923 19
3940
+ ## 3 blue-gray 77.00000 57.00000 1
3941
+ ## 4 brown 66.09231 108.96429 21
3942
+ ## 5 dark NaN NaN 1
3943
+ ## 6 gold NaN NaN 1
3944
+ ## 7 green, yellow 159.00000 NaN 1
3945
+ ## 8 hazel 66.00000 34.50000 3
3946
+ ## 9 orange 282.33333 231.00000 8
3947
+ ## 10 pink NaN NaN 1
3948
+ ## 11 red 81.40000 33.66667 5
3949
+ ## 12 red, blue NaN NaN 1
3950
+ ## 13 unknown 31.50000 NaN 3
3951
+ ## 14 white 48.00000 NaN 1
3952
+ ## 15 yellow 81.11111 76.38000 11
3953
+ \end{verbatim}
3954
+
3955
+ The same code with Galaaz, becomes:
3956
+
3957
+ \begin{Shaded}
3958
+ \begin{Highlighting}[]
3959
+ \KeywordTok{def}\NormalTok{ grouped_mean(data, grouping_variables, value_variables)}
3960
+ \NormalTok{ data.}
3961
+ \NormalTok{ group_by_at(grouping_variables).}
3962
+ \NormalTok{ mutate(}\StringTok{count: }\NormalTok{E.n).}
3963
+ \NormalTok{ summarise_at(E.c(value_variables, }\StringTok{"count"}\NormalTok{), ~}\StringTok{:mean}\NormalTok{, }\StringTok{na__rm: }\DecValTok{true}\NormalTok{).}
3964
+ \NormalTok{ rename_at(value_variables, E.funs(E.paste0(}\StringTok{"mean_"}\NormalTok{, value_variables)))}
3965
+ \KeywordTok{end}
3966
+
3967
+ \NormalTok{puts grouped_mean((~}\StringTok{:starwars}\NormalTok{), }\StringTok{"eye_color"}\NormalTok{, E.c(}\StringTok{"mass"}\NormalTok{, }\StringTok{"birth_year"}\NormalTok{))}
3968
+ \end{Highlighting}
3969
+ \end{Shaded}
3970
+
3971
+ \begin{verbatim}
3972
+ ## # A tibble: 15 x 4
3973
+ ## eye_color mean_mass mean_birth_year count
3974
+ ## <chr> <dbl> <dbl> <dbl>
3975
+ ## 1 black 76.3 33 10
3976
+ ## 2 blue 86.5 67.1 19
3977
+ ## 3 blue-gray 77 57 1
3978
+ ## 4 brown 66.1 109. 21
3979
+ ## 5 dark NaN NaN 1
3980
+ ## 6 gold NaN NaN 1
3981
+ ## 7 green, yellow 159 NaN 1
3982
+ ## 8 hazel 66 34.5 3
3983
+ ## 9 orange 282. 231 8
3984
+ ## 10 pink NaN NaN 1
3985
+ ## 11 red 81.4 33.7 5
3986
+ ## 12 red, blue NaN NaN 1
3987
+ ## 13 unknown 31.5 NaN 3
3988
+ ## 14 white 48 NaN 1
3989
+ ## 15 yellow 81.1 76.4 11
3990
+ \end{verbatim}
3991
+
3992
+ {[}TO BE CONTINUED\ldots{}{]}
3993
+
3994
+ \section{Contributing}\label{contributing}
3995
+
3996
+ \begin{itemize}
3997
+ \tightlist
3998
+ \item
3999
+ Fork it
4000
+ \item
4001
+ Create your feature branch (git checkout -b my-new-feature)
4002
+ \item
4003
+ Write Tests!
4004
+ \item
4005
+ Commit your changes (git commit -am `Add some feature')
4006
+ \item
4007
+ Push to the branch (git push origin my-new-feature)
4008
+ \item
4009
+ Create new Pull Request
4010
+ \end{itemize}
4011
+
4012
+ \section*{References}\label{references}
4013
+ \addcontentsline{toc}{section}{References}
4014
+
4015
+ \hypertarget{refs}{}
4016
+ \hypertarget{ref-Knuth:literate_programming}{}
4017
+ Knuth, Donald E. 1984. ``Literate Programming.'' \emph{Comput. J.} 27
4018
+ (2). Oxford, UK: Oxford University Press: 97--111.
4019
+ doi:\href{https://doi.org/10.1093/comjnl/27.2.97}{10.1093/comjnl/27.2.97}.
4020
+
4021
+ \hypertarget{ref-Wilkinson:grammar_of_graphics}{}
4022
+ Wilkinson, Leland. 2005. \emph{The Grammar of Graphics (Statistics and
4023
+ Computing)}. Berlin, Heidelberg: Springer-Verlag.
4024
+
4025
+
4026
+ \end{document}