galaaz 0.4.10 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (163) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2048 -531
  3. data/Rakefile +3 -2
  4. data/bin/gknit +152 -6
  5. data/bin/gknit-draft +105 -0
  6. data/bin/gknit-draft.rb +28 -0
  7. data/bin/gknit_Rscript +127 -0
  8. data/bin/grun +27 -1
  9. data/bin/gstudio +47 -4
  10. data/bin/{gstudio.rb → gstudio_irb.rb} +0 -0
  11. data/bin/gstudio_pry.rb +7 -0
  12. data/blogs/galaaz_ggplot/galaaz_ggplot.html +10 -195
  13. data/blogs/galaaz_ggplot/galaaz_ggplot.md +404 -0
  14. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
  15. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
  16. data/blogs/gknit/gknit.Rmd +5 -3
  17. data/blogs/gknit/gknit.pdf +0 -0
  18. data/blogs/gknit/lst.rds +0 -0
  19. data/blogs/manual/lst.rds +0 -0
  20. data/blogs/manual/manual.Rmd +826 -53
  21. data/blogs/manual/manual.html +2338 -695
  22. data/blogs/manual/manual.md +2032 -539
  23. data/blogs/manual/manual.pdf +0 -0
  24. data/blogs/manual/manual.tex +1804 -594
  25. data/blogs/manual/manual_files/figure-html/bubble-1.png +0 -0
  26. data/blogs/manual/manual_files/figure-html/diverging_bar.png +0 -0
  27. data/blogs/manual/manual_files/figure-latex/bubble-1.png +0 -0
  28. data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
  29. data/blogs/manual/model.rb +41 -0
  30. data/blogs/nse_dplyr/nse_dplyr.Rmd +226 -73
  31. data/blogs/nse_dplyr/nse_dplyr.html +254 -336
  32. data/blogs/nse_dplyr/nse_dplyr.md +353 -158
  33. data/blogs/oh_my/oh_my.html +274 -386
  34. data/blogs/oh_my/oh_my.md +208 -205
  35. data/blogs/ruby_plot/ruby_plot.html +20 -205
  36. data/blogs/ruby_plot/ruby_plot.md +14 -15
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  43. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  44. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  46. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  47. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  48. data/examples/Bibliography/master.bib +50 -0
  49. data/examples/Bibliography/stats.bib +72 -0
  50. data/examples/islr/x_y_rnorm.jpg +0 -0
  51. data/examples/latex_templates/Test-acm_article/Makefile +16 -0
  52. data/examples/latex_templates/Test-acm_article/Test-acm_article.Rmd +65 -0
  53. data/examples/latex_templates/Test-acm_article/acm_proc_article-sp.cls +1670 -0
  54. data/examples/latex_templates/Test-acm_article/sensys-abstract.cls +703 -0
  55. data/examples/latex_templates/Test-acm_article/sigproc.bib +59 -0
  56. data/examples/latex_templates/Test-acs_article/Test-acs_article.Rmd +260 -0
  57. data/examples/latex_templates/Test-acs_article/Test-acs_article.pdf +0 -0
  58. data/examples/latex_templates/Test-acs_article/acs-Test-acs_article.bib +11 -0
  59. data/examples/latex_templates/Test-acs_article/acs-my_output.bib +11 -0
  60. data/examples/latex_templates/Test-acs_article/acstest.bib +17 -0
  61. data/examples/latex_templates/Test-aea_article/AEA.cls +1414 -0
  62. data/{blogs/gknit/marshal.dump → examples/latex_templates/Test-aea_article/BibFile.bib} +0 -0
  63. data/examples/latex_templates/Test-aea_article/Test-aea_article.Rmd +108 -0
  64. data/examples/latex_templates/Test-aea_article/Test-aea_article.pdf +0 -0
  65. data/examples/latex_templates/Test-aea_article/aea.bst +1269 -0
  66. data/examples/latex_templates/Test-aea_article/multicol.sty +853 -0
  67. data/examples/latex_templates/Test-aea_article/references.bib +0 -0
  68. data/examples/latex_templates/Test-aea_article/setspace.sty +546 -0
  69. data/examples/latex_templates/Test-amq_article/Test-amq_article.Rmd +256 -0
  70. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdf +0 -0
  71. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdfsync +3397 -0
  72. data/examples/latex_templates/Test-amq_article/pics/Figure2.pdf +0 -0
  73. data/examples/latex_templates/Test-ams_article/Test-ams_article.Rmd +215 -0
  74. data/examples/latex_templates/Test-ams_article/amstest.bib +436 -0
  75. data/examples/latex_templates/Test-asa_article/Test-asa_article.Rmd +153 -0
  76. data/examples/latex_templates/Test-asa_article/Test-asa_article.pdf +0 -0
  77. data/examples/latex_templates/Test-asa_article/agsm.bst +1353 -0
  78. data/examples/latex_templates/Test-asa_article/bibliography.bib +233 -0
  79. data/examples/latex_templates/Test-ieee_article/IEEEtran.bst +2409 -0
  80. data/examples/latex_templates/Test-ieee_article/IEEEtran.cls +6346 -0
  81. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.Rmd +175 -0
  82. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.pdf +0 -0
  83. data/examples/latex_templates/Test-ieee_article/mybibfile.bib +20 -0
  84. data/examples/latex_templates/Test-rjournal_article/RJournal.sty +335 -0
  85. data/examples/latex_templates/Test-rjournal_article/RJreferences.bib +18 -0
  86. data/examples/latex_templates/Test-rjournal_article/RJwrapper.pdf +0 -0
  87. data/examples/latex_templates/Test-rjournal_article/Test-rjournal_article.Rmd +52 -0
  88. data/examples/latex_templates/Test-springer_article/Test-springer_article.Rmd +65 -0
  89. data/examples/latex_templates/Test-springer_article/Test-springer_article.pdf +0 -0
  90. data/examples/latex_templates/Test-springer_article/bibliography.bib +26 -0
  91. data/examples/latex_templates/Test-springer_article/spbasic.bst +1658 -0
  92. data/examples/latex_templates/Test-springer_article/spmpsci.bst +1512 -0
  93. data/examples/latex_templates/Test-springer_article/spphys.bst +1443 -0
  94. data/examples/latex_templates/Test-springer_article/svglov3.clo +113 -0
  95. data/examples/latex_templates/Test-springer_article/svjour3.cls +1431 -0
  96. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.Rmd +73 -0
  97. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.pdf +0 -0
  98. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.Rmd +382 -0
  99. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.pdf +0 -0
  100. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.Rmd +164 -0
  101. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.pdf +0 -0
  102. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.Rmd +92 -0
  103. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.pdf +0 -0
  104. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/attend-grade-relationships.csv +482 -0
  105. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.Rmd +280 -0
  106. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.pdf +0 -0
  107. data/examples/rmarkdown/svm-xaringan-example/svm-xaringan-example.Rmd +386 -0
  108. data/lib/R_interface/r.rb +1 -1
  109. data/lib/R_interface/r_libs.R +1 -1
  110. data/lib/R_interface/r_methods.rb +10 -0
  111. data/lib/R_interface/rpkg.rb +1 -0
  112. data/lib/R_interface/rsupport.rb +4 -6
  113. data/lib/gknit.rb +2 -0
  114. data/lib/gknit/draft.rb +105 -0
  115. data/lib/gknit/knitr_engine.rb +0 -33
  116. data/lib/util/exec_ruby.rb +1 -27
  117. data/specs/figures/bg.jpeg +0 -0
  118. data/specs/figures/bg.png +0 -0
  119. data/specs/figures/dose_len.png +0 -0
  120. data/specs/figures/no_args.jpeg +0 -0
  121. data/specs/figures/no_args.png +0 -0
  122. data/specs/figures/width_height.jpeg +0 -0
  123. data/specs/figures/width_height.png +0 -0
  124. data/specs/figures/width_height_units1.jpeg +0 -0
  125. data/specs/figures/width_height_units1.png +0 -0
  126. data/specs/figures/width_height_units2.jpeg +0 -0
  127. data/specs/figures/width_height_units2.png +0 -0
  128. data/specs/r_dataframe.spec.rb +11 -11
  129. data/specs/ruby_expression.spec.rb +1 -0
  130. data/specs/tmp.rb +41 -20
  131. data/version.rb +1 -1
  132. metadata +73 -35
  133. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +0 -41
  134. data/blogs/galaaz_ggplot/galaaz_ggplot.out +0 -10
  135. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/midwest_rb.pdf +0 -0
  136. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/scatter_plot_rb.pdf +0 -0
  137. data/blogs/gknit/gknit.md +0 -1430
  138. data/blogs/gknit/gknit.tex +0 -1358
  139. data/blogs/manual/graph.rb +0 -29
  140. data/blogs/nse_dplyr/nse_dplyr.tex +0 -1373
  141. data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +0 -662
  142. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +0 -57
  143. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +0 -106
  144. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +0 -110
  145. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +0 -174
  146. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +0 -236
  147. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +0 -296
  148. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +0 -236
  149. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +0 -218
  150. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +0 -128
  151. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +0 -150
  152. data/blogs/ruby_plot/ruby_plot_files/figure-latex/dose_len.png +0 -0
  153. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_delivery.png +0 -0
  154. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_dose.png +0 -0
  155. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color.png +0 -0
  156. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color2.png +0 -0
  157. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_decorations.png +0 -0
  158. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_jitter.png +0 -0
  159. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_points.png +0 -0
  160. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_box_plot.png +0 -0
  161. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_violin_plot.png +0 -0
  162. data/blogs/ruby_plot/ruby_plot_files/figure-latex/violin_with_jitter.png +0 -0
  163. data/examples/paper/paper.rb +0 -36
Binary file
@@ -38,37 +38,37 @@
38
38
  \usepackage{framed}
39
39
  \definecolor{shadecolor}{RGB}{248,248,248}
40
40
  \newenvironment{Shaded}{\begin{snugshade}}{\end{snugshade}}
41
- \newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{#1}}
42
- \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
43
- \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.77,0.63,0.00}{#1}}
41
+ \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
42
+ \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{#1}}
43
+ \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
44
44
  \newcommand{\BaseNTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
45
- \newcommand{\BuiltInTok}[1]{#1}
45
+ \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
46
+ \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
46
47
  \newcommand{\CharTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
48
+ \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
49
+ \newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
50
+ \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
51
+ \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
52
+ \newcommand{\ImportTok}[1]{#1}
47
53
  \newcommand{\CommentTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
48
- \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
49
- \newcommand{\ConstantTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
50
- \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
51
- \newcommand{\DataTypeTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{#1}}
52
- \newcommand{\DecValTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
53
54
  \newcommand{\DocumentationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
54
- \newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.64,0.00,0.00}{\textbf{#1}}}
55
- \newcommand{\ExtensionTok}[1]{#1}
56
- \newcommand{\FloatTok}[1]{\textcolor[rgb]{0.00,0.00,0.81}{#1}}
55
+ \newcommand{\AnnotationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
56
+ \newcommand{\CommentVarTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
57
+ \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}}
57
58
  \newcommand{\FunctionTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
58
- \newcommand{\ImportTok}[1]{#1}
59
- \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
60
- \newcommand{\KeywordTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
61
- \newcommand{\NormalTok}[1]{#1}
59
+ \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
60
+ \newcommand{\ControlFlowTok}[1]{\textcolor[rgb]{0.13,0.29,0.53}{\textbf{#1}}}
62
61
  \newcommand{\OperatorTok}[1]{\textcolor[rgb]{0.81,0.36,0.00}{\textbf{#1}}}
63
- \newcommand{\OtherTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{#1}}
62
+ \newcommand{\BuiltInTok}[1]{#1}
63
+ \newcommand{\ExtensionTok}[1]{#1}
64
64
  \newcommand{\PreprocessorTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textit{#1}}}
65
+ \newcommand{\AttributeTok}[1]{\textcolor[rgb]{0.77,0.63,0.00}{#1}}
65
66
  \newcommand{\RegionMarkerTok}[1]{#1}
66
- \newcommand{\SpecialCharTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
67
- \newcommand{\SpecialStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
68
- \newcommand{\StringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
69
- \newcommand{\VariableTok}[1]{\textcolor[rgb]{0.00,0.00,0.00}{#1}}
70
- \newcommand{\VerbatimStringTok}[1]{\textcolor[rgb]{0.31,0.60,0.02}{#1}}
67
+ \newcommand{\InformationTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
71
68
  \newcommand{\WarningTok}[1]{\textcolor[rgb]{0.56,0.35,0.01}{\textbf{\textit{#1}}}}
69
+ \newcommand{\AlertTok}[1]{\textcolor[rgb]{0.94,0.16,0.16}{#1}}
70
+ \newcommand{\ErrorTok}[1]{\textcolor[rgb]{0.64,0.00,0.00}{\textbf{#1}}}
71
+ \newcommand{\NormalTok}[1]{#1}
72
72
  \usepackage{longtable,booktabs}
73
73
  \usepackage{graphicx,grffile}
74
74
  \makeatletter
@@ -216,11 +216,10 @@
216
216
  \maketitle
217
217
 
218
218
  {
219
- \setcounter{tocdepth}{2}
219
+ \setcounter{tocdepth}{3}
220
220
  \tableofcontents
221
221
  }
222
- \hypertarget{introduction}{%
223
- \section{Introduction}\label{introduction}}
222
+ \section{Introduction}\label{introduction}
224
223
 
225
224
  Galaaz is a system for tightly coupling Ruby and R. Ruby is a powerful
226
225
  language, with a large community, a very large set of libraries and
@@ -230,8 +229,111 @@ R is considered one of the most powerful languages for solving all of
230
229
  the above problems. Maybe the strongest competitor to R is Python with
231
230
  libraries such as NumPy, Panda, SciPy, SciKit-Learn and a couple more.
232
231
 
233
- \hypertarget{system-compatibility}{%
234
- \section{System Compatibility}\label{system-compatibility}}
232
+ With Galaaz we do not intend to re-implement any of the scientific
233
+ libraries in R, we allow for very tight coupling between the two
234
+ languages to the point that the Ruby developer does not need to know
235
+ that there is an R engine running.
236
+
237
+ According to Wikipedia ``Ruby is a dynamic, interpreted, reflective,
238
+ object-oriented, general-purpose programming language. It was designed
239
+ and developed in the mid-1990s by Yukihiro''Matz" Matsumoto in Japan."
240
+ It reached high popularity with the development of Ruby on Rails (RoR)
241
+ by David Heinemeier Hansson. RoR is a web application framework first
242
+ released around 2005. It makes extensive use of Ruby's metaprogramming
243
+ features. With RoR, Ruby became very popular. According to
244
+ \href{https://www.tiobe.com/tiobe-index/ruby/}{Ruby's Tiobe index} it
245
+ peeked in popularity around 2008, then declined until 2015 when it
246
+ started picking up again. At the time of this writing (November 2018),
247
+ the Tiobe index puts Ruby in 16th position as most popular language.
248
+
249
+ Python, a language similar to Ruby, ranks 4th in the index. Java, C and
250
+ C++ take the first three positions. Ruby is often criticized for its
251
+ focus on web applications. But Ruby can do
252
+ \href{https://github.com/markets/awesome-ruby}{much more} than just web
253
+ applications. Yet, for scientific computing, Ruby lags way behind Python
254
+ and R. Python has Django framework for web, NumPy for numerical arrays,
255
+ Pandas for data analysis. R is a free software environment for
256
+ statistical computing and graphics with thousands of libraries for data
257
+ analysis.
258
+
259
+ Until recently, there was no real perspective for Ruby to bridge this
260
+ gap. Implementing a complete scientific computing infrastructure would
261
+ take too long. Enters \href{https://www.graalvm.org/}{Oracle's GraalVM}:
262
+
263
+ \begin{quote}
264
+ GraalVM is a universal virtual machine for running applications written
265
+ in JavaScript, Python 3, Ruby, R, JVM-based languages like Java, Scala,
266
+ Kotlin, and LLVM-based languages such as C and C++.
267
+
268
+ GraalVM removes the isolation between programming languages and enables
269
+ interoperability in a shared runtime. It can run either standalone or in
270
+ the context of OpenJDK, Node.js, Oracle Database, or MySQL.
271
+
272
+ GraalVM allows you to write polyglot applications with a seamless way to
273
+ pass values from one language to another. With GraalVM there is no
274
+ copying or marshaling necessary as it is with other polyglot systems.
275
+ This lets you achieve high performance when language boundaries are
276
+ crossed. Most of the time there is no additional cost for crossing a
277
+ language boundary at all.
278
+
279
+ Often developers have to make uncomfortable compromises that require
280
+ them to rewrite their software in other languages. For example:
281
+
282
+ \begin{itemize}
283
+ \tightlist
284
+ \item
285
+ That library is not available in my language. I need to rewrite it.
286
+ \item
287
+ That language would be the perfect fit for my problem, but we cannot
288
+ run it in our environment.
289
+ \item
290
+ That problem is already solved in my language, but the language is too
291
+ slow.
292
+ \end{itemize}
293
+
294
+ With GraalVM we aim to allow developers to freely choose the right
295
+ language for the task at hand without making compromises.
296
+ \end{quote}
297
+
298
+ As stated above, GraalVM is a \emph{universal} virtual machine that
299
+ allows Ruby and R (and other languages) to run on the same environment.
300
+ GraalVM allows polyglot applications to \emph{seamlessly} interact with
301
+ one another and pass values from one language to the other. Although a
302
+ great idea, GraalVM still requires application writers to know several
303
+ languages. To eliminate that requirement, we built Galaaz, a gem for
304
+ Ruby, to tightly couple Ruby and R and allow those languages to interact
305
+ in a way that the user will be unaware of such interaction. In other
306
+ words, a Ruby programmer will be able to use all the capabilities of R
307
+ without knowing the R syntax.
308
+
309
+ Library wrapping is a usual way of bringing features from one language
310
+ into another. To improve performance, Python often wraps more efficient
311
+ C libraries. For the Python developer, the existence of such C libraries
312
+ is hidden. The problem with library wrapping is that for any new
313
+ library, there is the need to handcraft a new wrapper.
314
+
315
+ Galaaz, instead of wrapping a single C or R library, wraps the whole R
316
+ language in Ruby. Doing so, all thousands of R libraries are available
317
+ immediately to Ruby developers without any new wrapping effort.
318
+
319
+ \subsection{What does Galaaz mean}\label{what-does-galaaz-mean}
320
+
321
+ Galaaz is the Portuguese name for ``Galahad''. From Wikipedia:
322
+
323
+ \begin{verbatim}
324
+ Sir Galahad (sometimes referred to as Galeas or Galath),
325
+ in Arthurian legend, is a knight of King Arthur's Round Table and one
326
+ of the three achievers of the Holy Grail. He is the illegitimate son
327
+ of Sir Lancelot and Elaine of Corbenic, and is renowned for his
328
+ gallantry and purity as the most perfect of all knights. Emerging quite
329
+ late in the medieval Arthurian tradition, Sir Galahad first appears in the
330
+ Lancelot–Grail cycle, and his story is taken up in later works such as
331
+ the Post-Vulgate Cycle and Sir Thomas Malory's Le Morte d'Arthur.
332
+ His name should not be mistaken with Galehaut, a different knight from
333
+ Arthurian legend.
334
+ \end{verbatim}
335
+
336
+ \section{System Compatibility}\label{system-compatibility}
235
337
 
236
338
  \begin{itemize}
237
339
  \tightlist
@@ -249,8 +351,7 @@ libraries such as NumPy, Panda, SciPy, SciKit-Learn and a couple more.
249
351
  macOS 10.13 (High Sierra)
250
352
  \end{itemize}
251
353
 
252
- \hypertarget{dependencies}{%
253
- \section{Dependencies}\label{dependencies}}
354
+ \section{Dependencies}\label{dependencies}
254
355
 
255
356
  \begin{itemize}
256
357
  \tightlist
@@ -260,100 +361,1365 @@ libraries such as NumPy, Panda, SciPy, SciKit-Learn and a couple more.
260
361
  FastR
261
362
  \end{itemize}
262
363
 
263
- \hypertarget{installation}{%
264
- \section{Installation}\label{installation}}
364
+ \section{Installation}\label{installation}
365
+
366
+ \begin{itemize}
367
+ \tightlist
368
+ \item
369
+ Install GrallVM (\url{http://www.graalvm.org/})
370
+ \item
371
+ Install Ruby (gu install Ruby)
372
+ \item
373
+ Install FastR (gu install R)
374
+ \item
375
+ Install rake if you want to run the specs and examples (gem install
376
+ rake)
377
+ \end{itemize}
378
+
379
+ \section{Usage}\label{usage}
380
+
381
+ \begin{itemize}
382
+ \tightlist
383
+ \item
384
+ Interactive shell: use `gstudio' on the command line
385
+ \end{itemize}
386
+
387
+ \begin{quote}
388
+ gstudio
389
+ \end{quote}
390
+
391
+ \begin{Shaded}
392
+ \begin{Highlighting}[]
393
+ \NormalTok{ vec = R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{4}\NormalTok{)}
394
+ \NormalTok{ puts vec}
395
+ \end{Highlighting}
396
+ \end{Shaded}
397
+
398
+ \begin{verbatim}
399
+ ## [1] 1 2 3 4
400
+ \end{verbatim}
401
+
402
+ \begin{itemize}
403
+ \tightlist
404
+ \item
405
+ Run all specs
406
+ \end{itemize}
407
+
408
+ \begin{quote}
409
+ galaaz specs:all
410
+ \end{quote}
411
+
412
+ \begin{itemize}
413
+ \tightlist
414
+ \item
415
+ Run graphics slideshow (80+ graphics)
416
+ \end{itemize}
417
+
418
+ \begin{quote}
419
+ galaaz sthda:all
420
+ \end{quote}
421
+
422
+ \begin{itemize}
423
+ \tightlist
424
+ \item
425
+ Run labs from Introduction to Statistical Learning with R
426
+ \end{itemize}
427
+
428
+ \begin{quote}
429
+ galaaz islr:all
430
+ \end{quote}
265
431
 
266
432
  \begin{itemize}
267
433
  \tightlist
268
434
  \item
269
- Install GrallVM (\url{http://www.graalvm.org/})
435
+ See all available examples
436
+ \end{itemize}
437
+
438
+ \begin{quote}
439
+ galaaz -T
440
+ \end{quote}
441
+
442
+ Shows a list with all available executalbe tasks. To execute a task,
443
+ substitute the `rake' word in the list with `galaaz'. For instance, the
444
+ following line shows up after `galaaz -T'
445
+
446
+ rake master\_list:scatter\_plot \# scatter\_plot from:\ldots{}.
447
+
448
+ execute
449
+
450
+ \begin{quote}
451
+ galaaz master\_list:scatter\_plot
452
+ \end{quote}
453
+
454
+ \section{Accessing R from Ruby}\label{accessing-r-from-ruby}
455
+
456
+ One of the nice aspects of Galaaz on GraalVM, is that variables and
457
+ functions defined in R, can be easily accessed from Ruby. For instance,
458
+ to access the `mtcars' data frame from R in Ruby, we use the `:mtcar'
459
+ symbol preceded by the `\textasciitilde{}' operator, thus
460
+ `\textasciitilde{}:r\_vec' retrieves the value of the `mtcars' variable.
461
+
462
+ \begin{Shaded}
463
+ \begin{Highlighting}[]
464
+ \NormalTok{puts ~}\StringTok{:mtcars}
465
+ \end{Highlighting}
466
+ \end{Shaded}
467
+
468
+ \begin{verbatim}
469
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
470
+ ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
471
+ ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
472
+ ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
473
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
474
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
475
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
476
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
477
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
478
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
479
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
480
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
481
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
482
+ ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
483
+ ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
484
+ ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
485
+ ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
486
+ ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
487
+ ## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
488
+ ## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
489
+ ## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
490
+ ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
491
+ ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
492
+ ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
493
+ ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
494
+ ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
495
+ ## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
496
+ ## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
497
+ ## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
498
+ ## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
499
+ ## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
500
+ ## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
501
+ ## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
502
+ \end{verbatim}
503
+
504
+ To access an R function from Ruby, the R function needs to be preceeded
505
+ by `R.' scoping. Bellow we see and example of creating a R::Vector by
506
+ calling the `c' R function
507
+
508
+ \begin{Shaded}
509
+ \begin{Highlighting}[]
510
+ \NormalTok{puts vec = R.c(}\FloatTok{1.0}\NormalTok{, }\FloatTok{2.0}\NormalTok{, }\FloatTok{3.0}\NormalTok{, }\FloatTok{4.0}\NormalTok{)}
511
+ \end{Highlighting}
512
+ \end{Shaded}
513
+
514
+ \begin{verbatim}
515
+ ## [1] 1 2 3 4
516
+ \end{verbatim}
517
+
518
+ Note that `vec' is an object of type R::Vector:
519
+
520
+ \begin{Shaded}
521
+ \begin{Highlighting}[]
522
+ \NormalTok{puts vec.class}
523
+ \end{Highlighting}
524
+ \end{Shaded}
525
+
526
+ \begin{verbatim}
527
+ ## R::Vector
528
+ \end{verbatim}
529
+
530
+ Every object created by a call to an R function will be of a type that
531
+ inherits from R::Object. In R, there is also a function `class'. In
532
+ order to access that function we can call method `rclass' in the
533
+ R::Object:
534
+
535
+ \begin{Shaded}
536
+ \begin{Highlighting}[]
537
+ \NormalTok{puts vec.rclass}
538
+ \end{Highlighting}
539
+ \end{Shaded}
540
+
541
+ \begin{verbatim}
542
+ ## [1] "numeric"
543
+ \end{verbatim}
544
+
545
+ When working with R::Object(s), it is possible to use the `.' operator
546
+ to pipe operations. When using `.', the object to which the `.' is
547
+ applied becomes the first argument of the corresponding R function. For
548
+ instance, function `c' in R, can be used to concatenate two vectors or
549
+ more vectors (in R, there are no scalar values, scalars are converted to
550
+ vectors of size 1. Within Galaaz, scalar parameter is converted to a
551
+ size one vector):
552
+
553
+ \begin{Shaded}
554
+ \begin{Highlighting}[]
555
+ \NormalTok{puts R.c(vec, }\DecValTok{10}\NormalTok{, }\DecValTok{20}\NormalTok{, }\DecValTok{30}\NormalTok{)}
556
+ \end{Highlighting}
557
+ \end{Shaded}
558
+
559
+ \begin{verbatim}
560
+ ## [1] 1 2 3 4 10 20 30
561
+ \end{verbatim}
562
+
563
+ The call above to the `c' function can also be done using `.' notation:
564
+
565
+ \begin{Shaded}
566
+ \begin{Highlighting}[]
567
+ \NormalTok{puts vec.c(}\DecValTok{10}\NormalTok{, }\DecValTok{20}\NormalTok{, }\DecValTok{30}\NormalTok{)}
568
+ \end{Highlighting}
569
+ \end{Shaded}
570
+
571
+ \begin{verbatim}
572
+ ## [1] 1 2 3 4 10 20 30
573
+ \end{verbatim}
574
+
575
+ We will talk about vector indexing in a latter section. But notice here
576
+ that indexing an R::Vector will return another R::Vector:
577
+
578
+ \begin{Shaded}
579
+ \begin{Highlighting}[]
580
+ \NormalTok{puts vec[}\DecValTok{1}\NormalTok{]}
581
+ \end{Highlighting}
582
+ \end{Shaded}
583
+
584
+ \begin{verbatim}
585
+ ## [1] 1
586
+ \end{verbatim}
587
+
588
+ Sometimes we want to index an R::Object and get back a Ruby object that
589
+ is not wrapped in an R::Object, but the native Ruby object. For this, we
590
+ can index the R object with the `\textgreater{}\textgreater{}' operator:
591
+
592
+ \begin{Shaded}
593
+ \begin{Highlighting}[]
594
+ \NormalTok{puts vec >> }\DecValTok{0}
595
+ \NormalTok{puts vec >> }\DecValTok{2}
596
+ \end{Highlighting}
597
+ \end{Shaded}
598
+
599
+ \begin{verbatim}
600
+ ## 1.0
601
+ ## 3.0
602
+ \end{verbatim}
603
+
604
+ It is also possible to call an R function with named arguments, by
605
+ creating the function in Galaaz with named parameters. For instance,
606
+ here is an example of creating a `list' with named elements:
607
+
608
+ \begin{Shaded}
609
+ \begin{Highlighting}[]
610
+ \NormalTok{puts R.list(}\StringTok{first_name: "Rodrigo"}\NormalTok{, }\StringTok{last_name: "Botafogo"}\NormalTok{)}
611
+ \end{Highlighting}
612
+ \end{Shaded}
613
+
614
+ \begin{verbatim}
615
+ ## $first_name
616
+ ## [1] "Rodrigo"
617
+ ##
618
+ ## $last_name
619
+ ## [1] "Botafogo"
620
+ \end{verbatim}
621
+
622
+ Many R functions receive another function as argument. For instance,
623
+ method `map' applies a function to every element of a vector. With
624
+ Galaaz, it is possible to pass a Proc, Method or Lambda in place of the
625
+ expected R function. In this next example, we will add 2 to every
626
+ element of our previously created vector:
627
+
628
+ \begin{Shaded}
629
+ \begin{Highlighting}[]
630
+ \NormalTok{puts vec.map \{ |x| x + }\DecValTok{2}\NormalTok{ \}}
631
+ \end{Highlighting}
632
+ \end{Shaded}
633
+
634
+ \begin{verbatim}
635
+ ## [1] 3
636
+ ## [1] 4
637
+ ## [1] 5
638
+ ## [1] 6
639
+ \end{verbatim}
640
+
641
+ \section{gKnitting a Document}\label{gknitting-a-document}
642
+
643
+ This manual has been formatted usign gKnit. gKnit uses Knitr and R
644
+ markdown to knit a document in Ruby or R and output it in any of the
645
+ available formats for R markdown. gKnit runs atop of GraalVM, and
646
+ Galaaz. In gKnit, Ruby variables are persisted between chunks, making it
647
+ an ideal solution for literate programming. Also, since it is based on
648
+ Galaaz, Ruby chunks can have access to R variables and Polyglot
649
+ Programming with Ruby and R is quite natural.
650
+
651
+ The idea of ``literate programming'' was first introduced by Donald
652
+ Knuth in the 1980's (Knuth 1984). The main intention of this approach
653
+ was to develop software interspersing macro snippets, traditional source
654
+ code, and a natural language such as English in a document that could be
655
+ compiled into executable code and at the same time easily read by a
656
+ human developer. According to Knuth ``The practitioner of literate
657
+ programming can be regarded as an essayist, whose main concern is with
658
+ exposition and excellence of style.''
659
+
660
+ The idea of literate programming evolved into the idea of reproducible
661
+ research, in which all the data, software code, documentation, graphics
662
+ etc. needed to reproduce the research and its reports could be included
663
+ in a single document or set of documents that when distributed to peers
664
+ could be rerun generating the same output and reports.
665
+
666
+ The R community has put a great deal of effort in reproducible research.
667
+ In 2002, Sweave was introduced and it allowed mixing R code with Latex
668
+ generating high quality PDF documents. A Sweave document could include
669
+ code, the results of executing the code, graphics and text such that it
670
+ contained the whole narrative to reproduce the research. In 2012, Knitr,
671
+ developed by Yihui Xie from RStudio was released to replace Sweave and
672
+ to consolidate in one single package the many extensions and add-on
673
+ packages that were necessary for Sweave.
674
+
675
+ With Knitr, \textbf{R markdown} was also developed, an extension to the
676
+ Markdown format. With \textbf{R markdown} and Knitr it is possible to
677
+ generate reports in a multitude of formats such as HTML, markdown,
678
+ Latex, PDF, dvi, etc. \textbf{R markdown} also allows the use of
679
+ multiple programming languages such as R, Ruby, Python, etc. in the same
680
+ document.
681
+
682
+ In \textbf{R markdown}, text is interspersed with code chunks that can
683
+ be executed and both the code and its results can become part of the
684
+ final report. Although \textbf{R markdown} allows multiple programming
685
+ languages in the same document, only R and Python (with the reticulate
686
+ package) can persist variables between chunks. For other languages, such
687
+ as Ruby, every chunk will start a new process and thus all data is lost
688
+ between chunks, unless it is somehow stored in a data file that is read
689
+ by the next chunk.
690
+
691
+ Being able to persist data between chunks is critical for literate
692
+ programming otherwise the flow of the narrative is lost by all the
693
+ effort of having to save data and then reload it. Although this might,
694
+ at first, seem like a small nuisance, not being able to persist data
695
+ between chunks is a major issue. For example, let's take a look at the
696
+ following simple example in which we want to show how to create a list
697
+ and the use it. Let's first assume that data cannot be persisted between
698
+ chunks. In the next chunk we create a list, then we would need to save
699
+ it to file, but to save it, we need somehow to marshal the data into a
700
+ binary format:
701
+
702
+ \begin{Shaded}
703
+ \begin{Highlighting}[]
704
+ \NormalTok{lst = R.list(}\StringTok{a: }\DecValTok{1}\NormalTok{, }\StringTok{b: }\DecValTok{2}\NormalTok{, }\StringTok{c: }\DecValTok{3}\NormalTok{)}
705
+ \NormalTok{lst.saveRDS(}\StringTok{"lst.rds"}\NormalTok{)}
706
+ \end{Highlighting}
707
+ \end{Shaded}
708
+
709
+ then, on the next chunk, where variable `lst' is used, we need to read
710
+ back it's value
711
+
712
+ \begin{Shaded}
713
+ \begin{Highlighting}[]
714
+ \NormalTok{lst = R.readRDS(}\StringTok{"lst.rds"}\NormalTok{)}
715
+ \NormalTok{puts lst}
716
+ \end{Highlighting}
717
+ \end{Shaded}
718
+
719
+ \begin{verbatim}
720
+ ## $a
721
+ ## [1] 1
722
+ ##
723
+ ## $b
724
+ ## [1] 2
725
+ ##
726
+ ## $c
727
+ ## [1] 3
728
+ \end{verbatim}
729
+
730
+ Now, any single code has dozens of variables that we might want to use
731
+ and reuse between chunks. Clearly, such an approach becomes quickly
732
+ unmanageable. Probably, because of this problem, it is very rare to see
733
+ any \textbf{R markdown} document in the Ruby community.
734
+
735
+ When variables can be used accross chunks, then no overhead is needed:
736
+
737
+ \begin{Shaded}
738
+ \begin{Highlighting}[]
739
+ \NormalTok{lst = R.list(}\StringTok{a: }\DecValTok{1}\NormalTok{, }\StringTok{b: }\DecValTok{2}\NormalTok{, }\StringTok{c: }\DecValTok{3}\NormalTok{)}
740
+ \CommentTok{# any other code can be added here}
741
+ \end{Highlighting}
742
+ \end{Shaded}
743
+
744
+ \begin{Shaded}
745
+ \begin{Highlighting}[]
746
+ \NormalTok{puts lst}
747
+ \end{Highlighting}
748
+ \end{Shaded}
749
+
750
+ \begin{verbatim}
751
+ ## $a
752
+ ## [1] 1
753
+ ##
754
+ ## $b
755
+ ## [1] 2
756
+ ##
757
+ ## $c
758
+ ## [1] 3
759
+ \end{verbatim}
760
+
761
+ In the Python community, the same effort to have code and text in an
762
+ integrated environment started around the first decade of 2000. In 2006
763
+ iPython 0.7.2 was released. In 2014, Fernando Pérez, spun off project
764
+ Jupyter from iPython creating a web-based interactive computation
765
+ environment. Jupyter can now be used with many languages, including Ruby
766
+ with the iruby gem (\url{https://github.com/SciRuby/iruby}). In order to
767
+ have multiple languages in a Jupyter notebook the SoS kernel was
768
+ developed (\url{https://vatlab.github.io/sos-docs/}).
769
+
770
+ \subsection{\texorpdfstring{gKnit and \textbf{R
771
+ markdown}}{gKnit and R markdown}}\label{gknit-and-r-markdown}
772
+
773
+ gKnit is based on knitr and \textbf{R markdown} and can knit a document
774
+ written both in Ruby and/or R and output it in any of the available
775
+ formats of \textbf{R markdown}. gKnit allows ruby developers to do
776
+ literate programming and reproducible research by allowing them to have
777
+ in a single document, text and code.
778
+
779
+ In gKnit, Ruby variables are persisted between chunks, making it an
780
+ ideal solution for literate programming in this language. Also, since it
781
+ is based on Galaaz, Ruby chunks can have access to R variables and
782
+ Polyglot Programming with Ruby and R is quite natural.
783
+
784
+ This is not a blog post on \textbf{R markdown}, and the interested user
785
+ is directed to the following links for detailed information on its
786
+ capabilities and use.
787
+
788
+ \begin{itemize}
789
+ \tightlist
790
+ \item
791
+ \url{https://rmarkdown.rstudio.com/} or
792
+ \item
793
+ \url{https://bookdown.org/yihui/rmarkdown/}
794
+ \end{itemize}
795
+
796
+ In this post, we will describe just the main aspects of \textbf{R
797
+ markdown}, so the user can start gKnitting Ruby and R documents quickly.
798
+
799
+ \subsection{The Yaml header}\label{the-yaml-header}
800
+
801
+ An \textbf{R markdown} document should start with a Yaml header and be
802
+ stored in a file with `.Rmd' extension. This document has the following
803
+ header for gKitting an HTML document.
804
+
805
+ \begin{verbatim}
806
+ ---
807
+ title: "How to do reproducible research in Ruby with gKnit"
808
+ author:
809
+ - "Rodrigo Botafogo"
810
+ - "Daniel Mossé - University of Pittsburgh"
811
+ tags: [Tech, Data Science, Ruby, R, GraalVM]
812
+ date: "20/02/2019"
813
+ output:
814
+ html_document:
815
+ self_contained: true
816
+ keep_md: true
817
+ pdf_document:
818
+ includes:
819
+ in_header: ["../../sty/galaaz.sty"]
820
+ number_sections: yes
821
+ ---
822
+ \end{verbatim}
823
+
824
+ For more information on the options in the Yaml header,
825
+ \href{https://bookdown.org/yihui/rmarkdown/html-document.html}{check
826
+ here}.
827
+
828
+ \subsection{\texorpdfstring{\textbf{R Markdown}
829
+ formatting}{R Markdown formatting}}\label{r-markdown-formatting}
830
+
831
+ Document formatting can be done with simple markups such as:
832
+
833
+ \subsection{Headers}\label{headers}
834
+
835
+ \begin{verbatim}
836
+ # Header 1
837
+
838
+ ## Header 2
839
+
840
+ ### Header 3
841
+ \end{verbatim}
842
+
843
+ \subsection{Lists}\label{lists}
844
+
845
+ \begin{verbatim}
846
+ Unordered lists:
847
+
848
+ * Item 1
849
+ * Item 2
850
+ + Item 2a
851
+ + Item 2b
852
+ \end{verbatim}
853
+
854
+ \begin{verbatim}
855
+ Ordered Lists
856
+
857
+ 1. Item 1
858
+ 2. Item 2
859
+ 3. Item 3
860
+ + Item 3a
861
+ + Item 3b
862
+ \end{verbatim}
863
+
864
+ For more R markdown formatting go to
865
+ \url{https://rmarkdown.rstudio.com/authoring_basics.html}.
866
+
867
+ \subsection{R chunks}\label{r-chunks}
868
+
869
+ Running and executing Ruby and R code is actually what really interests
870
+ us is this blog.\\
871
+ Inserting a code chunk is done by adding code in a block delimited by
872
+ three back ticks followed by an open curly brace (`\{') followed with
873
+ the engine name (r, ruby, rb, include, \ldots{}), an any optional
874
+ chunk\_label and options, as shown bellow:
875
+
876
+ \begin{verbatim}
877
+ ```{engine_name [chunk_label], [chunk_options]}
878
+ ```
879
+ \end{verbatim}
880
+
881
+ for instance, let's add an R chunk to the document labeled
882
+ `first\_r\_chunk'. This is a very simple code just to create a variable
883
+ and print it out, as follows:
884
+
885
+ \begin{verbatim}
886
+ ```{r first_r_chunk}
887
+ vec <- c(1, 2, 3)
888
+ print(vec)
889
+ ```
890
+ \end{verbatim}
891
+
892
+ If this block is added to an \textbf{R markdown} document and gKnitted
893
+ the result will be:
894
+
895
+ \begin{Shaded}
896
+ \begin{Highlighting}[]
897
+ \NormalTok{vec <-}\StringTok{ }\KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)}
898
+ \KeywordTok{print}\NormalTok{(vec)}
899
+ \end{Highlighting}
900
+ \end{Shaded}
901
+
902
+ \begin{verbatim}
903
+ ## [1] 1 2 3
904
+ \end{verbatim}
905
+
906
+ Now let's say that we want to do some analysis in the code, but just
907
+ print the result and not the code itself. For this, we need to add the
908
+ option `echo = FALSE'.
909
+
910
+ \begin{verbatim}
911
+ ```{r second_r_chunk, echo = FALSE}
912
+ vec2 <- c(10, 20, 30)
913
+ vec3 <- vec * vec2
914
+ print(vec3)
915
+ ```
916
+ \end{verbatim}
917
+
918
+ Here is how this block will show up in the document. Observe that the
919
+ code is not shown and we only see the execution result in a white box
920
+
921
+ \begin{verbatim}
922
+ ## [1] 10 40 90
923
+ \end{verbatim}
924
+
925
+ A description of the available chunk options can be found in
926
+ \url{https://yihui.name/knitr/}.
927
+
928
+ Let's add another R chunk with a function definition. In this example, a
929
+ vector `r\_vec' is created and a new function `reduce\_sum' is defined.
930
+ The chunk specification is
931
+
932
+ \begin{verbatim}
933
+ ```{r data_creation}
934
+ r_vec <- c(1, 2, 3, 4, 5)
935
+
936
+ reduce_sum <- function(...) {
937
+ Reduce(sum, as.list(...))
938
+ }
939
+ ```
940
+ \end{verbatim}
941
+
942
+ and this is how it will look like once executed. From now on, to be
943
+ concise in the presentation we will not show chunk definitions any
944
+ longer.
945
+
946
+ \begin{Shaded}
947
+ \begin{Highlighting}[]
948
+ \NormalTok{r_vec <-}\StringTok{ }\KeywordTok{c}\NormalTok{(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{4}\NormalTok{, }\DecValTok{5}\NormalTok{)}
949
+
950
+ \NormalTok{reduce_sum <-}\StringTok{ }\ControlFlowTok{function}\NormalTok{(...) \{}
951
+ \KeywordTok{Reduce}\NormalTok{(sum, }\KeywordTok{as.list}\NormalTok{(...))}
952
+ \NormalTok{\}}
953
+ \end{Highlighting}
954
+ \end{Shaded}
955
+
956
+ We can, possibly in another chunk, access the vector and call the
957
+ function as follows:
958
+
959
+ \begin{Shaded}
960
+ \begin{Highlighting}[]
961
+ \KeywordTok{print}\NormalTok{(r_vec)}
962
+ \end{Highlighting}
963
+ \end{Shaded}
964
+
965
+ \begin{verbatim}
966
+ ## [1] 1 2 3 4 5
967
+ \end{verbatim}
968
+
969
+ \begin{Shaded}
970
+ \begin{Highlighting}[]
971
+ \KeywordTok{print}\NormalTok{(}\KeywordTok{reduce_sum}\NormalTok{(r_vec))}
972
+ \end{Highlighting}
973
+ \end{Shaded}
974
+
975
+ \begin{verbatim}
976
+ ## [1] 15
977
+ \end{verbatim}
978
+
979
+ \subsection{R Graphics with ggplot}\label{r-graphics-with-ggplot}
980
+
981
+ In the following chunk, we create a bubble chart in R using ggplot and
982
+ include it in this document. Note that there is no directive in the code
983
+ to include the image, this occurs automatically. The `mpg' dataframe is
984
+ natively available to R and to Galaaz as well.
985
+
986
+ For the reader not knowledgeable of ggplot, ggplot is a graphics library
987
+ based on ``the grammar of graphics'' (Wilkinson 2005). The idea of the
988
+ grammar of graphics is to build a graphics by adding layers to the plot.
989
+ More information can be found in
990
+ \url{https://towardsdatascience.com/a-comprehensive-guide-to-the-grammar-of-graphics-for-effective-visualization-of-multi-dimensional-1f92b4ed4149}.
991
+
992
+ In the plot bellow the `mpg' dataset from base R is used. ``The data
993
+ concerns city-cycle fuel consumption in miles per gallon, to be
994
+ predicted in terms of 3 multivalued discrete and 5 continuous
995
+ attributes.'' (Quinlan, 1993)
996
+
997
+ First, the `mpg' dataset if filtered to extract only cars from the
998
+ following manumactures: Audi, Ford, Honda, and Hyundai and stored in the
999
+ `mpg\_select' variable. Then, the selected dataframe is passed to the
1000
+ ggplot function specifying in the aesthetic method (aes) that
1001
+ `displacement' (disp) should be plotted in the `x' axis and `city
1002
+ mileage' should be on the `y' axis. In the `labs' layer we pass the
1003
+ `title' and `subtitle' for the plot. To the basic plot `g', geom\_jitter
1004
+ is added, that plots cars from the same manufactures with the same color
1005
+ (col=manufactures) and the size of the car point equal its high way
1006
+ consumption (size = hwy). Finally, a last layer is plotter containing a
1007
+ linear regression line (method = ``lm'') for every manufacturer.
1008
+
1009
+ \begin{Shaded}
1010
+ \begin{Highlighting}[]
1011
+ \CommentTok{# load package and data}
1012
+ \KeywordTok{library}\NormalTok{(ggplot2)}
1013
+ \end{Highlighting}
1014
+ \end{Shaded}
1015
+
1016
+ \begin{verbatim}
1017
+ ## Message:
1018
+ ## Registered S3 methods overwritten by 'ggplot2':
1019
+ ## method from
1020
+ ## [.quosures rlang
1021
+ ## c.quosures rlang
1022
+ ## print.quosures rlang
1023
+ \end{verbatim}
1024
+
1025
+ \begin{Shaded}
1026
+ \begin{Highlighting}[]
1027
+ \KeywordTok{data}\NormalTok{(mpg, }\DataTypeTok{package=}\StringTok{"ggplot2"}\NormalTok{)}
1028
+
1029
+ \NormalTok{mpg_select <-}\StringTok{ }\NormalTok{mpg[mpg}\OperatorTok{$}\NormalTok{manufacturer }\OperatorTok{%in%}\StringTok{ }\KeywordTok{c}\NormalTok{(}\StringTok{"audi"}\NormalTok{, }\StringTok{"ford"}\NormalTok{, }\StringTok{"honda"}\NormalTok{, }\StringTok{"hyundai"}\NormalTok{), ]}
1030
+
1031
+ \CommentTok{# Scatterplot}
1032
+ \KeywordTok{theme_set}\NormalTok{(}\KeywordTok{theme_bw}\NormalTok{()) }\CommentTok{# pre-set the bw theme.}
1033
+ \NormalTok{g <-}\StringTok{ }\KeywordTok{ggplot}\NormalTok{(mpg_select, }\KeywordTok{aes}\NormalTok{(displ, cty)) }\OperatorTok{+}\StringTok{ }
1034
+ \StringTok{ }\KeywordTok{labs}\NormalTok{(}\DataTypeTok{subtitle=}\StringTok{"mpg: Displacement vs City Mileage"}\NormalTok{,}
1035
+ \DataTypeTok{title=}\StringTok{"Bubble chart"}\NormalTok{)}
1036
+
1037
+ \NormalTok{g }\OperatorTok{+}\StringTok{ }\KeywordTok{geom_jitter}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{col=}\NormalTok{manufacturer, }\DataTypeTok{size=}\NormalTok{hwy)) }\OperatorTok{+}\StringTok{ }
1038
+ \StringTok{ }\KeywordTok{geom_smooth}\NormalTok{(}\KeywordTok{aes}\NormalTok{(}\DataTypeTok{col=}\NormalTok{manufacturer), }\DataTypeTok{method=}\StringTok{"lm"}\NormalTok{, }\DataTypeTok{se=}\NormalTok{F)}
1039
+ \end{Highlighting}
1040
+ \end{Shaded}
1041
+
1042
+ \includegraphics{manual_files/figure-latex/bubble-1.png}
1043
+
1044
+ \subsection{Ruby chunks}\label{ruby-chunks}
1045
+
1046
+ Including a Ruby chunk is just as easy as including an R chunk in the
1047
+ document: just change the name of the engine to `ruby'. It is also
1048
+ possible to pass chunk options to the Ruby engine; however, this version
1049
+ does not accept all the options that are available to R chunks. Future
1050
+ versions will add those options.
1051
+
1052
+ \begin{verbatim}
1053
+ ```{ruby first_ruby_chunk}
1054
+ ```
1055
+ \end{verbatim}
1056
+
1057
+ In this example, the ruby chunk is called `first\_ruby\_chunk'. One
1058
+ important aspect of chunk labels is that they cannot be duplicated. If a
1059
+ chunk label is duplicated, gKnit will stop with an error.
1060
+
1061
+ In the following chunk, variable `a', `b' and `c' are standard Ruby
1062
+ variables and `vec' and `vec2' are two vectors created by calling the
1063
+ `c' method on the R module.
1064
+
1065
+ In Galaaz, the R module allows us to access R functions transparently.
1066
+ The `c' function in R, is a function that concatenates its arguments
1067
+ making a vector.
1068
+
1069
+ It should be clear that there is no requirement in gknit to call or use
1070
+ any R functions. gKnit will knit standard Ruby code, or even general
1071
+ text without any code.
1072
+
1073
+ \begin{Shaded}
1074
+ \begin{Highlighting}[]
1075
+ \NormalTok{a = [}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{]}
1076
+ \NormalTok{b = }\StringTok{"US$ 250.000"}
1077
+ \NormalTok{c = }\StringTok{"The 'outputs' function"}
1078
+
1079
+ \NormalTok{vec = R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{)}
1080
+ \NormalTok{vec2 = R.c(}\DecValTok{10}\NormalTok{, }\DecValTok{20}\NormalTok{, }\DecValTok{30}\NormalTok{)}
1081
+ \end{Highlighting}
1082
+ \end{Shaded}
1083
+
1084
+ In the next block, variables `a', `vec' and `vec2' are used and printed.
1085
+
1086
+ \begin{Shaded}
1087
+ \begin{Highlighting}[]
1088
+ \NormalTok{puts a}
1089
+ \NormalTok{puts vec * vec2}
1090
+ \end{Highlighting}
1091
+ \end{Shaded}
1092
+
1093
+ \begin{verbatim}
1094
+ ## 1
1095
+ ## 2
1096
+ ## 3
1097
+ ## [1] 10 40 90
1098
+ \end{verbatim}
1099
+
1100
+ Note that `a' is a standard Ruby Array and `vec' and `vec2' are vectors
1101
+ that behave accordingly, where multiplication works as expected.
1102
+
1103
+ \subsection{Inline Ruby code}\label{inline-ruby-code}
1104
+
1105
+ When using a Ruby chunk, the code and the output are formatted in blocks
1106
+ as seen above. This formatting is not always desired. Sometimes, we want
1107
+ to have the results of the Ruby evaluation included in the middle of a
1108
+ phrase. gKnit allows adding inline Ruby code with the `rb' engine. The
1109
+ following chunk specification will create and inline Ruby text:
1110
+
1111
+ \begin{verbatim}
1112
+ This is some text with inline Ruby accessing variable 'b' which has value:
1113
+ ```{rb puts b}
1114
+ ```
1115
+ and is followed by some other text!
1116
+ \end{verbatim}
1117
+
1118
+ This is some text with inline Ruby accessing variable `b' which has
1119
+ value: US\$ 250.000 and is followed by some other text!
1120
+
1121
+ Note that it is important not to add any new line before of after the
1122
+ code block if we want everything to be in only one line, resulting in
1123
+ the following sentence with inline Ruby code.
1124
+
1125
+ \subsubsection{\texorpdfstring{The `outputs'
1126
+ function}{The outputs function}}\label{the-outputs-function}
1127
+
1128
+ He have previously used the standard `puts' method in Ruby chunks in
1129
+ order produce output. The result of a `puts', as seen in all previous
1130
+ chunks that use it, is formatted inside a white box that follows the
1131
+ code block. Many times however, we would like to do some processing in
1132
+ the Ruby chunk and have the result of this processing generate and
1133
+ output that is ``included'' in the document as if we had typed it in
1134
+ \textbf{R markdown} document.
1135
+
1136
+ For example, suppose we want to create a new heading in our document,
1137
+ but the heading phrase is the result of some code processing: maybe it's
1138
+ the first line of a file we are going to read. Method `outputs' adds its
1139
+ output as if typed in the \textbf{R markdown} document.
1140
+
1141
+ Take now a look at variable `c' (it was defined in a previous block
1142
+ above) as `c = ``The 'outputs' function''. ``The 'outputs' function'' is
1143
+ actually the name of this section and it was created using the 'outputs'
1144
+ function inside a Ruby chunk.
1145
+
1146
+ The ruby chunk to generate this heading is:
1147
+
1148
+ \begin{verbatim}
1149
+ ```{ruby heading}
1150
+ outputs "### #{c}"
1151
+ ```
1152
+ \end{verbatim}
1153
+
1154
+ The three `\#\#\#' is the way we add a Heading 3 in \textbf{R markdown}.
1155
+
1156
+ \subsubsection{HTML Output from Ruby
1157
+ Chunks}\label{html-output-from-ruby-chunks}
1158
+
1159
+ We've just seen the use of method `outputs' to add text to the the
1160
+ \textbf{R markdown} document. This technique can also be used to add
1161
+ HTML code to the document. In \textbf{R markdown}, any html code typed
1162
+ directly in the document will be properly rendered.\\
1163
+ Here, for instance, is a table definition in HTML and its output in the
1164
+ document:
1165
+
1166
+ \begin{verbatim}
1167
+ <table style="width:100%">
1168
+ <tr>
1169
+ <th>Firstname</th>
1170
+ <th>Lastname</th>
1171
+ <th>Age</th>
1172
+ </tr>
1173
+ <tr>
1174
+ <td>Jill</td>
1175
+ <td>Smith</td>
1176
+ <td>50</td>
1177
+ </tr>
1178
+ <tr>
1179
+ <td>Eve</td>
1180
+ <td>Jackson</td>
1181
+ <td>94</td>
1182
+ </tr>
1183
+ </table>
1184
+ \end{verbatim}
1185
+
1186
+ \begin{verbatim}
1187
+ <th>Firstname</th>
1188
+ <th>Lastname</th>
1189
+ <th>Age</th>
1190
+ \end{verbatim}
1191
+
1192
+ \begin{verbatim}
1193
+ <td>Jill</td>
1194
+ <td>Smith</td>
1195
+ <td>50</td>
1196
+ \end{verbatim}
1197
+
1198
+ \begin{verbatim}
1199
+ <td>Eve</td>
1200
+ <td>Jackson</td>
1201
+ <td>94</td>
1202
+ \end{verbatim}
1203
+
1204
+ But manually creating HTML output is not always easy or desirable,
1205
+ specially if we intend the document to be rendered in other formats, for
1206
+ example, as Latex. Also, The above table looks ugly. The `kableExtra'
1207
+ library is a great library for creating beautiful tables. Take a look at
1208
+ \url{https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html}
1209
+
1210
+ In the next chunk, we output the `mtcars' dataframe from R in a nicely
1211
+ formatted table. Note that we retrieve the mtcars dataframe by using
1212
+ `\textasciitilde{}:mtcars'.
1213
+
1214
+ \begin{Shaded}
1215
+ \begin{Highlighting}[]
1216
+ \NormalTok{R.install_and_loads(}\StringTok{'kableExtra'}\NormalTok{)}
1217
+ \NormalTok{outputs (~}\StringTok{:mtcars}\NormalTok{).kable.kable_styling}
1218
+ \end{Highlighting}
1219
+ \end{Shaded}
1220
+
1221
+ \begin{table}[H]
1222
+ \centering
1223
+ \begin{tabular}{l|r|r|r|r|r|r|r|r|r|r|r}
1224
+ \hline
1225
+ & mpg & cyl & disp & hp & drat & wt & qsec & vs & am & gear & carb\\
1226
+ \hline
1227
+ Mazda RX4 & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.620 & 16.46 & 0 & 1 & 4 & 4\\
1228
+ \hline
1229
+ Mazda RX4 Wag & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.875 & 17.02 & 0 & 1 & 4 & 4\\
1230
+ \hline
1231
+ Datsun 710 & 22.8 & 4 & 108.0 & 93 & 3.85 & 2.320 & 18.61 & 1 & 1 & 4 & 1\\
1232
+ \hline
1233
+ Hornet 4 Drive & 21.4 & 6 & 258.0 & 110 & 3.08 & 3.215 & 19.44 & 1 & 0 & 3 & 1\\
1234
+ \hline
1235
+ Hornet Sportabout & 18.7 & 8 & 360.0 & 175 & 3.15 & 3.440 & 17.02 & 0 & 0 & 3 & 2\\
1236
+ \hline
1237
+ Valiant & 18.1 & 6 & 225.0 & 105 & 2.76 & 3.460 & 20.22 & 1 & 0 & 3 & 1\\
1238
+ \hline
1239
+ Duster 360 & 14.3 & 8 & 360.0 & 245 & 3.21 & 3.570 & 15.84 & 0 & 0 & 3 & 4\\
1240
+ \hline
1241
+ Merc 240D & 24.4 & 4 & 146.7 & 62 & 3.69 & 3.190 & 20.00 & 1 & 0 & 4 & 2\\
1242
+ \hline
1243
+ Merc 230 & 22.8 & 4 & 140.8 & 95 & 3.92 & 3.150 & 22.90 & 1 & 0 & 4 & 2\\
1244
+ \hline
1245
+ Merc 280 & 19.2 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.30 & 1 & 0 & 4 & 4\\
1246
+ \hline
1247
+ Merc 280C & 17.8 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.90 & 1 & 0 & 4 & 4\\
1248
+ \hline
1249
+ Merc 450SE & 16.4 & 8 & 275.8 & 180 & 3.07 & 4.070 & 17.40 & 0 & 0 & 3 & 3\\
1250
+ \hline
1251
+ Merc 450SL & 17.3 & 8 & 275.8 & 180 & 3.07 & 3.730 & 17.60 & 0 & 0 & 3 & 3\\
1252
+ \hline
1253
+ Merc 450SLC & 15.2 & 8 & 275.8 & 180 & 3.07 & 3.780 & 18.00 & 0 & 0 & 3 & 3\\
1254
+ \hline
1255
+ Cadillac Fleetwood & 10.4 & 8 & 472.0 & 205 & 2.93 & 5.250 & 17.98 & 0 & 0 & 3 & 4\\
1256
+ \hline
1257
+ Lincoln Continental & 10.4 & 8 & 460.0 & 215 & 3.00 & 5.424 & 17.82 & 0 & 0 & 3 & 4\\
1258
+ \hline
1259
+ Chrysler Imperial & 14.7 & 8 & 440.0 & 230 & 3.23 & 5.345 & 17.42 & 0 & 0 & 3 & 4\\
1260
+ \hline
1261
+ Fiat 128 & 32.4 & 4 & 78.7 & 66 & 4.08 & 2.200 & 19.47 & 1 & 1 & 4 & 1\\
1262
+ \hline
1263
+ Honda Civic & 30.4 & 4 & 75.7 & 52 & 4.93 & 1.615 & 18.52 & 1 & 1 & 4 & 2\\
1264
+ \hline
1265
+ Toyota Corolla & 33.9 & 4 & 71.1 & 65 & 4.22 & 1.835 & 19.90 & 1 & 1 & 4 & 1\\
1266
+ \hline
1267
+ Toyota Corona & 21.5 & 4 & 120.1 & 97 & 3.70 & 2.465 & 20.01 & 1 & 0 & 3 & 1\\
1268
+ \hline
1269
+ Dodge Challenger & 15.5 & 8 & 318.0 & 150 & 2.76 & 3.520 & 16.87 & 0 & 0 & 3 & 2\\
1270
+ \hline
1271
+ AMC Javelin & 15.2 & 8 & 304.0 & 150 & 3.15 & 3.435 & 17.30 & 0 & 0 & 3 & 2\\
1272
+ \hline
1273
+ Camaro Z28 & 13.3 & 8 & 350.0 & 245 & 3.73 & 3.840 & 15.41 & 0 & 0 & 3 & 4\\
1274
+ \hline
1275
+ Pontiac Firebird & 19.2 & 8 & 400.0 & 175 & 3.08 & 3.845 & 17.05 & 0 & 0 & 3 & 2\\
1276
+ \hline
1277
+ Fiat X1-9 & 27.3 & 4 & 79.0 & 66 & 4.08 & 1.935 & 18.90 & 1 & 1 & 4 & 1\\
1278
+ \hline
1279
+ Porsche 914-2 & 26.0 & 4 & 120.3 & 91 & 4.43 & 2.140 & 16.70 & 0 & 1 & 5 & 2\\
1280
+ \hline
1281
+ Lotus Europa & 30.4 & 4 & 95.1 & 113 & 3.77 & 1.513 & 16.90 & 1 & 1 & 5 & 2\\
1282
+ \hline
1283
+ Ford Pantera L & 15.8 & 8 & 351.0 & 264 & 4.22 & 3.170 & 14.50 & 0 & 1 & 5 & 4\\
1284
+ \hline
1285
+ Ferrari Dino & 19.7 & 6 & 145.0 & 175 & 3.62 & 2.770 & 15.50 & 0 & 1 & 5 & 6\\
1286
+ \hline
1287
+ Maserati Bora & 15.0 & 8 & 301.0 & 335 & 3.54 & 3.570 & 14.60 & 0 & 1 & 5 & 8\\
1288
+ \hline
1289
+ Volvo 142E & 21.4 & 4 & 121.0 & 109 & 4.11 & 2.780 & 18.60 & 1 & 1 & 4 & 2\\
1290
+ \hline
1291
+ \end{tabular}
1292
+ \end{table}
1293
+
1294
+ \subsection{Including Ruby files in a
1295
+ chunk}\label{including-ruby-files-in-a-chunk}
1296
+
1297
+ R is a language that was created to be easy and fast for statisticians
1298
+ to use. As far as I know, it was not a language to be used for
1299
+ developing large systems. Of course, there are large systems and
1300
+ libraries in R, but the focus of the language is for developing
1301
+ statistical models and distribute that to peers.
1302
+
1303
+ Ruby on the other hand, is a language for large software development.
1304
+ Systems written in Ruby will have dozens, hundreds or even thousands of
1305
+ files. To document a large system with literate programming, we cannot
1306
+ expect the developer to add all the files in a single `.Rmd' file. gKnit
1307
+ provides the `include' chunk engine to include a Ruby file as if it had
1308
+ being typed in the `.Rmd' file.
1309
+
1310
+ To include a file, the following chunk should be created, where is the
1311
+ name of the file to be included and where the extension, if it is `.rb',
1312
+ does not need to be added. If the `relative' option is not included,
1313
+ then it is treated as TRUE. When `relative' is true, ruby's
1314
+ `require\_relative' semantics is used to load the file, when false,
1315
+ Ruby's \$LOAD\_PATH is searched to find the file and it is 'require'd.
1316
+
1317
+ \begin{verbatim}
1318
+ ```{include <filename>, relative = <TRUE/FALSE>}
1319
+ ```
1320
+ \end{verbatim}
1321
+
1322
+ Bellow we include file `model.rb', which is in the same directory of
1323
+ this blog.\\
1324
+ This code uses R `caret' package to split a dataset in a train and test
1325
+ sets. The `caret' package is a very important a useful package for doing
1326
+ Data Analysis, it has hundreds of functions for all steps of the Data
1327
+ Analysis workflow. To use `caret' just to split a dataset is like using
1328
+ the proverbial cannon to kill the fly. We use it here only to show that
1329
+ integrating Ruby and R and using even a very complex package as `caret'
1330
+ is trivial with Galaaz.
1331
+
1332
+ A word of advice: the `caret' package has lots of dependencies and
1333
+ installing it in a Linux system is a time consuming operation. Method
1334
+ `R.install\_and\_loads' will install the package if it is not already
1335
+ installed and can take a while.
1336
+
1337
+ \begin{verbatim}
1338
+ ```{include model}
1339
+ ```
1340
+ \end{verbatim}
1341
+
1342
+ \begin{verbatim}
1343
+ require 'galaaz'
1344
+
1345
+ # Loads the R 'caret' package. If not present, installs it
1346
+ R.install_and_loads 'caret'
1347
+
1348
+ class Model
1349
+
1350
+ attr_reader :data
1351
+ attr_reader :test
1352
+ attr_reader :train
1353
+
1354
+ #==========================================================
1355
+ #
1356
+ #==========================================================
1357
+
1358
+ def initialize(data, percent_train:, seed: 123)
1359
+
1360
+ R.set__seed(seed)
1361
+ @data = data
1362
+ @percent_train = percent_train
1363
+ @seed = seed
1364
+
1365
+ end
1366
+
1367
+ #==========================================================
1368
+ #
1369
+ #==========================================================
1370
+
1371
+ def partition(field)
1372
+
1373
+ train_index =
1374
+ R.createDataPartition(@data.send(field), p: @percet_train,
1375
+ list: false, times: 1)
1376
+ @train = @data[train_index, :all]
1377
+ @test = @data[-train_index, :all]
1378
+
1379
+ end
1380
+
1381
+ end
1382
+ \end{verbatim}
1383
+
1384
+ \begin{Shaded}
1385
+ \begin{Highlighting}[]
1386
+ \NormalTok{mtcars = ~}\StringTok{:mtcars}
1387
+ \NormalTok{model = }\DataTypeTok{Model}\NormalTok{.new(mtcars, }\StringTok{percent_train: }\FloatTok{0.8}\NormalTok{)}
1388
+ \NormalTok{model.partition(}\StringTok{:mpg}\NormalTok{)}
1389
+ \NormalTok{puts model.train.head}
1390
+ \NormalTok{puts model.test.head}
1391
+ \end{Highlighting}
1392
+ \end{Shaded}
1393
+
1394
+ \begin{verbatim}
1395
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1396
+ ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
1397
+ ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
1398
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
1399
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
1400
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
1401
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
1402
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1403
+ ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
1404
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
1405
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
1406
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
1407
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
1408
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
1409
+ \end{verbatim}
1410
+
1411
+ \subsection{Documenting Gems}\label{documenting-gems}
1412
+
1413
+ gKnit also allows developers to document and load files that are not in
1414
+ the same directory of the `.Rmd' file.
1415
+
1416
+ Here is an example of loading the `find.rb' file from TruffleRuby. In
1417
+ this example, relative is set to FALSE, so Ruby will look for the file
1418
+ in its \$LOAD\_PATH, and the user does not need to no it's directory.
1419
+
1420
+ \begin{verbatim}
1421
+ ```{include find, relative = FALSE}
1422
+ ```
1423
+ \end{verbatim}
1424
+
1425
+ \begin{verbatim}
1426
+ # frozen_string_literal: true
1427
+ #
1428
+ # find.rb: the Find module for processing all files under a given directory.
1429
+ #
1430
+
1431
+ #
1432
+ # The +Find+ module supports the top-down traversal of a set of file paths.
1433
+ #
1434
+ # For example, to total the size of all files under your home directory,
1435
+ # ignoring anything in a "dot" directory (e.g. $HOME/.ssh):
1436
+ #
1437
+ # require 'find'
1438
+ #
1439
+ # total_size = 0
1440
+ #
1441
+ # Find.find(ENV["HOME"]) do |path|
1442
+ # if FileTest.directory?(path)
1443
+ # if File.basename(path)[0] == ?.
1444
+ # Find.prune # Don't look any further into this directory.
1445
+ # else
1446
+ # next
1447
+ # end
1448
+ # else
1449
+ # total_size += FileTest.size(path)
1450
+ # end
1451
+ # end
1452
+ #
1453
+ module Find
1454
+
1455
+ #
1456
+ # Calls the associated block with the name of every file and directory listed
1457
+ # as arguments, then recursively on their subdirectories, and so on.
1458
+ #
1459
+ # Returns an enumerator if no block is given.
1460
+ #
1461
+ # See the +Find+ module documentation for an example.
1462
+ #
1463
+ def find(*paths, ignore_error: true) # :yield: path
1464
+ block_given? or return enum_for(__method__, *paths, ignore_error: ignore_error)
1465
+
1466
+ fs_encoding = Encoding.find("filesystem")
1467
+
1468
+ paths.collect!{|d| raise Errno::ENOENT, d unless File.exist?(d); d.dup}.each do |path|
1469
+ path = path.to_path if path.respond_to? :to_path
1470
+ enc = path.encoding == Encoding::US_ASCII ? fs_encoding : path.encoding
1471
+ ps = [path]
1472
+ while file = ps.shift
1473
+ catch(:prune) do
1474
+ yield file.dup.taint
1475
+ begin
1476
+ s = File.lstat(file)
1477
+ rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
1478
+ raise unless ignore_error
1479
+ next
1480
+ end
1481
+ if s.directory? then
1482
+ begin
1483
+ fs = Dir.children(file, encoding: enc)
1484
+ rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
1485
+ raise unless ignore_error
1486
+ next
1487
+ end
1488
+ fs.sort!
1489
+ fs.reverse_each {|f|
1490
+ f = File.join(file, f)
1491
+ ps.unshift f.untaint
1492
+ }
1493
+ end
1494
+ end
1495
+ end
1496
+ end
1497
+ nil
1498
+ end
1499
+
1500
+ #
1501
+ # Skips the current file or directory, restarting the loop with the next
1502
+ # entry. If the current file is a directory, that directory will not be
1503
+ # recursively entered. Meaningful only within the block associated with
1504
+ # Find::find.
1505
+ #
1506
+ # See the +Find+ module documentation for an example.
1507
+ #
1508
+ def prune
1509
+ throw :prune
1510
+ end
1511
+
1512
+ module_function :find, :prune
1513
+ end
1514
+ \end{verbatim}
1515
+
1516
+ \subsection{Converting to PDF}\label{converting-to-pdf}
1517
+
1518
+ One of the beauties of knitr is that the same input can be converted to
1519
+ many different outputs. One very useful format, is, of course, PDF. In
1520
+ order to converted an \textbf{R markdown} file to PDF it is necessary to
1521
+ have LaTeX installed on the system. We will not explain here how to
1522
+ install LaTeX as there are plenty of documents on the web showing how to
1523
+ proceed.
1524
+
1525
+ gKnit comes with a simple LaTeX style file for gknitting this blog as a
1526
+ PDF document. Here is the Yaml header to generate this blog in PDF
1527
+ format instead of HTML:
1528
+
1529
+ \begin{verbatim}
1530
+ ---
1531
+ title: "gKnit - Ruby and R Knitting with Galaaz in GraalVM"
1532
+ author: "Rodrigo Botafogo"
1533
+ tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, knitr, gknit]
1534
+ date: "29 October 2018"
1535
+ output:
1536
+ pdf\_document:
1537
+ includes:
1538
+ in\_header: ["../../sty/galaaz.sty"]
1539
+ number\_sections: yes
1540
+ ---
1541
+ \end{verbatim}
1542
+
1543
+ \subsection{Template based documents
1544
+ generation}\label{template-based-documents-generation}
1545
+
1546
+ When a document is converted to PDF it follows a certain convertion
1547
+ template. We've seen above the use of `galaaz.sty' as a basic template
1548
+ to generate a PDF document. Using the `gknit-draft' app that comes with
1549
+ Galaaz, the same .Rmd file can be compiled to different looking PDF
1550
+ documents. Galaaz automatically loads the `rticles' R package that comes
1551
+ with templates for the following journals with the respective template
1552
+ name:
1553
+
1554
+ \begin{itemize}
1555
+ \tightlist
1556
+ \item
1557
+ ACM articles: acm\_article
1558
+ \item
1559
+ ACS articles: acs\_article
1560
+ \item
1561
+ AEA journal submissions: aea\_article
1562
+ \item
1563
+ AGU journal submissions: ????
1564
+ \item
1565
+ AMS articles: ams\_article
1566
+ \item
1567
+ American Statistical Association: asa\_article
1568
+ \item
1569
+ Biometrics articles: biometrics\_article
1570
+ \item
1571
+ Bulletin de l'AMQ journal submissions: amq\_article
1572
+ \item
1573
+ CTeX documents: ctex
1574
+ \item
1575
+ Elsevier journal submissions: elsevier\_article
1576
+ \item
1577
+ IEEE Transaction journal submissions: ieee\_article
1578
+ \item
1579
+ JSS articles: jss\_article
1580
+ \item
1581
+ MDPI journal submissions: mdpi\_article
1582
+ \item
1583
+ Monthly Notices of the Royal Astronomical Society articles:
1584
+ mnras\_article
1585
+ \item
1586
+ NNRAS journal submissions: nmras\_article
1587
+ \item
1588
+ PeerJ articles: peerj\_article
1589
+ \item
1590
+ Royal Society Open Science journal submissions: rsos\_article
1591
+ \item
1592
+ Royal Statistical Society: rss\_article
1593
+ \item
1594
+ Sage journal submissions: sage\_article
1595
+ \item
1596
+ Springer journal submissions: springer\_article
1597
+ \item
1598
+ Statistics in Medicine journal submissions: sim\_article
270
1599
  \item
271
- Install Ruby (gu install Ruby)
1600
+ Copernicus Publications journal submissions: copernicus\_article
272
1601
  \item
273
- Install FastR (gu install R)
1602
+ The R Journal articles: rjournal\_article
274
1603
  \item
275
- Install rake if you want to run the specs and examples (gem install
276
- rake)
277
- \end{itemize}
278
-
279
- \hypertarget{usage}{%
280
- \section{Usage}\label{usage}}
281
-
282
- \begin{itemize}
1604
+ Frontiers articles: ???
283
1605
  \item
284
- Interactive shell: use `gstudio' on the command line
285
-
286
- \begin{quote}
287
- gstudio
288
- \end{quote}
289
- \end{itemize}
290
-
291
- \begin{Shaded}
292
- \begin{Highlighting}[]
293
- \NormalTok{ vec = R.c(}\DecValTok{1}\NormalTok{, }\DecValTok{2}\NormalTok{, }\DecValTok{3}\NormalTok{, }\DecValTok{4}\NormalTok{)}
294
- \NormalTok{ puts vec}
295
- \end{Highlighting}
296
- \end{Shaded}
297
-
298
- \begin{verbatim}
299
- ## [1] 1 2 3 4
300
- \end{verbatim}
301
-
302
- \begin{itemize}
1606
+ Taylor \& Francis articles: ???
303
1607
  \item
304
- Run all specs
305
-
306
- \begin{quote}
307
- galaaz specs:all
308
- \end{quote}
1608
+ Bulletin De L'AMQ: amq\_article
309
1609
  \item
310
- Run graphics slideshow (80+ graphics)
311
-
312
- \begin{quote}
313
- galaaz sthda:all
314
- \end{quote}
1610
+ PLOS journal: plos\_article
315
1611
  \item
316
- Run labs from Introduction to Statistical Learning with R
1612
+ Proceedings of the National Academy of Sciences of the USA:
1613
+ pnas\_article
1614
+ \end{itemize}
317
1615
 
318
- \begin{quote}
319
- galaaz islr:all
320
- \end{quote}
321
- \item
322
- See all available examples
1616
+ In order to create a document with one of those templates, use the
1617
+ following command:
323
1618
 
324
- \begin{quote}
325
- galaaz -T
326
- \end{quote}
1619
+ \begin{verbatim}
1620
+ gknit-draft --filename <my_document> --template <template> --package <package>
1621
+ --create_dir
1622
+ \end{verbatim}
327
1623
 
328
- Shows a list with all available executalbe tasks. To execute a task,
329
- substitute the `rake' word in the list with `galaaz'. For instance,
330
- the following line shows up after `galaaz -T'
1624
+ So, in order to create a template for writing an R Journal, use:
331
1625
 
332
- rake master\_list:scatter\_plot \# scatter\_plot from:\ldots{}.
1626
+ \begin{verbatim}
1627
+ gknit-draft --filename my_r_article --template rjournal_article --package rticles
1628
+ --create_dir
1629
+ \end{verbatim}
333
1630
 
334
- execute
1631
+ \section{Accessing R variables}\label{accessing-r-variables}
335
1632
 
336
- \begin{quote}
337
- galaaz master\_list:scatter\_plot
338
- \end{quote}
339
- \end{itemize}
1633
+ Galaaz allows Ruby to access variables created in R. For example, the
1634
+ `mtcars' data set is available in R and can be accessed from Ruby by
1635
+ using the `tilda' operator followed by the symbol for the variable, in
1636
+ this case `:mtcar'. In the code bellow method `outputs' is used to
1637
+ output the `mtcars' data set nicely formatted in HTML by use of the
1638
+ `kable' and `kable\_styling' functions. Method `outputs' is only
1639
+ available when used with `gknit'.
340
1640
 
341
- \hypertarget{gknitting-a-document}{%
342
- \section{gKnitting a Document}\label{gknitting-a-document}}
1641
+ \begin{Shaded}
1642
+ \begin{Highlighting}[]
1643
+ \NormalTok{outputs (~}\StringTok{:mtcars}\NormalTok{).kable.kable_styling}
1644
+ \end{Highlighting}
1645
+ \end{Shaded}
343
1646
 
344
- This manual has been formatted usign gKnit. gKnit uses Knitr and R
345
- markdown to knit a document in Ruby or R and output it in any of the
346
- available formats for R markdown. gKnit runs atop of GraalVM, and
347
- Galaaz. In gKnit, Ruby variables are persisted between chunks, making it
348
- an ideal solution for literate programming. Also, since it is based on
349
- Galaaz, Ruby chunks can have access to R variables and Polyglot
350
- Programming with Ruby and R is quite natural.
1647
+ \begin{table}[H]
1648
+ \centering
1649
+ \begin{tabular}{l|r|r|r|r|r|r|r|r|r|r|r}
1650
+ \hline
1651
+ & mpg & cyl & disp & hp & drat & wt & qsec & vs & am & gear & carb\\
1652
+ \hline
1653
+ Mazda RX4 & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.620 & 16.46 & 0 & 1 & 4 & 4\\
1654
+ \hline
1655
+ Mazda RX4 Wag & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.875 & 17.02 & 0 & 1 & 4 & 4\\
1656
+ \hline
1657
+ Datsun 710 & 22.8 & 4 & 108.0 & 93 & 3.85 & 2.320 & 18.61 & 1 & 1 & 4 & 1\\
1658
+ \hline
1659
+ Hornet 4 Drive & 21.4 & 6 & 258.0 & 110 & 3.08 & 3.215 & 19.44 & 1 & 0 & 3 & 1\\
1660
+ \hline
1661
+ Hornet Sportabout & 18.7 & 8 & 360.0 & 175 & 3.15 & 3.440 & 17.02 & 0 & 0 & 3 & 2\\
1662
+ \hline
1663
+ Valiant & 18.1 & 6 & 225.0 & 105 & 2.76 & 3.460 & 20.22 & 1 & 0 & 3 & 1\\
1664
+ \hline
1665
+ Duster 360 & 14.3 & 8 & 360.0 & 245 & 3.21 & 3.570 & 15.84 & 0 & 0 & 3 & 4\\
1666
+ \hline
1667
+ Merc 240D & 24.4 & 4 & 146.7 & 62 & 3.69 & 3.190 & 20.00 & 1 & 0 & 4 & 2\\
1668
+ \hline
1669
+ Merc 230 & 22.8 & 4 & 140.8 & 95 & 3.92 & 3.150 & 22.90 & 1 & 0 & 4 & 2\\
1670
+ \hline
1671
+ Merc 280 & 19.2 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.30 & 1 & 0 & 4 & 4\\
1672
+ \hline
1673
+ Merc 280C & 17.8 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.90 & 1 & 0 & 4 & 4\\
1674
+ \hline
1675
+ Merc 450SE & 16.4 & 8 & 275.8 & 180 & 3.07 & 4.070 & 17.40 & 0 & 0 & 3 & 3\\
1676
+ \hline
1677
+ Merc 450SL & 17.3 & 8 & 275.8 & 180 & 3.07 & 3.730 & 17.60 & 0 & 0 & 3 & 3\\
1678
+ \hline
1679
+ Merc 450SLC & 15.2 & 8 & 275.8 & 180 & 3.07 & 3.780 & 18.00 & 0 & 0 & 3 & 3\\
1680
+ \hline
1681
+ Cadillac Fleetwood & 10.4 & 8 & 472.0 & 205 & 2.93 & 5.250 & 17.98 & 0 & 0 & 3 & 4\\
1682
+ \hline
1683
+ Lincoln Continental & 10.4 & 8 & 460.0 & 215 & 3.00 & 5.424 & 17.82 & 0 & 0 & 3 & 4\\
1684
+ \hline
1685
+ Chrysler Imperial & 14.7 & 8 & 440.0 & 230 & 3.23 & 5.345 & 17.42 & 0 & 0 & 3 & 4\\
1686
+ \hline
1687
+ Fiat 128 & 32.4 & 4 & 78.7 & 66 & 4.08 & 2.200 & 19.47 & 1 & 1 & 4 & 1\\
1688
+ \hline
1689
+ Honda Civic & 30.4 & 4 & 75.7 & 52 & 4.93 & 1.615 & 18.52 & 1 & 1 & 4 & 2\\
1690
+ \hline
1691
+ Toyota Corolla & 33.9 & 4 & 71.1 & 65 & 4.22 & 1.835 & 19.90 & 1 & 1 & 4 & 1\\
1692
+ \hline
1693
+ Toyota Corona & 21.5 & 4 & 120.1 & 97 & 3.70 & 2.465 & 20.01 & 1 & 0 & 3 & 1\\
1694
+ \hline
1695
+ Dodge Challenger & 15.5 & 8 & 318.0 & 150 & 2.76 & 3.520 & 16.87 & 0 & 0 & 3 & 2\\
1696
+ \hline
1697
+ AMC Javelin & 15.2 & 8 & 304.0 & 150 & 3.15 & 3.435 & 17.30 & 0 & 0 & 3 & 2\\
1698
+ \hline
1699
+ Camaro Z28 & 13.3 & 8 & 350.0 & 245 & 3.73 & 3.840 & 15.41 & 0 & 0 & 3 & 4\\
1700
+ \hline
1701
+ Pontiac Firebird & 19.2 & 8 & 400.0 & 175 & 3.08 & 3.845 & 17.05 & 0 & 0 & 3 & 2\\
1702
+ \hline
1703
+ Fiat X1-9 & 27.3 & 4 & 79.0 & 66 & 4.08 & 1.935 & 18.90 & 1 & 1 & 4 & 1\\
1704
+ \hline
1705
+ Porsche 914-2 & 26.0 & 4 & 120.3 & 91 & 4.43 & 2.140 & 16.70 & 0 & 1 & 5 & 2\\
1706
+ \hline
1707
+ Lotus Europa & 30.4 & 4 & 95.1 & 113 & 3.77 & 1.513 & 16.90 & 1 & 1 & 5 & 2\\
1708
+ \hline
1709
+ Ford Pantera L & 15.8 & 8 & 351.0 & 264 & 4.22 & 3.170 & 14.50 & 0 & 1 & 5 & 4\\
1710
+ \hline
1711
+ Ferrari Dino & 19.7 & 6 & 145.0 & 175 & 3.62 & 2.770 & 15.50 & 0 & 1 & 5 & 6\\
1712
+ \hline
1713
+ Maserati Bora & 15.0 & 8 & 301.0 & 335 & 3.54 & 3.570 & 14.60 & 0 & 1 & 5 & 8\\
1714
+ \hline
1715
+ Volvo 142E & 21.4 & 4 & 121.0 & 109 & 4.11 & 2.780 & 18.60 & 1 & 1 & 4 & 2\\
1716
+ \hline
1717
+ \end{tabular}
1718
+ \end{table}
351
1719
 
352
- \href{https://towardsdatascience.com/how-to-do-reproducible-research-in-ruby-with-gknit-c26d2684d64e}{gknit
353
- is described in more details here}
1720
+ \section{Basic Data Types}\label{basic-data-types}
354
1721
 
355
- \hypertarget{vector}{%
356
- \section{Vector}\label{vector}}
1722
+ \subsection{Vector}\label{vector}
357
1723
 
358
1724
  Vectors can be thought of as contiguous cells containing data. Cells are
359
1725
  accessed through indexing operations such as x{[}5{]}. Galaaz has six
@@ -401,7 +1767,7 @@ printing it out. A data frame, for those not familiar with it, is
401
1767
  basically a table. Here we create the data frame and add the column name
402
1768
  by passing named parameters for each column, such as `typeof:', `mode:'
403
1769
  and 'storage\_\_mode?`. You should also note here that the double
404
- underscore is converted to a'.'. So, when printed `storage\_\_mode' will
1770
+ underscore is converted to a'.`. So, when printed 'storage\_\_mode' will
405
1771
  actually print as `storage.mode'.
406
1772
 
407
1773
  Data frames will later be more carefully described. In R, the method
@@ -469,7 +1835,7 @@ error.
469
1835
 
470
1836
  \begin{verbatim}
471
1837
  ## Message:
472
- ## undefined local variable or method `hello' for #<RC:0x2e0 @out_list=nil>:RC
1838
+ ## undefined local variable or method `hello' for #<RC:0x3d8 @out_list=nil>:RC
473
1839
  \end{verbatim}
474
1840
 
475
1841
  \begin{verbatim}
@@ -482,18 +1848,15 @@ error.
482
1848
  ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
483
1849
  ## (eval):3:in `function(...) {\n rb_method(...)'
484
1850
  ## unknown.r:1:in `in_dir'
485
- ## unknown.r:1:in `block_exec:BLOCK0'
486
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:102:in `block_exec'
487
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:92:in `call_block'
488
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:6:in `process_group.block'
489
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:3:in `<no source>'
1851
+ ## unknown.r:1:in `block_exec'
1852
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:92:in `call_block'
1853
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:6:in `process_group.block'
1854
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:3:in `<no source>'
490
1855
  ## unknown.r:1:in `withCallingHandlers'
491
1856
  ## unknown.r:1:in `process_file'
492
- ## unknown.r:1:in `<no source>:BLOCK1'
493
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/output.R:129:in `<no source>'
494
- ## unknown.r:1:in `<no source>:BLOCK1'
495
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/rmarkdown/R/render.R:162:in `<no source>'
496
- ## <REPL>:5:in `<repl wrapper>'
1857
+ ## unknown.r:1:in `<no source>'
1858
+ ## unknown.r:1:in `<no source>'
1859
+ ## <REPL>:4:in `<repl wrapper>'
497
1860
  ## <REPL>:1
498
1861
  \end{verbatim}
499
1862
 
@@ -510,8 +1873,7 @@ Here is a vector with logical values
510
1873
  ## [1] TRUE TRUE FALSE FALSE TRUE
511
1874
  \end{verbatim}
512
1875
 
513
- \hypertarget{combining-vectors}{%
514
- \subsection{Combining Vectors}\label{combining-vectors}}
1876
+ \subsubsection{Combining Vectors}\label{combining-vectors}
515
1877
 
516
1878
  The `c' functions used to create vectors can also be used to combine two
517
1879
  vectors:
@@ -549,8 +1911,7 @@ converted to the code above.
549
1911
  ## [1] 10 20 30 4 5 6
550
1912
  \end{verbatim}
551
1913
 
552
- \hypertarget{vector-arithmetic}{%
553
- \subsection{Vector Arithmetic}\label{vector-arithmetic}}
1914
+ \subsubsection{Vector Arithmetic}\label{vector-arithmetic}
554
1915
 
555
1916
  Arithmetic operations on vectors are performed element by element:
556
1917
 
@@ -588,8 +1949,7 @@ shorter vector:
588
1949
  ## [1] 11 22 33 14 25 36 17 28 39
589
1950
  \end{verbatim}
590
1951
 
591
- \hypertarget{vector-indexing}{%
592
- \subsection{Vector Indexing}\label{vector-indexing}}
1952
+ \subsubsection{Vector Indexing}\label{vector-indexing}
593
1953
 
594
1954
  Vectors can be indexed by using the `{[}{]}' operator:
595
1955
 
@@ -698,9 +2058,8 @@ paramenters:
698
2058
  ## "Rodrigo" "A" "Botafogo"
699
2059
  \end{verbatim}
700
2060
 
701
- \hypertarget{extracting-native-ruby-types-from-a-vector}{%
702
- \subsection{Extracting Native Ruby Types from a
703
- Vector}\label{extracting-native-ruby-types-from-a-vector}}
2061
+ \subsubsection{Extracting Native Ruby Types from a
2062
+ Vector}\label{extracting-native-ruby-types-from-a-vector}
704
2063
 
705
2064
  Vectors created with `R.c' are of class R::Vector. You might have
706
2065
  noticed that when indexing a vector, a new vector is returned, even if
@@ -726,98 +2085,7 @@ type from the vector. In order to do this extraction the
726
2085
  Note that indexing with `\textgreater{}\textgreater{}' starts at 0 and
727
2086
  not at 1, also, we cannot do negative indexing.
728
2087
 
729
- \hypertarget{accessing-r-variables}{%
730
- \section{Accessing R variables}\label{accessing-r-variables}}
731
-
732
- Galaaz allows Ruby to access variables created in R. For example, the
733
- `mtcars' data set is available in R and can be accessed from Ruby by
734
- using the `tilda' operator followed by the symbol for the variable, in
735
- this case `:mtcar'. In the code bellow method `outputs' is used to
736
- output the `mtcars' data set nicely formatted in HTML by use of the
737
- `kable' and `kable\_styling' functions. Method `outputs' is only
738
- available when used with `gknit'.
739
-
740
- \begin{Shaded}
741
- \begin{Highlighting}[]
742
- \NormalTok{outputs (~}\StringTok{:mtcars}\NormalTok{).kable.kable_styling}
743
- \end{Highlighting}
744
- \end{Shaded}
745
-
746
- \begin{table}[H]
747
- \centering
748
- \begin{tabular}{l|r|r|r|r|r|r|r|r|r|r|r}
749
- \hline
750
- & mpg & cyl & disp & hp & drat & wt & qsec & vs & am & gear & carb\\
751
- \hline
752
- Mazda RX4 & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.620 & 16.46 & 0 & 1 & 4 & 4\\
753
- \hline
754
- Mazda RX4 Wag & 21.0 & 6 & 160.0 & 110 & 3.90 & 2.875 & 17.02 & 0 & 1 & 4 & 4\\
755
- \hline
756
- Datsun 710 & 22.8 & 4 & 108.0 & 93 & 3.85 & 2.320 & 18.61 & 1 & 1 & 4 & 1\\
757
- \hline
758
- Hornet 4 Drive & 21.4 & 6 & 258.0 & 110 & 3.08 & 3.215 & 19.44 & 1 & 0 & 3 & 1\\
759
- \hline
760
- Hornet Sportabout & 18.7 & 8 & 360.0 & 175 & 3.15 & 3.440 & 17.02 & 0 & 0 & 3 & 2\\
761
- \hline
762
- Valiant & 18.1 & 6 & 225.0 & 105 & 2.76 & 3.460 & 20.22 & 1 & 0 & 3 & 1\\
763
- \hline
764
- Duster 360 & 14.3 & 8 & 360.0 & 245 & 3.21 & 3.570 & 15.84 & 0 & 0 & 3 & 4\\
765
- \hline
766
- Merc 240D & 24.4 & 4 & 146.7 & 62 & 3.69 & 3.190 & 20.00 & 1 & 0 & 4 & 2\\
767
- \hline
768
- Merc 230 & 22.8 & 4 & 140.8 & 95 & 3.92 & 3.150 & 22.90 & 1 & 0 & 4 & 2\\
769
- \hline
770
- Merc 280 & 19.2 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.30 & 1 & 0 & 4 & 4\\
771
- \hline
772
- Merc 280C & 17.8 & 6 & 167.6 & 123 & 3.92 & 3.440 & 18.90 & 1 & 0 & 4 & 4\\
773
- \hline
774
- Merc 450SE & 16.4 & 8 & 275.8 & 180 & 3.07 & 4.070 & 17.40 & 0 & 0 & 3 & 3\\
775
- \hline
776
- Merc 450SL & 17.3 & 8 & 275.8 & 180 & 3.07 & 3.730 & 17.60 & 0 & 0 & 3 & 3\\
777
- \hline
778
- Merc 450SLC & 15.2 & 8 & 275.8 & 180 & 3.07 & 3.780 & 18.00 & 0 & 0 & 3 & 3\\
779
- \hline
780
- Cadillac Fleetwood & 10.4 & 8 & 472.0 & 205 & 2.93 & 5.250 & 17.98 & 0 & 0 & 3 & 4\\
781
- \hline
782
- Lincoln Continental & 10.4 & 8 & 460.0 & 215 & 3.00 & 5.424 & 17.82 & 0 & 0 & 3 & 4\\
783
- \hline
784
- Chrysler Imperial & 14.7 & 8 & 440.0 & 230 & 3.23 & 5.345 & 17.42 & 0 & 0 & 3 & 4\\
785
- \hline
786
- Fiat 128 & 32.4 & 4 & 78.7 & 66 & 4.08 & 2.200 & 19.47 & 1 & 1 & 4 & 1\\
787
- \hline
788
- Honda Civic & 30.4 & 4 & 75.7 & 52 & 4.93 & 1.615 & 18.52 & 1 & 1 & 4 & 2\\
789
- \hline
790
- Toyota Corolla & 33.9 & 4 & 71.1 & 65 & 4.22 & 1.835 & 19.90 & 1 & 1 & 4 & 1\\
791
- \hline
792
- Toyota Corona & 21.5 & 4 & 120.1 & 97 & 3.70 & 2.465 & 20.01 & 1 & 0 & 3 & 1\\
793
- \hline
794
- Dodge Challenger & 15.5 & 8 & 318.0 & 150 & 2.76 & 3.520 & 16.87 & 0 & 0 & 3 & 2\\
795
- \hline
796
- AMC Javelin & 15.2 & 8 & 304.0 & 150 & 3.15 & 3.435 & 17.30 & 0 & 0 & 3 & 2\\
797
- \hline
798
- Camaro Z28 & 13.3 & 8 & 350.0 & 245 & 3.73 & 3.840 & 15.41 & 0 & 0 & 3 & 4\\
799
- \hline
800
- Pontiac Firebird & 19.2 & 8 & 400.0 & 175 & 3.08 & 3.845 & 17.05 & 0 & 0 & 3 & 2\\
801
- \hline
802
- Fiat X1-9 & 27.3 & 4 & 79.0 & 66 & 4.08 & 1.935 & 18.90 & 1 & 1 & 4 & 1\\
803
- \hline
804
- Porsche 914-2 & 26.0 & 4 & 120.3 & 91 & 4.43 & 2.140 & 16.70 & 0 & 1 & 5 & 2\\
805
- \hline
806
- Lotus Europa & 30.4 & 4 & 95.1 & 113 & 3.77 & 1.513 & 16.90 & 1 & 1 & 5 & 2\\
807
- \hline
808
- Ford Pantera L & 15.8 & 8 & 351.0 & 264 & 4.22 & 3.170 & 14.50 & 0 & 1 & 5 & 4\\
809
- \hline
810
- Ferrari Dino & 19.7 & 6 & 145.0 & 175 & 3.62 & 2.770 & 15.50 & 0 & 1 & 5 & 6\\
811
- \hline
812
- Maserati Bora & 15.0 & 8 & 301.0 & 335 & 3.54 & 3.570 & 14.60 & 0 & 1 & 5 & 8\\
813
- \hline
814
- Volvo 142E & 21.4 & 4 & 121.0 & 109 & 4.11 & 2.780 & 18.60 & 1 & 1 & 4 & 2\\
815
- \hline
816
- \end{tabular}
817
- \end{table}
818
-
819
- \hypertarget{matrix}{%
820
- \section{Matrix}\label{matrix}}
2088
+ \subsection{Matrix}\label{matrix}
821
2089
 
822
2090
  A matrix is a collection of elements organized as a two dimensional
823
2091
  table. A matrix can be created by the `matrix' function:
@@ -861,8 +2129,7 @@ organize the matrix memory by row first passing an extra argument to the
861
2129
  ## [3,] 7 8 9
862
2130
  \end{verbatim}
863
2131
 
864
- \hypertarget{indexing-a-matrix}{%
865
- \subsection{Indexing a Matrix}\label{indexing-a-matrix}}
2132
+ \subsubsection{Indexing a Matrix}\label{indexing-a-matrix}
866
2133
 
867
2134
  A matrix can be indexed by {[}row, column{]}:
868
2135
 
@@ -941,8 +2208,7 @@ and `cbind':
941
2208
  ## [3,] 7 8 9 3 6 9
942
2209
  \end{verbatim}
943
2210
 
944
- \hypertarget{list}{%
945
- \section{List}\label{list}}
2211
+ \subsection{List}\label{list}
946
2212
 
947
2213
  A list is a data structure that can contain sublists of different types,
948
2214
  while vector and matrix can only hold one type of element.
@@ -970,8 +2236,7 @@ while vector and matrix can only hold one type of element.
970
2236
 
971
2237
  Note that `lst' elements are named elements.
972
2238
 
973
- \hypertarget{list-indexing}{%
974
- \subsection{List Indexing}\label{list-indexing}}
2239
+ \subsubsection{List Indexing}\label{list-indexing}
975
2240
 
976
2241
  List indexing, also called slicing, is done using the `{[}{]}' operator
977
2242
  and the `{[}{[}{]}{]}' operator. Let's first start with the `{[}{]}'
@@ -1022,8 +2287,7 @@ extracted (note that vectors also accept the `{[}{[}{]}{]}' operator)
1022
2287
  and then the vector was indexed by its first element, extracting the
1023
2288
  native Ruby type.
1024
2289
 
1025
- \hypertarget{data-frame}{%
1026
- \section{Data Frame}\label{data-frame}}
2290
+ \subsection{Data Frame}\label{data-frame}
1027
2291
 
1028
2292
  A data frame is a table like structure in which each column has the same
1029
2293
  number of rows. Data frames are the basic structure for storing data for
@@ -1048,8 +2312,7 @@ frame, function 'data\_\_frame' is used:
1048
2312
  ## 3 2012 2000
1049
2313
  \end{verbatim}
1050
2314
 
1051
- \hypertarget{data-frame-indexing}{%
1052
- \subsection{Data Frame Indexing}\label{data-frame-indexing}}
2315
+ \subsubsection{Data Frame Indexing}\label{data-frame-indexing}
1053
2316
 
1054
2317
  A data frame can be indexed the same way as a matrix, by using `{[}row,
1055
2318
  column{]}', where row and column can either be a numeric or the name of
@@ -1192,17 +2455,15 @@ data frame in which all cars have automatic transmission.
1192
2455
  ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
1193
2456
  \end{verbatim}
1194
2457
 
1195
- \hypertarget{writing-expressions-in-galaaz}{%
1196
2458
  \section{Writing Expressions in
1197
- Galaaz}\label{writing-expressions-in-galaaz}}
2459
+ Galaaz}\label{writing-expressions-in-galaaz}
1198
2460
 
1199
2461
  Galaaz extends Ruby to work with complex expressions, similar to R's
1200
2462
  expressions build with `quote' (base R) or `quo' (tidyverse). Let's take
1201
2463
  a look at some of those expressions.
1202
2464
 
1203
- \hypertarget{expressions-from-operators}{%
1204
2465
  \subsection{Expressions from
1205
- operators}\label{expressions-from-operators}}
2466
+ operators}\label{expressions-from-operators}
1206
2467
 
1207
2468
  The code bellow creates an expression summing two symbols
1208
2469
 
@@ -1297,13 +2558,6 @@ expressions such as
1297
2558
  \end{Highlighting}
1298
2559
  \end{Shaded}
1299
2560
 
1300
- \begin{verbatim}
1301
- ## Message:
1302
- ## Error in function (x, y, num.eq = TRUE, single.NA = TRUE, attrib.as.set = TRUE, :
1303
- ## object 'a' not found (RError)
1304
- ## Translated to internal error
1305
- \end{verbatim}
1306
-
1307
2561
  and it might be difficult to understand what is going on here. The
1308
2562
  problem lies with the fact that when using `==' we are comparing
1309
2563
  expression (:a + :b) to expression :z with `=='. When the comparison is
@@ -1312,9 +2566,8 @@ at this time are not bound to anything and we get a ``object `a' not
1312
2566
  found'' message. If we only use functional notation, this type of error
1313
2567
  will not occur.
1314
2568
 
1315
- \hypertarget{expressions-with-r-methods}{%
1316
2569
  \subsection{Expressions with R
1317
- methods}\label{expressions-with-r-methods}}
2570
+ methods}\label{expressions-with-r-methods}
1318
2571
 
1319
2572
  It is often necessary to create an expression that uses a method or
1320
2573
  function. For instance, in mathematics, it's quite natural to write an
@@ -1363,8 +2616,7 @@ the `.':
1363
2616
  ## c(x, y)
1364
2617
  \end{verbatim}
1365
2618
 
1366
- \hypertarget{evaluating-an-expression}{%
1367
- \subsection{Evaluating an Expression}\label{evaluating-an-expression}}
2619
+ \subsection{Evaluating an Expression}\label{evaluating-an-expression}
1368
2620
 
1369
2621
  Expressions can be evaluated by calling function `eval' with a binding.
1370
2622
  A binding can be provided with a list:
@@ -1398,8 +2650,7 @@ A binding can be provided with a list:
1398
2650
  ## [1] 32 64 96
1399
2651
  \end{verbatim}
1400
2652
 
1401
- \hypertarget{manipulating-data}{%
1402
- \section{Manipulating Data}\label{manipulating-data}}
2653
+ \section{Manipulating Data}\label{manipulating-data}
1403
2654
 
1404
2655
  One of the major benefits of Galaaz is to bring strong data manipulation
1405
2656
  to Ruby. The following examples were extracted from Hardley's ``R for
@@ -1415,6 +2666,11 @@ and if not, installs it. This data frame contains all 336,776 flights
1415
2666
  that departed from New York City in 2013. The data comes from the US
1416
2667
  Bureau of Transportation Statistics.
1417
2668
 
2669
+ Dplyr uses `tibbles' in place of data frames; unfortunately, tibbles do
2670
+ not print yet properly in Galaaz due to a bug in fastR. In order to
2671
+ print a tibble we need to convert it to a data frame using the
2672
+ 'as\_\_data\_\_frame' method.
2673
+
1418
2674
  \begin{Shaded}
1419
2675
  \begin{Highlighting}[]
1420
2676
  \NormalTok{R.install_and_loads(}\StringTok{'nycflights13'}\NormalTok{)}
@@ -1425,145 +2681,110 @@ Bureau of Transportation Statistics.
1425
2681
  \begin{Shaded}
1426
2682
  \begin{Highlighting}[]
1427
2683
  \NormalTok{flights = ~}\StringTok{:flights}
1428
- \NormalTok{puts flights.head.as__data__frame}
1429
- \end{Highlighting}
1430
- \end{Shaded}
1431
-
1432
- \begin{verbatim}
1433
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1434
- ## 1 2013 1 1 517 515 2 830 819
1435
- ## 2 2013 1 1 533 529 4 850 830
1436
- ## 3 2013 1 1 542 540 2 923 850
1437
- ## 4 2013 1 1 544 545 -1 1004 1022
1438
- ## 5 2013 1 1 554 600 -6 812 837
1439
- ## 6 2013 1 1 554 558 -4 740 728
1440
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1441
- ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1442
- ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1443
- ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1444
- ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1445
- ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1446
- ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1447
- ## minute time_hour
1448
- ## 1 15 2013-01-01 05:00:00
1449
- ## 2 29 2013-01-01 05:00:00
1450
- ## 3 40 2013-01-01 05:00:00
1451
- ## 4 45 2013-01-01 05:00:00
1452
- ## 5 0 2013-01-01 06:00:00
1453
- ## 6 58 2013-01-01 05:00:00
1454
- \end{verbatim}
1455
-
1456
- \hypertarget{filtering-rows-with-filter}{%
2684
+ \NormalTok{puts flights.head}
2685
+ \end{Highlighting}
2686
+ \end{Shaded}
2687
+
2688
+ \begin{verbatim}
2689
+ ## # A tibble: 6 x 19
2690
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2691
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2692
+ ## 1 2013 1 1 517 515 2 830
2693
+ ## 2 2013 1 1 533 529 4 850
2694
+ ## 3 2013 1 1 542 540 2 923
2695
+ ## 4 2013 1 1 544 545 -1 1004
2696
+ ## 5 2013 1 1 554 600 -6 812
2697
+ ## 6 2013 1 1 554 558 -4 740
2698
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2699
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2700
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2701
+ ## # time_hour <dttm>
2702
+ \end{verbatim}
2703
+
1457
2704
  \subsection{Filtering rows with
1458
- Filter}\label{filtering-rows-with-filter}}
2705
+ Filter}\label{filtering-rows-with-filter}
1459
2706
 
1460
2707
  In this example we filter the flights data set by giving to the filter
1461
2708
  function two expressions: the first :month.eq 1
1462
2709
 
1463
2710
  \begin{Shaded}
1464
2711
  \begin{Highlighting}[]
1465
- \NormalTok{puts flights.filter((}\StringTok{:month}\NormalTok{.eq }\DecValTok{1}\NormalTok{), (}\StringTok{:day}\NormalTok{.eq }\DecValTok{1}\NormalTok{)).head.as__data__frame}
2712
+ \NormalTok{puts flights.filter((}\StringTok{:month}\NormalTok{.eq }\DecValTok{1}\NormalTok{), (}\StringTok{:day}\NormalTok{.eq }\DecValTok{1}\NormalTok{)).head}
1466
2713
  \end{Highlighting}
1467
2714
  \end{Shaded}
1468
2715
 
1469
2716
  \begin{verbatim}
1470
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1471
- ## 1 2013 1 1 517 515 2 830 819
1472
- ## 2 2013 1 1 533 529 4 850 830
1473
- ## 3 2013 1 1 542 540 2 923 850
1474
- ## 4 2013 1 1 544 545 -1 1004 1022
1475
- ## 5 2013 1 1 554 600 -6 812 837
1476
- ## 6 2013 1 1 554 558 -4 740 728
1477
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1478
- ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1479
- ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1480
- ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1481
- ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1482
- ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1483
- ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1484
- ## minute time_hour
1485
- ## 1 15 2013-01-01 05:00:00
1486
- ## 2 29 2013-01-01 05:00:00
1487
- ## 3 40 2013-01-01 05:00:00
1488
- ## 4 45 2013-01-01 05:00:00
1489
- ## 5 0 2013-01-01 06:00:00
1490
- ## 6 58 2013-01-01 05:00:00
2717
+ ## # A tibble: 6 x 19
2718
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2719
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2720
+ ## 1 2013 1 1 517 515 2 830
2721
+ ## 2 2013 1 1 533 529 4 850
2722
+ ## 3 2013 1 1 542 540 2 923
2723
+ ## 4 2013 1 1 544 545 -1 1004
2724
+ ## 5 2013 1 1 554 600 -6 812
2725
+ ## 6 2013 1 1 554 558 -4 740
2726
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2727
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2728
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2729
+ ## # time_hour <dttm>
1491
2730
  \end{verbatim}
1492
2731
 
1493
- \hypertarget{logical-operators}{%
1494
- \subsection{Logical Operators}\label{logical-operators}}
2732
+ \subsection{Logical Operators}\label{logical-operators}
1495
2733
 
1496
2734
  All flights that departed in November of December
1497
2735
 
1498
2736
  \begin{Shaded}
1499
2737
  \begin{Highlighting}[]
1500
- \NormalTok{puts flights.filter((}\StringTok{:month}\NormalTok{.eq }\DecValTok{11}\NormalTok{) | (}\StringTok{:month}\NormalTok{.eq }\DecValTok{12}\NormalTok{)).head.as__data__frame}
2738
+ \NormalTok{puts flights.filter((}\StringTok{:month}\NormalTok{.eq }\DecValTok{11}\NormalTok{) | (}\StringTok{:month}\NormalTok{.eq }\DecValTok{12}\NormalTok{)).head}
1501
2739
  \end{Highlighting}
1502
2740
  \end{Shaded}
1503
2741
 
1504
2742
  \begin{verbatim}
1505
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1506
- ## 1 2013 11 1 5 2359 6 352 345
1507
- ## 2 2013 11 1 35 2250 105 123 2356
1508
- ## 3 2013 11 1 455 500 -5 641 651
1509
- ## 4 2013 11 1 539 545 -6 856 827
1510
- ## 5 2013 11 1 542 545 -3 831 855
1511
- ## 6 2013 11 1 549 600 -11 912 923
1512
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1513
- ## 1 7 B6 745 N568JB JFK PSE 205 1617 23
1514
- ## 2 87 B6 1816 N353JB JFK SYR 36 209 22
1515
- ## 3 -10 US 1895 N192UW EWR CLT 88 529 5
1516
- ## 4 29 UA 1714 N38727 LGA IAH 229 1416 5
1517
- ## 5 -24 AA 2243 N5CLAA JFK MIA 147 1089 5
1518
- ## 6 -11 UA 303 N595UA JFK SFO 359 2586 6
1519
- ## minute time_hour
1520
- ## 1 59 2013-11-01 23:00:00
1521
- ## 2 50 2013-11-01 22:00:00
1522
- ## 3 0 2013-11-01 05:00:00
1523
- ## 4 45 2013-11-01 05:00:00
1524
- ## 5 45 2013-11-01 05:00:00
1525
- ## 6 0 2013-11-01 06:00:00
2743
+ ## # A tibble: 6 x 19
2744
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2745
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2746
+ ## 1 2013 11 1 5 2359 6 352
2747
+ ## 2 2013 11 1 35 2250 105 123
2748
+ ## 3 2013 11 1 455 500 -5 641
2749
+ ## 4 2013 11 1 539 545 -6 856
2750
+ ## 5 2013 11 1 542 545 -3 831
2751
+ ## 6 2013 11 1 549 600 -11 912
2752
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2753
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2754
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2755
+ ## # time_hour <dttm>
1526
2756
  \end{verbatim}
1527
2757
 
1528
2758
  The same as above, but using the `in' operator. In R, it is possible to
1529
2759
  define many operators by doing \%\%. The \%in\% operator checks if a
1530
2760
  value is in a vector. In order to use those operators from Galaaz the
1531
- `.\_' method is used, where the first argument is the operator's symbol,
1532
- in this case `:in' and the second argument is the vector:
2761
+ '.\_`method is used, where the first argument is the operator's symbol,
2762
+ in this case':in' and the second argument is the vector:
1533
2763
 
1534
2764
  \begin{Shaded}
1535
2765
  \begin{Highlighting}[]
1536
- \NormalTok{puts flights.filter(}\StringTok{:month}\NormalTok{._ }\StringTok{:in}\NormalTok{, R.c(}\DecValTok{11}\NormalTok{, }\DecValTok{12}\NormalTok{)).head.as__data__frame}
2766
+ \NormalTok{puts flights.filter(}\StringTok{:month}\NormalTok{._ }\StringTok{:in}\NormalTok{, R.c(}\DecValTok{11}\NormalTok{, }\DecValTok{12}\NormalTok{)).head}
1537
2767
  \end{Highlighting}
1538
2768
  \end{Shaded}
1539
2769
 
1540
2770
  \begin{verbatim}
1541
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1542
- ## 1 2013 11 1 5 2359 6 352 345
1543
- ## 2 2013 11 1 35 2250 105 123 2356
1544
- ## 3 2013 11 1 455 500 -5 641 651
1545
- ## 4 2013 11 1 539 545 -6 856 827
1546
- ## 5 2013 11 1 542 545 -3 831 855
1547
- ## 6 2013 11 1 549 600 -11 912 923
1548
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1549
- ## 1 7 B6 745 N568JB JFK PSE 205 1617 23
1550
- ## 2 87 B6 1816 N353JB JFK SYR 36 209 22
1551
- ## 3 -10 US 1895 N192UW EWR CLT 88 529 5
1552
- ## 4 29 UA 1714 N38727 LGA IAH 229 1416 5
1553
- ## 5 -24 AA 2243 N5CLAA JFK MIA 147 1089 5
1554
- ## 6 -11 UA 303 N595UA JFK SFO 359 2586 6
1555
- ## minute time_hour
1556
- ## 1 59 2013-11-01 23:00:00
1557
- ## 2 50 2013-11-01 22:00:00
1558
- ## 3 0 2013-11-01 05:00:00
1559
- ## 4 45 2013-11-01 05:00:00
1560
- ## 5 45 2013-11-01 05:00:00
1561
- ## 6 0 2013-11-01 06:00:00
2771
+ ## # A tibble: 6 x 19
2772
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2773
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2774
+ ## 1 2013 11 1 5 2359 6 352
2775
+ ## 2 2013 11 1 35 2250 105 123
2776
+ ## 3 2013 11 1 455 500 -5 641
2777
+ ## 4 2013 11 1 539 545 -6 856
2778
+ ## 5 2013 11 1 542 545 -3 831
2779
+ ## 6 2013 11 1 549 600 -11 912
2780
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2781
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2782
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2783
+ ## # time_hour <dttm>
1562
2784
  \end{verbatim}
1563
2785
 
1564
- \hypertarget{filtering-with-na-not-available}{%
1565
2786
  \subsection{Filtering with NA (Not
1566
- Available)}\label{filtering-with-na-not-available}}
2787
+ Available)}\label{filtering-with-na-not-available}
1567
2788
 
1568
2789
  Let's first create a `tibble' with a Not Available value (R::NA).
1569
2790
  Tibbles are a modern version of a data frame and operate very similarly
@@ -1574,15 +2795,17 @@ from data frame.
1574
2795
  \begin{Shaded}
1575
2796
  \begin{Highlighting}[]
1576
2797
  \NormalTok{df = R.tibble(}\StringTok{x: }\NormalTok{R.c(}\DecValTok{1}\NormalTok{, R::}\DataTypeTok{NA}\NormalTok{, }\DecValTok{3}\NormalTok{))}
1577
- \NormalTok{puts df.as__data__frame}
2798
+ \NormalTok{puts df}
1578
2799
  \end{Highlighting}
1579
2800
  \end{Shaded}
1580
2801
 
1581
2802
  \begin{verbatim}
1582
- ## x
1583
- ## 1 1
1584
- ## 2 NA
1585
- ## 3 3
2803
+ ## # A tibble: 3 x 1
2804
+ ## x
2805
+ ## <int>
2806
+ ## 1 1
2807
+ ## 2
2808
+ ## 3 3
1586
2809
  \end{verbatim}
1587
2810
 
1588
2811
  Now filtering by :x \textgreater{} 1 shows all lines that satisfy this
@@ -1590,145 +2813,137 @@ condition, where the row with R:NA does not.
1590
2813
 
1591
2814
  \begin{Shaded}
1592
2815
  \begin{Highlighting}[]
1593
- \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{ > }\DecValTok{1}\NormalTok{).as__data__frame}
2816
+ \NormalTok{puts df.filter(}\StringTok{:x}\NormalTok{ > }\DecValTok{1}\NormalTok{)}
1594
2817
  \end{Highlighting}
1595
2818
  \end{Shaded}
1596
2819
 
1597
2820
  \begin{verbatim}
1598
- ## x
1599
- ## 1 3
2821
+ ## # A tibble: 1 x 1
2822
+ ## x
2823
+ ## <int>
2824
+ ## 1 3
1600
2825
  \end{verbatim}
1601
2826
 
1602
2827
  To match an NA use method 'is\_\_na'
1603
2828
 
1604
2829
  \begin{Shaded}
1605
2830
  \begin{Highlighting}[]
1606
- \NormalTok{puts df.filter((}\StringTok{:x}\NormalTok{.is__na) | (}\StringTok{:x}\NormalTok{ > }\DecValTok{1}\NormalTok{)).as__data__frame}
2831
+ \NormalTok{puts df.filter((}\StringTok{:x}\NormalTok{.is__na) | (}\StringTok{:x}\NormalTok{ > }\DecValTok{1}\NormalTok{))}
1607
2832
  \end{Highlighting}
1608
2833
  \end{Shaded}
1609
2834
 
1610
2835
  \begin{verbatim}
1611
- ## x
1612
- ## 1 NA
1613
- ## 2 3
2836
+ ## # A tibble: 2 x 1
2837
+ ## x
2838
+ ## <int>
2839
+ ## 1
2840
+ ## 2 3
1614
2841
  \end{verbatim}
1615
2842
 
1616
- \hypertarget{arrange-rows-with-arrange}{%
1617
- \subsection{Arrange Rows with arrange}\label{arrange-rows-with-arrange}}
2843
+ \subsection{Arrange Rows with arrange}\label{arrange-rows-with-arrange}
1618
2844
 
1619
2845
  Arrange reorders the rows of a data frame by the given arguments.
1620
2846
 
1621
2847
  \begin{Shaded}
1622
2848
  \begin{Highlighting}[]
1623
- \NormalTok{puts flights.arrange(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{).head.as__data__frame}
2849
+ \NormalTok{puts flights.arrange(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{).head}
1624
2850
  \end{Highlighting}
1625
2851
  \end{Shaded}
1626
2852
 
1627
2853
  \begin{verbatim}
1628
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1629
- ## 1 2013 1 1 517 515 2 830 819
1630
- ## 2 2013 1 1 533 529 4 850 830
1631
- ## 3 2013 1 1 542 540 2 923 850
1632
- ## 4 2013 1 1 544 545 -1 1004 1022
1633
- ## 5 2013 1 1 554 600 -6 812 837
1634
- ## 6 2013 1 1 554 558 -4 740 728
1635
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1636
- ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1637
- ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1638
- ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1639
- ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1640
- ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1641
- ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1642
- ## minute time_hour
1643
- ## 1 15 2013-01-01 05:00:00
1644
- ## 2 29 2013-01-01 05:00:00
1645
- ## 3 40 2013-01-01 05:00:00
1646
- ## 4 45 2013-01-01 05:00:00
1647
- ## 5 0 2013-01-01 06:00:00
1648
- ## 6 58 2013-01-01 05:00:00
2854
+ ## # A tibble: 6 x 19
2855
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2856
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2857
+ ## 1 2013 1 1 517 515 2 830
2858
+ ## 2 2013 1 1 533 529 4 850
2859
+ ## 3 2013 1 1 542 540 2 923
2860
+ ## 4 2013 1 1 544 545 -1 1004
2861
+ ## 5 2013 1 1 554 600 -6 812
2862
+ ## 6 2013 1 1 554 558 -4 740
2863
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2864
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2865
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2866
+ ## # time_hour <dttm>
1649
2867
  \end{verbatim}
1650
2868
 
1651
2869
  To arrange in descending order, use function `desc'
1652
2870
 
1653
2871
  \begin{Shaded}
1654
2872
  \begin{Highlighting}[]
1655
- \NormalTok{puts flights.arrange(}\StringTok{:dep_delay}\NormalTok{.desc).head.as__data__frame}
2873
+ \NormalTok{puts flights.arrange(}\StringTok{:dep_delay}\NormalTok{.desc).head}
1656
2874
  \end{Highlighting}
1657
2875
  \end{Shaded}
1658
2876
 
1659
2877
  \begin{verbatim}
1660
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1661
- ## 1 2013 1 9 641 900 1301 1242 1530
1662
- ## 2 2013 6 15 1432 1935 1137 1607 2120
1663
- ## 3 2013 1 10 1121 1635 1126 1239 1810
1664
- ## 4 2013 9 20 1139 1845 1014 1457 2210
1665
- ## 5 2013 7 22 845 1600 1005 1044 1815
1666
- ## 6 2013 4 10 1100 1900 960 1342 2211
1667
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1668
- ## 1 1272 HA 51 N384HA JFK HNL 640 4983 9
1669
- ## 2 1127 MQ 3535 N504MQ JFK CMH 74 483 19
1670
- ## 3 1109 MQ 3695 N517MQ EWR ORD 111 719 16
1671
- ## 4 1007 AA 177 N338AA JFK SFO 354 2586 18
1672
- ## 5 989 MQ 3075 N665MQ JFK CVG 96 589 16
1673
- ## 6 931 DL 2391 N959DL JFK TPA 139 1005 19
1674
- ## minute time_hour
1675
- ## 1 0 2013-01-09 09:00:00
1676
- ## 2 35 2013-06-15 19:00:00
1677
- ## 3 35 2013-01-10 16:00:00
1678
- ## 4 45 2013-09-20 18:00:00
1679
- ## 5 0 2013-07-22 16:00:00
1680
- ## 6 0 2013-04-10 19:00:00
2878
+ ## # A tibble: 6 x 19
2879
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2880
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2881
+ ## 1 2013 1 9 641 900 1301 1242
2882
+ ## 2 2013 6 15 1432 1935 1137 1607
2883
+ ## 3 2013 1 10 1121 1635 1126 1239
2884
+ ## 4 2013 9 20 1139 1845 1014 1457
2885
+ ## 5 2013 7 22 845 1600 1005 1044
2886
+ ## 6 2013 4 10 1100 1900 960 1342
2887
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2888
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2889
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2890
+ ## # time_hour <dttm>
1681
2891
  \end{verbatim}
1682
2892
 
1683
- \hypertarget{selecting-columns}{%
1684
- \subsection{Selecting columns}\label{selecting-columns}}
2893
+ \subsection{Selecting columns}\label{selecting-columns}
1685
2894
 
1686
2895
  To select specific columns from a dataset we use function `select':
1687
2896
 
1688
2897
  \begin{Shaded}
1689
2898
  \begin{Highlighting}[]
1690
- \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{).head.as__data__frame}
2899
+ \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{).head}
1691
2900
  \end{Highlighting}
1692
2901
  \end{Shaded}
1693
2902
 
1694
2903
  \begin{verbatim}
1695
- ## year month day
1696
- ## 1 2013 1 1
1697
- ## 2 2013 1 1
1698
- ## 3 2013 1 1
1699
- ## 4 2013 1 1
1700
- ## 5 2013 1 1
1701
- ## 6 2013 1 1
2904
+ ## # A tibble: 6 x 3
2905
+ ## year month day
2906
+ ## <int> <int> <int>
2907
+ ## 1 2013 1 1
2908
+ ## 2 2013 1 1
2909
+ ## 3 2013 1 1
2910
+ ## 4 2013 1 1
2911
+ ## 5 2013 1 1
2912
+ ## 6 2013 1 1
1702
2913
  \end{verbatim}
1703
2914
 
1704
2915
  It is also possible to select column in a given range
1705
2916
 
1706
2917
  \begin{Shaded}
1707
2918
  \begin{Highlighting}[]
1708
- \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{.up_to }\StringTok{:day}\NormalTok{).head.as__data__frame}
2919
+ \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{.up_to }\StringTok{:day}\NormalTok{).head}
1709
2920
  \end{Highlighting}
1710
2921
  \end{Shaded}
1711
2922
 
1712
2923
  \begin{verbatim}
1713
- ## year month day
1714
- ## 1 2013 1 1
1715
- ## 2 2013 1 1
1716
- ## 3 2013 1 1
1717
- ## 4 2013 1 1
1718
- ## 5 2013 1 1
1719
- ## 6 2013 1 1
2924
+ ## # A tibble: 6 x 3
2925
+ ## year month day
2926
+ ## <int> <int> <int>
2927
+ ## 1 2013 1 1
2928
+ ## 2 2013 1 1
2929
+ ## 3 2013 1 1
2930
+ ## 4 2013 1 1
2931
+ ## 5 2013 1 1
2932
+ ## 6 2013 1 1
1720
2933
  \end{verbatim}
1721
2934
 
1722
2935
  Select all columns that start with a given name sequence
1723
2936
 
1724
2937
  \begin{Shaded}
1725
2938
  \begin{Highlighting}[]
1726
- \NormalTok{puts flights.select(E.starts_with(}\StringTok{'arr'}\NormalTok{)).head.as__data__frame}
2939
+ \NormalTok{puts flights.select(E.starts_with(}\StringTok{'arr'}\NormalTok{)).head}
1727
2940
  \end{Highlighting}
1728
2941
  \end{Shaded}
1729
2942
 
1730
2943
  \begin{verbatim}
2944
+ ## # A tibble: 6 x 2
1731
2945
  ## arr_time arr_delay
2946
+ ## <int> <dbl>
1732
2947
  ## 1 830 11
1733
2948
  ## 2 850 20
1734
2949
  ## 3 923 33
@@ -1757,37 +2972,28 @@ column order is `Everything':
1757
2972
 
1758
2973
  \begin{Shaded}
1759
2974
  \begin{Highlighting}[]
1760
- \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{, E.everything).head.as__data__frame}
2975
+ \NormalTok{puts flights.select(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{, E.everything).head}
1761
2976
  \end{Highlighting}
1762
2977
  \end{Shaded}
1763
2978
 
1764
2979
  \begin{verbatim}
1765
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1766
- ## 1 2013 1 1 517 515 2 830 819
1767
- ## 2 2013 1 1 533 529 4 850 830
1768
- ## 3 2013 1 1 542 540 2 923 850
1769
- ## 4 2013 1 1 544 545 -1 1004 1022
1770
- ## 5 2013 1 1 554 600 -6 812 837
1771
- ## 6 2013 1 1 554 558 -4 740 728
1772
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1773
- ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1774
- ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1775
- ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1776
- ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1777
- ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1778
- ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1779
- ## minute time_hour
1780
- ## 1 15 2013-01-01 05:00:00
1781
- ## 2 29 2013-01-01 05:00:00
1782
- ## 3 40 2013-01-01 05:00:00
1783
- ## 4 45 2013-01-01 05:00:00
1784
- ## 5 0 2013-01-01 06:00:00
1785
- ## 6 58 2013-01-01 05:00:00
2980
+ ## # A tibble: 6 x 19
2981
+ ## year month day dep_time sched_dep_time dep_delay arr_time
2982
+ ## <int> <int> <int> <int> <int> <dbl> <int>
2983
+ ## 1 2013 1 1 517 515 2 830
2984
+ ## 2 2013 1 1 533 529 4 850
2985
+ ## 3 2013 1 1 542 540 2 923
2986
+ ## 4 2013 1 1 544 545 -1 1004
2987
+ ## 5 2013 1 1 554 600 -6 812
2988
+ ## 6 2013 1 1 554 558 -4 740
2989
+ ## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
2990
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
2991
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
2992
+ ## # time_hour <dttm>
1786
2993
  \end{verbatim}
1787
2994
 
1788
- \hypertarget{add-variables-to-a-dataframe-with-mutate}{%
1789
- \subsection{Add variables to a dataframe with
1790
- `mutate'}\label{add-variables-to-a-dataframe-with-mutate}}
2995
+ \subsection{\texorpdfstring{Add variables to a dataframe with
2996
+ `mutate'}{Add variables to a dataframe with mutate}}\label{add-variables-to-a-dataframe-with-mutate}
1791
2997
 
1792
2998
  \begin{Shaded}
1793
2999
  \begin{Highlighting}[]
@@ -1797,18 +3003,20 @@ column order is `Everything':
1797
3003
  \StringTok{:distance}\NormalTok{,}
1798
3004
  \StringTok{:air_time}\NormalTok{)}
1799
3005
 
1800
- \NormalTok{puts flights_sm.head.as__data__frame}
3006
+ \NormalTok{puts flights_sm.head}
1801
3007
  \end{Highlighting}
1802
3008
  \end{Shaded}
1803
3009
 
1804
3010
  \begin{verbatim}
1805
- ## year month day dep_delay arr_delay distance air_time
1806
- ## 1 2013 1 1 2 11 1400 227
1807
- ## 2 2013 1 1 4 20 1416 227
1808
- ## 3 2013 1 1 2 33 1089 160
1809
- ## 4 2013 1 1 -1 -18 1576 183
1810
- ## 5 2013 1 1 -6 -25 762 116
1811
- ## 6 2013 1 1 -4 12 719 150
3011
+ ## # A tibble: 6 x 7
3012
+ ## year month day dep_delay arr_delay distance air_time
3013
+ ## <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
3014
+ ## 1 2013 1 1 2 11 1400 227
3015
+ ## 2 2013 1 1 4 20 1416 227
3016
+ ## 3 2013 1 1 2 33 1089 160
3017
+ ## 4 2013 1 1 -1 -18 1576 183
3018
+ ## 5 2013 1 1 -6 -25 762 116
3019
+ ## 6 2013 1 1 -4 12 719 150
1812
3020
  \end{verbatim}
1813
3021
 
1814
3022
  \begin{Shaded}
@@ -1816,55 +3024,61 @@ column order is `Everything':
1816
3024
  \NormalTok{flights_sm = flights_sm.}
1817
3025
  \NormalTok{ mutate(}\StringTok{gain: :dep_delay}\NormalTok{ - }\StringTok{:arr_delay}\NormalTok{,}
1818
3026
  \StringTok{speed: :distance}\NormalTok{ / }\StringTok{:air_time}\NormalTok{ * }\DecValTok{60}\NormalTok{)}
1819
- \NormalTok{puts flights_sm.head.as__data__frame}
3027
+ \NormalTok{puts flights_sm.head}
1820
3028
  \end{Highlighting}
1821
3029
  \end{Shaded}
1822
3030
 
1823
3031
  \begin{verbatim}
1824
- ## year month day dep_delay arr_delay distance air_time gain speed
1825
- ## 1 2013 1 1 2 11 1400 227 -9 370.0441
1826
- ## 2 2013 1 1 4 20 1416 227 -16 374.2731
1827
- ## 3 2013 1 1 2 33 1089 160 -31 408.3750
1828
- ## 4 2013 1 1 -1 -18 1576 183 17 516.7213
1829
- ## 5 2013 1 1 -6 -25 762 116 19 394.1379
1830
- ## 6 2013 1 1 -4 12 719 150 -16 287.6000
3032
+ ## # A tibble: 6 x 9
3033
+ ## year month day dep_delay arr_delay distance air_time gain speed
3034
+ ## <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
3035
+ ## 1 2013 1 1 2 11 1400 227 -9 370.
3036
+ ## 2 2013 1 1 4 20 1416 227 -16 374.
3037
+ ## 3 2013 1 1 2 33 1089 160 -31 408.
3038
+ ## 4 2013 1 1 -1 -18 1576 183 17 517.
3039
+ ## 5 2013 1 1 -6 -25 762 116 19 394.
3040
+ ## 6 2013 1 1 -4 12 719 150 -16 288.
1831
3041
  \end{verbatim}
1832
3042
 
1833
- \hypertarget{summarising-data}{%
1834
- \subsection{Summarising data}\label{summarising-data}}
3043
+ \subsection{Summarising data}\label{summarising-data}
1835
3044
 
1836
3045
  Function `summarise' calculates summaries for the data frame. When no
1837
3046
  `group\_by' is used a single value is obtained from the data frame:
1838
3047
 
1839
3048
  \begin{Shaded}
1840
3049
  \begin{Highlighting}[]
1841
- \NormalTok{puts flights.summarise(}\StringTok{delay: }\NormalTok{E.mean(}\StringTok{:dep_delay}\NormalTok{, }\StringTok{na__rm: }\DecValTok{true}\NormalTok{)).as__data__frame}
3050
+ \NormalTok{puts flights.summarise(}\StringTok{delay: }\NormalTok{E.mean(}\StringTok{:dep_delay}\NormalTok{, }\StringTok{na__rm: }\DecValTok{true}\NormalTok{))}
1842
3051
  \end{Highlighting}
1843
3052
  \end{Shaded}
1844
3053
 
1845
3054
  \begin{verbatim}
1846
- ## delay
1847
- ## 1 12.63907
3055
+ ## # A tibble: 1 x 1
3056
+ ## delay
3057
+ ## <dbl>
3058
+ ## 1 12.6
1848
3059
  \end{verbatim}
1849
3060
 
1850
- When a data frame is groupe with `group\_by' summaries apply to the
3061
+ When a data frame is grouped with `group\_by' summaries apply to the
1851
3062
  given group:
1852
3063
 
1853
3064
  \begin{Shaded}
1854
3065
  \begin{Highlighting}[]
1855
3066
  \NormalTok{by_day = flights.group_by(}\StringTok{:year}\NormalTok{, }\StringTok{:month}\NormalTok{, }\StringTok{:day}\NormalTok{)}
1856
- \NormalTok{puts by_day.summarise(}\StringTok{delay: :dep_delay}\NormalTok{.mean(}\StringTok{na__rm: }\DecValTok{true}\NormalTok{)).head.as__data__frame}
3067
+ \NormalTok{puts by_day.summarise(}\StringTok{delay: :dep_delay}\NormalTok{.mean(}\StringTok{na__rm: }\DecValTok{true}\NormalTok{)).head}
1857
3068
  \end{Highlighting}
1858
3069
  \end{Shaded}
1859
3070
 
1860
3071
  \begin{verbatim}
1861
- ## year month day delay
1862
- ## 1 2013 1 1 11.548926
1863
- ## 2 2013 1 2 13.858824
1864
- ## 3 2013 1 3 10.987832
1865
- ## 4 2013 1 4 8.951595
1866
- ## 5 2013 1 5 5.732218
1867
- ## 6 2013 1 6 7.148014
3072
+ ## # A tibble: 6 x 4
3073
+ ## # Groups: year, month [1]
3074
+ ## year month day delay
3075
+ ## * <int> <int> <int> <dbl>
3076
+ ## 1 2013 1 1 11.5
3077
+ ## 2 2013 1 2 13.9
3078
+ ## 3 2013 1 3 11.0
3079
+ ## 4 2013 1 4 8.95
3080
+ ## 5 2013 1 5 5.73
3081
+ ## 6 2013 1 6 7.15
1868
3082
  \end{verbatim}
1869
3083
 
1870
3084
  Next we put many operations together by pipping them one after the
@@ -1880,22 +3094,23 @@ other:
1880
3094
  \StringTok{delay: :arr_delay}\NormalTok{.mean(}\StringTok{na__rm: }\DecValTok{true}\NormalTok{)).}
1881
3095
  \NormalTok{ filter(}\StringTok{:count}\NormalTok{ > }\DecValTok{20}\NormalTok{, }\StringTok{:dest}\NormalTok{ != }\StringTok{"NHL"}\NormalTok{)}
1882
3096
 
1883
- \NormalTok{puts delays.as__data__frame.head}
3097
+ \NormalTok{puts delays.head}
1884
3098
  \end{Highlighting}
1885
3099
  \end{Shaded}
1886
3100
 
1887
3101
  \begin{verbatim}
1888
- ## dest count dist delay
1889
- ## 1 ABQ 254 1826.0000 4.381890
1890
- ## 2 ACK 265 199.0000 4.852273
1891
- ## 3 ALB 439 143.0000 14.397129
1892
- ## 4 ATL 17215 757.1082 11.300113
1893
- ## 5 AUS 2439 1514.2530 6.019909
1894
- ## 6 AVL 275 583.5818 8.003831
3102
+ ## # A tibble: 6 x 4
3103
+ ## dest count dist delay
3104
+ ## <chr> <int> <dbl> <dbl>
3105
+ ## 1 ABQ 254 1826 4.38
3106
+ ## 2 ACK 265 199 4.85
3107
+ ## 3 ALB 439 143 14.4
3108
+ ## 4 ATL 17215 757. 11.3
3109
+ ## 5 AUS 2439 1514. 6.02
3110
+ ## 6 AVL 275 584. 8.00
1895
3111
  \end{verbatim}
1896
3112
 
1897
- \hypertarget{using-data-table}{%
1898
- \section{Using Data Table}\label{using-data-table}}
3113
+ \section{Using Data Table}\label{using-data-table}
1899
3114
 
1900
3115
  \begin{Shaded}
1901
3116
  \begin{Highlighting}[]
@@ -2032,8 +3247,7 @@ other:
2032
3247
  ## 6: 0
2033
3248
  \end{verbatim}
2034
3249
 
2035
- \hypertarget{graphics-in-galaaz}{%
2036
- \section{Graphics in Galaaz}\label{graphics-in-galaaz}}
3250
+ \section{Graphics in Galaaz}\label{graphics-in-galaaz}
2037
3251
 
2038
3252
  Creating graphics in Galaaz is quite easy, as it can use all the power
2039
3253
  of ggplot2. There are many resources in the web that teaches ggplot, so
@@ -2125,10 +3339,9 @@ so we add `coord\_flip'.
2125
3339
  \end{Highlighting}
2126
3340
  \end{Shaded}
2127
3341
 
2128
- \includegraphics{/home/rbotafogo/desenv/galaaz/blogs/manual/manual_files/figure-latex/diverging_bar.pdf}
3342
+ \includegraphics{manual_files/figure-latex/diverging_bar.pdf}
2129
3343
 
2130
- \hypertarget{coding-with-tidyverse}{%
2131
- \section{Coding with Tidyverse}\label{coding-with-tidyverse}}
3344
+ \section{Coding with Tidyverse}\label{coding-with-tidyverse}
2132
3345
 
2133
3346
  In R, and when coding with `tidyverse', arguments to a function are
2134
3347
  usually not \emph{referencially transparent}. That is, you can't replace
@@ -2158,7 +3371,7 @@ and now, let's look at this code:
2158
3371
  \end{Highlighting}
2159
3372
  \end{Shaded}
2160
3373
 
2161
- It generates the following error: "object `x' not found.
3374
+ It generates the following error: ``object `x' not found.
2162
3375
 
2163
3376
  However, in Galaaz, arguments are referencially transparent as can be
2164
3377
  seen by the code bellow. Note initally that `my\_var = :x' will not give
@@ -2229,9 +3442,8 @@ chunks of code:
2229
3442
  ## 1 1 3
2230
3443
  \end{verbatim}
2231
3444
 
2232
- \hypertarget{writing-a-function-that-applies-to-different-data-sets}{%
2233
3445
  \subsection{Writing a function that applies to different data
2234
- sets}\label{writing-a-function-that-applies-to-different-data-sets}}
3446
+ sets}\label{writing-a-function-that-applies-to-different-data-sets}
2235
3447
 
2236
3448
  Let's suppose that we want to write a function that receives as the
2237
3449
  first argument a data frame and as second argument an expression that
@@ -2308,8 +3520,7 @@ no relationship with the symbol `:a' used in the definition of
2308
3520
  ## Translated to internal error
2309
3521
  \end{verbatim}
2310
3522
 
2311
- \hypertarget{different-expressions}{%
2312
- \subsection{Different expressions}\label{different-expressions}}
3523
+ \subsection{Different expressions}\label{different-expressions}
2313
3524
 
2314
3525
  Let's move to the next problem as presented by Hardley where trying to
2315
3526
  write a function in R that will receive two argumens, the first a
@@ -2334,11 +3545,11 @@ variable and summarises it by an expression:
2334
3545
 
2335
3546
  \begin{verbatim}
2336
3547
  ## g1 g2 a b
2337
- ## 1 1 1 2 1
2338
- ## 2 1 2 4 3
2339
- ## 3 2 1 5 4
2340
- ## 4 2 2 3 2
2341
- ## 5 2 1 1 5
3548
+ ## 1 1 1 3 3
3549
+ ## 2 1 2 2 1
3550
+ ## 3 2 1 5 2
3551
+ ## 4 2 2 4 5
3552
+ ## 5 2 1 1 4
2342
3553
  \end{verbatim}
2343
3554
 
2344
3555
  \begin{Shaded}
@@ -2352,9 +3563,9 @@ variable and summarises it by an expression:
2352
3563
  \end{Shaded}
2353
3564
 
2354
3565
  \begin{verbatim}
2355
- ## g1 a
2356
- ## 1 1 3
2357
- ## 2 2 3
3566
+ ## g1 a
3567
+ ## 1 1 2.500000
3568
+ ## 2 2 3.333333
2358
3569
  \end{verbatim}
2359
3570
 
2360
3571
  \begin{Shaded}
@@ -2368,9 +3579,9 @@ variable and summarises it by an expression:
2368
3579
  \end{Shaded}
2369
3580
 
2370
3581
  \begin{verbatim}
2371
- ## g2 a
2372
- ## 1 1 2.666667
2373
- ## 2 2 3.500000
3582
+ ## g2 a
3583
+ ## 1 1 3
3584
+ ## 2 2 3
2374
3585
  \end{verbatim}
2375
3586
 
2376
3587
  As shown by Hardley, one might expect this function to do the trick:
@@ -2407,11 +3618,11 @@ access an R variable from Galaaz, we use the tilda operator
2407
3618
 
2408
3619
  \begin{verbatim}
2409
3620
  ## g1 g2 a b
2410
- ## 1 1 1 2 1
2411
- ## 2 1 2 4 3
2412
- ## 3 2 1 5 4
2413
- ## 4 2 2 3 2
2414
- ## 5 2 1 1 5
3621
+ ## 1 1 1 3 3
3622
+ ## 2 1 2 2 1
3623
+ ## 3 2 1 5 2
3624
+ ## 4 2 2 4 5
3625
+ ## 5 2 1 1 4
2415
3626
  \end{verbatim}
2416
3627
 
2417
3628
  We then create the `my\_summarize' method and call it passing the R data
@@ -2424,36 +3635,39 @@ frame and the group by variable `:g1':
2424
3635
  \NormalTok{ summarize(}\StringTok{a: :a}\NormalTok{.mean)}
2425
3636
  \KeywordTok{end}
2426
3637
 
2427
- \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g1}\NormalTok{).as__data__frame}
3638
+ \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g1}\NormalTok{)}
2428
3639
  \end{Highlighting}
2429
3640
  \end{Shaded}
2430
3641
 
2431
3642
  \begin{verbatim}
2432
- ## g1 a
2433
- ## 1 1 3
2434
- ## 2 2 3
3643
+ ## # A tibble: 2 x 2
3644
+ ## g1 a
3645
+ ## <dbl> <dbl>
3646
+ ## 1 1 2.5
3647
+ ## 2 2 3.33
2435
3648
  \end{verbatim}
2436
3649
 
2437
3650
  It works!!! Well, let's make sure this was not just some coincidence
2438
3651
 
2439
3652
  \begin{Shaded}
2440
3653
  \begin{Highlighting}[]
2441
- \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g2}\NormalTok{).as__data__frame}
3654
+ \NormalTok{puts my_summarize(}\StringTok{:df}\NormalTok{, }\StringTok{:g2}\NormalTok{)}
2442
3655
  \end{Highlighting}
2443
3656
  \end{Shaded}
2444
3657
 
2445
3658
  \begin{verbatim}
2446
- ## g2 a
2447
- ## 1 1 2.666667
2448
- ## 2 2 3.500000
3659
+ ## # A tibble: 2 x 2
3660
+ ## g2 a
3661
+ ## <dbl> <dbl>
3662
+ ## 1 1 3
3663
+ ## 2 2 3
2449
3664
  \end{verbatim}
2450
3665
 
2451
3666
  Great, everything is fine! No magic, no new functions, no complexities,
2452
3667
  just normal, standard Ruby code. If you've ever done NSE in R, this
2453
3668
  certainly feels much safer and easy to implement.
2454
3669
 
2455
- \hypertarget{different-input-variables}{%
2456
- \subsection{Different input variables}\label{different-input-variables}}
3670
+ \subsection{Different input variables}\label{different-input-variables}
2457
3671
 
2458
3672
  In the previous section we've managed to get rid of all NSE formulation
2459
3673
  for a simple example, but does this remain true for more complex
@@ -2508,9 +3722,8 @@ Once again, there is no need to use any special theory or functions. The
2508
3722
  only point to be careful about is the use of `E' to build expressions
2509
3723
  from functions `mean', `sum' and `n'.
2510
3724
 
2511
- \hypertarget{different-input-and-output-variable}{%
2512
3725
  \subsection{Different input and output
2513
- variable}\label{different-input-and-output-variable}}
3726
+ variable}\label{different-input-and-output-variable}
2514
3727
 
2515
3728
  Now the next challenge presented by Hardley is to vary the name of the
2516
3729
  output variables based on the received expression. So, if the input
@@ -2564,18 +3777,18 @@ Here is our Ruby code:
2564
3777
 
2565
3778
  \begin{verbatim}
2566
3779
  ## g1 g2 a b mean_a sum_a
2567
- ## 1 1 1 2 1 3 15
2568
- ## 2 1 2 4 3 3 15
2569
- ## 3 2 1 5 4 3 15
2570
- ## 4 2 2 3 2 3 15
2571
- ## 5 2 1 1 5 3 15
3780
+ ## 1 1 1 3 3 3 15
3781
+ ## 2 1 2 2 1 3 15
3782
+ ## 3 2 1 5 2 3 15
3783
+ ## 4 2 2 4 5 3 15
3784
+ ## 5 2 1 1 4 3 15
2572
3785
  ##
2573
3786
  ## g1 g2 a b mean_b sum_b
2574
- ## 1 1 1 2 1 3 15
2575
- ## 2 1 2 4 3 3 15
2576
- ## 3 2 1 5 4 3 15
2577
- ## 4 2 2 3 2 3 15
2578
- ## 5 2 1 1 5 3 15
3787
+ ## 1 1 1 3 3 3 15
3788
+ ## 2 1 2 2 1 3 15
3789
+ ## 3 2 1 5 2 3 15
3790
+ ## 4 2 2 4 5 3 15
3791
+ ## 5 2 1 1 4 3 15
2579
3792
  \end{verbatim}
2580
3793
 
2581
3794
  It really seems that ``Non Standard Evaluation'' is actually quite
@@ -2589,9 +3802,8 @@ This is standard Ruby notation.
2589
3802
 
2590
3803
  {[}explain\ldots{}.{]}
2591
3804
 
2592
- \hypertarget{capturing-multiple-variables}{%
2593
3805
  \subsection{Capturing multiple
2594
- variables}\label{capturing-multiple-variables}}
3806
+ variables}\label{capturing-multiple-variables}
2595
3807
 
2596
3808
  Moving on with new complexities, Hardley proposes us to solve the
2597
3809
  problem in which the summarise function will receive any number of
@@ -2607,21 +3819,23 @@ number of paramenters the paramenter is preceded by '*':
2607
3819
  \NormalTok{ summarise(}\StringTok{a: }\NormalTok{E.mean(}\StringTok{:a}\NormalTok{))}
2608
3820
  \KeywordTok{end}
2609
3821
 
2610
- \NormalTok{puts my_summarise3((~}\StringTok{:df}\NormalTok{), }\StringTok{:g1}\NormalTok{, }\StringTok{:g2}\NormalTok{).as__data__frame}
3822
+ \NormalTok{puts my_summarise3((~}\StringTok{:df}\NormalTok{), }\StringTok{:g1}\NormalTok{, }\StringTok{:g2}\NormalTok{)}
2611
3823
  \end{Highlighting}
2612
3824
  \end{Shaded}
2613
3825
 
2614
3826
  \begin{verbatim}
2615
- ## g1 g2 a
2616
- ## 1 1 1 2
2617
- ## 2 1 2 4
2618
- ## 3 2 1 3
2619
- ## 4 2 2 3
3827
+ ## # A tibble: 4 x 3
3828
+ ## # Groups: g1 [?]
3829
+ ## g1 g2 a
3830
+ ## <dbl> <dbl> <dbl>
3831
+ ## 1 1 1 3
3832
+ ## 2 1 2 2
3833
+ ## 3 2 1 3
3834
+ ## 4 2 2 4
2620
3835
  \end{verbatim}
2621
3836
 
2622
- \hypertarget{why-does-r-require-nse-and-galaaz-does-not}{%
2623
3837
  \subsection{Why does R require NSE and Galaaz does
2624
- not?}\label{why-does-r-require-nse-and-galaaz-does-not}}
3838
+ not?}\label{why-does-r-require-nse-and-galaaz-does-not}
2625
3839
 
2626
3840
  NSE introduces a number of new concepts, such as `quoting',
2627
3841
  `quasiquotation', `unquoting' and `unquote-splicing', while in Galaaz
@@ -2654,8 +3868,7 @@ should call the function passing the value `true' if variable `a' is
2654
3868
  equal to variable `b' or if it should call the function passing the
2655
3869
  expression `:a.eq :b'.
2656
3870
 
2657
- \hypertarget{advanced-dplyr-features}{%
2658
- \subsection{Advanced dplyr features}\label{advanced-dplyr-features}}
3871
+ \subsection{Advanced dplyr features}\label{advanced-dplyr-features}
2659
3872
 
2660
3873
  In the blog: Programming with dplyr by using dplyr
2661
3874
  (\url{https://www.r-bloggers.com/programming-with-dplyr-by-using-dplyr/})
@@ -2682,39 +3895,22 @@ the Starwars movies:
2682
3895
 
2683
3896
  \begin{Shaded}
2684
3897
  \begin{Highlighting}[]
2685
- \NormalTok{puts (~}\StringTok{:starwars}\NormalTok{).head.as__data__frame}
3898
+ \NormalTok{puts (~}\StringTok{:starwars}\NormalTok{).head}
2686
3899
  \end{Highlighting}
2687
3900
  \end{Shaded}
2688
3901
 
2689
3902
  \begin{verbatim}
2690
- ## name height mass hair_color skin_color eye_color birth_year
2691
- ## 1 Luke Skywalker 172 77 blond fair blue 19.0
2692
- ## 2 C-3PO 167 75 <NA> gold yellow 112.0
2693
- ## 3 R2-D2 96 32 <NA> white, blue red 33.0
2694
- ## 4 Darth Vader 202 136 none white yellow 41.9
2695
- ## 5 Leia Organa 150 49 brown light brown 19.0
2696
- ## 6 Owen Lars 178 120 brown, grey light blue 52.0
2697
- ## gender homeworld species
2698
- ## 1 male Tatooine Human
2699
- ## 2 <NA> Tatooine Droid
2700
- ## 3 <NA> Naboo Droid
2701
- ## 4 male Tatooine Human
2702
- ## 5 female Alderaan Human
2703
- ## 6 male Tatooine Human
2704
- ## films
2705
- ## 1 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2706
- ## 2 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
2707
- ## 3 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2708
- ## 4 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
2709
- ## 5 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2710
- ## 6 Attack of the Clones, Revenge of the Sith, A New Hope
2711
- ## vehicles starships
2712
- ## 1 Snowspeeder, Imperial Speeder Bike X-wing, Imperial shuttle
2713
- ## 2
2714
- ## 3
2715
- ## 4 TIE Advanced x1
2716
- ## 5 Imperial Speeder Bike
2717
- ## 6
3903
+ ## # A tibble: 6 x 13
3904
+ ## name height mass hair_color skin_color eye_color birth_year gender
3905
+ ## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
3906
+ ## 1 Luke~ 172 77 blond fair blue 19 male
3907
+ ## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
3908
+ ## 3 R2-D2 96 32 <NA> white, bl~ red 33 <NA>
3909
+ ## 4 Dart~ 202 136 none white yellow 41.9 male
3910
+ ## 5 Leia~ 150 49 brown light brown 19 female
3911
+ ## 6 Owen~ 178 120 brown, gr~ light blue 52 male
3912
+ ## # ... with 5 more variables: homeworld <chr>, species <chr>, films <list>,
3913
+ ## # vehicles <list>, starships <list>
2718
3914
  \end{verbatim}
2719
3915
 
2720
3916
  The grouped\_mean function bellow will receive a grouping variable and
@@ -2768,33 +3964,34 @@ The same code with Galaaz, becomes:
2768
3964
  \NormalTok{ rename_at(value_variables, E.funs(E.paste0(}\StringTok{"mean_"}\NormalTok{, value_variables)))}
2769
3965
  \KeywordTok{end}
2770
3966
 
2771
- \NormalTok{puts grouped_mean((~}\StringTok{:starwars}\NormalTok{), }\StringTok{"eye_color"}\NormalTok{, E.c(}\StringTok{"mass"}\NormalTok{, }\StringTok{"birth_year"}\NormalTok{)).as__data__frame}
3967
+ \NormalTok{puts grouped_mean((~}\StringTok{:starwars}\NormalTok{), }\StringTok{"eye_color"}\NormalTok{, E.c(}\StringTok{"mass"}\NormalTok{, }\StringTok{"birth_year"}\NormalTok{))}
2772
3968
  \end{Highlighting}
2773
3969
  \end{Shaded}
2774
3970
 
2775
3971
  \begin{verbatim}
2776
- ## eye_color mean_mass mean_birth_year count
2777
- ## 1 black 76.28571 33.00000 10
2778
- ## 2 blue 86.51667 67.06923 19
2779
- ## 3 blue-gray 77.00000 57.00000 1
2780
- ## 4 brown 66.09231 108.96429 21
2781
- ## 5 dark NaN NaN 1
2782
- ## 6 gold NaN NaN 1
2783
- ## 7 green, yellow 159.00000 NaN 1
2784
- ## 8 hazel 66.00000 34.50000 3
2785
- ## 9 orange 282.33333 231.00000 8
2786
- ## 10 pink NaN NaN 1
2787
- ## 11 red 81.40000 33.66667 5
2788
- ## 12 red, blue NaN NaN 1
2789
- ## 13 unknown 31.50000 NaN 3
2790
- ## 14 white 48.00000 NaN 1
2791
- ## 15 yellow 81.11111 76.38000 11
3972
+ ## # A tibble: 15 x 4
3973
+ ## eye_color mean_mass mean_birth_year count
3974
+ ## <chr> <dbl> <dbl> <dbl>
3975
+ ## 1 black 76.3 33 10
3976
+ ## 2 blue 86.5 67.1 19
3977
+ ## 3 blue-gray 77 57 1
3978
+ ## 4 brown 66.1 109. 21
3979
+ ## 5 dark NaN NaN 1
3980
+ ## 6 gold NaN NaN 1
3981
+ ## 7 green, yellow 159 NaN 1
3982
+ ## 8 hazel 66 34.5 3
3983
+ ## 9 orange 282. 231 8
3984
+ ## 10 pink NaN NaN 1
3985
+ ## 11 red 81.4 33.7 5
3986
+ ## 12 red, blue NaN NaN 1
3987
+ ## 13 unknown 31.5 NaN 3
3988
+ ## 14 white 48 NaN 1
3989
+ ## 15 yellow 81.1 76.4 11
2792
3990
  \end{verbatim}
2793
3991
 
2794
3992
  {[}TO BE CONTINUED\ldots{}{]}
2795
3993
 
2796
- \hypertarget{contributing}{%
2797
- \section{Contributing}\label{contributing}}
3994
+ \section{Contributing}\label{contributing}
2798
3995
 
2799
3996
  \begin{itemize}
2800
3997
  \tightlist
@@ -2812,5 +4009,18 @@ The same code with Galaaz, becomes:
2812
4009
  Create new Pull Request
2813
4010
  \end{itemize}
2814
4011
 
4012
+ \section*{References}\label{references}
4013
+ \addcontentsline{toc}{section}{References}
4014
+
4015
+ \hypertarget{refs}{}
4016
+ \hypertarget{ref-Knuth:literate_programming}{}
4017
+ Knuth, Donald E. 1984. ``Literate Programming.'' \emph{Comput. J.} 27
4018
+ (2). Oxford, UK: Oxford University Press: 97--111.
4019
+ doi:\href{https://doi.org/10.1093/comjnl/27.2.97}{10.1093/comjnl/27.2.97}.
4020
+
4021
+ \hypertarget{ref-Wilkinson:grammar_of_graphics}{}
4022
+ Wilkinson, Leland. 2005. \emph{The Grammar of Graphics (Statistics and
4023
+ Computing)}. Berlin, Heidelberg: Springer-Verlag.
4024
+
2815
4025
 
2816
4026
  \end{document}