galaaz 0.4.10 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (163) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2048 -531
  3. data/Rakefile +3 -2
  4. data/bin/gknit +152 -6
  5. data/bin/gknit-draft +105 -0
  6. data/bin/gknit-draft.rb +28 -0
  7. data/bin/gknit_Rscript +127 -0
  8. data/bin/grun +27 -1
  9. data/bin/gstudio +47 -4
  10. data/bin/{gstudio.rb → gstudio_irb.rb} +0 -0
  11. data/bin/gstudio_pry.rb +7 -0
  12. data/blogs/galaaz_ggplot/galaaz_ggplot.html +10 -195
  13. data/blogs/galaaz_ggplot/galaaz_ggplot.md +404 -0
  14. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
  15. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
  16. data/blogs/gknit/gknit.Rmd +5 -3
  17. data/blogs/gknit/gknit.pdf +0 -0
  18. data/blogs/gknit/lst.rds +0 -0
  19. data/blogs/manual/lst.rds +0 -0
  20. data/blogs/manual/manual.Rmd +826 -53
  21. data/blogs/manual/manual.html +2338 -695
  22. data/blogs/manual/manual.md +2032 -539
  23. data/blogs/manual/manual.pdf +0 -0
  24. data/blogs/manual/manual.tex +1804 -594
  25. data/blogs/manual/manual_files/figure-html/bubble-1.png +0 -0
  26. data/blogs/manual/manual_files/figure-html/diverging_bar.png +0 -0
  27. data/blogs/manual/manual_files/figure-latex/bubble-1.png +0 -0
  28. data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
  29. data/blogs/manual/model.rb +41 -0
  30. data/blogs/nse_dplyr/nse_dplyr.Rmd +226 -73
  31. data/blogs/nse_dplyr/nse_dplyr.html +254 -336
  32. data/blogs/nse_dplyr/nse_dplyr.md +353 -158
  33. data/blogs/oh_my/oh_my.html +274 -386
  34. data/blogs/oh_my/oh_my.md +208 -205
  35. data/blogs/ruby_plot/ruby_plot.html +20 -205
  36. data/blogs/ruby_plot/ruby_plot.md +14 -15
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  43. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  44. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  46. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  47. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  48. data/examples/Bibliography/master.bib +50 -0
  49. data/examples/Bibliography/stats.bib +72 -0
  50. data/examples/islr/x_y_rnorm.jpg +0 -0
  51. data/examples/latex_templates/Test-acm_article/Makefile +16 -0
  52. data/examples/latex_templates/Test-acm_article/Test-acm_article.Rmd +65 -0
  53. data/examples/latex_templates/Test-acm_article/acm_proc_article-sp.cls +1670 -0
  54. data/examples/latex_templates/Test-acm_article/sensys-abstract.cls +703 -0
  55. data/examples/latex_templates/Test-acm_article/sigproc.bib +59 -0
  56. data/examples/latex_templates/Test-acs_article/Test-acs_article.Rmd +260 -0
  57. data/examples/latex_templates/Test-acs_article/Test-acs_article.pdf +0 -0
  58. data/examples/latex_templates/Test-acs_article/acs-Test-acs_article.bib +11 -0
  59. data/examples/latex_templates/Test-acs_article/acs-my_output.bib +11 -0
  60. data/examples/latex_templates/Test-acs_article/acstest.bib +17 -0
  61. data/examples/latex_templates/Test-aea_article/AEA.cls +1414 -0
  62. data/{blogs/gknit/marshal.dump → examples/latex_templates/Test-aea_article/BibFile.bib} +0 -0
  63. data/examples/latex_templates/Test-aea_article/Test-aea_article.Rmd +108 -0
  64. data/examples/latex_templates/Test-aea_article/Test-aea_article.pdf +0 -0
  65. data/examples/latex_templates/Test-aea_article/aea.bst +1269 -0
  66. data/examples/latex_templates/Test-aea_article/multicol.sty +853 -0
  67. data/examples/latex_templates/Test-aea_article/references.bib +0 -0
  68. data/examples/latex_templates/Test-aea_article/setspace.sty +546 -0
  69. data/examples/latex_templates/Test-amq_article/Test-amq_article.Rmd +256 -0
  70. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdf +0 -0
  71. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdfsync +3397 -0
  72. data/examples/latex_templates/Test-amq_article/pics/Figure2.pdf +0 -0
  73. data/examples/latex_templates/Test-ams_article/Test-ams_article.Rmd +215 -0
  74. data/examples/latex_templates/Test-ams_article/amstest.bib +436 -0
  75. data/examples/latex_templates/Test-asa_article/Test-asa_article.Rmd +153 -0
  76. data/examples/latex_templates/Test-asa_article/Test-asa_article.pdf +0 -0
  77. data/examples/latex_templates/Test-asa_article/agsm.bst +1353 -0
  78. data/examples/latex_templates/Test-asa_article/bibliography.bib +233 -0
  79. data/examples/latex_templates/Test-ieee_article/IEEEtran.bst +2409 -0
  80. data/examples/latex_templates/Test-ieee_article/IEEEtran.cls +6346 -0
  81. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.Rmd +175 -0
  82. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.pdf +0 -0
  83. data/examples/latex_templates/Test-ieee_article/mybibfile.bib +20 -0
  84. data/examples/latex_templates/Test-rjournal_article/RJournal.sty +335 -0
  85. data/examples/latex_templates/Test-rjournal_article/RJreferences.bib +18 -0
  86. data/examples/latex_templates/Test-rjournal_article/RJwrapper.pdf +0 -0
  87. data/examples/latex_templates/Test-rjournal_article/Test-rjournal_article.Rmd +52 -0
  88. data/examples/latex_templates/Test-springer_article/Test-springer_article.Rmd +65 -0
  89. data/examples/latex_templates/Test-springer_article/Test-springer_article.pdf +0 -0
  90. data/examples/latex_templates/Test-springer_article/bibliography.bib +26 -0
  91. data/examples/latex_templates/Test-springer_article/spbasic.bst +1658 -0
  92. data/examples/latex_templates/Test-springer_article/spmpsci.bst +1512 -0
  93. data/examples/latex_templates/Test-springer_article/spphys.bst +1443 -0
  94. data/examples/latex_templates/Test-springer_article/svglov3.clo +113 -0
  95. data/examples/latex_templates/Test-springer_article/svjour3.cls +1431 -0
  96. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.Rmd +73 -0
  97. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.pdf +0 -0
  98. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.Rmd +382 -0
  99. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.pdf +0 -0
  100. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.Rmd +164 -0
  101. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.pdf +0 -0
  102. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.Rmd +92 -0
  103. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.pdf +0 -0
  104. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/attend-grade-relationships.csv +482 -0
  105. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.Rmd +280 -0
  106. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.pdf +0 -0
  107. data/examples/rmarkdown/svm-xaringan-example/svm-xaringan-example.Rmd +386 -0
  108. data/lib/R_interface/r.rb +1 -1
  109. data/lib/R_interface/r_libs.R +1 -1
  110. data/lib/R_interface/r_methods.rb +10 -0
  111. data/lib/R_interface/rpkg.rb +1 -0
  112. data/lib/R_interface/rsupport.rb +4 -6
  113. data/lib/gknit.rb +2 -0
  114. data/lib/gknit/draft.rb +105 -0
  115. data/lib/gknit/knitr_engine.rb +0 -33
  116. data/lib/util/exec_ruby.rb +1 -27
  117. data/specs/figures/bg.jpeg +0 -0
  118. data/specs/figures/bg.png +0 -0
  119. data/specs/figures/dose_len.png +0 -0
  120. data/specs/figures/no_args.jpeg +0 -0
  121. data/specs/figures/no_args.png +0 -0
  122. data/specs/figures/width_height.jpeg +0 -0
  123. data/specs/figures/width_height.png +0 -0
  124. data/specs/figures/width_height_units1.jpeg +0 -0
  125. data/specs/figures/width_height_units1.png +0 -0
  126. data/specs/figures/width_height_units2.jpeg +0 -0
  127. data/specs/figures/width_height_units2.png +0 -0
  128. data/specs/r_dataframe.spec.rb +11 -11
  129. data/specs/ruby_expression.spec.rb +1 -0
  130. data/specs/tmp.rb +41 -20
  131. data/version.rb +1 -1
  132. metadata +73 -35
  133. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +0 -41
  134. data/blogs/galaaz_ggplot/galaaz_ggplot.out +0 -10
  135. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/midwest_rb.pdf +0 -0
  136. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/scatter_plot_rb.pdf +0 -0
  137. data/blogs/gknit/gknit.md +0 -1430
  138. data/blogs/gknit/gknit.tex +0 -1358
  139. data/blogs/manual/graph.rb +0 -29
  140. data/blogs/nse_dplyr/nse_dplyr.tex +0 -1373
  141. data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +0 -662
  142. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +0 -57
  143. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +0 -106
  144. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +0 -110
  145. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +0 -174
  146. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +0 -236
  147. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +0 -296
  148. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +0 -236
  149. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +0 -218
  150. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +0 -128
  151. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +0 -150
  152. data/blogs/ruby_plot/ruby_plot_files/figure-latex/dose_len.png +0 -0
  153. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_delivery.png +0 -0
  154. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_dose.png +0 -0
  155. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color.png +0 -0
  156. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color2.png +0 -0
  157. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_decorations.png +0 -0
  158. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_jitter.png +0 -0
  159. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_points.png +0 -0
  160. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_box_plot.png +0 -0
  161. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_violin_plot.png +0 -0
  162. data/blogs/ruby_plot/ruby_plot_files/figure-latex/violin_with_jitter.png +0 -0
  163. data/examples/paper/paper.rb +0 -36
@@ -7,13 +7,14 @@ tags: [Tech, Data Science, Ruby, R, GraalVM]
7
7
  date: "29/04/2019"
8
8
  bibliography: stats.bib
9
9
  output:
10
- html_document:
11
- self_contained: true
12
- keep_md: true
13
10
  pdf_document:
14
11
  includes:
15
12
  in_header: ["../../sty/galaaz.sty"]
16
13
  number_sections: yes
14
+ html_document:
15
+ self_contained: true
16
+ keep_md: true
17
+ biblio-style: apsr
17
18
  ---
18
19
 
19
20
  ```{r setup, echo=FALSE}
@@ -726,5 +727,6 @@ the gnu compiler and tools should be enough. I am not sure what is needed on th
726
727
 
727
728
  * gknit \<filename\>
728
729
 
730
+
729
731
  # References
730
732
 
Binary file
Binary file
Binary file
@@ -4,6 +4,7 @@ subtitle: "How to tightly couple Ruby and R in GraalVM"
4
4
  author: "Rodrigo Botafogo"
5
5
  tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, ggplot2]
6
6
  date: "2019"
7
+ bibliography: "/home/rbotafogo/Bibliography/stats.bib"
7
8
  output:
8
9
  pdf_document:
9
10
  includes:
@@ -11,7 +12,7 @@ output:
11
12
  keep_tex: yes
12
13
  number_sections: yes
13
14
  toc: true
14
- toc_depth: 2
15
+ toc_depth: 3
15
16
  html_document:
16
17
  self_contained: true
17
18
  keep_md: true
@@ -21,6 +22,7 @@ fontsize: 11pt
21
22
  ---
22
23
 
23
24
  ```{ruby setup, echo=FALSE}
25
+ R.options(crayon__enabled: false)
24
26
  R.install_and_loads('kableExtra')
25
27
  ```
26
28
 
@@ -33,6 +35,92 @@ other hand, R is considered one of the most powerful languages for solving all o
33
35
  problems. Maybe the strongest competitor to R is Python with libraries such as NumPy,
34
36
  Panda, SciPy, SciKit-Learn and a couple more.
35
37
 
38
+ With Galaaz we do not intend to re-implement any of the scientific libraries in R, we allow
39
+ for very tight coupling between the two languages to the point that the Ruby developer does
40
+ not need to know that there is an R engine running.
41
+
42
+ According to Wikipedia "Ruby is a dynamic, interpreted, reflective, object-oriented,
43
+ general-purpose programming language. It was designed and developed in the mid-1990s by Yukihiro
44
+ "Matz" Matsumoto in Japan." It reached high popularity with the development of Ruby on Rails
45
+ (RoR) by David Heinemeier Hansson. RoR is a web application framework first released
46
+ around 2005. It makes extensive use of Ruby's metaprogramming features. With RoR,
47
+ Ruby became very popular. According to [Ruby's Tiobe index](https://www.tiobe.com/tiobe-index/ruby/)
48
+ it peeked in popularity around 2008, then declined until 2015 when it started picking up again.
49
+ At the time of this writing (November 2018), the Tiobe index puts Ruby in 16th position as
50
+ most popular language.
51
+
52
+ Python, a language similar to Ruby, ranks 4th in the index. Java, C and C++ take the
53
+ first three positions. Ruby is often criticized for its focus on web applications.
54
+ But Ruby can do [much more](https://github.com/markets/awesome-ruby) than just web applications.
55
+ Yet, for scientific computing, Ruby lags way behind Python and R. Python has
56
+ Django framework for web, NumPy for numerical arrays, Pandas for data analysis.
57
+ R is a free software environment for statistical computing and graphics with thousands
58
+ of libraries for data analysis.
59
+
60
+ Until recently, there was no real perspective for Ruby to bridge this gap.
61
+ Implementing a complete scientific computing infrastructure would take too long.
62
+ Enters [Oracle's GraalVM](https://www.graalvm.org/):
63
+
64
+ > GraalVM is a universal virtual machine for running applications written in
65
+ > JavaScript, Python 3, Ruby, R, JVM-based languages like Java, Scala, Kotlin,
66
+ > and LLVM-based languages such as C and C++.
67
+ >
68
+ > GraalVM removes the isolation between programming languages and enables
69
+ > interoperability in a shared runtime. It can run either standalone or in the
70
+ > context of OpenJDK, Node.js, Oracle Database, or MySQL.
71
+ >
72
+ > GraalVM allows you to write polyglot applications with a seamless way to pass
73
+ > values from one language to another. With GraalVM there is no copying or
74
+ > marshaling necessary as it is with other polyglot systems. This lets you
75
+ > achieve high performance when language boundaries are crossed. Most of the time
76
+ > there is no additional cost for crossing a language boundary at all.
77
+ >
78
+ > Often developers have to make uncomfortable compromises that require them
79
+ > to rewrite their software in other languages. For example:
80
+ >
81
+ > * That library is not available in my language. I need to rewrite it.
82
+ > * That language would be the perfect fit for my problem, but we cannot
83
+ > run it in our environment.
84
+ > * That problem is already solved in my language, but the language is
85
+ > too slow.
86
+ >
87
+ > With GraalVM we aim to allow developers to freely choose the right language for
88
+ > the task at hand without making compromises.
89
+
90
+ As stated above, GraalVM is a _universal_ virtual machine that allows Ruby and R (and other
91
+ languages) to run on the same environment. GraalVM allows polyglot applications to
92
+ _seamlessly_ interact with one another and pass values from one language to the other.
93
+ Although a great idea, GraalVM still requires application writers to know several languages.
94
+ To eliminate that requirement, we built Galaaz, a gem for Ruby, to tightly couple
95
+ Ruby and R and allow those languages to interact in a way that the user will be unaware
96
+ of such interaction. In other words, a Ruby programmer will be able to use all
97
+ the capabilities of R without knowing the R syntax.
98
+
99
+ Library wrapping is a usual way of bringing features from one language into another.
100
+ To improve performance, Python often wraps more efficient C libraries. For the
101
+ Python developer, the existence of such C libraries is hidden. The problem with
102
+ library wrapping is that for any new library, there is the need to handcraft a new
103
+ wrapper.
104
+
105
+ Galaaz, instead of wrapping a single C or R library, wraps the whole R language
106
+ in Ruby. Doing so, all thousands of R libraries are available immediately
107
+ to Ruby developers without any new wrapping effort.
108
+
109
+ ## What does Galaaz mean
110
+
111
+ Galaaz is the Portuguese name for "Galahad". From Wikipedia:
112
+
113
+ Sir Galahad (sometimes referred to as Galeas or Galath),
114
+ in Arthurian legend, is a knight of King Arthur's Round Table and one
115
+ of the three achievers of the Holy Grail. He is the illegitimate son
116
+ of Sir Lancelot and Elaine of Corbenic, and is renowned for his
117
+ gallantry and purity as the most perfect of all knights. Emerging quite
118
+ late in the medieval Arthurian tradition, Sir Galahad first appears in the
119
+ Lancelot–Grail cycle, and his story is taken up in later works such as
120
+ the Post-Vulgate Cycle and Sir Thomas Malory's Le Morte d'Arthur.
121
+ His name should not be mistaken with Galehaut, a different knight from
122
+ Arthurian legend.
123
+
36
124
  # System Compatibility
37
125
 
38
126
  * Oracle Linux 7
@@ -83,7 +171,7 @@ Panda, SciPy, SciKit-Learn and a couple more.
83
171
  > galaaz -T
84
172
 
85
173
  Shows a list with all available executalbe tasks. To execute a task, substitute the
86
- 'rake' word in the list with 'galaaz'. For instance, the following line shows up
174
+ 'rake' word in the list with 'galaaz'. For instance, the following line shows up
87
175
  after 'galaaz -T'
88
176
 
89
177
  rake master_list:scatter_plot # scatter_plot from:....
@@ -92,6 +180,82 @@ Panda, SciPy, SciKit-Learn and a couple more.
92
180
 
93
181
  > galaaz master_list:scatter_plot
94
182
 
183
+
184
+ # Accessing R from Ruby
185
+
186
+ One of the nice aspects of Galaaz on GraalVM, is that variables and functions defined in R, can
187
+ be easily accessed from Ruby. For instance, to access the 'mtcars' data frame from R
188
+ in Ruby, we use the ':mtcar' symbol preceded by the '~' operator, thus '~:r_vec' retrieves the
189
+ value of the 'mtcars' variable.
190
+
191
+ ```{ruby access_r}
192
+ puts ~:mtcars
193
+ ```
194
+
195
+ To access an R function from Ruby, the R function needs to be preceeded by 'R.' scoping.
196
+ Bellow we see and example of creating a R::Vector by calling the 'c' R function
197
+
198
+ ```{ruby call_r_func}
199
+ puts vec = R.c(1.0, 2.0, 3.0, 4.0)
200
+ ```
201
+ Note that 'vec' is an object of type R::Vector:
202
+
203
+ ```{ruby r_object}
204
+ puts vec.class
205
+ ```
206
+ Every object created by a call to an R function will be of a type that inherits from
207
+ R::Object. In R, there is also a function 'class'. In order to access that function we
208
+ can call method 'rclass' in the R::Object:
209
+
210
+ ```{ruby rclass}
211
+ puts vec.rclass
212
+ ```
213
+ When working with R::Object(s), it is possible to use the '.' operator to pipe operations.
214
+ When using '.', the object to which the '.' is applied becomes the first argument of the
215
+ corresponding R function. For instance, function 'c' in R, can be used to concatenate
216
+ two vectors or more vectors (in R, there are no scalar values, scalars are converted to
217
+ vectors of size 1. Within Galaaz, scalar parameter is converted to a size one vector):
218
+
219
+ ```{ruby concat}
220
+ puts R.c(vec, 10, 20, 30)
221
+ ```
222
+ The call above to the 'c' function can also be done using '.' notation:
223
+
224
+ ```{ruby concat_with_dot}
225
+ puts vec.c(10, 20, 30)
226
+ ```
227
+ We will talk about vector indexing in a latter section. But notice here that indexing
228
+ an R::Vector will return another R::Vector:
229
+
230
+ ```{ruby indexing}
231
+ puts vec[1]
232
+ ```
233
+ Sometimes we want to index an R::Object and get back a Ruby object that is not wrapped
234
+ in an R::Object, but the native Ruby object. For this, we can index the R object with
235
+ the '>>' operator:
236
+
237
+ ```{ruby native_value}
238
+ puts vec >> 0
239
+ puts vec >> 2
240
+ ```
241
+
242
+ It is also possible to call an R function with named arguments, by creating the function
243
+ in Galaaz with named parameters. For instance, here is an example of creating a 'list'
244
+ with named elements:
245
+
246
+ ```{ruby named_parameters}
247
+ puts R.list(first_name: "Rodrigo", last_name: "Botafogo")
248
+ ```
249
+
250
+ Many R functions receive another function as argument. For instance, method 'map' applies
251
+ a function to every element of a vector. With Galaaz, it is possible to pass a Proc,
252
+ Method or Lambda in place of the expected R function. In this next example, we will
253
+ add 2 to every element of our previously created vector:
254
+
255
+ ```{ruby proc_as_param}
256
+ puts vec.map { |x| x + 2 }
257
+ ```
258
+
95
259
  # gKnitting a Document
96
260
 
97
261
  This manual has been formatted usign gKnit. gKnit uses Knitr and R markdown to knit
@@ -101,9 +265,626 @@ chunks, making it an ideal solution for literate programming. Also, since it is
101
265
  on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming with
102
266
  Ruby and R is quite natural.
103
267
 
104
- [gknit is described in more details here](https://towardsdatascience.com/how-to-do-reproducible-research-in-ruby-with-gknit-c26d2684d64e)
268
+ The idea of "literate programming" was first introduced by Donald Knuth in the
269
+ 1980's [@Knuth:literate_programming].
270
+ The main intention of this approach was to develop software interspersing macro snippets,
271
+ traditional source code, and a natural language such as English in a document
272
+ that could be compiled into
273
+ executable code and at the same time easily read by a human developer. According to Knuth
274
+ "The practitioner of
275
+ literate programming can be regarded as an essayist, whose main concern is with exposition
276
+ and excellence of style."
277
+
278
+ The idea of literate programming evolved into the idea of reproducible research, in which
279
+ all the data, software code, documentation, graphics etc. needed to reproduce the research
280
+ and its reports could be included in a
281
+ single document or set of documents that when distributed to peers could be rerun generating
282
+ the same output and reports.
283
+
284
+ The R community has put a great deal of effort in reproducible research. In 2002, Sweave was
285
+ introduced and it allowed mixing R code with Latex generating high quality PDF documents. A
286
+ Sweave document could include code, the results of executing the code, graphics and text
287
+ such that it contained the whole narrative to reproduce the research. In
288
+ 2012, Knitr, developed by Yihui Xie from RStudio was released to replace Sweave and to
289
+ consolidate in one single package the many extensions and add-on packages that
290
+ were necessary for Sweave.
291
+
292
+ With Knitr, __R markdown__ was also developed, an extension to the
293
+ Markdown format. With __R markdown__ and Knitr it is possible to generate reports in a multitude
294
+ of formats such as HTML, markdown, Latex, PDF, dvi, etc. __R markdown__ also allows the use of
295
+ multiple programming languages such as R, Ruby, Python, etc. in the same document.
296
+
297
+ In __R markdown__, text is interspersed with
298
+ code chunks that can be executed and both the code and its results can become
299
+ part of the final report. Although __R markdown__ allows multiple programming languages in the
300
+ same document, only R and Python (with
301
+ the reticulate package) can persist variables between chunks. For other languages, such as
302
+ Ruby, every chunk will start a new process and thus all data is lost between chunks, unless it
303
+ is somehow stored in a data file that is read by the next chunk.
304
+
305
+ Being able to persist data
306
+ between chunks is critical for literate programming otherwise the flow of the narrative is lost
307
+ by all the effort of having to save data and then reload it. Although this might, at first, seem like
308
+ a small nuisance, not being able to persist data between chunks is a major issue. For example, let's
309
+ take a look at the following simple example in which we want to show how to create a list and the
310
+ use it. Let's first assume that data cannot be persisted between chunks. In the next chunk we
311
+ create a list, then we would need to save it to file, but to save it, we need somehow to marshal the
312
+ data into a binary format:
313
+
314
+ ```{ruby no_persistence}
315
+ lst = R.list(a: 1, b: 2, c: 3)
316
+ lst.saveRDS("lst.rds")
317
+ ```
318
+ then, on the next chunk, where variable 'lst' is used, we need to read back it's value
319
+
320
+ ```{ruby load_persisted_data}
321
+ lst = R.readRDS("lst.rds")
322
+ puts lst
323
+ ```
324
+
325
+ Now, any single code has dozens of variables that we might want to use and reuse between chunks.
326
+ Clearly, such an approach becomes quickly unmanageable. Probably, because of
327
+ this problem, it is very rare to see any __R markdown__ document in the Ruby community.
328
+
329
+ When variables can be used accross chunks, then no overhead is needed:
330
+
331
+ ```{ruby persistence}
332
+ lst = R.list(a: 1, b: 2, c: 3)
333
+ # any other code can be added here
334
+ ```
335
+
336
+ ```{ruby use_var}
337
+ puts lst
338
+ ```
339
+
340
+ In the Python community, the same effort to have code and text in an integrated environment
341
+ started around the first decade of 2000. In 2006 iPython 0.7.2 was released. In 2014,
342
+ Fernando Pérez, spun off project Jupyter from iPython creating a web-based interactive
343
+ computation environment. Jupyter can now be used with many languages, including Ruby with the
344
+ iruby gem (https://github.com/SciRuby/iruby). In order to have multiple languages in a Jupyter
345
+ notebook the SoS kernel was developed (https://vatlab.github.io/sos-docs/).
346
+
347
+ ## gKnit and __R markdown__
348
+
349
+ gKnit is based on knitr and __R markdown__ and can knit a document
350
+ written both in Ruby and/or R and output it in any of the available formats of __R markdown__. gKnit
351
+ allows ruby developers to do literate programming and reproducible research by allowing them to
352
+ have in a single document, text and code.
353
+
354
+ In gKnit, Ruby variables are persisted between
355
+ chunks, making it an ideal solution for literate programming in this language. Also,
356
+ since it is based on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming
357
+ with Ruby and R is quite natural.
358
+
359
+ This is not a blog post on __R markdown__, and the interested user is directed to the following links
360
+ for detailed information on its capabilities and use.
361
+
362
+ * https://rmarkdown.rstudio.com/ or
363
+ * https://bookdown.org/yihui/rmarkdown/
364
+
365
+ In this post, we will describe just the main aspects of __R markdown__, so the user can start
366
+ gKnitting Ruby and R documents quickly.
367
+
368
+ ## The Yaml header
369
+
370
+ An __R markdown__ document should start with a Yaml header and be stored in a file with
371
+ '.Rmd' extension. This document has the following header for gKitting an HTML document.
372
+
373
+ ```
374
+ ---
375
+ title: "How to do reproducible research in Ruby with gKnit"
376
+ author:
377
+ - "Rodrigo Botafogo"
378
+ - "Daniel Mossé - University of Pittsburgh"
379
+ tags: [Tech, Data Science, Ruby, R, GraalVM]
380
+ date: "20/02/2019"
381
+ output:
382
+ html_document:
383
+ self_contained: true
384
+ keep_md: true
385
+ pdf_document:
386
+ includes:
387
+ in_header: ["../../sty/galaaz.sty"]
388
+ number_sections: yes
389
+ ---
390
+ ```
391
+
392
+ For more information on the options in the Yaml header, [check here](https://bookdown.org/yihui/rmarkdown/html-document.html).
393
+
394
+ ## __R Markdown__ formatting
395
+
396
+ Document formatting can be done with simple markups such as:
397
+
398
+ ## Headers
399
+
400
+ ```
401
+ # Header 1
402
+
403
+ ## Header 2
404
+
405
+ ### Header 3
406
+
407
+ ```
408
+
409
+ ## Lists
410
+
411
+ ```
412
+ Unordered lists:
413
+
414
+ * Item 1
415
+ * Item 2
416
+ + Item 2a
417
+ + Item 2b
418
+ ```
419
+
420
+ ```
421
+ Ordered Lists
422
+
423
+ 1. Item 1
424
+ 2. Item 2
425
+ 3. Item 3
426
+ + Item 3a
427
+ + Item 3b
428
+ ```
429
+
430
+ For more R markdown formatting go to https://rmarkdown.rstudio.com/authoring_basics.html.
431
+
432
+ ## R chunks
433
+
434
+ Running and executing Ruby and R code is actually what really interests us is this blog.
435
+ Inserting a code chunk is done by adding code in a block delimited by three back ticks
436
+ followed by an open
437
+ curly brace ('{') followed with the engine name (r, ruby, rb, include, ...), an
438
+ any optional chunk_label and options, as shown bellow:
439
+
440
+ ````
441
+ ```{engine_name [chunk_label], [chunk_options]}`r ''`
442
+ ```
443
+ ````
444
+
445
+ for instance, let's add an R chunk to the document labeled 'first_r_chunk'. This is
446
+ a very simple code just to create a variable and print it out, as follows:
447
+
448
+ ````
449
+ ```{r first_r_chunk}`r ''`
450
+ vec <- c(1, 2, 3)
451
+ print(vec)
452
+ ```
453
+ ````
454
+
455
+ If this block is added to an __R markdown__ document and gKnitted the result will be:
456
+
457
+ ```{r first_r_chunk}
458
+ vec <- c(1, 2, 3)
459
+ print(vec)
460
+ ```
461
+
462
+ Now let's say that we want to do some analysis in the code, but just print the result and not the
463
+ code itself. For this, we need to add the option 'echo = FALSE'.
464
+
465
+ ````
466
+ ```{r second_r_chunk, echo = FALSE}`r ''`
467
+ vec2 <- c(10, 20, 30)
468
+ vec3 <- vec * vec2
469
+ print(vec3)
470
+ ```
471
+ ````
472
+ Here is how this block will show up in the document. Observe that the code is not shown
473
+ and we only see the execution result in a white box
474
+
475
+ ```{r second_r_chunk, echo = FALSE}
476
+ vec2 <- c(10, 20, 30)
477
+ vec3 <- vec * vec2
478
+ print(vec3)
479
+ ```
480
+
481
+ A description of the available chunk options can be found in https://yihui.name/knitr/.
482
+
483
+ Let's add another R chunk with a function definition. In this example, a vector
484
+ 'r_vec' is created and
485
+ a new function 'reduce_sum' is defined. The chunk specification is
486
+
487
+ ````
488
+ ```{r data_creation}`r ''`
489
+ r_vec <- c(1, 2, 3, 4, 5)
490
+
491
+ reduce_sum <- function(...) {
492
+ Reduce(sum, as.list(...))
493
+ }
494
+ ```
495
+ ````
496
+
497
+ and this is how it will look like once executed. From now on, to be concise in the
498
+ presentation we will not show chunk definitions any longer.
499
+
500
+
501
+ ```{r data_creation}
502
+ r_vec <- c(1, 2, 3, 4, 5)
503
+
504
+ reduce_sum <- function(...) {
505
+ Reduce(sum, as.list(...))
506
+ }
507
+ ```
508
+
509
+ We can, possibly in another chunk, access the vector and call the function as follows:
510
+
511
+ ```{r using_previous}
512
+ print(r_vec)
513
+ print(reduce_sum(r_vec))
514
+ ```
515
+ ## R Graphics with ggplot
516
+
517
+ In the following chunk, we create a bubble chart in R using ggplot and include it in
518
+ this document. Note that there is no directive in the code to include the image, this
519
+ occurs automatically. The 'mpg' dataframe is natively available to R and to Galaaz as
520
+ well.
521
+
522
+ For the reader not knowledgeable of ggplot, ggplot is a graphics library based on "the
523
+ grammar of graphics" [@Wilkinson:grammar_of_graphics]. The idea of the grammar of graphics
524
+ is to build a graphics by adding layers to the plot. More information can be found in
525
+ https://towardsdatascience.com/a-comprehensive-guide-to-the-grammar-of-graphics-for-effective-visualization-of-multi-dimensional-1f92b4ed4149.
526
+
527
+ In the plot bellow the 'mpg' dataset from base R is used. "The data concerns city-cycle fuel
528
+ consumption in miles per gallon, to be predicted in terms of 3 multivalued discrete and 5
529
+ continuous attributes." (Quinlan, 1993)
530
+
531
+ First, the 'mpg' dataset if filtered to extract only cars from the following manumactures: Audi, Ford,
532
+ Honda, and Hyundai and stored in the 'mpg_select' variable. Then, the selected dataframe is passed
533
+ to the ggplot function specifying in the aesthetic method (aes) that 'displacement' (disp) should
534
+ be plotted in the 'x' axis and 'city mileage' should be on the 'y' axis. In the 'labs' layer we
535
+ pass the 'title' and 'subtitle' for the plot. To the basic plot 'g', geom\_jitter is added, that
536
+ plots cars from the same manufactures with the same color (col=manufactures) and the size of the
537
+ car point equal its high way consumption (size = hwy). Finally, a last layer is plotter containing
538
+ a linear regression line (method = "lm") for every manufacturer.
539
+
540
+ ```{r bubble, dev='png'}
541
+ # load package and data
542
+ library(ggplot2)
543
+ data(mpg, package="ggplot2")
544
+
545
+ mpg_select <- mpg[mpg$manufacturer %in% c("audi", "ford", "honda", "hyundai"), ]
105
546
 
106
- # Vector
547
+ # Scatterplot
548
+ theme_set(theme_bw()) # pre-set the bw theme.
549
+ g <- ggplot(mpg_select, aes(displ, cty)) +
550
+ labs(subtitle="mpg: Displacement vs City Mileage",
551
+ title="Bubble chart")
552
+
553
+ g + geom_jitter(aes(col=manufacturer, size=hwy)) +
554
+ geom_smooth(aes(col=manufacturer), method="lm", se=F)
555
+ ```
556
+
557
+ ## Ruby chunks
558
+
559
+ Including a Ruby chunk is just as easy as including an R chunk in the document: just
560
+ change the name of the engine to 'ruby'. It is also possible to pass chunk options
561
+ to the Ruby engine; however, this version does not accept all the options that are
562
+ available to R chunks. Future versions will add those options.
563
+
564
+ ````
565
+ ```{ruby first_ruby_chunk}`r ''`
566
+ ```
567
+ ````
568
+
569
+ In this example, the ruby chunk is called 'first_ruby_chunk'. One important
570
+ aspect of chunk labels is that they cannot be duplicated. If a chunk label is
571
+ duplicated, gKnit will stop with an error.
572
+
573
+ In the following chunk, variable 'a', 'b' and 'c' are standard Ruby variables
574
+ and 'vec' and 'vec2' are two vectors created by calling the 'c' method on the
575
+ R module.
576
+
577
+ In Galaaz, the R module allows us to access R functions transparently. The 'c'
578
+ function in R, is a function that concatenates its arguments making a vector.
579
+
580
+ It
581
+ should be clear that there is no requirement in gknit to call or use any R
582
+ functions. gKnit will knit standard Ruby code, or even general text without
583
+ any code.
584
+
585
+ ```{ruby split_data}
586
+ a = [1, 2, 3]
587
+ b = "US$ 250.000"
588
+ c = "The 'outputs' function"
589
+
590
+ vec = R.c(1, 2, 3)
591
+ vec2 = R.c(10, 20, 30)
592
+ ```
593
+
594
+ In the next block, variables 'a', 'vec' and 'vec2' are used and printed.
595
+
596
+ ```{ruby split2}
597
+ puts a
598
+ puts vec * vec2
599
+ ```
600
+
601
+ Note that 'a' is a standard Ruby Array and 'vec' and 'vec2' are vectors that behave accordingly,
602
+ where multiplication works as expected.
603
+
604
+ ## Inline Ruby code
605
+
606
+ When using a Ruby chunk, the code and the output are formatted in blocks as seen above.
607
+ This formatting is not always desired. Sometimes, we want to have the results of the
608
+ Ruby evaluation included in the middle of a phrase. gKnit allows adding inline Ruby code
609
+ with the 'rb' engine. The following chunk specification will
610
+ create and inline Ruby text:
611
+
612
+ ````
613
+ This is some text with inline Ruby accessing variable 'b' which has value:
614
+ ```{rb puts "```{rb puts b}\n```"}
615
+ ```
616
+ and is followed by some other text!
617
+ ````
618
+
619
+ <div style="margin-bottom:30px;">
620
+ </div>
621
+
622
+ This is some text with inline Ruby accessing variable 'b' which has value:
623
+ ```{rb puts b}
624
+ ```
625
+ and is followed by some other text!
626
+
627
+ <div style="margin-bottom:30px;">
628
+ </div>
629
+
630
+ Note that it is important not to add any new line before of after the code
631
+ block if we want everything to be in only one line, resulting in the following sentence
632
+ with inline Ruby code.
633
+
634
+
635
+ ```{ruby heading, echo = FALSE}
636
+ outputs "### #{c}"
637
+ ```
638
+
639
+ He have previously used the standard 'puts' method in Ruby chunks in order produce
640
+ output. The result of a 'puts', as seen in all previous chunks that use it, is formatted
641
+ inside a white box that
642
+ follows the code block. Many times however, we would like to do some processing in the
643
+ Ruby chunk and have the result of this processing generate and output that is
644
+ "included" in the document as if we had typed it in __R markdown__ document.
645
+
646
+ For example, suppose we want to create a new heading in our document, but the heading
647
+ phrase is the result of some code processing: maybe it's the first line of a file we are
648
+ going to read. Method 'outputs' adds its output as if typed in the __R markdown__ document.
649
+
650
+ Take now a look at variable 'c' (it was defined in a previous block above) as
651
+ 'c = "The 'outputs' function". "The 'outputs' function" is actually the name of this
652
+ section and it was created using the 'outputs' function inside a Ruby chunk.
653
+
654
+ The ruby chunk to generate this heading is:
655
+
656
+ ````
657
+ ```{ruby heading}`r ''`
658
+ outputs "### #{c}"
659
+ ```
660
+ ````
661
+
662
+ The three '###' is the way we add a Heading 3 in __R markdown__.
663
+
664
+
665
+ ### HTML Output from Ruby Chunks
666
+
667
+ We've just seen the use of method 'outputs' to add text to the the __R markdown__
668
+ document. This technique can also be used to add HTML code to the document. In
669
+ __R markdown__, any html code typed directly in the document will be properly rendered.
670
+ Here, for instance, is a table definition in HTML and its output in the document:
671
+
672
+ ```
673
+ <table style="width:100%">
674
+ <tr>
675
+ <th>Firstname</th>
676
+ <th>Lastname</th>
677
+ <th>Age</th>
678
+ </tr>
679
+ <tr>
680
+ <td>Jill</td>
681
+ <td>Smith</td>
682
+ <td>50</td>
683
+ </tr>
684
+ <tr>
685
+ <td>Eve</td>
686
+ <td>Jackson</td>
687
+ <td>94</td>
688
+ </tr>
689
+ </table>
690
+ ```
691
+ <div style="margin-bottom:30px;">
692
+ </div>
693
+
694
+ <table style="width:100%">
695
+ <tr>
696
+ <th>Firstname</th>
697
+ <th>Lastname</th>
698
+ <th>Age</th>
699
+ </tr>
700
+ <tr>
701
+ <td>Jill</td>
702
+ <td>Smith</td>
703
+ <td>50</td>
704
+ </tr>
705
+ <tr>
706
+ <td>Eve</td>
707
+ <td>Jackson</td>
708
+ <td>94</td>
709
+ </tr>
710
+ </table>
711
+
712
+ <div style="margin-bottom:30px;">
713
+ </div>
714
+
715
+ But manually creating HTML output is not always easy or desirable, specially
716
+ if we intend the document to be rendered in other formats, for example, as Latex.
717
+ Also, The above
718
+ table looks ugly. The 'kableExtra' library is a great library for
719
+ creating beautiful tables. Take a look at https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html
720
+
721
+ In the next chunk, we output the 'mtcars' dataframe from R in a nicely formatted
722
+ table. Note that we retrieve the mtcars dataframe by using '~:mtcars'.
723
+
724
+ ```{ruby nice_table}
725
+ R.install_and_loads('kableExtra')
726
+ outputs (~:mtcars).kable.kable_styling
727
+ ```
728
+
729
+ ## Including Ruby files in a chunk
730
+
731
+ R is a language that was created to be easy and fast for statisticians to use. As far
732
+ as I know, it was not a
733
+ language to be used for developing large systems. Of course, there are large systems and
734
+ libraries in R, but the focus of the language is for developing statistical models and
735
+ distribute that to peers.
736
+
737
+ Ruby on the other hand, is a language for large software development. Systems written in
738
+ Ruby will have dozens, hundreds or even thousands of files. To document a
739
+ large system with literate programming, we cannot expect the developer to add all the
740
+ files in a single '.Rmd' file. gKnit provides the 'include' chunk engine to include
741
+ a Ruby file as if it had being typed in the '.Rmd' file.
742
+
743
+ To include a file, the following chunk should be created, where <filename> is the name of
744
+ the file to be included and where the extension, if it is '.rb', does not need to be added.
745
+ If the 'relative' option is not included, then it is treated as TRUE. When 'relative' is
746
+ true, ruby's 'require\_relative' semantics is used to load the file, when false, Ruby's
747
+ \$LOAD_PATH is searched to find the file and it is 'require'd.
748
+
749
+ ````
750
+ ```{include <filename>, relative = <TRUE/FALSE>}`r ''`
751
+ ```
752
+ ````
753
+
754
+ Bellow we include file 'model.rb', which is in the same directory of this blog.
755
+ This code uses R 'caret' package to split a dataset in a train and test sets.
756
+ The 'caret' package is a very important a useful package for doing Data Analysis,
757
+ it has hundreds of functions for all steps of the Data Analysis workflow. To
758
+ use 'caret' just to split a dataset is like using the proverbial cannon to
759
+ kill the fly. We use it here only to show that integrating Ruby and R and
760
+ using even a very complex package as 'caret' is trivial with Galaaz.
761
+
762
+ A word of advice: the 'caret' package has lots of dependencies and installing
763
+ it in a Linux system is a time consuming operation. Method 'R.install_and_loads'
764
+ will install the package if it is not already installed and can take a while.
765
+
766
+ ````
767
+ ```{include model}`r ''`
768
+ ```
769
+ ````
770
+
771
+ ```{include model}
772
+ ```
773
+
774
+ ```{ruby model_partition}
775
+ mtcars = ~:mtcars
776
+ model = Model.new(mtcars, percent_train: 0.8)
777
+ model.partition(:mpg)
778
+ puts model.train.head
779
+ puts model.test.head
780
+ ```
781
+
782
+ ## Documenting Gems
783
+
784
+ gKnit also allows developers to document and load files that are not in the same directory
785
+ of the '.Rmd' file.
786
+
787
+ Here is an example of loading the 'find.rb' file from TruffleRuby. In this example, relative
788
+ is set to FALSE, so Ruby will look for the file in its $LOAD\_PATH, and the user does not
789
+ need to no it's directory.
790
+
791
+ ````
792
+ ```{include find, relative = FALSE}`r ''`
793
+ ```
794
+ ````
795
+
796
+ ```{include find, relative = FALSE}
797
+ ```
798
+
799
+ ## Converting to PDF
800
+
801
+ One of the beauties of knitr is that the same input can be converted to many different outputs.
802
+ One very useful format, is, of course, PDF. In order to converted an __R markdown__ file to PDF
803
+ it is necessary to have LaTeX installed on the system. We will not explain here how to
804
+ install LaTeX as there are plenty of documents on the web showing how to proceed.
805
+
806
+ gKnit comes with a simple LaTeX style file for gknitting this blog as a PDF document. Here is
807
+ the Yaml header to generate this blog in PDF format instead of HTML:
808
+
809
+ ```
810
+ ---
811
+ title: "gKnit - Ruby and R Knitting with Galaaz in GraalVM"
812
+ author: "Rodrigo Botafogo"
813
+ tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, knitr, gknit]
814
+ date: "29 October 2018"
815
+ output:
816
+ pdf\_document:
817
+ includes:
818
+ in\_header: ["../../sty/galaaz.sty"]
819
+ number\_sections: yes
820
+ ---
821
+ ```
822
+
823
+ ## Template based documents generation
824
+
825
+ When a document is converted to PDF it follows a certain convertion template. We've seen above
826
+ the use of 'galaaz.sty' as a basic template to generate a PDF document. Using the
827
+ 'gknit-draft' app that comes with Galaaz, the same .Rmd file can be compiled to different
828
+ looking PDF documents. Galaaz automatically loads the 'rticles' R package that comes with
829
+ templates for the following journals with the respective template name:
830
+
831
+ * ACM articles: acm_article
832
+ * ACS articles: acs_article
833
+ * AEA journal submissions: aea_article
834
+ * AGU journal submissions: ????
835
+ * AMS articles: ams_article
836
+ * American Statistical Association: asa_article
837
+ * Biometrics articles: biometrics_article
838
+ * Bulletin de l'AMQ journal submissions: amq_article
839
+ * CTeX documents: ctex
840
+ * Elsevier journal submissions: elsevier_article
841
+ * IEEE Transaction journal submissions: ieee_article
842
+ * JSS articles: jss_article
843
+ * MDPI journal submissions: mdpi_article
844
+ * Monthly Notices of the Royal Astronomical Society articles: mnras_article
845
+ * NNRAS journal submissions: nmras_article
846
+ * PeerJ articles: peerj_article
847
+ * Royal Society Open Science journal submissions: rsos_article
848
+ * Royal Statistical Society: rss_article
849
+ * Sage journal submissions: sage_article
850
+ * Springer journal submissions: springer_article
851
+ * Statistics in Medicine journal submissions: sim_article
852
+ * Copernicus Publications journal submissions: copernicus_article
853
+ * The R Journal articles: rjournal_article
854
+ * Frontiers articles: ???
855
+ * Taylor & Francis articles: ???
856
+ * Bulletin De L'AMQ: amq_article
857
+ * PLOS journal: plos_article
858
+ * Proceedings of the National Academy of Sciences of the USA: pnas_article
859
+
860
+ In order to create a document with one of those templates, use the following command:
861
+
862
+ ```
863
+ gknit-draft --filename <my_document> --template <template> --package <package>
864
+ --create_dir
865
+ ```
866
+ So, in order to create a template for writing an R Journal, use:
867
+
868
+ ```
869
+ gknit-draft --filename my_r_article --template rjournal_article --package rticles
870
+ --create_dir
871
+ ```
872
+
873
+ # Accessing R variables
874
+
875
+ Galaaz allows Ruby to access variables created in R. For example, the 'mtcars' data set is
876
+ available in R and can be accessed from Ruby by using the 'tilda' operator followed by the
877
+ symbol for the variable, in this case ':mtcar'. In the code bellow method 'outputs' is
878
+ used to output the 'mtcars' data set nicely formatted in HTML by use of the 'kable' and
879
+ 'kable_styling' functions. Method 'outputs' is only available when used with 'gknit'.
880
+
881
+ ```{ruby view_kable}
882
+ outputs (~:mtcars).kable.kable_styling
883
+ ```
884
+
885
+ # Basic Data Types
886
+
887
+ ## Vector
107
888
 
108
889
  Vectors can be thought of as contiguous cells containing data. Cells are accessed through
109
890
  indexing operations such as x[5]. Galaaz has six basic (‘atomic’) vector types: logical,
@@ -178,7 +959,7 @@ vec = R.c(true, true, false, false, true)
178
959
  puts vec
179
960
  ```
180
961
 
181
- ## Combining Vectors
962
+ ### Combining Vectors
182
963
 
183
964
  The 'c' functions used to create vectors can also be used to combine two vectors:
184
965
 
@@ -200,7 +981,7 @@ vec = vec1.c(vec2)
200
981
  puts vec
201
982
  ```
202
983
 
203
- ## Vector Arithmetic
984
+ ### Vector Arithmetic
204
985
 
205
986
  Arithmetic operations on vectors are performed element by element:
206
987
 
@@ -219,7 +1000,7 @@ vec3 = R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
219
1000
  puts vec4 = vec1 + vec3
220
1001
  ```
221
1002
 
222
- ## Vector Indexing
1003
+ ### Vector Indexing
223
1004
 
224
1005
  Vectors can be indexed by using the '[]' operator:
225
1006
 
@@ -275,7 +1056,7 @@ full_name = R.c(First: "Rodrigo", Middle: "A", Last: "Botafogo")
275
1056
  puts full_name
276
1057
  ```
277
1058
 
278
- ## Extracting Native Ruby Types from a Vector
1059
+ ### Extracting Native Ruby Types from a Vector
279
1060
 
280
1061
  Vectors created with 'R.c' are of class R::Vector. You might have noticed that when indexing a
281
1062
  vector, a new vector is returned, even if this vector has one single element. In order to use
@@ -290,19 +1071,7 @@ puts vec4 >> 4
290
1071
 
291
1072
  Note that indexing with '>>' starts at 0 and not at 1, also, we cannot do negative indexing.
292
1073
 
293
- # Accessing R variables
294
-
295
- Galaaz allows Ruby to access variables created in R. For example, the 'mtcars' data set is
296
- available in R and can be accessed from Ruby by using the 'tilda' operator followed by the
297
- symbol for the variable, in this case ':mtcar'. In the code bellow method 'outputs' is
298
- used to output the 'mtcars' data set nicely formatted in HTML by use of the 'kable' and
299
- 'kable_styling' functions. Method 'outputs' is only available when used with 'gknit'.
300
-
301
- ```{ruby view_kable}
302
- outputs (~:mtcars).kable.kable_styling
303
- ```
304
-
305
- # Matrix
1074
+ ## Matrix
306
1075
 
307
1076
  A matrix is a collection of elements organized as a two dimensional table. A matrix can be
308
1077
  created by the 'matrix' function:
@@ -326,7 +1095,7 @@ mat_row = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
326
1095
  puts mat_row
327
1096
  ```
328
1097
 
329
- ## Indexing a Matrix
1098
+ ### Indexing a Matrix
330
1099
 
331
1100
  A matrix can be indexed by [row, column]:
332
1101
 
@@ -360,7 +1129,7 @@ and 'cbind':
360
1129
  puts mat_row.cbind(mat)
361
1130
  ```
362
1131
 
363
- # List
1132
+ ## List
364
1133
 
365
1134
  A list is a data structure that can contain sublists of different types, while vector and matrix
366
1135
  can only hold one type of element.
@@ -376,7 +1145,7 @@ puts lst
376
1145
  Note that 'lst' elements are named elements.
377
1146
 
378
1147
 
379
- ## List Indexing
1148
+ ### List Indexing
380
1149
 
381
1150
  List indexing, also called slicing, is done using the '[]' operator and the '[[]]' operator. Let's
382
1151
  first start with the '[]' operator. The list above has three sublist indexing with '[]' will
@@ -406,7 +1175,7 @@ then the first element of the vector was extracted (note that vectors also accep
406
1175
  operator) and then the vector was indexed by its first element, extracting the native Ruby type.
407
1176
 
408
1177
 
409
- # Data Frame
1178
+ ## Data Frame
410
1179
 
411
1180
  A data frame is a table like structure in which each column has the same number of
412
1181
  rows. Data frames are the basic structure for storing data for data analysis. We have already
@@ -421,7 +1190,7 @@ df = R.data__frame(
421
1190
  puts df
422
1191
  ```
423
1192
 
424
- ## Data Frame Indexing
1193
+ ### Data Frame Indexing
425
1194
 
426
1195
  A data frame can be indexed the same way as a matrix, by using '[row, column]', where row and
427
1196
  column can either be a numeric or the name of the row or column
@@ -530,7 +1299,7 @@ puts exp6
530
1299
  In general we think that using the functional notation is preferable to using the
531
1300
  symbolic notation as otherwise, we end up writing invalid expressions such as
532
1301
 
533
- ```{ruby exp_wrong, warning=FALSE}
1302
+ ```{ruby exp_wrong, warning=FALSE, eval=FALSE}
534
1303
  exp_wrong = (:a + :b) == :z
535
1304
  puts exp_wrong
536
1305
  ```
@@ -600,11 +1369,15 @@ Galaaz.
600
1369
 
601
1370
  For these
602
1371
  examples, we will investigate the nycflights13 data set available on the package by the
603
- same name. We use function 'R.install_and_loads' that checks if the library is available
1372
+ same name. We use function 'R.install\_and\_loads' that checks if the library is available
604
1373
  locally, and if not, installs it. This data frame contains all 336,776 flights that
605
1374
  departed from New York City in 2013. The data comes from the US Bureau of
606
1375
  Transportation Statistics.
607
1376
 
1377
+ Dplyr uses 'tibbles' in place of data frames; unfortunately, tibbles do not print yet properly in
1378
+ Galaaz due to a bug in fastR. In order to print a tibble we need to convert it to a data frame
1379
+ using the 'as\_\_data__frame' method.
1380
+
608
1381
  ```{ruby nycflights13}
609
1382
  R.install_and_loads('nycflights13')
610
1383
  R.library('dplyr')
@@ -612,7 +1385,7 @@ R.library('dplyr')
612
1385
 
613
1386
  ```{ruby flights}
614
1387
  flights = ~:flights
615
- puts flights.head.as__data__frame
1388
+ puts flights.head
616
1389
  ```
617
1390
 
618
1391
  ## Filtering rows with Filter
@@ -621,7 +1394,7 @@ In this example we filter the flights data set by giving to the filter function
621
1394
  the first :month.eq 1
622
1395
 
623
1396
  ```{ruby filter_rows}
624
- puts flights.filter((:month.eq 1), (:day.eq 1)).head.as__data__frame
1397
+ puts flights.filter((:month.eq 1), (:day.eq 1)).head
625
1398
  ```
626
1399
 
627
1400
  ## Logical Operators
@@ -629,7 +1402,7 @@ puts flights.filter((:month.eq 1), (:day.eq 1)).head.as__data__frame
629
1402
  All flights that departed in November of December
630
1403
 
631
1404
  ```{ruby nov_dec}
632
- puts flights.filter((:month.eq 11) | (:month.eq 12)).head.as__data__frame
1405
+ puts flights.filter((:month.eq 11) | (:month.eq 12)).head
633
1406
  ```
634
1407
 
635
1408
  The same as above, but using the 'in' operator. In R, it is possible to define many operators
@@ -638,7 +1411,7 @@ operators from Galaaz the '._' method is used, where the first argument is the o
638
1411
  symbol, in this case ':in' and the second argument is the vector:
639
1412
 
640
1413
  ```{ruby in_op}
641
- puts flights.filter(:month._ :in, R.c(11, 12)).head.as__data__frame
1414
+ puts flights.filter(:month._ :in, R.c(11, 12)).head
642
1415
  ```
643
1416
 
644
1417
  ## Filtering with NA (Not Available)
@@ -650,20 +1423,20 @@ what is obtained from data frame.
650
1423
 
651
1424
  ```{ruby na_tibble}
652
1425
  df = R.tibble(x: R.c(1, R::NA, 3))
653
- puts df.as__data__frame
1426
+ puts df
654
1427
  ```
655
1428
 
656
1429
  Now filtering by :x > 1 shows all lines that satisfy this condition, where the row with R:NA does
657
1430
  not.
658
1431
 
659
1432
  ```{ruby filter_na}
660
- puts df.filter(:x > 1).as__data__frame
1433
+ puts df.filter(:x > 1)
661
1434
  ```
662
1435
 
663
1436
  To match an NA use method 'is__na'
664
1437
 
665
1438
  ```{ruby with_na}
666
- puts df.filter((:x.is__na) | (:x > 1)).as__data__frame
1439
+ puts df.filter((:x.is__na) | (:x > 1))
667
1440
  ```
668
1441
 
669
1442
  ## Arrange Rows with arrange
@@ -671,13 +1444,13 @@ puts df.filter((:x.is__na) | (:x > 1)).as__data__frame
671
1444
  Arrange reorders the rows of a data frame by the given arguments.
672
1445
 
673
1446
  ```{ruby arrange}
674
- puts flights.arrange(:year, :month, :day).head.as__data__frame
1447
+ puts flights.arrange(:year, :month, :day).head
675
1448
  ```
676
1449
 
677
1450
  To arrange in descending order, use function 'desc'
678
1451
 
679
1452
  ```{ruby desc_arrange}
680
- puts flights.arrange(:dep_delay.desc).head.as__data__frame
1453
+ puts flights.arrange(:dep_delay.desc).head
681
1454
  ```
682
1455
 
683
1456
  ## Selecting columns
@@ -685,19 +1458,19 @@ puts flights.arrange(:dep_delay.desc).head.as__data__frame
685
1458
  To select specific columns from a dataset we use function 'select':
686
1459
 
687
1460
  ```{ruby select}
688
- puts flights.select(:year, :month, :day).head.as__data__frame
1461
+ puts flights.select(:year, :month, :day).head
689
1462
  ```
690
1463
 
691
1464
  It is also possible to select column in a given range
692
1465
 
693
1466
  ```{ruby select_range}
694
- puts flights.select(:year.up_to :day).head.as__data__frame
1467
+ puts flights.select(:year.up_to :day).head
695
1468
  ```
696
1469
 
697
1470
  Select all columns that start with a given name sequence
698
1471
 
699
1472
  ```{ruby select_starts_with}
700
- puts flights.select(E.starts_with('arr')).head.as__data__frame
1473
+ puts flights.select(E.starts_with('arr')).head
701
1474
  ```
702
1475
 
703
1476
  Other functions that can be used:
@@ -714,7 +1487,7 @@ Other functions that can be used:
714
1487
  A helper function that comes in handy when we just want to rearrange column order is 'Everything':
715
1488
 
716
1489
  ```{ruby everything}
717
- puts flights.select(:year, :month, :day, E.everything).head.as__data__frame
1490
+ puts flights.select(:year, :month, :day, E.everything).head
718
1491
  ```
719
1492
 
720
1493
  ## Add variables to a dataframe with 'mutate'
@@ -726,14 +1499,14 @@ flights_sm = flights.
726
1499
  :distance,
727
1500
  :air_time)
728
1501
 
729
- puts flights_sm.head.as__data__frame
1502
+ puts flights_sm.head
730
1503
  ```
731
1504
 
732
1505
  ```{ruby mutate}
733
1506
  flights_sm = flights_sm.
734
1507
  mutate(gain: :dep_delay - :arr_delay,
735
1508
  speed: :distance / :air_time * 60)
736
- puts flights_sm.head.as__data__frame
1509
+ puts flights_sm.head
737
1510
  ```
738
1511
 
739
1512
  ## Summarising data
@@ -742,14 +1515,14 @@ Function 'summarise' calculates summaries for the data frame. When no 'group_by'
742
1515
  a single value is obtained from the data frame:
743
1516
 
744
1517
  ```{ruby summarise}
745
- puts flights.summarise(delay: E.mean(:dep_delay, na__rm: true)).as__data__frame
1518
+ puts flights.summarise(delay: E.mean(:dep_delay, na__rm: true))
746
1519
  ```
747
1520
 
748
- When a data frame is groupe with 'group_by' summaries apply to the given group:
1521
+ When a data frame is grouped with 'group_by' summaries apply to the given group:
749
1522
 
750
1523
  ```{ruby summarise_group_by}
751
1524
  by_day = flights.group_by(:year, :month, :day)
752
- puts by_day.summarise(delay: :dep_delay.mean(na__rm: true)).head.as__data__frame
1525
+ puts by_day.summarise(delay: :dep_delay.mean(na__rm: true)).head
753
1526
  ```
754
1527
 
755
1528
  Next we put many operations together by pipping them one after the other:
@@ -763,7 +1536,7 @@ delays = flights.
763
1536
  delay: :arr_delay.mean(na__rm: true)).
764
1537
  filter(:count > 20, :dest != "NHL")
765
1538
 
766
- puts delays.as__data__frame.head
1539
+ puts delays.head
767
1540
  ```
768
1541
 
769
1542
  # Using Data Table
@@ -1061,13 +1834,13 @@ def my_summarize(df, group_var)
1061
1834
  summarize(a: :a.mean)
1062
1835
  end
1063
1836
 
1064
- puts my_summarize(:df, :g1).as__data__frame
1837
+ puts my_summarize(:df, :g1)
1065
1838
  ```
1066
1839
 
1067
1840
  It works!!! Well, let's make sure this was not just some coincidence
1068
1841
 
1069
1842
  ```{ruby group_g2}
1070
- puts my_summarize(:df, :g2).as__data__frame
1843
+ puts my_summarize(:df, :g2)
1071
1844
  ```
1072
1845
 
1073
1846
  Great, everything is fine! No magic, no new functions, no complexities, just normal, standard Ruby
@@ -1184,7 +1957,7 @@ def my_summarise3(df, *group_vars)
1184
1957
  summarise(a: E.mean(:a))
1185
1958
  end
1186
1959
 
1187
- puts my_summarise3((~:df), :g1, :g2).as__data__frame
1960
+ puts my_summarise3((~:df), :g1, :g2)
1188
1961
  ```
1189
1962
 
1190
1963
  ## Why does R require NSE and Galaaz does not?
@@ -1235,7 +2008,7 @@ In the following examples, we show the use of functions 'group\_by\_at', 'summar
1235
2008
  features of characters in the Starwars movies:
1236
2009
 
1237
2010
  ```{ruby starwars}
1238
- puts (~:starwars).head.as__data__frame
2011
+ puts (~:starwars).head
1239
2012
  ```
1240
2013
  The grouped_mean function bellow will receive a grouping variable and calculate summaries for
1241
2014
  the value\_variables given:
@@ -1266,7 +2039,7 @@ def grouped_mean(data, grouping_variables, value_variables)
1266
2039
  rename_at(value_variables, E.funs(E.paste0("mean_", value_variables)))
1267
2040
  end
1268
2041
 
1269
- puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year")).as__data__frame
2042
+ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year"))
1270
2043
  ```
1271
2044
 
1272
2045
 
@@ -1275,7 +2048,6 @@ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year")).as__data
1275
2048
 
1276
2049
  # Contributing
1277
2050
 
1278
-
1279
2051
  * Fork it
1280
2052
  * Create your feature branch (git checkout -b my-new-feature)
1281
2053
  * Write Tests!
@@ -1283,3 +2055,4 @@ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year")).as__data
1283
2055
  * Push to the branch (git push origin my-new-feature)
1284
2056
  * Create new Pull Request
1285
2057
 
2058
+ # References