galaaz 0.4.6 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (181) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +3575 -118
  3. data/Rakefile +21 -4
  4. data/bin/gknit +152 -6
  5. data/bin/gknit-draft +105 -0
  6. data/bin/gknit-draft.rb +28 -0
  7. data/bin/gknit_Rscript +127 -0
  8. data/bin/grun +27 -1
  9. data/bin/gstudio +47 -4
  10. data/bin/{gstudio.rb → gstudio_irb.rb} +0 -0
  11. data/bin/gstudio_pry.rb +7 -0
  12. data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +3 -12
  13. data/blogs/galaaz_ggplot/galaaz_ggplot.html +77 -222
  14. data/blogs/galaaz_ggplot/galaaz_ggplot.md +4 -31
  15. data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
  16. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
  17. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
  18. data/blogs/galaaz_ggplot/midwest.Rmd +1 -9
  19. data/blogs/gknit/gknit.Rmd +232 -123
  20. data/blogs/{dev/dev.html → gknit/gknit.html} +1897 -33
  21. data/blogs/gknit/gknit.pdf +0 -0
  22. data/blogs/gknit/lst.rds +0 -0
  23. data/blogs/gknit/stats.bib +27 -0
  24. data/blogs/manual/lst.rds +0 -0
  25. data/blogs/manual/manual.Rmd +1893 -47
  26. data/blogs/manual/manual.html +3153 -347
  27. data/blogs/manual/manual.md +3575 -118
  28. data/blogs/manual/manual.pdf +0 -0
  29. data/blogs/manual/manual.tex +4026 -0
  30. data/blogs/manual/manual_files/figure-html/bubble-1.png +0 -0
  31. data/blogs/manual/manual_files/figure-html/diverging_bar.png +0 -0
  32. data/blogs/manual/manual_files/figure-latex/bubble-1.png +0 -0
  33. data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
  34. data/blogs/{dev → manual}/model.rb +0 -0
  35. data/blogs/nse_dplyr/nse_dplyr.Rmd +849 -0
  36. data/blogs/nse_dplyr/nse_dplyr.html +878 -0
  37. data/blogs/nse_dplyr/nse_dplyr.md +1198 -0
  38. data/blogs/nse_dplyr/nse_dplyr.pdf +0 -0
  39. data/blogs/oh_my/oh_my.html +274 -386
  40. data/blogs/oh_my/oh_my.md +208 -205
  41. data/blogs/ruby_plot/ruby_plot.Rmd +64 -84
  42. data/blogs/ruby_plot/ruby_plot.html +235 -208
  43. data/blogs/ruby_plot/ruby_plot.md +239 -34
  44. data/blogs/ruby_plot/ruby_plot.pdf +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  46. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  47. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  48. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  49. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  50. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  51. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  52. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  53. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  54. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  55. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  56. data/examples/Bibliography/master.bib +50 -0
  57. data/examples/Bibliography/stats.bib +72 -0
  58. data/examples/islr/ch2.spec.rb +1 -1
  59. data/examples/islr/ch3_boston.rb +4 -4
  60. data/examples/islr/x_y_rnorm.jpg +0 -0
  61. data/examples/latex_templates/Test-acm_article/Makefile +16 -0
  62. data/examples/latex_templates/Test-acm_article/Test-acm_article.Rmd +65 -0
  63. data/examples/latex_templates/Test-acm_article/acm_proc_article-sp.cls +1670 -0
  64. data/examples/latex_templates/Test-acm_article/sensys-abstract.cls +703 -0
  65. data/examples/latex_templates/Test-acm_article/sigproc.bib +59 -0
  66. data/examples/latex_templates/Test-acs_article/Test-acs_article.Rmd +260 -0
  67. data/examples/latex_templates/Test-acs_article/Test-acs_article.pdf +0 -0
  68. data/examples/latex_templates/Test-acs_article/acs-Test-acs_article.bib +11 -0
  69. data/examples/latex_templates/Test-acs_article/acs-my_output.bib +11 -0
  70. data/examples/latex_templates/Test-acs_article/acstest.bib +17 -0
  71. data/examples/latex_templates/Test-aea_article/AEA.cls +1414 -0
  72. data/examples/latex_templates/Test-aea_article/BibFile.bib +0 -0
  73. data/examples/latex_templates/Test-aea_article/Test-aea_article.Rmd +108 -0
  74. data/examples/latex_templates/Test-aea_article/Test-aea_article.pdf +0 -0
  75. data/examples/latex_templates/Test-aea_article/aea.bst +1269 -0
  76. data/examples/latex_templates/Test-aea_article/multicol.sty +853 -0
  77. data/examples/latex_templates/Test-aea_article/references.bib +0 -0
  78. data/examples/latex_templates/Test-aea_article/setspace.sty +546 -0
  79. data/examples/latex_templates/Test-amq_article/Test-amq_article.Rmd +256 -0
  80. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdf +0 -0
  81. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdfsync +3397 -0
  82. data/examples/latex_templates/Test-amq_article/pics/Figure2.pdf +0 -0
  83. data/examples/latex_templates/Test-ams_article/Test-ams_article.Rmd +215 -0
  84. data/examples/latex_templates/Test-ams_article/amstest.bib +436 -0
  85. data/examples/latex_templates/Test-asa_article/Test-asa_article.Rmd +153 -0
  86. data/examples/latex_templates/Test-asa_article/Test-asa_article.pdf +0 -0
  87. data/examples/latex_templates/Test-asa_article/agsm.bst +1353 -0
  88. data/examples/latex_templates/Test-asa_article/bibliography.bib +233 -0
  89. data/examples/latex_templates/Test-ieee_article/IEEEtran.bst +2409 -0
  90. data/examples/latex_templates/Test-ieee_article/IEEEtran.cls +6346 -0
  91. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.Rmd +175 -0
  92. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.pdf +0 -0
  93. data/examples/latex_templates/Test-ieee_article/mybibfile.bib +20 -0
  94. data/examples/latex_templates/Test-rjournal_article/RJournal.sty +335 -0
  95. data/examples/latex_templates/Test-rjournal_article/RJreferences.bib +18 -0
  96. data/examples/latex_templates/Test-rjournal_article/RJwrapper.pdf +0 -0
  97. data/examples/latex_templates/Test-rjournal_article/Test-rjournal_article.Rmd +52 -0
  98. data/examples/latex_templates/Test-springer_article/Test-springer_article.Rmd +65 -0
  99. data/examples/latex_templates/Test-springer_article/Test-springer_article.pdf +0 -0
  100. data/examples/latex_templates/Test-springer_article/bibliography.bib +26 -0
  101. data/examples/latex_templates/Test-springer_article/spbasic.bst +1658 -0
  102. data/examples/latex_templates/Test-springer_article/spmpsci.bst +1512 -0
  103. data/examples/latex_templates/Test-springer_article/spphys.bst +1443 -0
  104. data/examples/latex_templates/Test-springer_article/svglov3.clo +113 -0
  105. data/examples/latex_templates/Test-springer_article/svjour3.cls +1431 -0
  106. data/examples/misc/moneyball.rb +1 -1
  107. data/examples/misc/subsetting.rb +37 -37
  108. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.Rmd +73 -0
  109. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.pdf +0 -0
  110. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.Rmd +382 -0
  111. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.pdf +0 -0
  112. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.Rmd +164 -0
  113. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.pdf +0 -0
  114. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.Rmd +92 -0
  115. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.pdf +0 -0
  116. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/attend-grade-relationships.csv +482 -0
  117. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.Rmd +280 -0
  118. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.pdf +0 -0
  119. data/examples/rmarkdown/svm-xaringan-example/svm-xaringan-example.Rmd +386 -0
  120. data/lib/R_interface/r.rb +2 -2
  121. data/lib/R_interface/r_libs.R +6 -1
  122. data/lib/R_interface/r_methods.rb +12 -2
  123. data/lib/R_interface/rdata_frame.rb +8 -17
  124. data/lib/R_interface/rindexed_object.rb +1 -2
  125. data/lib/R_interface/rlist.rb +1 -0
  126. data/lib/R_interface/robject.rb +20 -23
  127. data/lib/R_interface/rpkg.rb +15 -6
  128. data/lib/R_interface/rsupport.rb +13 -19
  129. data/lib/R_interface/ruby_extensions.rb +14 -18
  130. data/lib/R_interface/rvector.rb +0 -12
  131. data/lib/gknit.rb +2 -0
  132. data/lib/gknit/draft.rb +105 -0
  133. data/lib/gknit/knitr_engine.rb +6 -37
  134. data/lib/util/exec_ruby.rb +22 -84
  135. data/lib/util/inline_file.rb +7 -3
  136. data/specs/figures/bg.jpeg +0 -0
  137. data/specs/figures/bg.png +0 -0
  138. data/specs/figures/bg.svg +2 -2
  139. data/specs/figures/dose_len.png +0 -0
  140. data/specs/figures/no_args.jpeg +0 -0
  141. data/specs/figures/no_args.png +0 -0
  142. data/specs/figures/no_args.svg +2 -2
  143. data/specs/figures/width_height.jpeg +0 -0
  144. data/specs/figures/width_height.png +0 -0
  145. data/specs/figures/width_height_units1.jpeg +0 -0
  146. data/specs/figures/width_height_units1.png +0 -0
  147. data/specs/figures/width_height_units2.jpeg +0 -0
  148. data/specs/figures/width_height_units2.png +0 -0
  149. data/specs/r_dataframe.spec.rb +184 -11
  150. data/specs/r_list.spec.rb +4 -4
  151. data/specs/r_list_apply.spec.rb +11 -10
  152. data/specs/ruby_expression.spec.rb +3 -11
  153. data/specs/tmp.rb +106 -34
  154. data/version.rb +1 -1
  155. metadata +96 -33
  156. data/bin/gknit_old_r +0 -236
  157. data/blogs/dev/dev.Rmd +0 -77
  158. data/blogs/dev/dev.md +0 -87
  159. data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
  160. data/blogs/dev/dev_files/figure-html/diverging_bar. +0 -0
  161. data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
  162. data/blogs/dplyr/dplyr.rb +0 -63
  163. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +0 -43
  164. data/blogs/galaaz_ggplot/galaaz_ggplot.log +0 -640
  165. data/blogs/galaaz_ggplot/galaaz_ggplot.out +0 -10
  166. data/blogs/galaaz_ggplot/galaaz_ggplot.tex +0 -481
  167. data/blogs/galaaz_ggplot/midwest.png +0 -0
  168. data/blogs/galaaz_ggplot/scatter_plot.png +0 -0
  169. data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +0 -662
  170. data/blogs/ruby_plot/ruby_plot.tex +0 -1077
  171. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +0 -57
  172. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +0 -106
  173. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +0 -110
  174. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +0 -174
  175. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +0 -236
  176. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +0 -296
  177. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +0 -236
  178. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +0 -218
  179. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +0 -128
  180. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +0 -150
  181. data/examples/paper/paper.rb +0 -36
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 567380b5c235ab363a1b4c5848e06126a0ee635b
4
- data.tar.gz: b0d4735743f9f6f50af6e9231e9bad2001bf2e65
2
+ SHA256:
3
+ metadata.gz: 0a936fac80a3198849bf43505e3badca81025fcef2b942fabe5edc328b6d35f3
4
+ data.tar.gz: 4aa40b1d667ee45ab94ee8e9565401e718179ad261c043a2173fe50d5b97dfb2
5
5
  SHA512:
6
- metadata.gz: b6e9847e52df71021cbcc4e67e21bb5f15cde4e85bf52def332ef5acb0acab23542db97a7907bab6ec00ee5f038c22412e250e1af7c317ab99ceb6bb7007011b
7
- data.tar.gz: 1d2e3694c81ba8de5f06ded79c5d04e3d987d97680cc898d51aa5fadfc4f5ef3e45741164a106680dd6f5df9f55461d526f5d88c4ff500eb68254c0da5728eaa
6
+ metadata.gz: 34974a5d148a2f0896fa07ef26f046af1b43d1263750732d072e6614ad8f3ff32783248a02228acd9b6c0f2183ddb68c91a6dd93aebd51198c594c1f6e513298
7
+ data.tar.gz: 88ea82fcf3e298deacdae6c7305faabff38d89b41a526a8f0e528c00555190acd84006764365c0fa7e913e361f3ecaf69cdf1c00332b80d4ba7d276dad7d10fe
data/README.md CHANGED
@@ -4,6 +4,7 @@ subtitle: "How to tightly couple Ruby and R in GraalVM"
4
4
  author: "Rodrigo Botafogo"
5
5
  tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, ggplot2]
6
6
  date: "2019"
7
+ bibliography: "/home/rbotafogo/Bibliography/stats.bib"
7
8
  output:
8
9
  html_document:
9
10
  self_contained: true
@@ -16,13 +17,12 @@ output:
16
17
  keep_tex: yes
17
18
  number_sections: yes
18
19
  toc: true
19
- toc_depth: 2
20
+ toc_depth: 3
20
21
  fontsize: 11pt
21
22
  ---
22
23
 
23
24
 
24
25
 
25
-
26
26
  # Introduction
27
27
 
28
28
  Galaaz is a system for tightly coupling Ruby and R. Ruby is a powerful language, with a large
@@ -32,6 +32,92 @@ other hand, R is considered one of the most powerful languages for solving all o
32
32
  problems. Maybe the strongest competitor to R is Python with libraries such as NumPy,
33
33
  Panda, SciPy, SciKit-Learn and a couple more.
34
34
 
35
+ With Galaaz we do not intend to re-implement any of the scientific libraries in R, we allow
36
+ for very tight coupling between the two languages to the point that the Ruby developer does
37
+ not need to know that there is an R engine running.
38
+
39
+ According to Wikipedia "Ruby is a dynamic, interpreted, reflective, object-oriented,
40
+ general-purpose programming language. It was designed and developed in the mid-1990s by Yukihiro
41
+ "Matz" Matsumoto in Japan." It reached high popularity with the development of Ruby on Rails
42
+ (RoR) by David Heinemeier Hansson. RoR is a web application framework first released
43
+ around 2005. It makes extensive use of Ruby's metaprogramming features. With RoR,
44
+ Ruby became very popular. According to [Ruby's Tiobe index](https://www.tiobe.com/tiobe-index/ruby/)
45
+ it peeked in popularity around 2008, then declined until 2015 when it started picking up again.
46
+ At the time of this writing (November 2018), the Tiobe index puts Ruby in 16th position as
47
+ most popular language.
48
+
49
+ Python, a language similar to Ruby, ranks 4th in the index. Java, C and C++ take the
50
+ first three positions. Ruby is often criticized for its focus on web applications.
51
+ But Ruby can do [much more](https://github.com/markets/awesome-ruby) than just web applications.
52
+ Yet, for scientific computing, Ruby lags way behind Python and R. Python has
53
+ Django framework for web, NumPy for numerical arrays, Pandas for data analysis.
54
+ R is a free software environment for statistical computing and graphics with thousands
55
+ of libraries for data analysis.
56
+
57
+ Until recently, there was no real perspective for Ruby to bridge this gap.
58
+ Implementing a complete scientific computing infrastructure would take too long.
59
+ Enters [Oracle's GraalVM](https://www.graalvm.org/):
60
+
61
+ > GraalVM is a universal virtual machine for running applications written in
62
+ > JavaScript, Python 3, Ruby, R, JVM-based languages like Java, Scala, Kotlin,
63
+ > and LLVM-based languages such as C and C++.
64
+ >
65
+ > GraalVM removes the isolation between programming languages and enables
66
+ > interoperability in a shared runtime. It can run either standalone or in the
67
+ > context of OpenJDK, Node.js, Oracle Database, or MySQL.
68
+ >
69
+ > GraalVM allows you to write polyglot applications with a seamless way to pass
70
+ > values from one language to another. With GraalVM there is no copying or
71
+ > marshaling necessary as it is with other polyglot systems. This lets you
72
+ > achieve high performance when language boundaries are crossed. Most of the time
73
+ > there is no additional cost for crossing a language boundary at all.
74
+ >
75
+ > Often developers have to make uncomfortable compromises that require them
76
+ > to rewrite their software in other languages. For example:
77
+ >
78
+ > * That library is not available in my language. I need to rewrite it.
79
+ > * That language would be the perfect fit for my problem, but we cannot
80
+ > run it in our environment.
81
+ > * That problem is already solved in my language, but the language is
82
+ > too slow.
83
+ >
84
+ > With GraalVM we aim to allow developers to freely choose the right language for
85
+ > the task at hand without making compromises.
86
+
87
+ As stated above, GraalVM is a _universal_ virtual machine that allows Ruby and R (and other
88
+ languages) to run on the same environment. GraalVM allows polyglot applications to
89
+ _seamlessly_ interact with one another and pass values from one language to the other.
90
+ Although a great idea, GraalVM still requires application writers to know several languages.
91
+ To eliminate that requirement, we built Galaaz, a gem for Ruby, to tightly couple
92
+ Ruby and R and allow those languages to interact in a way that the user will be unaware
93
+ of such interaction. In other words, a Ruby programmer will be able to use all
94
+ the capabilities of R without knowing the R syntax.
95
+
96
+ Library wrapping is a usual way of bringing features from one language into another.
97
+ To improve performance, Python often wraps more efficient C libraries. For the
98
+ Python developer, the existence of such C libraries is hidden. The problem with
99
+ library wrapping is that for any new library, there is the need to handcraft a new
100
+ wrapper.
101
+
102
+ Galaaz, instead of wrapping a single C or R library, wraps the whole R language
103
+ in Ruby. Doing so, all thousands of R libraries are available immediately
104
+ to Ruby developers without any new wrapping effort.
105
+
106
+ ## What does Galaaz mean
107
+
108
+ Galaaz is the Portuguese name for "Galahad". From Wikipedia:
109
+
110
+ Sir Galahad (sometimes referred to as Galeas or Galath),
111
+ in Arthurian legend, is a knight of King Arthur's Round Table and one
112
+ of the three achievers of the Holy Grail. He is the illegitimate son
113
+ of Sir Lancelot and Elaine of Corbenic, and is renowned for his
114
+ gallantry and purity as the most perfect of all knights. Emerging quite
115
+ late in the medieval Arthurian tradition, Sir Galahad first appears in the
116
+ Lancelot–Grail cycle, and his story is taken up in later works such as
117
+ the Post-Vulgate Cycle and Sir Thomas Malory's Le Morte d'Arthur.
118
+ His name should not be mistaken with Galehaut, a different knight from
119
+ Arthurian legend.
120
+
35
121
  # System Compatibility
36
122
 
37
123
  * Oracle Linux 7
@@ -87,7 +173,7 @@ Panda, SciPy, SciKit-Learn and a couple more.
87
173
  > galaaz -T
88
174
 
89
175
  Shows a list with all available executalbe tasks. To execute a task, substitute the
90
- 'rake' word in the list with 'galaaz'. For instance, the following line shows up
176
+ 'rake' word in the list with 'galaaz'. For instance, the following line shows up
91
177
  after 'galaaz -T'
92
178
 
93
179
  rake master_list:scatter_plot # scatter_plot from:....
@@ -96,147 +182,713 @@ Panda, SciPy, SciKit-Learn and a couple more.
96
182
 
97
183
  > galaaz master_list:scatter_plot
98
184
 
99
- # Basic Types
100
185
 
101
- ## Vectors
186
+ # Accessing R from Ruby
102
187
 
103
- Vectors can be thought of as contiguous cells containing data. Cells are accessed through
104
- indexing operations such as x[5]. Galaaz has six basic (‘atomic’) vector types: logical,
105
- integer, real, complex, string (or character) and raw. The modes and storage modes for the
106
- different vector types are listed in the following
107
- table.
188
+ One of the nice aspects of Galaaz on GraalVM, is that variables and functions defined in R, can
189
+ be easily accessed from Ruby. For instance, to access the 'mtcars' data frame from R
190
+ in Ruby, we use the ':mtcar' symbol preceded by the '~' operator, thus '~:r_vec' retrieves the
191
+ value of the 'mtcars' variable.
108
192
 
109
- | typeof | mode | storage.mode |
110
- |-----------|:---------:|-------------:|
111
- | logical | logical | logical |
112
- | integer | numeric | integer |
113
- | double | numeric | double |
114
- | complex | complex | comples |
115
- | character | character | character |
116
- | raw | raw | raw |
117
193
 
118
- Single numbers, such as 4.2, and strings, such as "four point two" are still vectors, of length
119
- 1; there are no more basic types. Vectors with length zero are possible (and useful).
120
- String vectors have mode and storage mode "character". A single element of a character
121
- vector is often referred to as a character string.
194
+ ```ruby
195
+ puts ~:mtcars
196
+ ```
122
197
 
123
- To create a vector the 'c' (concatenate) method from the 'R' module should be used:
198
+ ```
199
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
200
+ ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
201
+ ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
202
+ ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
203
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
204
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
205
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
206
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
207
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
208
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
209
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
210
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
211
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
212
+ ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
213
+ ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
214
+ ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
215
+ ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
216
+ ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
217
+ ## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
218
+ ## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
219
+ ## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
220
+ ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
221
+ ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
222
+ ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
223
+ ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
224
+ ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
225
+ ## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
226
+ ## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
227
+ ## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
228
+ ## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
229
+ ## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
230
+ ## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
231
+ ## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
232
+ ```
233
+
234
+ To access an R function from Ruby, the R function needs to be preceeded by 'R.' scoping.
235
+ Bellow we see and example of creating a R::Vector by calling the 'c' R function
124
236
 
125
237
 
126
238
  ```ruby
127
- @vec = R.c(1, 2, 3)
128
- puts @vec
239
+ puts vec = R.c(1.0, 2.0, 3.0, 4.0)
129
240
  ```
130
241
 
131
242
  ```
132
- ## [1] 1 2 3
243
+ ## [1] 1 2 3 4
133
244
  ```
245
+ Note that 'vec' is an object of type R::Vector:
134
246
 
135
- Lets take a look at the type, mode and storage.mode of our vector @vec. In order to print
136
- this out, we are creating a data frame 'df' and printing it out. A data frame, for those
137
- not familiar with it, it basically a table. Here we create the data frame and add the
138
- column name by passing named parameters for each column, such as 'typeof:', 'mode:' and
139
- 'storage__mode'. You should also note here that the double underscore is converted to a '.'.
140
247
 
141
- In R, the method used to create a data frame is 'data.frame', in Galaaz we use 'data__frame'.
248
+ ```ruby
249
+ puts vec.class
250
+ ```
251
+
252
+ ```
253
+ ## R::Vector
254
+ ```
255
+ Every object created by a call to an R function will be of a type that inherits from
256
+ R::Object. In R, there is also a function 'class'. In order to access that function we
257
+ can call method 'rclass' in the R::Object:
142
258
 
143
259
 
144
260
  ```ruby
145
- df = R.data__frame(typeof: @vec.typeof, mode: @vec.mode, storage__mode: @vec.storage__mode)
146
- puts df
261
+ puts vec.rclass
147
262
  ```
148
263
 
149
264
  ```
150
- ## typeof mode storage.mode
151
- ## 1 integer numeric integer
265
+ ## [1] "numeric"
152
266
  ```
267
+ When working with R::Object(s), it is possible to use the '.' operator to pipe operations.
268
+ When using '.', the object to which the '.' is applied becomes the first argument of the
269
+ corresponding R function. For instance, function 'c' in R, can be used to concatenate
270
+ two vectors or more vectors (in R, there are no scalar values, scalars are converted to
271
+ vectors of size 1. Within Galaaz, scalar parameter is converted to a size one vector):
153
272
 
154
- If you want to create a vector with floating point numbers, then we need at least one of the
155
- vector's element to be a float, such as 1.0. R users should be careful, since in R a number
156
- like '1' is converted to float and to have an integer the R developer will use '1L'. Galaaz
157
- follows normal Ruby rules and the number 1 is an integer and 1.0 is a float.
273
+
274
+ ```ruby
275
+ puts R.c(vec, 10, 20, 30)
276
+ ```
277
+
278
+ ```
279
+ ## [1] 1 2 3 4 10 20 30
280
+ ```
281
+ The call above to the 'c' function can also be done using '.' notation:
158
282
 
159
283
 
160
284
  ```ruby
161
- @vec = R.c(1.0, 2, 3)
162
- puts @vec
285
+ puts vec.c(10, 20, 30)
163
286
  ```
164
287
 
165
288
  ```
166
- ## [1] 1 2 3
289
+ ## [1] 1 2 3 4 10 20 30
167
290
  ```
291
+ We will talk about vector indexing in a latter section. But notice here that indexing
292
+ an R::Vector will return another R::Vector:
168
293
 
169
294
 
170
295
  ```ruby
171
- df = R.data__frame(typeof: @vec.typeof, mode: @vec.mode, storage__mode: @vec.storage__mode)
172
- outputs df.kable.kable_styling
296
+ puts vec[1]
173
297
  ```
174
298
 
175
- <table class="table" style="margin-left: auto; margin-right: auto;">
176
- <thead>
177
- <tr>
178
- <th style="text-align:left;"> typeof </th>
179
- <th style="text-align:left;"> mode </th>
180
- <th style="text-align:left;"> storage.mode </th>
181
- </tr>
182
- </thead>
183
- <tbody>
184
- <tr>
185
- <td style="text-align:left;"> double </td>
186
- <td style="text-align:left;"> numeric </td>
187
- <td style="text-align:left;"> double </td>
188
- </tr>
189
- </tbody>
190
- </table>
299
+ ```
300
+ ## [1] 1
301
+ ```
302
+ Sometimes we want to index an R::Object and get back a Ruby object that is not wrapped
303
+ in an R::Object, but the native Ruby object. For this, we can index the R object with
304
+ the '>>' operator:
191
305
 
192
- In this next example we try to create a vector with a variable 'hello' that has not yet
193
- being defined. This will raise an exception that is printed out. We get two return blocks,
194
- the first with a message explaining what went wrong and the second with the full backtrace
195
- of the error.
306
+
307
+ ```ruby
308
+ puts vec >> 0
309
+ puts vec >> 2
310
+ ```
311
+
312
+ ```
313
+ ## 1.0
314
+ ## 3.0
315
+ ```
316
+
317
+ It is also possible to call an R function with named arguments, by creating the function
318
+ in Galaaz with named parameters. For instance, here is an example of creating a 'list'
319
+ with named elements:
196
320
 
197
321
 
198
322
  ```ruby
199
- vec = R.c(1, hello, 5)
323
+ puts R.list(first_name: "Rodrigo", last_name: "Botafogo")
200
324
  ```
201
325
 
202
326
  ```
203
- ## Message:
204
- ## undefined local variable or method `hello' for RubyChunk:Class
327
+ ## $first_name
328
+ ## [1] "Rodrigo"
329
+ ##
330
+ ## $last_name
331
+ ## [1] "Botafogo"
332
+ ```
333
+
334
+ Many R functions receive another function as argument. For instance, method 'map' applies
335
+ a function to every element of a vector. With Galaaz, it is possible to pass a Proc,
336
+ Method or Lambda in place of the expected R function. In this next example, we will
337
+ add 2 to every element of our previously created vector:
338
+
339
+
340
+ ```ruby
341
+ puts vec.map { |x| x + 2 }
342
+ ```
343
+
344
+ ```
345
+ ## [1] 3
346
+ ## [1] 4
347
+ ## [1] 5
348
+ ## [1] 6
349
+ ```
350
+
351
+ # gKnitting a Document
352
+
353
+ This manual has been formatted usign gKnit. gKnit uses Knitr and R markdown to knit
354
+ a document in Ruby or R and output it in any of the available formats for R markdown.
355
+ gKnit runs atop of GraalVM, and Galaaz. In gKnit, Ruby variables are persisted between
356
+ chunks, making it an ideal solution for literate programming. Also, since it is based
357
+ on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming with
358
+ Ruby and R is quite natural.
359
+
360
+ The idea of "literate programming" was first introduced by Donald Knuth in the
361
+ 1980's [@Knuth:literate_programming].
362
+ The main intention of this approach was to develop software interspersing macro snippets,
363
+ traditional source code, and a natural language such as English in a document
364
+ that could be compiled into
365
+ executable code and at the same time easily read by a human developer. According to Knuth
366
+ "The practitioner of
367
+ literate programming can be regarded as an essayist, whose main concern is with exposition
368
+ and excellence of style."
369
+
370
+ The idea of literate programming evolved into the idea of reproducible research, in which
371
+ all the data, software code, documentation, graphics etc. needed to reproduce the research
372
+ and its reports could be included in a
373
+ single document or set of documents that when distributed to peers could be rerun generating
374
+ the same output and reports.
375
+
376
+ The R community has put a great deal of effort in reproducible research. In 2002, Sweave was
377
+ introduced and it allowed mixing R code with Latex generating high quality PDF documents. A
378
+ Sweave document could include code, the results of executing the code, graphics and text
379
+ such that it contained the whole narrative to reproduce the research. In
380
+ 2012, Knitr, developed by Yihui Xie from RStudio was released to replace Sweave and to
381
+ consolidate in one single package the many extensions and add-on packages that
382
+ were necessary for Sweave.
383
+
384
+ With Knitr, __R markdown__ was also developed, an extension to the
385
+ Markdown format. With __R markdown__ and Knitr it is possible to generate reports in a multitude
386
+ of formats such as HTML, markdown, Latex, PDF, dvi, etc. __R markdown__ also allows the use of
387
+ multiple programming languages such as R, Ruby, Python, etc. in the same document.
388
+
389
+ In __R markdown__, text is interspersed with
390
+ code chunks that can be executed and both the code and its results can become
391
+ part of the final report. Although __R markdown__ allows multiple programming languages in the
392
+ same document, only R and Python (with
393
+ the reticulate package) can persist variables between chunks. For other languages, such as
394
+ Ruby, every chunk will start a new process and thus all data is lost between chunks, unless it
395
+ is somehow stored in a data file that is read by the next chunk.
396
+
397
+ Being able to persist data
398
+ between chunks is critical for literate programming otherwise the flow of the narrative is lost
399
+ by all the effort of having to save data and then reload it. Although this might, at first, seem like
400
+ a small nuisance, not being able to persist data between chunks is a major issue. For example, let's
401
+ take a look at the following simple example in which we want to show how to create a list and the
402
+ use it. Let's first assume that data cannot be persisted between chunks. In the next chunk we
403
+ create a list, then we would need to save it to file, but to save it, we need somehow to marshal the
404
+ data into a binary format:
405
+
406
+
407
+ ```ruby
408
+ lst = R.list(a: 1, b: 2, c: 3)
409
+ lst.saveRDS("lst.rds")
410
+ ```
411
+ then, on the next chunk, where variable 'lst' is used, we need to read back it's value
412
+
413
+
414
+ ```ruby
415
+ lst = R.readRDS("lst.rds")
416
+ puts lst
417
+ ```
418
+
419
+ ```
420
+ ## $a
421
+ ## [1] 1
422
+ ##
423
+ ## $b
424
+ ## [1] 2
425
+ ##
426
+ ## $c
427
+ ## [1] 3
428
+ ```
429
+
430
+ Now, any single code has dozens of variables that we might want to use and reuse between chunks.
431
+ Clearly, such an approach becomes quickly unmanageable. Probably, because of
432
+ this problem, it is very rare to see any __R markdown__ document in the Ruby community.
433
+
434
+ When variables can be used accross chunks, then no overhead is needed:
435
+
436
+
437
+ ```ruby
438
+ lst = R.list(a: 1, b: 2, c: 3)
439
+ # any other code can be added here
440
+ ```
441
+
442
+
443
+ ```ruby
444
+ puts lst
445
+ ```
446
+
447
+ ```
448
+ ## $a
449
+ ## [1] 1
450
+ ##
451
+ ## $b
452
+ ## [1] 2
453
+ ##
454
+ ## $c
455
+ ## [1] 3
456
+ ```
457
+
458
+ In the Python community, the same effort to have code and text in an integrated environment
459
+ started around the first decade of 2000. In 2006 iPython 0.7.2 was released. In 2014,
460
+ Fernando Pérez, spun off project Jupyter from iPython creating a web-based interactive
461
+ computation environment. Jupyter can now be used with many languages, including Ruby with the
462
+ iruby gem (https://github.com/SciRuby/iruby). In order to have multiple languages in a Jupyter
463
+ notebook the SoS kernel was developed (https://vatlab.github.io/sos-docs/).
464
+
465
+ ## gKnit and __R markdown__
466
+
467
+ gKnit is based on knitr and __R markdown__ and can knit a document
468
+ written both in Ruby and/or R and output it in any of the available formats of __R markdown__. gKnit
469
+ allows ruby developers to do literate programming and reproducible research by allowing them to
470
+ have in a single document, text and code.
471
+
472
+ In gKnit, Ruby variables are persisted between
473
+ chunks, making it an ideal solution for literate programming in this language. Also,
474
+ since it is based on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming
475
+ with Ruby and R is quite natural.
476
+
477
+ This is not a blog post on __R markdown__, and the interested user is directed to the following links
478
+ for detailed information on its capabilities and use.
479
+
480
+ * https://rmarkdown.rstudio.com/ or
481
+ * https://bookdown.org/yihui/rmarkdown/
482
+
483
+ In this post, we will describe just the main aspects of __R markdown__, so the user can start
484
+ gKnitting Ruby and R documents quickly.
485
+
486
+ ## The Yaml header
487
+
488
+ An __R markdown__ document should start with a Yaml header and be stored in a file with
489
+ '.Rmd' extension. This document has the following header for gKitting an HTML document.
490
+
491
+ ```
492
+ ---
493
+ title: "How to do reproducible research in Ruby with gKnit"
494
+ author:
495
+ - "Rodrigo Botafogo"
496
+ - "Daniel Mossé - University of Pittsburgh"
497
+ tags: [Tech, Data Science, Ruby, R, GraalVM]
498
+ date: "20/02/2019"
499
+ output:
500
+ html_document:
501
+ self_contained: true
502
+ keep_md: true
503
+ pdf_document:
504
+ includes:
505
+ in_header: ["../../sty/galaaz.sty"]
506
+ number_sections: yes
507
+ ---
508
+ ```
509
+
510
+ For more information on the options in the Yaml header, [check here](https://bookdown.org/yihui/rmarkdown/html-document.html).
511
+
512
+ ## __R Markdown__ formatting
513
+
514
+ Document formatting can be done with simple markups such as:
515
+
516
+ ## Headers
517
+
518
+ ```
519
+ # Header 1
520
+
521
+ ## Header 2
522
+
523
+ ### Header 3
524
+
525
+ ```
526
+
527
+ ## Lists
528
+
529
+ ```
530
+ Unordered lists:
531
+
532
+ * Item 1
533
+ * Item 2
534
+ + Item 2a
535
+ + Item 2b
536
+ ```
537
+
538
+ ```
539
+ Ordered Lists
540
+
541
+ 1. Item 1
542
+ 2. Item 2
543
+ 3. Item 3
544
+ + Item 3a
545
+ + Item 3b
546
+ ```
547
+
548
+ For more R markdown formatting go to https://rmarkdown.rstudio.com/authoring_basics.html.
549
+
550
+ ## R chunks
551
+
552
+ Running and executing Ruby and R code is actually what really interests us is this blog.
553
+ Inserting a code chunk is done by adding code in a block delimited by three back ticks
554
+ followed by an open
555
+ curly brace ('{') followed with the engine name (r, ruby, rb, include, ...), an
556
+ any optional chunk_label and options, as shown bellow:
557
+
558
+ ````
559
+ ```{engine_name [chunk_label], [chunk_options]}
560
+ ```
561
+ ````
562
+
563
+ for instance, let's add an R chunk to the document labeled 'first_r_chunk'. This is
564
+ a very simple code just to create a variable and print it out, as follows:
565
+
566
+ ````
567
+ ```{r first_r_chunk}
568
+ vec <- c(1, 2, 3)
569
+ print(vec)
570
+ ```
571
+ ````
572
+
573
+ If this block is added to an __R markdown__ document and gKnitted the result will be:
574
+
575
+
576
+ ```r
577
+ vec <- c(1, 2, 3)
578
+ print(vec)
579
+ ```
580
+
581
+ ```
582
+ ## [1] 1 2 3
583
+ ```
584
+
585
+ Now let's say that we want to do some analysis in the code, but just print the result and not the
586
+ code itself. For this, we need to add the option 'echo = FALSE'.
587
+
588
+ ````
589
+ ```{r second_r_chunk, echo = FALSE}
590
+ vec2 <- c(10, 20, 30)
591
+ vec3 <- vec * vec2
592
+ print(vec3)
593
+ ```
594
+ ````
595
+ Here is how this block will show up in the document. Observe that the code is not shown
596
+ and we only see the execution result in a white box
597
+
598
+
599
+ ```
600
+ ## [1] 10 40 90
601
+ ```
602
+
603
+ A description of the available chunk options can be found in https://yihui.name/knitr/.
604
+
605
+ Let's add another R chunk with a function definition. In this example, a vector
606
+ 'r_vec' is created and
607
+ a new function 'reduce_sum' is defined. The chunk specification is
608
+
609
+ ````
610
+ ```{r data_creation}
611
+ r_vec <- c(1, 2, 3, 4, 5)
612
+
613
+ reduce_sum <- function(...) {
614
+ Reduce(sum, as.list(...))
615
+ }
616
+ ```
617
+ ````
618
+
619
+ and this is how it will look like once executed. From now on, to be concise in the
620
+ presentation we will not show chunk definitions any longer.
621
+
622
+
623
+
624
+ ```r
625
+ r_vec <- c(1, 2, 3, 4, 5)
626
+
627
+ reduce_sum <- function(...) {
628
+ Reduce(sum, as.list(...))
629
+ }
630
+ ```
631
+
632
+ We can, possibly in another chunk, access the vector and call the function as follows:
633
+
634
+
635
+ ```r
636
+ print(r_vec)
637
+ ```
638
+
639
+ ```
640
+ ## [1] 1 2 3 4 5
641
+ ```
642
+
643
+ ```r
644
+ print(reduce_sum(r_vec))
645
+ ```
646
+
647
+ ```
648
+ ## [1] 15
649
+ ```
650
+ ## R Graphics with ggplot
651
+
652
+ In the following chunk, we create a bubble chart in R using ggplot and include it in
653
+ this document. Note that there is no directive in the code to include the image, this
654
+ occurs automatically. The 'mpg' dataframe is natively available to R and to Galaaz as
655
+ well.
656
+
657
+ For the reader not knowledgeable of ggplot, ggplot is a graphics library based on "the
658
+ grammar of graphics" [@Wilkinson:grammar_of_graphics]. The idea of the grammar of graphics
659
+ is to build a graphics by adding layers to the plot. More information can be found in
660
+ https://towardsdatascience.com/a-comprehensive-guide-to-the-grammar-of-graphics-for-effective-visualization-of-multi-dimensional-1f92b4ed4149.
661
+
662
+ In the plot bellow the 'mpg' dataset from base R is used. "The data concerns city-cycle fuel
663
+ consumption in miles per gallon, to be predicted in terms of 3 multivalued discrete and 5
664
+ continuous attributes." (Quinlan, 1993)
665
+
666
+ First, the 'mpg' dataset if filtered to extract only cars from the following manumactures: Audi, Ford,
667
+ Honda, and Hyundai and stored in the 'mpg_select' variable. Then, the selected dataframe is passed
668
+ to the ggplot function specifying in the aesthetic method (aes) that 'displacement' (disp) should
669
+ be plotted in the 'x' axis and 'city mileage' should be on the 'y' axis. In the 'labs' layer we
670
+ pass the 'title' and 'subtitle' for the plot. To the basic plot 'g', geom\_jitter is added, that
671
+ plots cars from the same manufactures with the same color (col=manufactures) and the size of the
672
+ car point equal its high way consumption (size = hwy). Finally, a last layer is plotter containing
673
+ a linear regression line (method = "lm") for every manufacturer.
674
+
675
+
676
+ ```r
677
+ # load package and data
678
+ library(ggplot2)
205
679
  ```
206
680
 
207
681
  ```
208
682
  ## Message:
209
- ## (eval):1:in `exec_ruby'
210
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:137:in `instance_eval'
211
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:137:in `exec_ruby'
212
- ## /home/rbotafogo/desenv/galaaz/lib/gknit/ruby_engine.rb:55:in `block in initialize'
213
- ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
214
- ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
215
- ## (eval):3:in `function(...) {\n rb_method(...)'
216
- ## unknown.r:1:in `in_dir'
217
- ## unknown.r:1:in `block_exec'
218
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc12/jre/languages/R/library/knitr/R/block.R:91:in `call_block'
219
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc12/jre/languages/R/library/knitr/R/block.R:6:in `process_group.block'
220
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc12/jre/languages/R/library/knitr/R/block.R:3:in `<no source>'
221
- ## unknown.r:1:in `withCallingHandlers'
222
- ## unknown.r:1:in `process_file'
223
- ## unknown.r:1:in `<no source>'
224
- ## unknown.r:1:in `<no source>'
225
- ## <REPL>:4:in `<repl wrapper>'
226
- ## <REPL>:1
683
+ ## Registered S3 methods overwritten by 'ggplot2':
684
+ ## method from
685
+ ## [.quosures rlang
686
+ ## c.quosures rlang
687
+ ## print.quosures rlang
688
+ ```
689
+
690
+ ```r
691
+ data(mpg, package="ggplot2")
692
+
693
+ mpg_select <- mpg[mpg$manufacturer %in% c("audi", "ford", "honda", "hyundai"), ]
694
+
695
+ # Scatterplot
696
+ theme_set(theme_bw()) # pre-set the bw theme.
697
+ g <- ggplot(mpg_select, aes(displ, cty)) +
698
+ labs(subtitle="mpg: Displacement vs City Mileage",
699
+ title="Bubble chart")
700
+
701
+ g + geom_jitter(aes(col=manufacturer, size=hwy)) +
702
+ geom_smooth(aes(col=manufacturer), method="lm", se=F)
703
+ ```
704
+
705
+ ![](manual_files/figure-html/bubble-1.png)<!-- -->
706
+
707
+ ## Ruby chunks
708
+
709
+ Including a Ruby chunk is just as easy as including an R chunk in the document: just
710
+ change the name of the engine to 'ruby'. It is also possible to pass chunk options
711
+ to the Ruby engine; however, this version does not accept all the options that are
712
+ available to R chunks. Future versions will add those options.
713
+
714
+ ````
715
+ ```{ruby first_ruby_chunk}
227
716
  ```
717
+ ````
718
+
719
+ In this example, the ruby chunk is called 'first_ruby_chunk'. One important
720
+ aspect of chunk labels is that they cannot be duplicated. If a chunk label is
721
+ duplicated, gKnit will stop with an error.
722
+
723
+ In the following chunk, variable 'a', 'b' and 'c' are standard Ruby variables
724
+ and 'vec' and 'vec2' are two vectors created by calling the 'c' method on the
725
+ R module.
726
+
727
+ In Galaaz, the R module allows us to access R functions transparently. The 'c'
728
+ function in R, is a function that concatenates its arguments making a vector.
729
+
730
+ It
731
+ should be clear that there is no requirement in gknit to call or use any R
732
+ functions. gKnit will knit standard Ruby code, or even general text without
733
+ any code.
228
734
 
229
735
 
230
736
  ```ruby
231
- outputs (~:mtcars).kable.kable_styling
737
+ a = [1, 2, 3]
738
+ b = "US$ 250.000"
739
+ c = "The 'outputs' function"
740
+
741
+ vec = R.c(1, 2, 3)
742
+ vec2 = R.c(10, 20, 30)
232
743
  ```
233
744
 
234
- <table class="table" style="margin-left: auto; margin-right: auto;">
235
- <thead>
236
- <tr>
237
- <th style="text-align:left;"> </th>
238
- <th style="text-align:right;"> mpg </th>
239
- <th style="text-align:right;"> cyl </th>
745
+ In the next block, variables 'a', 'vec' and 'vec2' are used and printed.
746
+
747
+
748
+ ```ruby
749
+ puts a
750
+ puts vec * vec2
751
+ ```
752
+
753
+ ```
754
+ ## 1
755
+ ## 2
756
+ ## 3
757
+ ## [1] 10 40 90
758
+ ```
759
+
760
+ Note that 'a' is a standard Ruby Array and 'vec' and 'vec2' are vectors that behave accordingly,
761
+ where multiplication works as expected.
762
+
763
+ ## Inline Ruby code
764
+
765
+ When using a Ruby chunk, the code and the output are formatted in blocks as seen above.
766
+ This formatting is not always desired. Sometimes, we want to have the results of the
767
+ Ruby evaluation included in the middle of a phrase. gKnit allows adding inline Ruby code
768
+ with the 'rb' engine. The following chunk specification will
769
+ create and inline Ruby text:
770
+
771
+ ````
772
+ This is some text with inline Ruby accessing variable 'b' which has value:
773
+ ```{rb puts b}
774
+ ```
775
+ and is followed by some other text!
776
+ ````
777
+
778
+ <div style="margin-bottom:30px;">
779
+ </div>
780
+
781
+ This is some text with inline Ruby accessing variable 'b' which has value:
782
+ US$ 250.000
783
+ and is followed by some other text!
784
+
785
+ <div style="margin-bottom:30px;">
786
+ </div>
787
+
788
+ Note that it is important not to add any new line before of after the code
789
+ block if we want everything to be in only one line, resulting in the following sentence
790
+ with inline Ruby code.
791
+
792
+
793
+ ### The 'outputs' function
794
+
795
+ He have previously used the standard 'puts' method in Ruby chunks in order produce
796
+ output. The result of a 'puts', as seen in all previous chunks that use it, is formatted
797
+ inside a white box that
798
+ follows the code block. Many times however, we would like to do some processing in the
799
+ Ruby chunk and have the result of this processing generate and output that is
800
+ "included" in the document as if we had typed it in __R markdown__ document.
801
+
802
+ For example, suppose we want to create a new heading in our document, but the heading
803
+ phrase is the result of some code processing: maybe it's the first line of a file we are
804
+ going to read. Method 'outputs' adds its output as if typed in the __R markdown__ document.
805
+
806
+ Take now a look at variable 'c' (it was defined in a previous block above) as
807
+ 'c = "The 'outputs' function". "The 'outputs' function" is actually the name of this
808
+ section and it was created using the 'outputs' function inside a Ruby chunk.
809
+
810
+ The ruby chunk to generate this heading is:
811
+
812
+ ````
813
+ ```{ruby heading}
814
+ outputs "### #{c}"
815
+ ```
816
+ ````
817
+
818
+ The three '###' is the way we add a Heading 3 in __R markdown__.
819
+
820
+
821
+ ### HTML Output from Ruby Chunks
822
+
823
+ We've just seen the use of method 'outputs' to add text to the the __R markdown__
824
+ document. This technique can also be used to add HTML code to the document. In
825
+ __R markdown__, any html code typed directly in the document will be properly rendered.
826
+ Here, for instance, is a table definition in HTML and its output in the document:
827
+
828
+ ```
829
+ <table style="width:100%">
830
+ <tr>
831
+ <th>Firstname</th>
832
+ <th>Lastname</th>
833
+ <th>Age</th>
834
+ </tr>
835
+ <tr>
836
+ <td>Jill</td>
837
+ <td>Smith</td>
838
+ <td>50</td>
839
+ </tr>
840
+ <tr>
841
+ <td>Eve</td>
842
+ <td>Jackson</td>
843
+ <td>94</td>
844
+ </tr>
845
+ </table>
846
+ ```
847
+ <div style="margin-bottom:30px;">
848
+ </div>
849
+
850
+ <table style="width:100%">
851
+ <tr>
852
+ <th>Firstname</th>
853
+ <th>Lastname</th>
854
+ <th>Age</th>
855
+ </tr>
856
+ <tr>
857
+ <td>Jill</td>
858
+ <td>Smith</td>
859
+ <td>50</td>
860
+ </tr>
861
+ <tr>
862
+ <td>Eve</td>
863
+ <td>Jackson</td>
864
+ <td>94</td>
865
+ </tr>
866
+ </table>
867
+
868
+ <div style="margin-bottom:30px;">
869
+ </div>
870
+
871
+ But manually creating HTML output is not always easy or desirable, specially
872
+ if we intend the document to be rendered in other formats, for example, as Latex.
873
+ Also, The above
874
+ table looks ugly. The 'kableExtra' library is a great library for
875
+ creating beautiful tables. Take a look at https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html
876
+
877
+ In the next chunk, we output the 'mtcars' dataframe from R in a nicely formatted
878
+ table. Note that we retrieve the mtcars dataframe by using '~:mtcars'.
879
+
880
+
881
+ ```ruby
882
+ R.install_and_loads('kableExtra')
883
+ outputs (~:mtcars).kable.kable_styling
884
+ ```
885
+
886
+ <table class="table" style="margin-left: auto; margin-right: auto;">
887
+ <thead>
888
+ <tr>
889
+ <th style="text-align:left;"> </th>
890
+ <th style="text-align:right;"> mpg </th>
891
+ <th style="text-align:right;"> cyl </th>
240
892
  <th style="text-align:right;"> disp </th>
241
893
  <th style="text-align:right;"> hp </th>
242
894
  <th style="text-align:right;"> drat </th>
@@ -700,47 +1352,2851 @@ outputs (~:mtcars).kable.kable_styling
700
1352
  </tbody>
701
1353
  </table>
702
1354
 
1355
+ ## Including Ruby files in a chunk
703
1356
 
704
- ## Graphics with ggplot
1357
+ R is a language that was created to be easy and fast for statisticians to use. As far
1358
+ as I know, it was not a
1359
+ language to be used for developing large systems. Of course, there are large systems and
1360
+ libraries in R, but the focus of the language is for developing statistical models and
1361
+ distribute that to peers.
705
1362
 
1363
+ Ruby on the other hand, is a language for large software development. Systems written in
1364
+ Ruby will have dozens, hundreds or even thousands of files. To document a
1365
+ large system with literate programming, we cannot expect the developer to add all the
1366
+ files in a single '.Rmd' file. gKnit provides the 'include' chunk engine to include
1367
+ a Ruby file as if it had being typed in the '.Rmd' file.
706
1368
 
707
- ```ruby
708
- require 'ggplot'
1369
+ To include a file, the following chunk should be created, where <filename> is the name of
1370
+ the file to be included and where the extension, if it is '.rb', does not need to be added.
1371
+ If the 'relative' option is not included, then it is treated as TRUE. When 'relative' is
1372
+ true, ruby's 'require\_relative' semantics is used to load the file, when false, Ruby's
1373
+ \$LOAD_PATH is searched to find the file and it is 'require'd.
1374
+
1375
+ ````
1376
+ ```{include <filename>, relative = <TRUE/FALSE>}
1377
+ ```
1378
+ ````
1379
+
1380
+ Bellow we include file 'model.rb', which is in the same directory of this blog.
1381
+ This code uses R 'caret' package to split a dataset in a train and test sets.
1382
+ The 'caret' package is a very important a useful package for doing Data Analysis,
1383
+ it has hundreds of functions for all steps of the Data Analysis workflow. To
1384
+ use 'caret' just to split a dataset is like using the proverbial cannon to
1385
+ kill the fly. We use it here only to show that integrating Ruby and R and
1386
+ using even a very complex package as 'caret' is trivial with Galaaz.
1387
+
1388
+ A word of advice: the 'caret' package has lots of dependencies and installing
1389
+ it in a Linux system is a time consuming operation. Method 'R.install_and_loads'
1390
+ will install the package if it is not already installed and can take a while.
1391
+
1392
+ ````
1393
+ ```{include model}
1394
+ ```
1395
+ ````
1396
+
1397
+
1398
+ ```include
1399
+ require 'galaaz'
1400
+
1401
+ # Loads the R 'caret' package. If not present, installs it
1402
+ R.install_and_loads 'caret'
1403
+
1404
+ class Model
1405
+
1406
+ attr_reader :data
1407
+ attr_reader :test
1408
+ attr_reader :train
1409
+
1410
+ #==========================================================
1411
+ #
1412
+ #==========================================================
1413
+
1414
+ def initialize(data, percent_train:, seed: 123)
1415
+
1416
+ R.set__seed(seed)
1417
+ @data = data
1418
+ @percent_train = percent_train
1419
+ @seed = seed
1420
+
1421
+ end
1422
+
1423
+ #==========================================================
1424
+ #
1425
+ #==========================================================
1426
+
1427
+ def partition(field)
1428
+
1429
+ train_index =
1430
+ R.createDataPartition(@data.send(field), p: @percet_train,
1431
+ list: false, times: 1)
1432
+ @train = @data[train_index, :all]
1433
+ @test = @data[-train_index, :all]
1434
+
1435
+ end
1436
+
1437
+ end
1438
+
1439
+ ```
709
1440
 
710
- R.theme_set R.theme_bw
711
1441
 
712
- # Data Prep
1442
+ ```ruby
713
1443
  mtcars = ~:mtcars
714
- mtcars.car_name = R.rownames(:mtcars)
715
- # compute normalized mpg
716
- mtcars.mpg_z = ((mtcars.mpg - mtcars.mpg.mean)/mtcars.mpg.sd).round 2
717
- mtcars.mpg_type = mtcars.mpg_z < 0 ? "below" : "above"
718
- mtcars = mtcars[mtcars.mpg_z.order, :all]
719
- # convert to factor to retain sorted order in plot
720
- mtcars.car_name = mtcars.car_name.factor levels: mtcars.car_name
1444
+ model = Model.new(mtcars, percent_train: 0.8)
1445
+ model.partition(:mpg)
1446
+ puts model.train.head
1447
+ puts model.test.head
1448
+ ```
721
1449
 
722
- # Diverging Barcharts
723
- gg = mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
724
- R.geom_bar(E.aes(fill: :mpg_type), stat: 'identity', width: 0.5) +
725
- R.scale_fill_manual(name: "Mileage",
726
- labels: R.c("Above Average", "Below Average"),
727
- values: R.c("above": "#00ba38", "below": "#f8766d")) +
728
- R.labs(subtitle: "Normalised mileage from 'mtcars'",
729
- title: "Diverging Bars") +
730
- R.coord_flip()
1450
+ ```
1451
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1452
+ ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
1453
+ ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
1454
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
1455
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
1456
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
1457
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
1458
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1459
+ ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
1460
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
1461
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
1462
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
1463
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
1464
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
1465
+ ```
731
1466
 
732
- puts gg
1467
+ ## Documenting Gems
1468
+
1469
+ gKnit also allows developers to document and load files that are not in the same directory
1470
+ of the '.Rmd' file.
1471
+
1472
+ Here is an example of loading the 'find.rb' file from TruffleRuby. In this example, relative
1473
+ is set to FALSE, so Ruby will look for the file in its $LOAD\_PATH, and the user does not
1474
+ need to no it's directory.
1475
+
1476
+ ````
1477
+ ```{include find, relative = FALSE}
733
1478
  ```
1479
+ ````
734
1480
 
735
1481
 
736
- ![](/home/rbotafogo/desenv/galaaz/blogs/manual/manual_files/figure-html/diverging_bar.png)<!-- -->
1482
+ ```include
1483
+ # frozen_string_literal: true
1484
+ #
1485
+ # find.rb: the Find module for processing all files under a given directory.
1486
+ #
737
1487
 
1488
+ #
1489
+ # The +Find+ module supports the top-down traversal of a set of file paths.
1490
+ #
1491
+ # For example, to total the size of all files under your home directory,
1492
+ # ignoring anything in a "dot" directory (e.g. $HOME/.ssh):
1493
+ #
1494
+ # require 'find'
1495
+ #
1496
+ # total_size = 0
1497
+ #
1498
+ # Find.find(ENV["HOME"]) do |path|
1499
+ # if FileTest.directory?(path)
1500
+ # if File.basename(path)[0] == ?.
1501
+ # Find.prune # Don't look any further into this directory.
1502
+ # else
1503
+ # next
1504
+ # end
1505
+ # else
1506
+ # total_size += FileTest.size(path)
1507
+ # end
1508
+ # end
1509
+ #
1510
+ module Find
738
1511
 
739
- [TO BE CONTINUED...]
1512
+ #
1513
+ # Calls the associated block with the name of every file and directory listed
1514
+ # as arguments, then recursively on their subdirectories, and so on.
1515
+ #
1516
+ # Returns an enumerator if no block is given.
1517
+ #
1518
+ # See the +Find+ module documentation for an example.
1519
+ #
1520
+ def find(*paths, ignore_error: true) # :yield: path
1521
+ block_given? or return enum_for(__method__, *paths, ignore_error: ignore_error)
740
1522
 
1523
+ fs_encoding = Encoding.find("filesystem")
741
1524
 
742
- # Contributing
1525
+ paths.collect!{|d| raise Errno::ENOENT, d unless File.exist?(d); d.dup}.each do |path|
1526
+ path = path.to_path if path.respond_to? :to_path
1527
+ enc = path.encoding == Encoding::US_ASCII ? fs_encoding : path.encoding
1528
+ ps = [path]
1529
+ while file = ps.shift
1530
+ catch(:prune) do
1531
+ yield file.dup.taint
1532
+ begin
1533
+ s = File.lstat(file)
1534
+ rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
1535
+ raise unless ignore_error
1536
+ next
1537
+ end
1538
+ if s.directory? then
1539
+ begin
1540
+ fs = Dir.children(file, encoding: enc)
1541
+ rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
1542
+ raise unless ignore_error
1543
+ next
1544
+ end
1545
+ fs.sort!
1546
+ fs.reverse_each {|f|
1547
+ f = File.join(file, f)
1548
+ ps.unshift f.untaint
1549
+ }
1550
+ end
1551
+ end
1552
+ end
1553
+ end
1554
+ nil
1555
+ end
1556
+
1557
+ #
1558
+ # Skips the current file or directory, restarting the loop with the next
1559
+ # entry. If the current file is a directory, that directory will not be
1560
+ # recursively entered. Meaningful only within the block associated with
1561
+ # Find::find.
1562
+ #
1563
+ # See the +Find+ module documentation for an example.
1564
+ #
1565
+ def prune
1566
+ throw :prune
1567
+ end
1568
+
1569
+ module_function :find, :prune
1570
+ end
1571
+ ```
1572
+
1573
+ ## Converting to PDF
1574
+
1575
+ One of the beauties of knitr is that the same input can be converted to many different outputs.
1576
+ One very useful format, is, of course, PDF. In order to converted an __R markdown__ file to PDF
1577
+ it is necessary to have LaTeX installed on the system. We will not explain here how to
1578
+ install LaTeX as there are plenty of documents on the web showing how to proceed.
1579
+
1580
+ gKnit comes with a simple LaTeX style file for gknitting this blog as a PDF document. Here is
1581
+ the Yaml header to generate this blog in PDF format instead of HTML:
1582
+
1583
+ ```
1584
+ ---
1585
+ title: "gKnit - Ruby and R Knitting with Galaaz in GraalVM"
1586
+ author: "Rodrigo Botafogo"
1587
+ tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, knitr, gknit]
1588
+ date: "29 October 2018"
1589
+ output:
1590
+ pdf\_document:
1591
+ includes:
1592
+ in\_header: ["../../sty/galaaz.sty"]
1593
+ number\_sections: yes
1594
+ ---
1595
+ ```
1596
+
1597
+ ## Template based documents generation
1598
+
1599
+ When a document is converted to PDF it follows a certain convertion template. We've seen above
1600
+ the use of 'galaaz.sty' as a basic template to generate a PDF document. Using the
1601
+ 'gknit-draft' app that comes with Galaaz, the same .Rmd file can be compiled to different
1602
+ looking PDF documents. Galaaz automatically loads the 'rticles' R package that comes with
1603
+ templates for the following journals with the respective template name:
1604
+
1605
+ * ACM articles: acm_article
1606
+ * ACS articles: acs_article
1607
+ * AEA journal submissions: aea_article
1608
+ * AGU journal submissions: ????
1609
+ * AMS articles: ams_article
1610
+ * American Statistical Association: asa_article
1611
+ * Biometrics articles: biometrics_article
1612
+ * Bulletin de l'AMQ journal submissions: amq_article
1613
+ * CTeX documents: ctex
1614
+ * Elsevier journal submissions: elsevier_article
1615
+ * IEEE Transaction journal submissions: ieee_article
1616
+ * JSS articles: jss_article
1617
+ * MDPI journal submissions: mdpi_article
1618
+ * Monthly Notices of the Royal Astronomical Society articles: mnras_article
1619
+ * NNRAS journal submissions: nmras_article
1620
+ * PeerJ articles: peerj_article
1621
+ * Royal Society Open Science journal submissions: rsos_article
1622
+ * Royal Statistical Society: rss_article
1623
+ * Sage journal submissions: sage_article
1624
+ * Springer journal submissions: springer_article
1625
+ * Statistics in Medicine journal submissions: sim_article
1626
+ * Copernicus Publications journal submissions: copernicus_article
1627
+ * The R Journal articles: rjournal_article
1628
+ * Frontiers articles: ???
1629
+ * Taylor & Francis articles: ???
1630
+ * Bulletin De L'AMQ: amq_article
1631
+ * PLOS journal: plos_article
1632
+ * Proceedings of the National Academy of Sciences of the USA: pnas_article
1633
+
1634
+ In order to create a document with one of those templates, use the following command:
1635
+
1636
+ ```
1637
+ gknit-draft --filename <my_document> --template <template> --package <package>
1638
+ --create_dir
1639
+ ```
1640
+ So, in order to create a template for writing an R Journal, use:
1641
+
1642
+ ```
1643
+ gknit-draft --filename my_r_article --template rjournal_article --package rticles
1644
+ --create_dir
1645
+ ```
1646
+
1647
+ # Accessing R variables
1648
+
1649
+ Galaaz allows Ruby to access variables created in R. For example, the 'mtcars' data set is
1650
+ available in R and can be accessed from Ruby by using the 'tilda' operator followed by the
1651
+ symbol for the variable, in this case ':mtcar'. In the code bellow method 'outputs' is
1652
+ used to output the 'mtcars' data set nicely formatted in HTML by use of the 'kable' and
1653
+ 'kable_styling' functions. Method 'outputs' is only available when used with 'gknit'.
1654
+
1655
+
1656
+ ```ruby
1657
+ outputs (~:mtcars).kable.kable_styling
1658
+ ```
743
1659
 
1660
+ <table class="table" style="margin-left: auto; margin-right: auto;">
1661
+ <thead>
1662
+ <tr>
1663
+ <th style="text-align:left;"> </th>
1664
+ <th style="text-align:right;"> mpg </th>
1665
+ <th style="text-align:right;"> cyl </th>
1666
+ <th style="text-align:right;"> disp </th>
1667
+ <th style="text-align:right;"> hp </th>
1668
+ <th style="text-align:right;"> drat </th>
1669
+ <th style="text-align:right;"> wt </th>
1670
+ <th style="text-align:right;"> qsec </th>
1671
+ <th style="text-align:right;"> vs </th>
1672
+ <th style="text-align:right;"> am </th>
1673
+ <th style="text-align:right;"> gear </th>
1674
+ <th style="text-align:right;"> carb </th>
1675
+ </tr>
1676
+ </thead>
1677
+ <tbody>
1678
+ <tr>
1679
+ <td style="text-align:left;"> Mazda RX4 </td>
1680
+ <td style="text-align:right;"> 21.0 </td>
1681
+ <td style="text-align:right;"> 6 </td>
1682
+ <td style="text-align:right;"> 160.0 </td>
1683
+ <td style="text-align:right;"> 110 </td>
1684
+ <td style="text-align:right;"> 3.90 </td>
1685
+ <td style="text-align:right;"> 2.620 </td>
1686
+ <td style="text-align:right;"> 16.46 </td>
1687
+ <td style="text-align:right;"> 0 </td>
1688
+ <td style="text-align:right;"> 1 </td>
1689
+ <td style="text-align:right;"> 4 </td>
1690
+ <td style="text-align:right;"> 4 </td>
1691
+ </tr>
1692
+ <tr>
1693
+ <td style="text-align:left;"> Mazda RX4 Wag </td>
1694
+ <td style="text-align:right;"> 21.0 </td>
1695
+ <td style="text-align:right;"> 6 </td>
1696
+ <td style="text-align:right;"> 160.0 </td>
1697
+ <td style="text-align:right;"> 110 </td>
1698
+ <td style="text-align:right;"> 3.90 </td>
1699
+ <td style="text-align:right;"> 2.875 </td>
1700
+ <td style="text-align:right;"> 17.02 </td>
1701
+ <td style="text-align:right;"> 0 </td>
1702
+ <td style="text-align:right;"> 1 </td>
1703
+ <td style="text-align:right;"> 4 </td>
1704
+ <td style="text-align:right;"> 4 </td>
1705
+ </tr>
1706
+ <tr>
1707
+ <td style="text-align:left;"> Datsun 710 </td>
1708
+ <td style="text-align:right;"> 22.8 </td>
1709
+ <td style="text-align:right;"> 4 </td>
1710
+ <td style="text-align:right;"> 108.0 </td>
1711
+ <td style="text-align:right;"> 93 </td>
1712
+ <td style="text-align:right;"> 3.85 </td>
1713
+ <td style="text-align:right;"> 2.320 </td>
1714
+ <td style="text-align:right;"> 18.61 </td>
1715
+ <td style="text-align:right;"> 1 </td>
1716
+ <td style="text-align:right;"> 1 </td>
1717
+ <td style="text-align:right;"> 4 </td>
1718
+ <td style="text-align:right;"> 1 </td>
1719
+ </tr>
1720
+ <tr>
1721
+ <td style="text-align:left;"> Hornet 4 Drive </td>
1722
+ <td style="text-align:right;"> 21.4 </td>
1723
+ <td style="text-align:right;"> 6 </td>
1724
+ <td style="text-align:right;"> 258.0 </td>
1725
+ <td style="text-align:right;"> 110 </td>
1726
+ <td style="text-align:right;"> 3.08 </td>
1727
+ <td style="text-align:right;"> 3.215 </td>
1728
+ <td style="text-align:right;"> 19.44 </td>
1729
+ <td style="text-align:right;"> 1 </td>
1730
+ <td style="text-align:right;"> 0 </td>
1731
+ <td style="text-align:right;"> 3 </td>
1732
+ <td style="text-align:right;"> 1 </td>
1733
+ </tr>
1734
+ <tr>
1735
+ <td style="text-align:left;"> Hornet Sportabout </td>
1736
+ <td style="text-align:right;"> 18.7 </td>
1737
+ <td style="text-align:right;"> 8 </td>
1738
+ <td style="text-align:right;"> 360.0 </td>
1739
+ <td style="text-align:right;"> 175 </td>
1740
+ <td style="text-align:right;"> 3.15 </td>
1741
+ <td style="text-align:right;"> 3.440 </td>
1742
+ <td style="text-align:right;"> 17.02 </td>
1743
+ <td style="text-align:right;"> 0 </td>
1744
+ <td style="text-align:right;"> 0 </td>
1745
+ <td style="text-align:right;"> 3 </td>
1746
+ <td style="text-align:right;"> 2 </td>
1747
+ </tr>
1748
+ <tr>
1749
+ <td style="text-align:left;"> Valiant </td>
1750
+ <td style="text-align:right;"> 18.1 </td>
1751
+ <td style="text-align:right;"> 6 </td>
1752
+ <td style="text-align:right;"> 225.0 </td>
1753
+ <td style="text-align:right;"> 105 </td>
1754
+ <td style="text-align:right;"> 2.76 </td>
1755
+ <td style="text-align:right;"> 3.460 </td>
1756
+ <td style="text-align:right;"> 20.22 </td>
1757
+ <td style="text-align:right;"> 1 </td>
1758
+ <td style="text-align:right;"> 0 </td>
1759
+ <td style="text-align:right;"> 3 </td>
1760
+ <td style="text-align:right;"> 1 </td>
1761
+ </tr>
1762
+ <tr>
1763
+ <td style="text-align:left;"> Duster 360 </td>
1764
+ <td style="text-align:right;"> 14.3 </td>
1765
+ <td style="text-align:right;"> 8 </td>
1766
+ <td style="text-align:right;"> 360.0 </td>
1767
+ <td style="text-align:right;"> 245 </td>
1768
+ <td style="text-align:right;"> 3.21 </td>
1769
+ <td style="text-align:right;"> 3.570 </td>
1770
+ <td style="text-align:right;"> 15.84 </td>
1771
+ <td style="text-align:right;"> 0 </td>
1772
+ <td style="text-align:right;"> 0 </td>
1773
+ <td style="text-align:right;"> 3 </td>
1774
+ <td style="text-align:right;"> 4 </td>
1775
+ </tr>
1776
+ <tr>
1777
+ <td style="text-align:left;"> Merc 240D </td>
1778
+ <td style="text-align:right;"> 24.4 </td>
1779
+ <td style="text-align:right;"> 4 </td>
1780
+ <td style="text-align:right;"> 146.7 </td>
1781
+ <td style="text-align:right;"> 62 </td>
1782
+ <td style="text-align:right;"> 3.69 </td>
1783
+ <td style="text-align:right;"> 3.190 </td>
1784
+ <td style="text-align:right;"> 20.00 </td>
1785
+ <td style="text-align:right;"> 1 </td>
1786
+ <td style="text-align:right;"> 0 </td>
1787
+ <td style="text-align:right;"> 4 </td>
1788
+ <td style="text-align:right;"> 2 </td>
1789
+ </tr>
1790
+ <tr>
1791
+ <td style="text-align:left;"> Merc 230 </td>
1792
+ <td style="text-align:right;"> 22.8 </td>
1793
+ <td style="text-align:right;"> 4 </td>
1794
+ <td style="text-align:right;"> 140.8 </td>
1795
+ <td style="text-align:right;"> 95 </td>
1796
+ <td style="text-align:right;"> 3.92 </td>
1797
+ <td style="text-align:right;"> 3.150 </td>
1798
+ <td style="text-align:right;"> 22.90 </td>
1799
+ <td style="text-align:right;"> 1 </td>
1800
+ <td style="text-align:right;"> 0 </td>
1801
+ <td style="text-align:right;"> 4 </td>
1802
+ <td style="text-align:right;"> 2 </td>
1803
+ </tr>
1804
+ <tr>
1805
+ <td style="text-align:left;"> Merc 280 </td>
1806
+ <td style="text-align:right;"> 19.2 </td>
1807
+ <td style="text-align:right;"> 6 </td>
1808
+ <td style="text-align:right;"> 167.6 </td>
1809
+ <td style="text-align:right;"> 123 </td>
1810
+ <td style="text-align:right;"> 3.92 </td>
1811
+ <td style="text-align:right;"> 3.440 </td>
1812
+ <td style="text-align:right;"> 18.30 </td>
1813
+ <td style="text-align:right;"> 1 </td>
1814
+ <td style="text-align:right;"> 0 </td>
1815
+ <td style="text-align:right;"> 4 </td>
1816
+ <td style="text-align:right;"> 4 </td>
1817
+ </tr>
1818
+ <tr>
1819
+ <td style="text-align:left;"> Merc 280C </td>
1820
+ <td style="text-align:right;"> 17.8 </td>
1821
+ <td style="text-align:right;"> 6 </td>
1822
+ <td style="text-align:right;"> 167.6 </td>
1823
+ <td style="text-align:right;"> 123 </td>
1824
+ <td style="text-align:right;"> 3.92 </td>
1825
+ <td style="text-align:right;"> 3.440 </td>
1826
+ <td style="text-align:right;"> 18.90 </td>
1827
+ <td style="text-align:right;"> 1 </td>
1828
+ <td style="text-align:right;"> 0 </td>
1829
+ <td style="text-align:right;"> 4 </td>
1830
+ <td style="text-align:right;"> 4 </td>
1831
+ </tr>
1832
+ <tr>
1833
+ <td style="text-align:left;"> Merc 450SE </td>
1834
+ <td style="text-align:right;"> 16.4 </td>
1835
+ <td style="text-align:right;"> 8 </td>
1836
+ <td style="text-align:right;"> 275.8 </td>
1837
+ <td style="text-align:right;"> 180 </td>
1838
+ <td style="text-align:right;"> 3.07 </td>
1839
+ <td style="text-align:right;"> 4.070 </td>
1840
+ <td style="text-align:right;"> 17.40 </td>
1841
+ <td style="text-align:right;"> 0 </td>
1842
+ <td style="text-align:right;"> 0 </td>
1843
+ <td style="text-align:right;"> 3 </td>
1844
+ <td style="text-align:right;"> 3 </td>
1845
+ </tr>
1846
+ <tr>
1847
+ <td style="text-align:left;"> Merc 450SL </td>
1848
+ <td style="text-align:right;"> 17.3 </td>
1849
+ <td style="text-align:right;"> 8 </td>
1850
+ <td style="text-align:right;"> 275.8 </td>
1851
+ <td style="text-align:right;"> 180 </td>
1852
+ <td style="text-align:right;"> 3.07 </td>
1853
+ <td style="text-align:right;"> 3.730 </td>
1854
+ <td style="text-align:right;"> 17.60 </td>
1855
+ <td style="text-align:right;"> 0 </td>
1856
+ <td style="text-align:right;"> 0 </td>
1857
+ <td style="text-align:right;"> 3 </td>
1858
+ <td style="text-align:right;"> 3 </td>
1859
+ </tr>
1860
+ <tr>
1861
+ <td style="text-align:left;"> Merc 450SLC </td>
1862
+ <td style="text-align:right;"> 15.2 </td>
1863
+ <td style="text-align:right;"> 8 </td>
1864
+ <td style="text-align:right;"> 275.8 </td>
1865
+ <td style="text-align:right;"> 180 </td>
1866
+ <td style="text-align:right;"> 3.07 </td>
1867
+ <td style="text-align:right;"> 3.780 </td>
1868
+ <td style="text-align:right;"> 18.00 </td>
1869
+ <td style="text-align:right;"> 0 </td>
1870
+ <td style="text-align:right;"> 0 </td>
1871
+ <td style="text-align:right;"> 3 </td>
1872
+ <td style="text-align:right;"> 3 </td>
1873
+ </tr>
1874
+ <tr>
1875
+ <td style="text-align:left;"> Cadillac Fleetwood </td>
1876
+ <td style="text-align:right;"> 10.4 </td>
1877
+ <td style="text-align:right;"> 8 </td>
1878
+ <td style="text-align:right;"> 472.0 </td>
1879
+ <td style="text-align:right;"> 205 </td>
1880
+ <td style="text-align:right;"> 2.93 </td>
1881
+ <td style="text-align:right;"> 5.250 </td>
1882
+ <td style="text-align:right;"> 17.98 </td>
1883
+ <td style="text-align:right;"> 0 </td>
1884
+ <td style="text-align:right;"> 0 </td>
1885
+ <td style="text-align:right;"> 3 </td>
1886
+ <td style="text-align:right;"> 4 </td>
1887
+ </tr>
1888
+ <tr>
1889
+ <td style="text-align:left;"> Lincoln Continental </td>
1890
+ <td style="text-align:right;"> 10.4 </td>
1891
+ <td style="text-align:right;"> 8 </td>
1892
+ <td style="text-align:right;"> 460.0 </td>
1893
+ <td style="text-align:right;"> 215 </td>
1894
+ <td style="text-align:right;"> 3.00 </td>
1895
+ <td style="text-align:right;"> 5.424 </td>
1896
+ <td style="text-align:right;"> 17.82 </td>
1897
+ <td style="text-align:right;"> 0 </td>
1898
+ <td style="text-align:right;"> 0 </td>
1899
+ <td style="text-align:right;"> 3 </td>
1900
+ <td style="text-align:right;"> 4 </td>
1901
+ </tr>
1902
+ <tr>
1903
+ <td style="text-align:left;"> Chrysler Imperial </td>
1904
+ <td style="text-align:right;"> 14.7 </td>
1905
+ <td style="text-align:right;"> 8 </td>
1906
+ <td style="text-align:right;"> 440.0 </td>
1907
+ <td style="text-align:right;"> 230 </td>
1908
+ <td style="text-align:right;"> 3.23 </td>
1909
+ <td style="text-align:right;"> 5.345 </td>
1910
+ <td style="text-align:right;"> 17.42 </td>
1911
+ <td style="text-align:right;"> 0 </td>
1912
+ <td style="text-align:right;"> 0 </td>
1913
+ <td style="text-align:right;"> 3 </td>
1914
+ <td style="text-align:right;"> 4 </td>
1915
+ </tr>
1916
+ <tr>
1917
+ <td style="text-align:left;"> Fiat 128 </td>
1918
+ <td style="text-align:right;"> 32.4 </td>
1919
+ <td style="text-align:right;"> 4 </td>
1920
+ <td style="text-align:right;"> 78.7 </td>
1921
+ <td style="text-align:right;"> 66 </td>
1922
+ <td style="text-align:right;"> 4.08 </td>
1923
+ <td style="text-align:right;"> 2.200 </td>
1924
+ <td style="text-align:right;"> 19.47 </td>
1925
+ <td style="text-align:right;"> 1 </td>
1926
+ <td style="text-align:right;"> 1 </td>
1927
+ <td style="text-align:right;"> 4 </td>
1928
+ <td style="text-align:right;"> 1 </td>
1929
+ </tr>
1930
+ <tr>
1931
+ <td style="text-align:left;"> Honda Civic </td>
1932
+ <td style="text-align:right;"> 30.4 </td>
1933
+ <td style="text-align:right;"> 4 </td>
1934
+ <td style="text-align:right;"> 75.7 </td>
1935
+ <td style="text-align:right;"> 52 </td>
1936
+ <td style="text-align:right;"> 4.93 </td>
1937
+ <td style="text-align:right;"> 1.615 </td>
1938
+ <td style="text-align:right;"> 18.52 </td>
1939
+ <td style="text-align:right;"> 1 </td>
1940
+ <td style="text-align:right;"> 1 </td>
1941
+ <td style="text-align:right;"> 4 </td>
1942
+ <td style="text-align:right;"> 2 </td>
1943
+ </tr>
1944
+ <tr>
1945
+ <td style="text-align:left;"> Toyota Corolla </td>
1946
+ <td style="text-align:right;"> 33.9 </td>
1947
+ <td style="text-align:right;"> 4 </td>
1948
+ <td style="text-align:right;"> 71.1 </td>
1949
+ <td style="text-align:right;"> 65 </td>
1950
+ <td style="text-align:right;"> 4.22 </td>
1951
+ <td style="text-align:right;"> 1.835 </td>
1952
+ <td style="text-align:right;"> 19.90 </td>
1953
+ <td style="text-align:right;"> 1 </td>
1954
+ <td style="text-align:right;"> 1 </td>
1955
+ <td style="text-align:right;"> 4 </td>
1956
+ <td style="text-align:right;"> 1 </td>
1957
+ </tr>
1958
+ <tr>
1959
+ <td style="text-align:left;"> Toyota Corona </td>
1960
+ <td style="text-align:right;"> 21.5 </td>
1961
+ <td style="text-align:right;"> 4 </td>
1962
+ <td style="text-align:right;"> 120.1 </td>
1963
+ <td style="text-align:right;"> 97 </td>
1964
+ <td style="text-align:right;"> 3.70 </td>
1965
+ <td style="text-align:right;"> 2.465 </td>
1966
+ <td style="text-align:right;"> 20.01 </td>
1967
+ <td style="text-align:right;"> 1 </td>
1968
+ <td style="text-align:right;"> 0 </td>
1969
+ <td style="text-align:right;"> 3 </td>
1970
+ <td style="text-align:right;"> 1 </td>
1971
+ </tr>
1972
+ <tr>
1973
+ <td style="text-align:left;"> Dodge Challenger </td>
1974
+ <td style="text-align:right;"> 15.5 </td>
1975
+ <td style="text-align:right;"> 8 </td>
1976
+ <td style="text-align:right;"> 318.0 </td>
1977
+ <td style="text-align:right;"> 150 </td>
1978
+ <td style="text-align:right;"> 2.76 </td>
1979
+ <td style="text-align:right;"> 3.520 </td>
1980
+ <td style="text-align:right;"> 16.87 </td>
1981
+ <td style="text-align:right;"> 0 </td>
1982
+ <td style="text-align:right;"> 0 </td>
1983
+ <td style="text-align:right;"> 3 </td>
1984
+ <td style="text-align:right;"> 2 </td>
1985
+ </tr>
1986
+ <tr>
1987
+ <td style="text-align:left;"> AMC Javelin </td>
1988
+ <td style="text-align:right;"> 15.2 </td>
1989
+ <td style="text-align:right;"> 8 </td>
1990
+ <td style="text-align:right;"> 304.0 </td>
1991
+ <td style="text-align:right;"> 150 </td>
1992
+ <td style="text-align:right;"> 3.15 </td>
1993
+ <td style="text-align:right;"> 3.435 </td>
1994
+ <td style="text-align:right;"> 17.30 </td>
1995
+ <td style="text-align:right;"> 0 </td>
1996
+ <td style="text-align:right;"> 0 </td>
1997
+ <td style="text-align:right;"> 3 </td>
1998
+ <td style="text-align:right;"> 2 </td>
1999
+ </tr>
2000
+ <tr>
2001
+ <td style="text-align:left;"> Camaro Z28 </td>
2002
+ <td style="text-align:right;"> 13.3 </td>
2003
+ <td style="text-align:right;"> 8 </td>
2004
+ <td style="text-align:right;"> 350.0 </td>
2005
+ <td style="text-align:right;"> 245 </td>
2006
+ <td style="text-align:right;"> 3.73 </td>
2007
+ <td style="text-align:right;"> 3.840 </td>
2008
+ <td style="text-align:right;"> 15.41 </td>
2009
+ <td style="text-align:right;"> 0 </td>
2010
+ <td style="text-align:right;"> 0 </td>
2011
+ <td style="text-align:right;"> 3 </td>
2012
+ <td style="text-align:right;"> 4 </td>
2013
+ </tr>
2014
+ <tr>
2015
+ <td style="text-align:left;"> Pontiac Firebird </td>
2016
+ <td style="text-align:right;"> 19.2 </td>
2017
+ <td style="text-align:right;"> 8 </td>
2018
+ <td style="text-align:right;"> 400.0 </td>
2019
+ <td style="text-align:right;"> 175 </td>
2020
+ <td style="text-align:right;"> 3.08 </td>
2021
+ <td style="text-align:right;"> 3.845 </td>
2022
+ <td style="text-align:right;"> 17.05 </td>
2023
+ <td style="text-align:right;"> 0 </td>
2024
+ <td style="text-align:right;"> 0 </td>
2025
+ <td style="text-align:right;"> 3 </td>
2026
+ <td style="text-align:right;"> 2 </td>
2027
+ </tr>
2028
+ <tr>
2029
+ <td style="text-align:left;"> Fiat X1-9 </td>
2030
+ <td style="text-align:right;"> 27.3 </td>
2031
+ <td style="text-align:right;"> 4 </td>
2032
+ <td style="text-align:right;"> 79.0 </td>
2033
+ <td style="text-align:right;"> 66 </td>
2034
+ <td style="text-align:right;"> 4.08 </td>
2035
+ <td style="text-align:right;"> 1.935 </td>
2036
+ <td style="text-align:right;"> 18.90 </td>
2037
+ <td style="text-align:right;"> 1 </td>
2038
+ <td style="text-align:right;"> 1 </td>
2039
+ <td style="text-align:right;"> 4 </td>
2040
+ <td style="text-align:right;"> 1 </td>
2041
+ </tr>
2042
+ <tr>
2043
+ <td style="text-align:left;"> Porsche 914-2 </td>
2044
+ <td style="text-align:right;"> 26.0 </td>
2045
+ <td style="text-align:right;"> 4 </td>
2046
+ <td style="text-align:right;"> 120.3 </td>
2047
+ <td style="text-align:right;"> 91 </td>
2048
+ <td style="text-align:right;"> 4.43 </td>
2049
+ <td style="text-align:right;"> 2.140 </td>
2050
+ <td style="text-align:right;"> 16.70 </td>
2051
+ <td style="text-align:right;"> 0 </td>
2052
+ <td style="text-align:right;"> 1 </td>
2053
+ <td style="text-align:right;"> 5 </td>
2054
+ <td style="text-align:right;"> 2 </td>
2055
+ </tr>
2056
+ <tr>
2057
+ <td style="text-align:left;"> Lotus Europa </td>
2058
+ <td style="text-align:right;"> 30.4 </td>
2059
+ <td style="text-align:right;"> 4 </td>
2060
+ <td style="text-align:right;"> 95.1 </td>
2061
+ <td style="text-align:right;"> 113 </td>
2062
+ <td style="text-align:right;"> 3.77 </td>
2063
+ <td style="text-align:right;"> 1.513 </td>
2064
+ <td style="text-align:right;"> 16.90 </td>
2065
+ <td style="text-align:right;"> 1 </td>
2066
+ <td style="text-align:right;"> 1 </td>
2067
+ <td style="text-align:right;"> 5 </td>
2068
+ <td style="text-align:right;"> 2 </td>
2069
+ </tr>
2070
+ <tr>
2071
+ <td style="text-align:left;"> Ford Pantera L </td>
2072
+ <td style="text-align:right;"> 15.8 </td>
2073
+ <td style="text-align:right;"> 8 </td>
2074
+ <td style="text-align:right;"> 351.0 </td>
2075
+ <td style="text-align:right;"> 264 </td>
2076
+ <td style="text-align:right;"> 4.22 </td>
2077
+ <td style="text-align:right;"> 3.170 </td>
2078
+ <td style="text-align:right;"> 14.50 </td>
2079
+ <td style="text-align:right;"> 0 </td>
2080
+ <td style="text-align:right;"> 1 </td>
2081
+ <td style="text-align:right;"> 5 </td>
2082
+ <td style="text-align:right;"> 4 </td>
2083
+ </tr>
2084
+ <tr>
2085
+ <td style="text-align:left;"> Ferrari Dino </td>
2086
+ <td style="text-align:right;"> 19.7 </td>
2087
+ <td style="text-align:right;"> 6 </td>
2088
+ <td style="text-align:right;"> 145.0 </td>
2089
+ <td style="text-align:right;"> 175 </td>
2090
+ <td style="text-align:right;"> 3.62 </td>
2091
+ <td style="text-align:right;"> 2.770 </td>
2092
+ <td style="text-align:right;"> 15.50 </td>
2093
+ <td style="text-align:right;"> 0 </td>
2094
+ <td style="text-align:right;"> 1 </td>
2095
+ <td style="text-align:right;"> 5 </td>
2096
+ <td style="text-align:right;"> 6 </td>
2097
+ </tr>
2098
+ <tr>
2099
+ <td style="text-align:left;"> Maserati Bora </td>
2100
+ <td style="text-align:right;"> 15.0 </td>
2101
+ <td style="text-align:right;"> 8 </td>
2102
+ <td style="text-align:right;"> 301.0 </td>
2103
+ <td style="text-align:right;"> 335 </td>
2104
+ <td style="text-align:right;"> 3.54 </td>
2105
+ <td style="text-align:right;"> 3.570 </td>
2106
+ <td style="text-align:right;"> 14.60 </td>
2107
+ <td style="text-align:right;"> 0 </td>
2108
+ <td style="text-align:right;"> 1 </td>
2109
+ <td style="text-align:right;"> 5 </td>
2110
+ <td style="text-align:right;"> 8 </td>
2111
+ </tr>
2112
+ <tr>
2113
+ <td style="text-align:left;"> Volvo 142E </td>
2114
+ <td style="text-align:right;"> 21.4 </td>
2115
+ <td style="text-align:right;"> 4 </td>
2116
+ <td style="text-align:right;"> 121.0 </td>
2117
+ <td style="text-align:right;"> 109 </td>
2118
+ <td style="text-align:right;"> 4.11 </td>
2119
+ <td style="text-align:right;"> 2.780 </td>
2120
+ <td style="text-align:right;"> 18.60 </td>
2121
+ <td style="text-align:right;"> 1 </td>
2122
+ <td style="text-align:right;"> 1 </td>
2123
+ <td style="text-align:right;"> 4 </td>
2124
+ <td style="text-align:right;"> 2 </td>
2125
+ </tr>
2126
+ </tbody>
2127
+ </table>
2128
+
2129
+ # Basic Data Types
2130
+
2131
+ ## Vector
2132
+
2133
+ Vectors can be thought of as contiguous cells containing data. Cells are accessed through
2134
+ indexing operations such as x[5]. Galaaz has six basic (‘atomic’) vector types: logical,
2135
+ integer, real, complex, string (or character) and raw. The modes and storage modes for the
2136
+ different vector types are listed in the following
2137
+ table.
2138
+
2139
+ | typeof | mode | storage.mode |
2140
+ |-----------|:---------:|-------------:|
2141
+ | logical | logical | logical |
2142
+ | integer | numeric | integer |
2143
+ | double | numeric | double |
2144
+ | complex | complex | comples |
2145
+ | character | character | character |
2146
+ | raw | raw | raw |
2147
+
2148
+ Single numbers, such as 4.2, and strings, such as "four point two" are still vectors, of length
2149
+ 1; there are no more basic types. Vectors with length zero are possible (and useful).
2150
+ String vectors have mode and storage mode "character". A single element of a character
2151
+ vector is often referred to as a character string.
2152
+
2153
+ To create a vector the 'c' (concatenate) method from the 'R' module should be used:
2154
+
2155
+
2156
+ ```ruby
2157
+ vec = R.c(1, 2, 3)
2158
+ puts vec
2159
+ ```
2160
+
2161
+ ```
2162
+ ## [1] 1 2 3
2163
+ ```
2164
+
2165
+ Lets take a look at the type, mode and storage.mode of our vector vec. In order to print
2166
+ this out, we are creating a data frame 'df' and printing it out. A data frame, for those
2167
+ not familiar with it, is basically a table. Here we create the data frame and add the
2168
+ column name by passing named parameters for each column, such as 'typeof:', 'mode:' and
2169
+ 'storage__mode?'. You should also note here that the double underscore is converted to a '.'.
2170
+ So, when printed 'storage\_\_mode' will actually print as 'storage.mode'.
2171
+
2172
+ Data frames will later be more carefully described. In R, the method used to create a
2173
+ data frame is 'data.frame', in Galaaz we use 'data\_\_frame'.
2174
+
2175
+
2176
+ ```ruby
2177
+ df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
2178
+ puts df
2179
+ ```
2180
+
2181
+ ```
2182
+ ## typeof mode storage.mode
2183
+ ## 1 integer numeric integer
2184
+ ```
2185
+
2186
+ If you want to create a vector with floating point numbers, then we need at least one of the
2187
+ vector's element to be a float, such as 1.0. R users should be careful, since in R a number
2188
+ like '1' is converted to float and to have an integer the R developer will use '1L'. Galaaz
2189
+ follows normal Ruby rules and the number 1 is an integer and 1.0 is a float.
2190
+
2191
+
2192
+ ```ruby
2193
+ vec = R.c(1.0, 2, 3)
2194
+ puts vec
2195
+ ```
2196
+
2197
+ ```
2198
+ ## [1] 1 2 3
2199
+ ```
2200
+
2201
+
2202
+ ```ruby
2203
+ df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
2204
+ outputs df.kable.kable_styling
2205
+ ```
2206
+
2207
+ <table class="table" style="margin-left: auto; margin-right: auto;">
2208
+ <thead>
2209
+ <tr>
2210
+ <th style="text-align:left;"> typeof </th>
2211
+ <th style="text-align:left;"> mode </th>
2212
+ <th style="text-align:left;"> storage.mode </th>
2213
+ </tr>
2214
+ </thead>
2215
+ <tbody>
2216
+ <tr>
2217
+ <td style="text-align:left;"> double </td>
2218
+ <td style="text-align:left;"> numeric </td>
2219
+ <td style="text-align:left;"> double </td>
2220
+ </tr>
2221
+ </tbody>
2222
+ </table>
2223
+
2224
+ In this next example we try to create a vector with a variable 'hello' that has not yet
2225
+ being defined. This will raise an exception that is printed out. We get two return blocks,
2226
+ the first with a message explaining what went wrong and the second with the full backtrace
2227
+ of the error.
2228
+
2229
+
2230
+ ```ruby
2231
+ vec = R.c(1, hello, 5)
2232
+ ```
2233
+
2234
+ ```
2235
+ ## Message:
2236
+ ## undefined local variable or method `hello' for #<RC:0x3d8 @out_list=nil>:RC
2237
+ ```
2238
+
2239
+ ```
2240
+ ## Message:
2241
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:103:in `get_binding'
2242
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `eval'
2243
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `exec_ruby'
2244
+ ## /home/rbotafogo/desenv/galaaz/lib/gknit/knitr_engine.rb:650:in `block in initialize'
2245
+ ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
2246
+ ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
2247
+ ## (eval):3:in `function(...) {\n rb_method(...)'
2248
+ ## unknown.r:1:in `in_dir'
2249
+ ## unknown.r:1:in `block_exec'
2250
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:92:in `call_block'
2251
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:6:in `process_group.block'
2252
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:3:in `<no source>'
2253
+ ## unknown.r:1:in `withCallingHandlers'
2254
+ ## unknown.r:1:in `process_file'
2255
+ ## unknown.r:1:in `<no source>'
2256
+ ## unknown.r:1:in `<no source>'
2257
+ ## <REPL>:4:in `<repl wrapper>'
2258
+ ## <REPL>:1
2259
+ ```
2260
+
2261
+ Here is a vector with logical values
2262
+
2263
+
2264
+ ```ruby
2265
+ vec = R.c(true, true, false, false, true)
2266
+ puts vec
2267
+ ```
2268
+
2269
+ ```
2270
+ ## [1] TRUE TRUE FALSE FALSE TRUE
2271
+ ```
2272
+
2273
+ ### Combining Vectors
2274
+
2275
+ The 'c' functions used to create vectors can also be used to combine two vectors:
2276
+
2277
+
2278
+ ```ruby
2279
+ vec1 = R.c(10.0, 20.0, 30.0)
2280
+ vec2 = R.c(4.0, 5.0, 6.0)
2281
+ vec = R.c(vec1, vec2)
2282
+ puts vec
2283
+ ```
2284
+
2285
+ ```
2286
+ ## [1] 10 20 30 4 5 6
2287
+ ```
2288
+ In galaaz, methods can be chainned (somewhat like the pipe operator in R %>%, but more generic).
2289
+ In this next example, method 'c' is chainned after 'vec1'. This also looks like 'c' is a
2290
+ method of the vector, but in reallity, this is actually closer to the pipe operator. When
2291
+ Galaaz identifies that 'c' is not a method of 'vec' it actually tries to call 'R.c' with
2292
+ 'vec1' as the first argument concatenated with all the other available arguments. The code
2293
+ bellow is automatically converted to the code above.
2294
+
2295
+
2296
+ ```ruby
2297
+ vec = vec1.c(vec2)
2298
+ puts vec
2299
+ ```
2300
+
2301
+ ```
2302
+ ## [1] 10 20 30 4 5 6
2303
+ ```
2304
+
2305
+ ### Vector Arithmetic
2306
+
2307
+ Arithmetic operations on vectors are performed element by element:
2308
+
2309
+
2310
+ ```ruby
2311
+ puts vec1 + vec2
2312
+ ```
2313
+
2314
+ ```
2315
+ ## [1] 14 25 36
2316
+ ```
2317
+
2318
+
2319
+ ```ruby
2320
+ puts vec1 * 5
2321
+ ```
2322
+
2323
+ ```
2324
+ ## [1] 50 100 150
2325
+ ```
2326
+
2327
+ When vectors have different length, a recycling rule is applied to the shorter vector:
2328
+
2329
+
2330
+ ```ruby
2331
+ vec3 = R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
2332
+ puts vec4 = vec1 + vec3
2333
+ ```
2334
+
2335
+ ```
2336
+ ## [1] 11 22 33 14 25 36 17 28 39
2337
+ ```
2338
+
2339
+ ### Vector Indexing
2340
+
2341
+ Vectors can be indexed by using the '[]' operator:
2342
+
2343
+
2344
+ ```ruby
2345
+ puts vec4[3]
2346
+ ```
2347
+
2348
+ ```
2349
+ ## [1] 33
2350
+ ```
2351
+
2352
+ We can also index a vector with another vector. For example, in the code bellow, we take elements
2353
+ 1, 3, 5, and 7 from vec3:
2354
+
2355
+
2356
+ ```ruby
2357
+ puts vec4[R.c(1, 3, 5, 7)]
2358
+ ```
2359
+
2360
+ ```
2361
+ ## [1] 11 33 25 17
2362
+ ```
2363
+
2364
+ Repeating an index and having indices out of order is valid code:
2365
+
2366
+
2367
+ ```ruby
2368
+ puts vec4[R.c(1, 3, 3, 1)]
2369
+ ```
2370
+
2371
+ ```
2372
+ ## [1] 11 33 33 11
2373
+ ```
2374
+
2375
+ It is also possible to index a vector with a negative number or negative vector. In these cases
2376
+ the indexed values are not returned:
2377
+
2378
+
2379
+ ```ruby
2380
+ puts vec4[-3]
2381
+ puts vec4[-R.c(1, 3, 5, 7)]
2382
+ ```
2383
+
2384
+ ```
2385
+ ## [1] 11 22 14 25 36 17 28 39
2386
+ ## [1] 22 14 36 28 39
2387
+ ```
2388
+
2389
+ If an index is out of range, a missing value (NA) will be reported.
2390
+
2391
+
2392
+ ```ruby
2393
+ puts vec4[30]
2394
+ ```
2395
+
2396
+ ```
2397
+ ## [1] NA
2398
+ ```
2399
+
2400
+ It is also possible to index a vector by range:
2401
+
2402
+
2403
+ ```ruby
2404
+ puts vec4[(2..5)]
2405
+ ```
2406
+
2407
+ ```
2408
+ ## [1] 22 33 14 25
2409
+ ```
2410
+
2411
+ Elements in a vector can be named using the 'names' attribute of a vector:
2412
+
2413
+
2414
+ ```ruby
2415
+ full_name = R.c("Rodrigo", "A", "Botafogo")
2416
+ full_name.names = R.c("First", "Middle", "Last")
2417
+ puts full_name
2418
+ ```
2419
+
2420
+ ```
2421
+ ## First Middle Last
2422
+ ## "Rodrigo" "A" "Botafogo"
2423
+ ```
2424
+
2425
+ Or it can also be named by using the 'c' function with named paramenters:
2426
+
2427
+
2428
+ ```ruby
2429
+ full_name = R.c(First: "Rodrigo", Middle: "A", Last: "Botafogo")
2430
+ puts full_name
2431
+ ```
2432
+
2433
+ ```
2434
+ ## First Middle Last
2435
+ ## "Rodrigo" "A" "Botafogo"
2436
+ ```
2437
+
2438
+ ### Extracting Native Ruby Types from a Vector
2439
+
2440
+ Vectors created with 'R.c' are of class R::Vector. You might have noticed that when indexing a
2441
+ vector, a new vector is returned, even if this vector has one single element. In order to use
2442
+ R::Vector with other ruby classes it might be necessary to extract the actual Ruby native type
2443
+ from the vector. In order to do this extraction the '>>' operator is used.
2444
+
2445
+
2446
+ ```ruby
2447
+ puts vec4
2448
+ puts vec4 >> 0
2449
+ puts vec4 >> 4
2450
+ ```
2451
+
2452
+ ```
2453
+ ## [1] 11 22 33 14 25 36 17 28 39
2454
+ ## 11.0
2455
+ ## 25.0
2456
+ ```
2457
+
2458
+ Note that indexing with '>>' starts at 0 and not at 1, also, we cannot do negative indexing.
2459
+
2460
+ ## Matrix
2461
+
2462
+ A matrix is a collection of elements organized as a two dimensional table. A matrix can be
2463
+ created by the 'matrix' function:
2464
+
2465
+
2466
+ ```ruby
2467
+ mat = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
2468
+ nrow: 3,
2469
+ ncol: 3)
2470
+
2471
+ puts mat
2472
+ ```
2473
+
2474
+ ```
2475
+ ## [,1] [,2] [,3]
2476
+ ## [1,] 1 4 7
2477
+ ## [2,] 2 5 8
2478
+ ## [3,] 3 6 9
2479
+ ```
2480
+ Note that matrices data is organized by column first. It is possible to organize the matrix
2481
+ memory by row first passing an extra argument to the 'matrix' function:
2482
+
2483
+
2484
+ ```ruby
2485
+ mat_row = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
2486
+ nrow: 3,
2487
+ ncol: 3,
2488
+ byrow: true)
2489
+
2490
+ puts mat_row
2491
+ ```
2492
+
2493
+ ```
2494
+ ## [,1] [,2] [,3]
2495
+ ## [1,] 1 2 3
2496
+ ## [2,] 4 5 6
2497
+ ## [3,] 7 8 9
2498
+ ```
2499
+
2500
+ ### Indexing a Matrix
2501
+
2502
+ A matrix can be indexed by [row, column]:
2503
+
2504
+
2505
+ ```ruby
2506
+ puts mat_row[1, 1]
2507
+ puts mat_row[2, 3]
2508
+ ```
2509
+
2510
+ ```
2511
+ ## [1] 1
2512
+ ## [1] 6
2513
+ ```
2514
+ It is possible to index an entire row or column with the ':all' keyword
2515
+
2516
+
2517
+ ```ruby
2518
+ puts mat_row[1, :all]
2519
+ puts mat_row[:all, 2]
2520
+ ```
2521
+
2522
+ ```
2523
+ ## [1] 1 2 3
2524
+ ## [1] 2 5 8
2525
+ ```
2526
+
2527
+ Indexing with a vector is also possible for matrices. In the following example we want
2528
+ rows 1 and 3 and columns 2 and 3 building a 2 x 2 matrix.
2529
+
2530
+
2531
+ ```ruby
2532
+ puts mat_row[R.c(1, 3), R.c(2, 3)]
2533
+ ```
2534
+
2535
+ ```
2536
+ ## [,1] [,2]
2537
+ ## [1,] 2 3
2538
+ ## [2,] 8 9
2539
+ ```
2540
+
2541
+ Matrices can be combined with functions 'rbind':
2542
+
2543
+
2544
+ ```ruby
2545
+ puts mat_row.rbind(mat)
2546
+ ```
2547
+
2548
+ ```
2549
+ ## [,1] [,2] [,3]
2550
+ ## [1,] 1 2 3
2551
+ ## [2,] 4 5 6
2552
+ ## [3,] 7 8 9
2553
+ ## [4,] 1 4 7
2554
+ ## [5,] 2 5 8
2555
+ ## [6,] 3 6 9
2556
+ ```
2557
+
2558
+ and 'cbind':
2559
+
2560
+
2561
+ ```ruby
2562
+ puts mat_row.cbind(mat)
2563
+ ```
2564
+
2565
+ ```
2566
+ ## [,1] [,2] [,3] [,4] [,5] [,6]
2567
+ ## [1,] 1 2 3 1 4 7
2568
+ ## [2,] 4 5 6 2 5 8
2569
+ ## [3,] 7 8 9 3 6 9
2570
+ ```
2571
+
2572
+ ## List
2573
+
2574
+ A list is a data structure that can contain sublists of different types, while vector and matrix
2575
+ can only hold one type of element.
2576
+
2577
+
2578
+ ```ruby
2579
+ nums = R.c(1.0, 2.0, 3.0)
2580
+ strs = R.c("a", "b", "c", "d")
2581
+ bool = R.c(true, true, false)
2582
+ lst = R.list(nums: nums, strs: strs, bool: bool)
2583
+ puts lst
2584
+ ```
2585
+
2586
+ ```
2587
+ ## $nums
2588
+ ## [1] 1 2 3
2589
+ ##
2590
+ ## $strs
2591
+ ## [1] "a" "b" "c" "d"
2592
+ ##
2593
+ ## $bool
2594
+ ## [1] TRUE TRUE FALSE
2595
+ ```
2596
+
2597
+ Note that 'lst' elements are named elements.
2598
+
2599
+
2600
+ ### List Indexing
2601
+
2602
+ List indexing, also called slicing, is done using the '[]' operator and the '[[]]' operator. Let's
2603
+ first start with the '[]' operator. The list above has three sublist indexing with '[]' will
2604
+ return one of the sublists.
2605
+
2606
+
2607
+ ```ruby
2608
+ puts lst[1]
2609
+ ```
2610
+
2611
+ ```
2612
+ ## $nums
2613
+ ## [1] 1 2 3
2614
+ ```
2615
+
2616
+ Note that when using '[]' a new list is returned. When using the double square bracket operator
2617
+ the value returned is the actual element of the list in the given position and not a slice of
2618
+ the original list
2619
+
2620
+
2621
+
2622
+ ```ruby
2623
+ puts lst[[1]]
2624
+ ```
2625
+
2626
+ ```
2627
+ ## [1] 1 2 3
2628
+ ```
2629
+
2630
+ When elements are named, as dones with lst, indexing can be done by name:
2631
+
2632
+
2633
+ ```ruby
2634
+ puts lst[['bool']][[1]] >> 0
2635
+ ```
2636
+
2637
+ ```
2638
+ ## true
2639
+ ```
2640
+
2641
+ In this example, first the 'bool' element of the list was extracted, not as a list, but as a vector,
2642
+ then the first element of the vector was extracted (note that vectors also accept the '[[]]'
2643
+ operator) and then the vector was indexed by its first element, extracting the native Ruby type.
2644
+
2645
+
2646
+ ## Data Frame
2647
+
2648
+ A data frame is a table like structure in which each column has the same number of
2649
+ rows. Data frames are the basic structure for storing data for data analysis. We have already
2650
+ seen a data frame previously when we accessed variable '~:mtcars'. In order to create a
2651
+ data frame, function 'data__frame' is used:
2652
+
2653
+
2654
+ ```ruby
2655
+ df = R.data__frame(
2656
+ year: R.c(2010, 2011, 2012),
2657
+ income: R.c(1000.0, 1500.0, 2000.0))
2658
+
2659
+ puts df
2660
+ ```
2661
+
2662
+ ```
2663
+ ## year income
2664
+ ## 1 2010 1000
2665
+ ## 2 2011 1500
2666
+ ## 3 2012 2000
2667
+ ```
2668
+
2669
+ ### Data Frame Indexing
2670
+
2671
+ A data frame can be indexed the same way as a matrix, by using '[row, column]', where row and
2672
+ column can either be a numeric or the name of the row or column
2673
+
2674
+
2675
+ ```ruby
2676
+ puts (~:mtcars).head
2677
+ puts (~:mtcars)[1, 2]
2678
+ puts (~:mtcars)['Datsun 710', 'mpg']
2679
+ ```
2680
+
2681
+ ```
2682
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2683
+ ## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
2684
+ ## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
2685
+ ## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
2686
+ ## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
2687
+ ## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
2688
+ ## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
2689
+ ## [1] 6
2690
+ ## [1] 22.8
2691
+ ```
2692
+
2693
+ Extracting a column from a data frame as a vector can be done by using the double square bracket
2694
+ operator:
2695
+
2696
+
2697
+ ```ruby
2698
+ puts (~:mtcars)[['mpg']]
2699
+ ```
2700
+
2701
+ ```
2702
+ ## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
2703
+ ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
2704
+ ## [29] 15.8 19.7 15.0 21.4
2705
+ ```
2706
+
2707
+ A data frame column can also be accessed as if it were an instance variable of the data frame:
2708
+
2709
+
2710
+ ```ruby
2711
+ puts (~:mtcars).mpg
2712
+ ```
2713
+
2714
+ ```
2715
+ ## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
2716
+ ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
2717
+ ## [29] 15.8 19.7 15.0 21.4
2718
+ ```
2719
+
2720
+ Slicing a data frame can be done by indexing it with a vector (we use 'head' to reduce the
2721
+ output):
2722
+
2723
+
2724
+ ```ruby
2725
+ puts (~:mtcars)[R.c('mpg', 'hp')].head
2726
+ ```
2727
+
2728
+ ```
2729
+ ## mpg hp
2730
+ ## Mazda RX4 21.0 110
2731
+ ## Mazda RX4 Wag 21.0 110
2732
+ ## Datsun 710 22.8 93
2733
+ ## Hornet 4 Drive 21.4 110
2734
+ ## Hornet Sportabout 18.7 175
2735
+ ## Valiant 18.1 105
2736
+ ```
2737
+
2738
+ A row slice can be obtained by indexing by row and using the ':all' keyword for the column:
2739
+
2740
+
2741
+ ```ruby
2742
+ puts (~:mtcars)[R.c('Datsun 710', 'Camaro Z28'), :all]
2743
+ ```
2744
+
2745
+ ```
2746
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2747
+ ## Datsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1
2748
+ ## Camaro Z28 13.3 8 350 245 3.73 3.84 15.41 0 0 3 4
2749
+ ```
2750
+
2751
+ Finally, a data frame can also be indexed with a logical vector. In this next example, the
2752
+ 'am' column of :mtcars is compared with 0 (with method 'eq'). When 'am' is equal to 0 the
2753
+ car is automatic. So, by doing '(~:mtcars).am.eq 0' a logical vector is created with
2754
+ 'true' whenever 'am' is 0 and 'false' otherwise.
2755
+
2756
+
2757
+ ```ruby
2758
+ # obtain a vector with 'true' for cars with automatic transmission
2759
+ automatic = (~:mtcars).am.eq 0
2760
+ puts automatic
2761
+ ```
2762
+
2763
+ ```
2764
+ ## [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
2765
+ ## [12] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE
2766
+ ## [23] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
2767
+ ```
2768
+
2769
+ Using this logical vector, the data frame is indexed, returning a new data frame in
2770
+ which all cars have automatic transmission.
2771
+
2772
+
2773
+ ```ruby
2774
+ # slice the data frame by using this vector
2775
+ puts (~:mtcars)[automatic, :all]
2776
+ ```
2777
+
2778
+ ```
2779
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2780
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
2781
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
2782
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
2783
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
2784
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
2785
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
2786
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
2787
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
2788
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
2789
+ ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
2790
+ ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
2791
+ ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
2792
+ ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
2793
+ ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
2794
+ ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
2795
+ ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
2796
+ ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
2797
+ ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
2798
+ ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
2799
+ ```
2800
+
2801
+ # Writing Expressions in Galaaz
2802
+
2803
+ Galaaz extends Ruby to work with complex expressions, similar to R's expressions build with 'quote'
2804
+ (base R) or 'quo' (tidyverse). Let's take a look at some of those expressions.
2805
+
2806
+ ## Expressions from operators
2807
+
2808
+ The code bellow
2809
+ creates an expression summing two symbols
2810
+
2811
+
2812
+ ```ruby
2813
+ exp1 = :a + :b
2814
+ puts exp1
2815
+ ```
2816
+
2817
+ ```
2818
+ ## a + b
2819
+ ```
2820
+ We can build any complex mathematical expression
2821
+
2822
+
2823
+ ```ruby
2824
+ exp2 = (:a + :b) * 2.0 + :c ** 2 / :z
2825
+ puts exp2
2826
+ ```
2827
+
2828
+ ```
2829
+ ## (a + b) * 2 + c^2L/z
2830
+ ```
2831
+
2832
+ It is also possible to use inequality operators in building expressions
2833
+
2834
+
2835
+ ```ruby
2836
+ exp3 = (:a + :b) >= :z
2837
+ puts exp3
2838
+ ```
2839
+
2840
+ ```
2841
+ ## a + b >= z
2842
+ ```
2843
+
2844
+ Galaaz provides both symbolic representations for operators, such as (>, <, !=) as functional
2845
+ notation for those operators such as (.gt, .ge, etc.). So the same expression written
2846
+ above can also be written as
2847
+
2848
+
2849
+ ```ruby
2850
+ exp4 = (:a + :b).ge :z
2851
+ puts exp4
2852
+ ```
2853
+
2854
+ ```
2855
+ ## a + b >= z
2856
+ ```
2857
+
2858
+ Two type of expression can only be created with the functional representation of the operators,
2859
+ those are expressions involving '==', and '='. In order to write an expression involving '==' we
2860
+ need to use the method '.eq' and for '=' we need the function '.assign'
2861
+
2862
+
2863
+ ```ruby
2864
+ exp5 = (:a + :b).eq :z
2865
+ puts exp5
2866
+ ```
2867
+
2868
+ ```
2869
+ ## a + b == z
2870
+ ```
2871
+
2872
+
2873
+ ```ruby
2874
+ exp6 = :y.assign :a + :b
2875
+ puts exp6
2876
+ ```
2877
+
2878
+ ```
2879
+ ## y <- a + b
2880
+ ```
2881
+ In general we think that using the functional notation is preferable to using the
2882
+ symbolic notation as otherwise, we end up writing invalid expressions such as
2883
+
2884
+
2885
+ ```ruby
2886
+ exp_wrong = (:a + :b) == :z
2887
+ puts exp_wrong
2888
+ ```
2889
+ and it might be difficult to understand what is going on here. The problem lies with the fact that
2890
+ when using '==' we are comparing expression (:a + :b) to expression :z with '=='. When the
2891
+ comparison is executed, the system tries to evaluate :a, :b and :z, and those symbols at
2892
+ this time are not bound to anything and we get a "object 'a' not found" message.
2893
+ If we only use functional notation, this type of error will not occur.
2894
+
2895
+ ## Expressions with R methods
2896
+
2897
+ It is often necessary to create an expression that uses a method or function. For instance, in
2898
+ mathematics, it's quite natural to write an expressin such as $y = sin(x)$. In this case, the
2899
+ 'sin' function is part of the expression and should not immediately executed. Now, let's say
2900
+ that 'x' is an angle of 45$^\circ$ and we acttually want our expression to be $y = 0.850...$.
2901
+ When we want the function to be part of the expression, we call the function preceeding it
2902
+ by the letter E, such as 'E.sin(x)'
2903
+
2904
+
2905
+ ```ruby
2906
+ exp7 = :y.assign E.sin(:x)
2907
+ puts exp7
2908
+ ```
2909
+
2910
+ ```
2911
+ ## y <- sin(x)
2912
+ ```
2913
+
2914
+ Expressions can also be written using '.' notation:
2915
+
2916
+
2917
+ ```ruby
2918
+ exp8 = :y.assign :x.sin
2919
+ puts exp8
2920
+ ```
2921
+
2922
+ ```
2923
+ ## y <- sin(x)
2924
+ ```
2925
+
2926
+ When a function has multiple arguments, the first one can be used before the '.':
2927
+
2928
+
2929
+ ```ruby
2930
+ exp9 = :x.c(:y)
2931
+ puts exp9
2932
+ ```
2933
+
2934
+ ```
2935
+ ## c(x, y)
2936
+ ```
2937
+
2938
+ ## Evaluating an Expression
2939
+
2940
+ Expressions can be evaluated by calling function 'eval' with a binding. A binding can be provided
2941
+ with a list:
2942
+
2943
+
2944
+ ```ruby
2945
+ exp = (:a + :b) * 2.0 + :c ** 2 / :z
2946
+ puts exp.eval(R.list(a: 10, b: 20, c: 30, z: 40))
2947
+ ```
2948
+
2949
+ ```
2950
+ ## [1] 82.5
2951
+ ```
2952
+
2953
+ ... with a data frame:
2954
+
2955
+
2956
+ ```ruby
2957
+ df = R.data__frame(
2958
+ a: R.c(1, 2, 3),
2959
+ b: R.c(10, 20, 30),
2960
+ c: R.c(100, 200, 300),
2961
+ z: R.c(1000, 2000, 3000))
2962
+
2963
+ puts exp.eval(df)
2964
+ ```
2965
+
2966
+ ```
2967
+ ## [1] 32 64 96
2968
+ ```
2969
+
2970
+ # Manipulating Data
2971
+
2972
+ One of the major benefits of Galaaz is to bring strong data manipulation to Ruby. The following
2973
+ examples were extracted from Hardley's "R for Data Science" (https://r4ds.had.co.nz/). This
2974
+ is a highly recommended book for those not already familiar with the 'tidyverse' style of
2975
+ programming in R. In the sections to follow, we will limit ourselves to convert the R code to
2976
+ Galaaz.
2977
+
2978
+ For these
2979
+ examples, we will investigate the nycflights13 data set available on the package by the
2980
+ same name. We use function 'R.install\_and\_loads' that checks if the library is available
2981
+ locally, and if not, installs it. This data frame contains all 336,776 flights that
2982
+ departed from New York City in 2013. The data comes from the US Bureau of
2983
+ Transportation Statistics.
2984
+
2985
+ Dplyr uses 'tibbles' in place of data frames; unfortunately, tibbles do not print yet properly in
2986
+ Galaaz due to a bug in fastR. In order to print a tibble we need to convert it to a data frame
2987
+ using the 'as\_\_data__frame' method.
2988
+
2989
+
2990
+ ```ruby
2991
+ R.install_and_loads('nycflights13')
2992
+ R.library('dplyr')
2993
+ ```
2994
+
2995
+
2996
+ ```ruby
2997
+ flights = ~:flights
2998
+ puts flights.head
2999
+ ```
3000
+
3001
+ ```
3002
+ ## # A tibble: 6 x 19
3003
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3004
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3005
+ ## 1 2013 1 1 517 515 2 830
3006
+ ## 2 2013 1 1 533 529 4 850
3007
+ ## 3 2013 1 1 542 540 2 923
3008
+ ## 4 2013 1 1 544 545 -1 1004
3009
+ ## 5 2013 1 1 554 600 -6 812
3010
+ ## 6 2013 1 1 554 558 -4 740
3011
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3012
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3013
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3014
+ ## # time_hour <dttm>
3015
+ ```
3016
+
3017
+ ## Filtering rows with Filter
3018
+
3019
+ In this example we filter the flights data set by giving to the filter function two expressions:
3020
+ the first :month.eq 1
3021
+
3022
+
3023
+ ```ruby
3024
+ puts flights.filter((:month.eq 1), (:day.eq 1)).head
3025
+ ```
3026
+
3027
+ ```
3028
+ ## # A tibble: 6 x 19
3029
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3030
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3031
+ ## 1 2013 1 1 517 515 2 830
3032
+ ## 2 2013 1 1 533 529 4 850
3033
+ ## 3 2013 1 1 542 540 2 923
3034
+ ## 4 2013 1 1 544 545 -1 1004
3035
+ ## 5 2013 1 1 554 600 -6 812
3036
+ ## 6 2013 1 1 554 558 -4 740
3037
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3038
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3039
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3040
+ ## # time_hour <dttm>
3041
+ ```
3042
+
3043
+ ## Logical Operators
3044
+
3045
+ All flights that departed in November of December
3046
+
3047
+
3048
+ ```ruby
3049
+ puts flights.filter((:month.eq 11) | (:month.eq 12)).head
3050
+ ```
3051
+
3052
+ ```
3053
+ ## # A tibble: 6 x 19
3054
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3055
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3056
+ ## 1 2013 11 1 5 2359 6 352
3057
+ ## 2 2013 11 1 35 2250 105 123
3058
+ ## 3 2013 11 1 455 500 -5 641
3059
+ ## 4 2013 11 1 539 545 -6 856
3060
+ ## 5 2013 11 1 542 545 -3 831
3061
+ ## 6 2013 11 1 549 600 -11 912
3062
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3063
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3064
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3065
+ ## # time_hour <dttm>
3066
+ ```
3067
+
3068
+ The same as above, but using the 'in' operator. In R, it is possible to define many operators
3069
+ by doing %<op>%. The %in% operator checks if a value is in a vector. In order to use those
3070
+ operators from Galaaz the '._' method is used, where the first argument is the operator's
3071
+ symbol, in this case ':in' and the second argument is the vector:
3072
+
3073
+
3074
+ ```ruby
3075
+ puts flights.filter(:month._ :in, R.c(11, 12)).head
3076
+ ```
3077
+
3078
+ ```
3079
+ ## # A tibble: 6 x 19
3080
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3081
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3082
+ ## 1 2013 11 1 5 2359 6 352
3083
+ ## 2 2013 11 1 35 2250 105 123
3084
+ ## 3 2013 11 1 455 500 -5 641
3085
+ ## 4 2013 11 1 539 545 -6 856
3086
+ ## 5 2013 11 1 542 545 -3 831
3087
+ ## 6 2013 11 1 549 600 -11 912
3088
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3089
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3090
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3091
+ ## # time_hour <dttm>
3092
+ ```
3093
+
3094
+ ## Filtering with NA (Not Available)
3095
+
3096
+ Let's first create a 'tibble' with a Not Available value (R::NA). Tibbles are a modern
3097
+ version of a data frame and operate very similarly to one. It differs in how it outputs
3098
+ the values and the result of some subsetting operations that are more consistent than
3099
+ what is obtained from data frame.
3100
+
3101
+
3102
+ ```ruby
3103
+ df = R.tibble(x: R.c(1, R::NA, 3))
3104
+ puts df
3105
+ ```
3106
+
3107
+ ```
3108
+ ## # A tibble: 3 x 1
3109
+ ## x
3110
+ ## <int>
3111
+ ## 1 1
3112
+ ## 2
3113
+ ## 3 3
3114
+ ```
3115
+
3116
+ Now filtering by :x > 1 shows all lines that satisfy this condition, where the row with R:NA does
3117
+ not.
3118
+
3119
+
3120
+ ```ruby
3121
+ puts df.filter(:x > 1)
3122
+ ```
3123
+
3124
+ ```
3125
+ ## # A tibble: 1 x 1
3126
+ ## x
3127
+ ## <int>
3128
+ ## 1 3
3129
+ ```
3130
+
3131
+ To match an NA use method 'is__na'
3132
+
3133
+
3134
+ ```ruby
3135
+ puts df.filter((:x.is__na) | (:x > 1))
3136
+ ```
3137
+
3138
+ ```
3139
+ ## # A tibble: 2 x 1
3140
+ ## x
3141
+ ## <int>
3142
+ ## 1
3143
+ ## 2 3
3144
+ ```
3145
+
3146
+ ## Arrange Rows with arrange
3147
+
3148
+ Arrange reorders the rows of a data frame by the given arguments.
3149
+
3150
+
3151
+ ```ruby
3152
+ puts flights.arrange(:year, :month, :day).head
3153
+ ```
3154
+
3155
+ ```
3156
+ ## # A tibble: 6 x 19
3157
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3158
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3159
+ ## 1 2013 1 1 517 515 2 830
3160
+ ## 2 2013 1 1 533 529 4 850
3161
+ ## 3 2013 1 1 542 540 2 923
3162
+ ## 4 2013 1 1 544 545 -1 1004
3163
+ ## 5 2013 1 1 554 600 -6 812
3164
+ ## 6 2013 1 1 554 558 -4 740
3165
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3166
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3167
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3168
+ ## # time_hour <dttm>
3169
+ ```
3170
+
3171
+ To arrange in descending order, use function 'desc'
3172
+
3173
+
3174
+ ```ruby
3175
+ puts flights.arrange(:dep_delay.desc).head
3176
+ ```
3177
+
3178
+ ```
3179
+ ## # A tibble: 6 x 19
3180
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3181
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3182
+ ## 1 2013 1 9 641 900 1301 1242
3183
+ ## 2 2013 6 15 1432 1935 1137 1607
3184
+ ## 3 2013 1 10 1121 1635 1126 1239
3185
+ ## 4 2013 9 20 1139 1845 1014 1457
3186
+ ## 5 2013 7 22 845 1600 1005 1044
3187
+ ## 6 2013 4 10 1100 1900 960 1342
3188
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3189
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3190
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3191
+ ## # time_hour <dttm>
3192
+ ```
3193
+
3194
+ ## Selecting columns
3195
+
3196
+ To select specific columns from a dataset we use function 'select':
3197
+
3198
+
3199
+ ```ruby
3200
+ puts flights.select(:year, :month, :day).head
3201
+ ```
3202
+
3203
+ ```
3204
+ ## # A tibble: 6 x 3
3205
+ ## year month day
3206
+ ## <int> <int> <int>
3207
+ ## 1 2013 1 1
3208
+ ## 2 2013 1 1
3209
+ ## 3 2013 1 1
3210
+ ## 4 2013 1 1
3211
+ ## 5 2013 1 1
3212
+ ## 6 2013 1 1
3213
+ ```
3214
+
3215
+ It is also possible to select column in a given range
3216
+
3217
+
3218
+ ```ruby
3219
+ puts flights.select(:year.up_to :day).head
3220
+ ```
3221
+
3222
+ ```
3223
+ ## # A tibble: 6 x 3
3224
+ ## year month day
3225
+ ## <int> <int> <int>
3226
+ ## 1 2013 1 1
3227
+ ## 2 2013 1 1
3228
+ ## 3 2013 1 1
3229
+ ## 4 2013 1 1
3230
+ ## 5 2013 1 1
3231
+ ## 6 2013 1 1
3232
+ ```
3233
+
3234
+ Select all columns that start with a given name sequence
3235
+
3236
+
3237
+ ```ruby
3238
+ puts flights.select(E.starts_with('arr')).head
3239
+ ```
3240
+
3241
+ ```
3242
+ ## # A tibble: 6 x 2
3243
+ ## arr_time arr_delay
3244
+ ## <int> <dbl>
3245
+ ## 1 830 11
3246
+ ## 2 850 20
3247
+ ## 3 923 33
3248
+ ## 4 1004 -18
3249
+ ## 5 812 -25
3250
+ ## 6 740 12
3251
+ ```
3252
+
3253
+ Other functions that can be used:
3254
+
3255
+ * ends_with("xyz"): matches names that end with “xyz”.
3256
+
3257
+ * contains("ijk"): matches names that contain “ijk”.
3258
+
3259
+ * matches("(.)\\1"): selects variables that match a regular expression. This one matches
3260
+ any variables that contain repeated characters.
3261
+
3262
+ * num_range("x", (1..3)): matches x1, x2 and x3
3263
+
3264
+ A helper function that comes in handy when we just want to rearrange column order is 'Everything':
3265
+
3266
+
3267
+ ```ruby
3268
+ puts flights.select(:year, :month, :day, E.everything).head
3269
+ ```
3270
+
3271
+ ```
3272
+ ## # A tibble: 6 x 19
3273
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3274
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3275
+ ## 1 2013 1 1 517 515 2 830
3276
+ ## 2 2013 1 1 533 529 4 850
3277
+ ## 3 2013 1 1 542 540 2 923
3278
+ ## 4 2013 1 1 544 545 -1 1004
3279
+ ## 5 2013 1 1 554 600 -6 812
3280
+ ## 6 2013 1 1 554 558 -4 740
3281
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3282
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3283
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3284
+ ## # time_hour <dttm>
3285
+ ```
3286
+
3287
+ ## Add variables to a dataframe with 'mutate'
3288
+
3289
+
3290
+ ```ruby
3291
+ flights_sm = flights.
3292
+ select((:year.up_to :day),
3293
+ E.ends_with('delay'),
3294
+ :distance,
3295
+ :air_time)
3296
+
3297
+ puts flights_sm.head
3298
+ ```
3299
+
3300
+ ```
3301
+ ## # A tibble: 6 x 7
3302
+ ## year month day dep_delay arr_delay distance air_time
3303
+ ## <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
3304
+ ## 1 2013 1 1 2 11 1400 227
3305
+ ## 2 2013 1 1 4 20 1416 227
3306
+ ## 3 2013 1 1 2 33 1089 160
3307
+ ## 4 2013 1 1 -1 -18 1576 183
3308
+ ## 5 2013 1 1 -6 -25 762 116
3309
+ ## 6 2013 1 1 -4 12 719 150
3310
+ ```
3311
+
3312
+
3313
+ ```ruby
3314
+ flights_sm = flights_sm.
3315
+ mutate(gain: :dep_delay - :arr_delay,
3316
+ speed: :distance / :air_time * 60)
3317
+ puts flights_sm.head
3318
+ ```
3319
+
3320
+ ```
3321
+ ## # A tibble: 6 x 9
3322
+ ## year month day dep_delay arr_delay distance air_time gain speed
3323
+ ## <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
3324
+ ## 1 2013 1 1 2 11 1400 227 -9 370.
3325
+ ## 2 2013 1 1 4 20 1416 227 -16 374.
3326
+ ## 3 2013 1 1 2 33 1089 160 -31 408.
3327
+ ## 4 2013 1 1 -1 -18 1576 183 17 517.
3328
+ ## 5 2013 1 1 -6 -25 762 116 19 394.
3329
+ ## 6 2013 1 1 -4 12 719 150 -16 288.
3330
+ ```
3331
+
3332
+ ## Summarising data
3333
+
3334
+ Function 'summarise' calculates summaries for the data frame. When no 'group_by' is used
3335
+ a single value is obtained from the data frame:
3336
+
3337
+
3338
+ ```ruby
3339
+ puts flights.summarise(delay: E.mean(:dep_delay, na__rm: true))
3340
+ ```
3341
+
3342
+ ```
3343
+ ## # A tibble: 1 x 1
3344
+ ## delay
3345
+ ## <dbl>
3346
+ ## 1 12.6
3347
+ ```
3348
+
3349
+ When a data frame is groupe with 'group_by' summaries apply to the given group:
3350
+
3351
+
3352
+ ```ruby
3353
+ by_day = flights.group_by(:year, :month, :day)
3354
+ puts by_day.summarise(delay: :dep_delay.mean(na__rm: true)).head
3355
+ ```
3356
+
3357
+ ```
3358
+ ## # A tibble: 6 x 4
3359
+ ## # Groups: year, month [1]
3360
+ ## year month day delay
3361
+ ## * <int> <int> <int> <dbl>
3362
+ ## 1 2013 1 1 11.5
3363
+ ## 2 2013 1 2 13.9
3364
+ ## 3 2013 1 3 11.0
3365
+ ## 4 2013 1 4 8.95
3366
+ ## 5 2013 1 5 5.73
3367
+ ## 6 2013 1 6 7.15
3368
+ ```
3369
+
3370
+ Next we put many operations together by pipping them one after the other:
3371
+
3372
+
3373
+ ```ruby
3374
+ delays = flights.
3375
+ group_by(:dest).
3376
+ summarise(
3377
+ count: E.n,
3378
+ dist: :distance.mean(na__rm: true),
3379
+ delay: :arr_delay.mean(na__rm: true)).
3380
+ filter(:count > 20, :dest != "NHL")
3381
+
3382
+ puts delays.head
3383
+ ```
3384
+
3385
+ ```
3386
+ ## # A tibble: 6 x 4
3387
+ ## dest count dist delay
3388
+ ## <chr> <int> <dbl> <dbl>
3389
+ ## 1 ABQ 254 1826 4.38
3390
+ ## 2 ACK 265 199 4.85
3391
+ ## 3 ALB 439 143 14.4
3392
+ ## 4 ATL 17215 757. 11.3
3393
+ ## 5 AUS 2439 1514. 6.02
3394
+ ## 6 AVL 275 584. 8.00
3395
+ ```
3396
+
3397
+ # Using Data Table
3398
+
3399
+
3400
+ ```ruby
3401
+ R.library('data.table')
3402
+ R.install_and_loads('curl')
3403
+
3404
+ input = "https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv"
3405
+ flights = R.fread(input)
3406
+ puts flights
3407
+ puts flights.dim
3408
+ ```
3409
+
3410
+ ```
3411
+ ## year month day dep_delay arr_delay carrier origin dest air_time
3412
+ ## 1: 2014 1 1 14 13 AA JFK LAX 359
3413
+ ## 2: 2014 1 1 -3 13 AA JFK LAX 363
3414
+ ## 3: 2014 1 1 2 9 AA JFK LAX 351
3415
+ ## 4: 2014 1 1 -8 -26 AA LGA PBI 157
3416
+ ## 5: 2014 1 1 2 1 AA JFK LAX 350
3417
+ ## ---
3418
+ ## 253312: 2014 10 31 1 -30 UA LGA IAH 201
3419
+ ## 253313: 2014 10 31 -5 -14 UA EWR IAH 189
3420
+ ## 253314: 2014 10 31 -8 16 MQ LGA RDU 83
3421
+ ## 253315: 2014 10 31 -4 15 MQ LGA DTW 75
3422
+ ## 253316: 2014 10 31 -5 1 MQ LGA SDF 110
3423
+ ## distance hour
3424
+ ## 1: 2475 9
3425
+ ## 2: 2475 11
3426
+ ## 3: 2475 19
3427
+ ## 4: 1035 7
3428
+ ## 5: 2475 13
3429
+ ## ---
3430
+ ## 253312: 1416 14
3431
+ ## 253313: 1400 8
3432
+ ## 253314: 431 11
3433
+ ## 253315: 502 11
3434
+ ## 253316: 659 8
3435
+ ## [1] 253316 11
3436
+ ```
3437
+
3438
+
3439
+ ```ruby
3440
+
3441
+ data_table = R.data__table(
3442
+ ID: R.c("b","b","b","a","a","c"),
3443
+ a: (1..6),
3444
+ b: (7..12),
3445
+ c: (13..18)
3446
+ )
3447
+
3448
+ puts data_table
3449
+ puts data_table.ID
3450
+ ```
3451
+
3452
+ ```
3453
+ ## ID a b c
3454
+ ## 1: b 1 7 13
3455
+ ## 2: b 2 8 14
3456
+ ## 3: b 3 9 15
3457
+ ## 4: a 4 10 16
3458
+ ## 5: a 5 11 17
3459
+ ## 6: c 6 12 18
3460
+ ## [1] "b" "b" "b" "a" "a" "c"
3461
+ ```
3462
+
3463
+
3464
+ ```ruby
3465
+ # subset rows in i
3466
+ ans = flights[(:origin.eq "JFK") & (:month.eq 6)]
3467
+ puts ans.head
3468
+
3469
+ # Get the first two rows from flights.
3470
+
3471
+ ans = flights[(1..2)]
3472
+ puts ans
3473
+
3474
+ # Sort flights first by column origin in ascending order, and then by dest in descending order:
3475
+
3476
+ # ans = flights[E.order(:origin, -(:dest))]
3477
+ # puts ans.head
3478
+ ```
3479
+
3480
+ ```
3481
+ ## year month day dep_delay arr_delay carrier origin dest air_time
3482
+ ## 1: 2014 6 1 -9 -5 AA JFK LAX 324
3483
+ ## 2: 2014 6 1 -10 -13 AA JFK LAX 329
3484
+ ## 3: 2014 6 1 18 -1 AA JFK LAX 326
3485
+ ## 4: 2014 6 1 -6 -16 AA JFK LAX 320
3486
+ ## 5: 2014 6 1 -4 -45 AA JFK LAX 326
3487
+ ## 6: 2014 6 1 -6 -23 AA JFK LAX 329
3488
+ ## distance hour
3489
+ ## 1: 2475 8
3490
+ ## 2: 2475 12
3491
+ ## 3: 2475 7
3492
+ ## 4: 2475 10
3493
+ ## 5: 2475 18
3494
+ ## 6: 2475 14
3495
+ ## year month day dep_delay arr_delay carrier origin dest air_time
3496
+ ## 1: 2014 1 1 14 13 AA JFK LAX 359
3497
+ ## 2: 2014 1 1 -3 13 AA JFK LAX 363
3498
+ ## distance hour
3499
+ ## 1: 2475 9
3500
+ ## 2: 2475 11
3501
+ ```
3502
+
3503
+
3504
+ ```ruby
3505
+ # Select column(s) in j
3506
+ # select arr_delay column, but return it as a vector.
3507
+
3508
+ ans = flights[:all, :arr_delay]
3509
+ puts ans.head
3510
+
3511
+ # Select arr_delay column, but return as a data.table instead.
3512
+
3513
+ ans = flights[:all, :arr_delay.list]
3514
+ puts ans.head
3515
+
3516
+ ans = flights[:all, E.list(:arr_delay, :dep_delay)]
3517
+ ```
3518
+
3519
+ ```
3520
+ ## [1] 13 13 9 -26 1 0
3521
+ ## arr_delay
3522
+ ## 1: 13
3523
+ ## 2: 13
3524
+ ## 3: 9
3525
+ ## 4: -26
3526
+ ## 5: 1
3527
+ ## 6: 0
3528
+ ```
3529
+
3530
+ # Graphics in Galaaz
3531
+
3532
+ Creating graphics in Galaaz is quite easy, as it can use all the power of ggplot2. There are
3533
+ many resources in the web that teaches ggplot, so here we give a quick example of ggplot
3534
+ integration with Ruby. We continue to use the :mtcars dataset and we will plot a diverging
3535
+ bar plot, showing cars that have 'above' or 'below' gas consuption. Let's first prepare
3536
+ the data frame with the necessary data:
3537
+
3538
+
3539
+ ```ruby
3540
+ # copy the R variable :mtcars to the Ruby mtcars variable
3541
+ mtcars = ~:mtcars
3542
+
3543
+ # create a new column 'car_name' to store the car names so that it can be
3544
+ # used for plotting. The 'rownames' of the data frame cannot be used as
3545
+ # data for plotting
3546
+ mtcars.car_name = R.rownames(:mtcars)
3547
+
3548
+ # compute normalized mpg and add it to a new column called mpg_z
3549
+ # Note that the mean value for mpg can be obtained by calling the 'mean'
3550
+ # function on the vector 'mtcars.mpg'. The same with the standard
3551
+ # deviation 'sd'. The vector is then rounded to two digits with 'round 2'
3552
+ mtcars.mpg_z = ((mtcars.mpg - mtcars.mpg.mean)/mtcars.mpg.sd).round 2
3553
+
3554
+ # create a new column 'mpg_type'. Function 'ifelse' is a vectorized function
3555
+ # that looks at every element of the mpg_z vector and if the value is below
3556
+ # 0, returns 'below', otherwise returns 'above'
3557
+ mtcars.mpg_type = (mtcars.mpg_z < 0).ifelse("below", "above")
3558
+
3559
+ # order the mtcar data set by the mpg_z vector from smaler to larger values
3560
+ mtcars = mtcars[mtcars.mpg_z.order, :all]
3561
+
3562
+ # convert the car_name column to a factor to retain sorted order in plot
3563
+ mtcars.car_name = mtcars.car_name.factor levels: mtcars.car_name
3564
+
3565
+ # let's look at the final data frame
3566
+ puts mtcars.head
3567
+ ```
3568
+
3569
+ ```
3570
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
3571
+ ## Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
3572
+ ## Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
3573
+ ## Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
3574
+ ## Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4
3575
+ ## Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
3576
+ ## Maserati Bora 15.0 8 301 335 3.54 3.570 14.60 0 1 5 8
3577
+ ## car_name mpg_z mpg_type
3578
+ ## Cadillac Fleetwood Cadillac Fleetwood -1.61 below
3579
+ ## Lincoln Continental Lincoln Continental -1.61 below
3580
+ ## Camaro Z28 Camaro Z28 -1.13 below
3581
+ ## Duster 360 Duster 360 -0.96 below
3582
+ ## Chrysler Imperial Chrysler Imperial -0.89 below
3583
+ ## Maserati Bora Maserati Bora -0.84 below
3584
+ ```
3585
+ Now, lets plot the diverging bar plot. When using gKnit, there is no need to call
3586
+ 'R.awt' to create a plotting device, since gKnit does take care of it. Galaaz
3587
+ provides integration with ggplot. The interested reader should check online for more
3588
+ information on ggplot, since it is outside the scope of this manual describing
3589
+ how ggplot works. We give here but a brief description on how this plot is generated.
3590
+
3591
+ ggplot implements the 'grammar of graphics'. In this approach, plots are build by
3592
+ adding layers to the plot. On the first layer we describe what we want on the 'x'
3593
+ and 'y' axis of the plot. In this case, we have 'car_name' on the 'x' axis and
3594
+ 'mpg\_z' on the 'y' axis. Then the type of graph is specified by adding
3595
+ 'geom\_bar' (for a bar graph). We specify that our bars should be filled using
3596
+ 'mpg\_type', which is either 'above' or 'bellow' giving then two colours for
3597
+ filling. On the next layer we specify the labels for the graph, then we add the
3598
+ title and subtitle. Finally, in a bar chart usually bars go on the vertical direction,
3599
+ but in this graph we want the bars to be horizontally layed so we add 'coord\_flip'.
3600
+
3601
+
3602
+ ```ruby
3603
+ require 'ggplot'
3604
+
3605
+ puts mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
3606
+ R.geom_bar(E.aes(fill: :mpg_type), stat: 'identity', width: 0.5) +
3607
+ R.scale_fill_manual(name: 'Mileage',
3608
+ labels: R.c('Above Average', 'Below Average'),
3609
+ values: R.c('above': '#00ba38', 'below': '#f8766d')) +
3610
+ R.labs(subtitle: "Normalised mileage from 'mtcars'",
3611
+ title: "Diverging Bars") +
3612
+ R.coord_flip
3613
+ ```
3614
+
3615
+
3616
+ ![](manual_files/figure-html/diverging_bar.png)<!-- -->
3617
+
3618
+ # Coding with Tidyverse
3619
+
3620
+ In R, and when coding with 'tidyverse', arguments to a function are usually not
3621
+ *referencially transparent*. That is, you can’t replace a value with a seemingly equivalent
3622
+ object that you’ve defined elsewhere. To see the problem, let's first define a data frame:
3623
+
3624
+
3625
+ ```ruby
3626
+ df = R.data__frame(x: (1..3), y: (3..1))
3627
+ puts df
3628
+ ```
3629
+
3630
+ ```
3631
+ ## x y
3632
+ ## 1 1 3
3633
+ ## 2 2 2
3634
+ ## 3 3 1
3635
+ ```
3636
+
3637
+ and now, let's look at this code:
3638
+
3639
+
3640
+ ```r
3641
+ my_var <- x
3642
+ filter(df, my_var == 1)
3643
+ ```
3644
+ It generates the following error: "object 'x' not found.
3645
+
3646
+ However, in Galaaz, arguments are referencially transparent as can be seen by the
3647
+ code bellow. Note initally that 'my_var = :x' will not give the error "object 'x' not found"
3648
+ since ':x' is treated as an expression and assigned to my\_var. Then when doing (my\_var.eq 1),
3649
+ my\_var is a variable that resolves to ':x' and it becomes equivalent to (:x.eq 1) which is
3650
+ what we want.
3651
+
3652
+
3653
+ ```ruby
3654
+ my_var = :x
3655
+ puts df.filter(my_var.eq 1)
3656
+ ```
3657
+
3658
+ ```
3659
+ ## x y
3660
+ ## 1 1 3
3661
+ ```
3662
+ As stated by Hardley
3663
+
3664
+ > dplyr code is ambiguous. Depending on what variables are defined where,
3665
+ > filter(df, x == y) could be equivalent to any of:
3666
+
3667
+ ```
3668
+ df[df$x == df$y, ]
3669
+ df[df$x == y, ]
3670
+ df[x == df$y, ]
3671
+ df[x == y, ]
3672
+ ```
3673
+ In galaaz this ambiguity does not exist, filter(df, x.eq y) is not a valid expression as
3674
+ expressions are build with symbols. In doing filter(df, :x.eq y) we are looking for elements
3675
+ of the 'x' column that are equal to a previously defined y variable. Finally in
3676
+ filter(df, :x.eq :y) we are looking for elements in which the 'x' column value is equal to
3677
+ the 'y' column value. This can be seen in the following two chunks of code:
3678
+
3679
+
3680
+ ```ruby
3681
+ y = 1
3682
+ x = 2
3683
+
3684
+ # looking for values where the 'x' column is equal to the 'y' column
3685
+ puts df.filter(:x.eq :y)
3686
+ ```
3687
+
3688
+ ```
3689
+ ## x y
3690
+ ## 1 2 2
3691
+ ```
3692
+
3693
+
3694
+ ```ruby
3695
+ # looking for values where the 'x' column is equal to the 'y' variable
3696
+ # in this case, the number 1
3697
+ puts df.filter(:x.eq y)
3698
+ ```
3699
+
3700
+ ```
3701
+ ## x y
3702
+ ## 1 1 3
3703
+ ```
3704
+ ## Writing a function that applies to different data sets
3705
+
3706
+ Let's suppose that we want to write a function that receives as the first argument a data frame
3707
+ and as second argument an expression that adds a column to the data frame that is equal to the
3708
+ sum of elements in column 'a' plus 'x'.
3709
+
3710
+ Here is the intended behaviour using the 'mutate' function of 'dplyr':
3711
+
3712
+ ```
3713
+ mutate(df1, y = a + x)
3714
+ mutate(df2, y = a + x)
3715
+ mutate(df3, y = a + x)
3716
+ mutate(df4, y = a + x)
3717
+ ```
3718
+ The naive approach to writing an R function to solve this problem is:
3719
+
3720
+ ```
3721
+ mutate_y <- function(df) {
3722
+ mutate(df, y = a + x)
3723
+ }
3724
+ ```
3725
+ Unfortunately, in R, this function can fail silently if one of the variables isn’t present
3726
+ in the data frame, but is present in the global environment. We will not go through here how
3727
+ to solve this problem in R.
3728
+
3729
+ In Galaaz the method mutate_y bellow will work fine and will never fail silently.
3730
+
3731
+
3732
+ ```ruby
3733
+ def mutate_y(df)
3734
+ df.mutate(:y.assign :a + :x)
3735
+ end
3736
+ ```
3737
+ Here we create a data frame that has only one column named 'x':
3738
+
3739
+
3740
+ ```ruby
3741
+ df1 = R.data__frame(x: (1..3))
3742
+ puts df1
3743
+ ```
3744
+
3745
+ ```
3746
+ ## x
3747
+ ## 1 1
3748
+ ## 2 2
3749
+ ## 3 3
3750
+ ```
3751
+
3752
+ Note that method mutate_y will fail independetly from the fact that variable 'a' is defined and
3753
+ in the scope of the method. Variable 'a' has no relationship with the symbol ':a' used in the
3754
+ definition of 'mutate\_y' above:
3755
+
3756
+
3757
+ ```ruby
3758
+ a = 10
3759
+ mutate_y(df1)
3760
+ ```
3761
+
3762
+ ```
3763
+ ## Message:
3764
+ ## Error in mutate_impl(.data, dots) :
3765
+ ## Evaluation error: object 'a' not found.
3766
+ ## In addition: Warning message:
3767
+ ## In mutate_impl(.data, dots) :
3768
+ ## mismatched protect/unprotect (unprotect with empty protect stack) (RError)
3769
+ ## Translated to internal error
3770
+ ```
3771
+ ## Different expressions
3772
+
3773
+ Let's move to the next problem as presented by Hardley where trying to write a function in R
3774
+ that will receive two argumens, the first a variable and the second an expression is not trivial.
3775
+ Bellow we create a data frame and we want to write a function that groups data by a variable and
3776
+ summarises it by an expression:
3777
+
3778
+
3779
+ ```r
3780
+ set.seed(123)
3781
+
3782
+ df <- data.frame(
3783
+ g1 = c(1, 1, 2, 2, 2),
3784
+ g2 = c(1, 2, 1, 2, 1),
3785
+ a = sample(5),
3786
+ b = sample(5)
3787
+ )
3788
+
3789
+ as.data.frame(df)
3790
+ ```
3791
+
3792
+ ```
3793
+ ## g1 g2 a b
3794
+ ## 1 1 1 3 3
3795
+ ## 2 1 2 2 1
3796
+ ## 3 2 1 5 2
3797
+ ## 4 2 2 4 5
3798
+ ## 5 2 1 1 4
3799
+ ```
3800
+
3801
+ ```r
3802
+ d2 <- df %>%
3803
+ group_by(g1) %>%
3804
+ summarise(a = mean(a))
3805
+
3806
+ as.data.frame(d2)
3807
+ ```
3808
+
3809
+ ```
3810
+ ## g1 a
3811
+ ## 1 1 2.500000
3812
+ ## 2 2 3.333333
3813
+ ```
3814
+
3815
+ ```r
3816
+ d2 <- df %>%
3817
+ group_by(g2) %>%
3818
+ summarise(a = mean(a))
3819
+
3820
+ as.data.frame(d2)
3821
+ ```
3822
+
3823
+ ```
3824
+ ## g2 a
3825
+ ## 1 1 3
3826
+ ## 2 2 3
3827
+ ```
3828
+
3829
+ As shown by Hardley, one might expect this function to do the trick:
3830
+
3831
+
3832
+ ```r
3833
+ my_summarise <- function(df, group_var) {
3834
+ df %>%
3835
+ group_by(group_var) %>%
3836
+ summarise(a = mean(a))
3837
+ }
3838
+
3839
+ # my_summarise(df, g1)
3840
+ #> Error: Column `group_var` is unknown
3841
+ ```
3842
+
3843
+ In order to solve this problem, coding with dplyr requires the introduction of many new concepts
3844
+ and functions such as 'quo', 'quos', 'enquo', 'enquos', '!!' (bang bang), '!!!' (triple bang).
3845
+ Again, we'll leave to Hardley the explanation on how to use all those functions.
3846
+
3847
+ Now, let's try to implement the same function in galaaz. The next code block first prints the
3848
+ 'df' data frame defined previously in R (to access an R variable from Galaaz, we use the tilda
3849
+ operator '~' applied to the R variable name as symbol, i.e., ':df'.
3850
+
3851
+
3852
+ ```ruby
3853
+ puts ~:df
3854
+ ```
3855
+
3856
+ ```
3857
+ ## g1 g2 a b
3858
+ ## 1 1 1 3 3
3859
+ ## 2 1 2 2 1
3860
+ ## 3 2 1 5 2
3861
+ ## 4 2 2 4 5
3862
+ ## 5 2 1 1 4
3863
+ ```
3864
+
3865
+ We then create the 'my_summarize' method and call it passing the R data frame and
3866
+ the group by variable ':g1':
3867
+
3868
+
3869
+ ```ruby
3870
+ def my_summarize(df, group_var)
3871
+ df.group_by(group_var).
3872
+ summarize(a: :a.mean)
3873
+ end
3874
+
3875
+ puts my_summarize(:df, :g1)
3876
+ ```
3877
+
3878
+ ```
3879
+ ## # A tibble: 2 x 2
3880
+ ## g1 a
3881
+ ## <dbl> <dbl>
3882
+ ## 1 1 2.5
3883
+ ## 2 2 3.33
3884
+ ```
3885
+
3886
+ It works!!! Well, let's make sure this was not just some coincidence
3887
+
3888
+
3889
+ ```ruby
3890
+ puts my_summarize(:df, :g2)
3891
+ ```
3892
+
3893
+ ```
3894
+ ## # A tibble: 2 x 2
3895
+ ## g2 a
3896
+ ## <dbl> <dbl>
3897
+ ## 1 1 3
3898
+ ## 2 2 3
3899
+ ```
3900
+
3901
+ Great, everything is fine! No magic, no new functions, no complexities, just normal, standard Ruby
3902
+ code. If you've ever done NSE in R, this certainly feels much safer and easy to implement.
3903
+
3904
+ ## Different input variables
3905
+
3906
+ In the previous section we've managed to get rid of all NSE formulation for a simple example, but
3907
+ does this remain true for more complex examples, or will the Galaaz way prove inpractical for
3908
+ more complex code?
3909
+
3910
+ In the next example Hardley proposes us to write a function that given an expression such as 'a'
3911
+ or 'a * b', calculates three summaries. What we want a function that does the same as these R
3912
+ statements:
3913
+
3914
+ ```
3915
+ summarise(df, mean = mean(a), sum = sum(a), n = n())
3916
+ #> # A tibble: 1 x 3
3917
+ #> mean sum n
3918
+ #> <dbl> <int> <int>
3919
+ #> 1 3 15 5
3920
+
3921
+ summarise(df, mean = mean(a * b), sum = sum(a * b), n = n())
3922
+ #> # A tibble: 1 x 3
3923
+ #> mean sum n
3924
+ #> <dbl> <int> <int>
3925
+ #> 1 9 45 5
3926
+ ```
3927
+
3928
+ Let's try it in galaaz:
3929
+
3930
+
3931
+ ```ruby
3932
+ def my_summarise2(df, expr)
3933
+ df.summarize(
3934
+ mean: E.mean(expr),
3935
+ sum: E.sum(expr),
3936
+ n: E.n
3937
+ )
3938
+ end
3939
+
3940
+ puts my_summarise2((~:df), :a)
3941
+ puts "\n"
3942
+ puts my_summarise2((~:df), :a * :b)
3943
+ ```
3944
+
3945
+ ```
3946
+ ## mean sum n
3947
+ ## 1 3 15 5
3948
+ ##
3949
+ ## mean sum n
3950
+ ## 1 9 45 5
3951
+ ```
3952
+
3953
+ Once again, there is no need to use any special theory or functions. The only point to be
3954
+ careful about is the use of 'E' to build expressions from functions 'mean', 'sum' and 'n'.
3955
+
3956
+ ## Different input and output variable
3957
+
3958
+ Now the next challenge presented by Hardley is to vary the name of the output variables based on
3959
+ the received expression. So, if the input expression is 'a', we want our data frame columns to
3960
+ be named 'mean\_a' and 'sum\_a'. Now, if the input expression is 'b', columns
3961
+ should be named 'mean\_b' and 'sum\_b'.
3962
+
3963
+ ```
3964
+ mutate(df, mean_a = mean(a), sum_a = sum(a))
3965
+ #> # A tibble: 5 x 6
3966
+ #> g1 g2 a b mean_a sum_a
3967
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
3968
+ #> 1 1 1 1 3 3 15
3969
+ #> 2 1 2 4 2 3 15
3970
+ #> 3 2 1 2 1 3 15
3971
+ #> 4 2 2 5 4 3 15
3972
+ #> # … with 1 more row
3973
+
3974
+ mutate(df, mean_b = mean(b), sum_b = sum(b))
3975
+ #> # A tibble: 5 x 6
3976
+ #> g1 g2 a b mean_b sum_b
3977
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
3978
+ #> 1 1 1 1 3 3 15
3979
+ #> 2 1 2 4 2 3 15
3980
+ #> 3 2 1 2 1 3 15
3981
+ #> 4 2 2 5 4 3 15
3982
+ #> # … with 1 more row
3983
+ ```
3984
+ In order to solve this problem in R, Hardley needs to introduce some more new functions and notations:
3985
+ 'quo_name' and the ':=' operator from package 'rlang'
3986
+
3987
+ Here is our Ruby code:
3988
+
3989
+
3990
+ ```ruby
3991
+ def my_mutate(df, expr)
3992
+ mean_name = "mean_#{expr.to_s}"
3993
+ sum_name = "sum_#{expr.to_s}"
3994
+
3995
+ df.mutate(mean_name => E.mean(expr),
3996
+ sum_name => E.sum(expr))
3997
+ end
3998
+
3999
+ puts my_mutate((~:df), :a)
4000
+ puts "\n"
4001
+ puts my_mutate((~:df), :b)
4002
+ ```
4003
+
4004
+ ```
4005
+ ## g1 g2 a b mean_a sum_a
4006
+ ## 1 1 1 3 3 3 15
4007
+ ## 2 1 2 2 1 3 15
4008
+ ## 3 2 1 5 2 3 15
4009
+ ## 4 2 2 4 5 3 15
4010
+ ## 5 2 1 1 4 3 15
4011
+ ##
4012
+ ## g1 g2 a b mean_b sum_b
4013
+ ## 1 1 1 3 3 3 15
4014
+ ## 2 1 2 2 1 3 15
4015
+ ## 3 2 1 5 2 3 15
4016
+ ## 4 2 2 4 5 3 15
4017
+ ## 5 2 1 1 4 3 15
4018
+ ```
4019
+ It really seems that "Non Standard Evaluation" is actually quite standard in Galaaz! But, you
4020
+ might have noticed a small change in the way the arguments to the mutate method were called.
4021
+ In a previous example we used df.summarise(mean: E.mean(:a), ...) where the column name was
4022
+ followed by a ':' colom. In this example, we have df.mutate(mean_name => E.mean(expr), ...)
4023
+ and variable mean\_name is not followed by ':' but by '=>'. This is standard Ruby notation.
4024
+
4025
+ [explain....]
4026
+
4027
+ ## Capturing multiple variables
4028
+
4029
+ Moving on with new complexities, Hardley proposes us to solve the problem in which the
4030
+ summarise function will receive any number of grouping variables.
4031
+
4032
+ This again is quite standard Ruby. In order to receive an undefined number of paramenters
4033
+ the paramenter is preceded by '*':
4034
+
4035
+
4036
+ ```ruby
4037
+ def my_summarise3(df, *group_vars)
4038
+ df.group_by(*group_vars).
4039
+ summarise(a: E.mean(:a))
4040
+ end
4041
+
4042
+ puts my_summarise3((~:df), :g1, :g2)
4043
+ ```
4044
+
4045
+ ```
4046
+ ## # A tibble: 4 x 3
4047
+ ## # Groups: g1 [?]
4048
+ ## g1 g2 a
4049
+ ## <dbl> <dbl> <dbl>
4050
+ ## 1 1 1 3
4051
+ ## 2 1 2 2
4052
+ ## 3 2 1 3
4053
+ ## 4 2 2 4
4054
+ ```
4055
+
4056
+ ## Why does R require NSE and Galaaz does not?
4057
+
4058
+ NSE introduces a number of new concepts, such as 'quoting', 'quasiquotation', 'unquoting' and
4059
+ 'unquote-splicing', while in Galaaz none of those concepts are needed. What gives?
4060
+
4061
+ R is an extremely flexible language and it has lazy evaluation of parameters. When in R a
4062
+ function is called as 'summarise(df, a = b)', the summarise function receives the litteral
4063
+ 'a = b' parameter and can work with this as if it were a string. In R, it is not clear what
4064
+ a and b are, they can be expressions or they can be variables, it is up to the function to
4065
+ decide what 'a = b' means.
4066
+
4067
+ In Ruby, there is no lazy evaluation of parameters and 'a' is always a variable and so is 'b'.
4068
+ Variables assume their value as soon as they are used, so 'x = a' is immediately evaluate and
4069
+ variable 'x' will receive the value of variable 'a' as soon as the Ruby statement is executed.
4070
+ Ruby also provides the notion of a symbol; ':a' is a symbol and does not evaluate to anything.
4071
+ Galaaz uses Ruby symbols to build expressions that are not bound to anything: ':a.eq :b' is
4072
+ clearly an expression and has no relationship whatsoever with the statment 'a = b'. By using
4073
+ symbols, variables and expressions all the possible ambiguities that are found in R are
4074
+ eliminated in Galaaz.
4075
+
4076
+ The main problem that remains, is that in R, functions are not clearly documented as what type
4077
+ of input they are expecting, they might be expecting regular variables or they might be
4078
+ expecting expressions and the R function will know how to deal with an input of the form
4079
+ 'a = b', now for the Ruby developer it might not be immediately clear if it should call the
4080
+ function passing the value 'true' if variable 'a' is equal to variable 'b' or if it should
4081
+ call the function passing the expression ':a.eq :b'.
4082
+
4083
+
4084
+ ## Advanced dplyr features
4085
+
4086
+ In the blog: Programming with dplyr by using dplyr (https://www.r-bloggers.com/programming-with-dplyr-by-using-dplyr/) Iñaki Úcar shows surprise that some R users are trying to code in dplyr avoiding
4087
+ the use of NSE. For instance he says:
4088
+
4089
+ > Take the example of seplyr. It stands for standard evaluation dplyr, and enables us to
4090
+ > program over dplyr without having “to bring in (or study) any deep-theory or
4091
+ > heavy-weight tools such as rlang/tidyeval”.
4092
+
4093
+ For me, there isn't really any surprise that users are trying to avoid dplyr deep-theory. R
4094
+ users frequently are not programmers and learning to code is already hard business, on top
4095
+ of that, having to learn how to 'quote' or 'enquo' or 'quos' or 'enquos' is not necessarily
4096
+ a 'piece of cake'. So much so, that 'tidyeval' has some more advanced functions that instead
4097
+ of using quoted expressions, uses strings as arguments.
4098
+
4099
+ In the following examples, we show the use of functions 'group\_by\_at', 'summarise\_at' and
4100
+ 'rename\_at' that receive strings as argument. The data frame used in 'starwars' that describes
4101
+ features of characters in the Starwars movies:
4102
+
4103
+
4104
+ ```ruby
4105
+ puts (~:starwars).head
4106
+ ```
4107
+
4108
+ ```
4109
+ ## # A tibble: 6 x 13
4110
+ ## name height mass hair_color skin_color eye_color birth_year gender
4111
+ ## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
4112
+ ## 1 Luke… 172 77 blond fair blue 19 male
4113
+ ## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
4114
+ ## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
4115
+ ## 4 Dart… 202 136 none white yellow 41.9 male
4116
+ ## 5 Leia… 150 49 brown light brown 19 female
4117
+ ## 6 Owen… 178 120 brown, gr… light blue 52 male
4118
+ ## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
4119
+ ## # vehicles <list>, starships <list>
4120
+ ```
4121
+ The grouped_mean function bellow will receive a grouping variable and calculate summaries for
4122
+ the value\_variables given:
4123
+
4124
+
4125
+ ```r
4126
+ grouped_mean <- function(data, grouping_variables, value_variables) {
4127
+ data %>%
4128
+ group_by_at(grouping_variables) %>%
4129
+ mutate(count = n()) %>%
4130
+ summarise_at(c(value_variables, "count"), mean, na.rm = TRUE) %>%
4131
+ rename_at(value_variables, funs(paste0("mean_", .)))
4132
+ }
4133
+
4134
+ gm = starwars %>%
4135
+ grouped_mean("eye_color", c("mass", "birth_year"))
4136
+
4137
+ as.data.frame(gm)
4138
+ ```
4139
+
4140
+ ```
4141
+ ## eye_color mean_mass mean_birth_year count
4142
+ ## 1 black 76.28571 33.00000 10
4143
+ ## 2 blue 86.51667 67.06923 19
4144
+ ## 3 blue-gray 77.00000 57.00000 1
4145
+ ## 4 brown 66.09231 108.96429 21
4146
+ ## 5 dark NaN NaN 1
4147
+ ## 6 gold NaN NaN 1
4148
+ ## 7 green, yellow 159.00000 NaN 1
4149
+ ## 8 hazel 66.00000 34.50000 3
4150
+ ## 9 orange 282.33333 231.00000 8
4151
+ ## 10 pink NaN NaN 1
4152
+ ## 11 red 81.40000 33.66667 5
4153
+ ## 12 red, blue NaN NaN 1
4154
+ ## 13 unknown 31.50000 NaN 3
4155
+ ## 14 white 48.00000 NaN 1
4156
+ ## 15 yellow 81.11111 76.38000 11
4157
+ ```
4158
+
4159
+ The same code with Galaaz, becomes:
4160
+
4161
+
4162
+ ```ruby
4163
+ def grouped_mean(data, grouping_variables, value_variables)
4164
+ data.
4165
+ group_by_at(grouping_variables).
4166
+ mutate(count: E.n).
4167
+ summarise_at(E.c(value_variables, "count"), ~:mean, na__rm: true).
4168
+ rename_at(value_variables, E.funs(E.paste0("mean_", value_variables)))
4169
+ end
4170
+
4171
+ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year"))
4172
+ ```
4173
+
4174
+ ```
4175
+ ## # A tibble: 15 x 4
4176
+ ## eye_color mean_mass mean_birth_year count
4177
+ ## <chr> <dbl> <dbl> <dbl>
4178
+ ## 1 black 76.3 33 10
4179
+ ## 2 blue 86.5 67.1 19
4180
+ ## 3 blue-gray 77 57 1
4181
+ ## 4 brown 66.1 109. 21
4182
+ ## 5 dark NaN NaN 1
4183
+ ## 6 gold NaN NaN 1
4184
+ ## 7 green, yellow 159 NaN 1
4185
+ ## 8 hazel 66 34.5 3
4186
+ ## 9 orange 282. 231 8
4187
+ ## 10 pink NaN NaN 1
4188
+ ## 11 red 81.4 33.7 5
4189
+ ## 12 red, blue NaN NaN 1
4190
+ ## 13 unknown 31.5 NaN 3
4191
+ ## 14 white 48 NaN 1
4192
+ ## 15 yellow 81.1 76.4 11
4193
+ ```
4194
+
4195
+
4196
+ [TO BE CONTINUED...]
4197
+
4198
+
4199
+ # Contributing
744
4200
 
745
4201
  * Fork it
746
4202
  * Create your feature branch (git checkout -b my-new-feature)
@@ -749,3 +4205,4 @@ puts gg
749
4205
  * Push to the branch (git push origin my-new-feature)
750
4206
  * Create new Pull Request
751
4207
 
4208
+ # References