galaaz 0.4.6 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +3575 -118
  3. data/Rakefile +21 -4
  4. data/bin/gknit +152 -6
  5. data/bin/gknit-draft +105 -0
  6. data/bin/gknit-draft.rb +28 -0
  7. data/bin/gknit_Rscript +127 -0
  8. data/bin/grun +27 -1
  9. data/bin/gstudio +47 -4
  10. data/bin/{gstudio.rb → gstudio_irb.rb} +0 -0
  11. data/bin/gstudio_pry.rb +7 -0
  12. data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +3 -12
  13. data/blogs/galaaz_ggplot/galaaz_ggplot.html +77 -222
  14. data/blogs/galaaz_ggplot/galaaz_ggplot.md +4 -31
  15. data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
  16. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
  17. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
  18. data/blogs/galaaz_ggplot/midwest.Rmd +1 -9
  19. data/blogs/gknit/gknit.Rmd +232 -123
  20. data/blogs/{dev/dev.html → gknit/gknit.html} +1897 -33
  21. data/blogs/gknit/gknit.pdf +0 -0
  22. data/blogs/gknit/lst.rds +0 -0
  23. data/blogs/gknit/stats.bib +27 -0
  24. data/blogs/manual/lst.rds +0 -0
  25. data/blogs/manual/manual.Rmd +1893 -47
  26. data/blogs/manual/manual.html +3153 -347
  27. data/blogs/manual/manual.md +3575 -118
  28. data/blogs/manual/manual.pdf +0 -0
  29. data/blogs/manual/manual.tex +4026 -0
  30. data/blogs/manual/manual_files/figure-html/bubble-1.png +0 -0
  31. data/blogs/manual/manual_files/figure-html/diverging_bar.png +0 -0
  32. data/blogs/manual/manual_files/figure-latex/bubble-1.png +0 -0
  33. data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
  34. data/blogs/{dev → manual}/model.rb +0 -0
  35. data/blogs/nse_dplyr/nse_dplyr.Rmd +849 -0
  36. data/blogs/nse_dplyr/nse_dplyr.html +878 -0
  37. data/blogs/nse_dplyr/nse_dplyr.md +1198 -0
  38. data/blogs/nse_dplyr/nse_dplyr.pdf +0 -0
  39. data/blogs/oh_my/oh_my.html +274 -386
  40. data/blogs/oh_my/oh_my.md +208 -205
  41. data/blogs/ruby_plot/ruby_plot.Rmd +64 -84
  42. data/blogs/ruby_plot/ruby_plot.html +235 -208
  43. data/blogs/ruby_plot/ruby_plot.md +239 -34
  44. data/blogs/ruby_plot/ruby_plot.pdf +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  46. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  47. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  48. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  49. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  50. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  51. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  52. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  53. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  54. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  55. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  56. data/examples/Bibliography/master.bib +50 -0
  57. data/examples/Bibliography/stats.bib +72 -0
  58. data/examples/islr/ch2.spec.rb +1 -1
  59. data/examples/islr/ch3_boston.rb +4 -4
  60. data/examples/islr/x_y_rnorm.jpg +0 -0
  61. data/examples/latex_templates/Test-acm_article/Makefile +16 -0
  62. data/examples/latex_templates/Test-acm_article/Test-acm_article.Rmd +65 -0
  63. data/examples/latex_templates/Test-acm_article/acm_proc_article-sp.cls +1670 -0
  64. data/examples/latex_templates/Test-acm_article/sensys-abstract.cls +703 -0
  65. data/examples/latex_templates/Test-acm_article/sigproc.bib +59 -0
  66. data/examples/latex_templates/Test-acs_article/Test-acs_article.Rmd +260 -0
  67. data/examples/latex_templates/Test-acs_article/Test-acs_article.pdf +0 -0
  68. data/examples/latex_templates/Test-acs_article/acs-Test-acs_article.bib +11 -0
  69. data/examples/latex_templates/Test-acs_article/acs-my_output.bib +11 -0
  70. data/examples/latex_templates/Test-acs_article/acstest.bib +17 -0
  71. data/examples/latex_templates/Test-aea_article/AEA.cls +1414 -0
  72. data/examples/latex_templates/Test-aea_article/BibFile.bib +0 -0
  73. data/examples/latex_templates/Test-aea_article/Test-aea_article.Rmd +108 -0
  74. data/examples/latex_templates/Test-aea_article/Test-aea_article.pdf +0 -0
  75. data/examples/latex_templates/Test-aea_article/aea.bst +1269 -0
  76. data/examples/latex_templates/Test-aea_article/multicol.sty +853 -0
  77. data/examples/latex_templates/Test-aea_article/references.bib +0 -0
  78. data/examples/latex_templates/Test-aea_article/setspace.sty +546 -0
  79. data/examples/latex_templates/Test-amq_article/Test-amq_article.Rmd +256 -0
  80. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdf +0 -0
  81. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdfsync +3397 -0
  82. data/examples/latex_templates/Test-amq_article/pics/Figure2.pdf +0 -0
  83. data/examples/latex_templates/Test-ams_article/Test-ams_article.Rmd +215 -0
  84. data/examples/latex_templates/Test-ams_article/amstest.bib +436 -0
  85. data/examples/latex_templates/Test-asa_article/Test-asa_article.Rmd +153 -0
  86. data/examples/latex_templates/Test-asa_article/Test-asa_article.pdf +0 -0
  87. data/examples/latex_templates/Test-asa_article/agsm.bst +1353 -0
  88. data/examples/latex_templates/Test-asa_article/bibliography.bib +233 -0
  89. data/examples/latex_templates/Test-ieee_article/IEEEtran.bst +2409 -0
  90. data/examples/latex_templates/Test-ieee_article/IEEEtran.cls +6346 -0
  91. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.Rmd +175 -0
  92. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.pdf +0 -0
  93. data/examples/latex_templates/Test-ieee_article/mybibfile.bib +20 -0
  94. data/examples/latex_templates/Test-rjournal_article/RJournal.sty +335 -0
  95. data/examples/latex_templates/Test-rjournal_article/RJreferences.bib +18 -0
  96. data/examples/latex_templates/Test-rjournal_article/RJwrapper.pdf +0 -0
  97. data/examples/latex_templates/Test-rjournal_article/Test-rjournal_article.Rmd +52 -0
  98. data/examples/latex_templates/Test-springer_article/Test-springer_article.Rmd +65 -0
  99. data/examples/latex_templates/Test-springer_article/Test-springer_article.pdf +0 -0
  100. data/examples/latex_templates/Test-springer_article/bibliography.bib +26 -0
  101. data/examples/latex_templates/Test-springer_article/spbasic.bst +1658 -0
  102. data/examples/latex_templates/Test-springer_article/spmpsci.bst +1512 -0
  103. data/examples/latex_templates/Test-springer_article/spphys.bst +1443 -0
  104. data/examples/latex_templates/Test-springer_article/svglov3.clo +113 -0
  105. data/examples/latex_templates/Test-springer_article/svjour3.cls +1431 -0
  106. data/examples/misc/moneyball.rb +1 -1
  107. data/examples/misc/subsetting.rb +37 -37
  108. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.Rmd +73 -0
  109. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.pdf +0 -0
  110. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.Rmd +382 -0
  111. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.pdf +0 -0
  112. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.Rmd +164 -0
  113. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.pdf +0 -0
  114. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.Rmd +92 -0
  115. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.pdf +0 -0
  116. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/attend-grade-relationships.csv +482 -0
  117. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.Rmd +280 -0
  118. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.pdf +0 -0
  119. data/examples/rmarkdown/svm-xaringan-example/svm-xaringan-example.Rmd +386 -0
  120. data/lib/R_interface/r.rb +2 -2
  121. data/lib/R_interface/r_libs.R +6 -1
  122. data/lib/R_interface/r_methods.rb +12 -2
  123. data/lib/R_interface/rdata_frame.rb +8 -17
  124. data/lib/R_interface/rindexed_object.rb +1 -2
  125. data/lib/R_interface/rlist.rb +1 -0
  126. data/lib/R_interface/robject.rb +20 -23
  127. data/lib/R_interface/rpkg.rb +15 -6
  128. data/lib/R_interface/rsupport.rb +13 -19
  129. data/lib/R_interface/ruby_extensions.rb +14 -18
  130. data/lib/R_interface/rvector.rb +0 -12
  131. data/lib/gknit.rb +2 -0
  132. data/lib/gknit/draft.rb +105 -0
  133. data/lib/gknit/knitr_engine.rb +6 -37
  134. data/lib/util/exec_ruby.rb +22 -84
  135. data/lib/util/inline_file.rb +7 -3
  136. data/specs/figures/bg.jpeg +0 -0
  137. data/specs/figures/bg.png +0 -0
  138. data/specs/figures/bg.svg +2 -2
  139. data/specs/figures/dose_len.png +0 -0
  140. data/specs/figures/no_args.jpeg +0 -0
  141. data/specs/figures/no_args.png +0 -0
  142. data/specs/figures/no_args.svg +2 -2
  143. data/specs/figures/width_height.jpeg +0 -0
  144. data/specs/figures/width_height.png +0 -0
  145. data/specs/figures/width_height_units1.jpeg +0 -0
  146. data/specs/figures/width_height_units1.png +0 -0
  147. data/specs/figures/width_height_units2.jpeg +0 -0
  148. data/specs/figures/width_height_units2.png +0 -0
  149. data/specs/r_dataframe.spec.rb +184 -11
  150. data/specs/r_list.spec.rb +4 -4
  151. data/specs/r_list_apply.spec.rb +11 -10
  152. data/specs/ruby_expression.spec.rb +3 -11
  153. data/specs/tmp.rb +106 -34
  154. data/version.rb +1 -1
  155. metadata +96 -33
  156. data/bin/gknit_old_r +0 -236
  157. data/blogs/dev/dev.Rmd +0 -77
  158. data/blogs/dev/dev.md +0 -87
  159. data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
  160. data/blogs/dev/dev_files/figure-html/diverging_bar. +0 -0
  161. data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
  162. data/blogs/dplyr/dplyr.rb +0 -63
  163. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +0 -43
  164. data/blogs/galaaz_ggplot/galaaz_ggplot.log +0 -640
  165. data/blogs/galaaz_ggplot/galaaz_ggplot.out +0 -10
  166. data/blogs/galaaz_ggplot/galaaz_ggplot.tex +0 -481
  167. data/blogs/galaaz_ggplot/midwest.png +0 -0
  168. data/blogs/galaaz_ggplot/scatter_plot.png +0 -0
  169. data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +0 -662
  170. data/blogs/ruby_plot/ruby_plot.tex +0 -1077
  171. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +0 -57
  172. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +0 -106
  173. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +0 -110
  174. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +0 -174
  175. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +0 -236
  176. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +0 -296
  177. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +0 -236
  178. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +0 -218
  179. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +0 -128
  180. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +0 -150
  181. data/examples/paper/paper.rb +0 -36
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 567380b5c235ab363a1b4c5848e06126a0ee635b
4
- data.tar.gz: b0d4735743f9f6f50af6e9231e9bad2001bf2e65
2
+ SHA256:
3
+ metadata.gz: 0a936fac80a3198849bf43505e3badca81025fcef2b942fabe5edc328b6d35f3
4
+ data.tar.gz: 4aa40b1d667ee45ab94ee8e9565401e718179ad261c043a2173fe50d5b97dfb2
5
5
  SHA512:
6
- metadata.gz: b6e9847e52df71021cbcc4e67e21bb5f15cde4e85bf52def332ef5acb0acab23542db97a7907bab6ec00ee5f038c22412e250e1af7c317ab99ceb6bb7007011b
7
- data.tar.gz: 1d2e3694c81ba8de5f06ded79c5d04e3d987d97680cc898d51aa5fadfc4f5ef3e45741164a106680dd6f5df9f55461d526f5d88c4ff500eb68254c0da5728eaa
6
+ metadata.gz: 34974a5d148a2f0896fa07ef26f046af1b43d1263750732d072e6614ad8f3ff32783248a02228acd9b6c0f2183ddb68c91a6dd93aebd51198c594c1f6e513298
7
+ data.tar.gz: 88ea82fcf3e298deacdae6c7305faabff38d89b41a526a8f0e528c00555190acd84006764365c0fa7e913e361f3ecaf69cdf1c00332b80d4ba7d276dad7d10fe
data/README.md CHANGED
@@ -4,6 +4,7 @@ subtitle: "How to tightly couple Ruby and R in GraalVM"
4
4
  author: "Rodrigo Botafogo"
5
5
  tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, ggplot2]
6
6
  date: "2019"
7
+ bibliography: "/home/rbotafogo/Bibliography/stats.bib"
7
8
  output:
8
9
  html_document:
9
10
  self_contained: true
@@ -16,13 +17,12 @@ output:
16
17
  keep_tex: yes
17
18
  number_sections: yes
18
19
  toc: true
19
- toc_depth: 2
20
+ toc_depth: 3
20
21
  fontsize: 11pt
21
22
  ---
22
23
 
23
24
 
24
25
 
25
-
26
26
  # Introduction
27
27
 
28
28
  Galaaz is a system for tightly coupling Ruby and R. Ruby is a powerful language, with a large
@@ -32,6 +32,92 @@ other hand, R is considered one of the most powerful languages for solving all o
32
32
  problems. Maybe the strongest competitor to R is Python with libraries such as NumPy,
33
33
  Panda, SciPy, SciKit-Learn and a couple more.
34
34
 
35
+ With Galaaz we do not intend to re-implement any of the scientific libraries in R, we allow
36
+ for very tight coupling between the two languages to the point that the Ruby developer does
37
+ not need to know that there is an R engine running.
38
+
39
+ According to Wikipedia "Ruby is a dynamic, interpreted, reflective, object-oriented,
40
+ general-purpose programming language. It was designed and developed in the mid-1990s by Yukihiro
41
+ "Matz" Matsumoto in Japan." It reached high popularity with the development of Ruby on Rails
42
+ (RoR) by David Heinemeier Hansson. RoR is a web application framework first released
43
+ around 2005. It makes extensive use of Ruby's metaprogramming features. With RoR,
44
+ Ruby became very popular. According to [Ruby's Tiobe index](https://www.tiobe.com/tiobe-index/ruby/)
45
+ it peeked in popularity around 2008, then declined until 2015 when it started picking up again.
46
+ At the time of this writing (November 2018), the Tiobe index puts Ruby in 16th position as
47
+ most popular language.
48
+
49
+ Python, a language similar to Ruby, ranks 4th in the index. Java, C and C++ take the
50
+ first three positions. Ruby is often criticized for its focus on web applications.
51
+ But Ruby can do [much more](https://github.com/markets/awesome-ruby) than just web applications.
52
+ Yet, for scientific computing, Ruby lags way behind Python and R. Python has
53
+ Django framework for web, NumPy for numerical arrays, Pandas for data analysis.
54
+ R is a free software environment for statistical computing and graphics with thousands
55
+ of libraries for data analysis.
56
+
57
+ Until recently, there was no real perspective for Ruby to bridge this gap.
58
+ Implementing a complete scientific computing infrastructure would take too long.
59
+ Enters [Oracle's GraalVM](https://www.graalvm.org/):
60
+
61
+ > GraalVM is a universal virtual machine for running applications written in
62
+ > JavaScript, Python 3, Ruby, R, JVM-based languages like Java, Scala, Kotlin,
63
+ > and LLVM-based languages such as C and C++.
64
+ >
65
+ > GraalVM removes the isolation between programming languages and enables
66
+ > interoperability in a shared runtime. It can run either standalone or in the
67
+ > context of OpenJDK, Node.js, Oracle Database, or MySQL.
68
+ >
69
+ > GraalVM allows you to write polyglot applications with a seamless way to pass
70
+ > values from one language to another. With GraalVM there is no copying or
71
+ > marshaling necessary as it is with other polyglot systems. This lets you
72
+ > achieve high performance when language boundaries are crossed. Most of the time
73
+ > there is no additional cost for crossing a language boundary at all.
74
+ >
75
+ > Often developers have to make uncomfortable compromises that require them
76
+ > to rewrite their software in other languages. For example:
77
+ >
78
+ > * That library is not available in my language. I need to rewrite it.
79
+ > * That language would be the perfect fit for my problem, but we cannot
80
+ > run it in our environment.
81
+ > * That problem is already solved in my language, but the language is
82
+ > too slow.
83
+ >
84
+ > With GraalVM we aim to allow developers to freely choose the right language for
85
+ > the task at hand without making compromises.
86
+
87
+ As stated above, GraalVM is a _universal_ virtual machine that allows Ruby and R (and other
88
+ languages) to run on the same environment. GraalVM allows polyglot applications to
89
+ _seamlessly_ interact with one another and pass values from one language to the other.
90
+ Although a great idea, GraalVM still requires application writers to know several languages.
91
+ To eliminate that requirement, we built Galaaz, a gem for Ruby, to tightly couple
92
+ Ruby and R and allow those languages to interact in a way that the user will be unaware
93
+ of such interaction. In other words, a Ruby programmer will be able to use all
94
+ the capabilities of R without knowing the R syntax.
95
+
96
+ Library wrapping is a usual way of bringing features from one language into another.
97
+ To improve performance, Python often wraps more efficient C libraries. For the
98
+ Python developer, the existence of such C libraries is hidden. The problem with
99
+ library wrapping is that for any new library, there is the need to handcraft a new
100
+ wrapper.
101
+
102
+ Galaaz, instead of wrapping a single C or R library, wraps the whole R language
103
+ in Ruby. Doing so, all thousands of R libraries are available immediately
104
+ to Ruby developers without any new wrapping effort.
105
+
106
+ ## What does Galaaz mean
107
+
108
+ Galaaz is the Portuguese name for "Galahad". From Wikipedia:
109
+
110
+ Sir Galahad (sometimes referred to as Galeas or Galath),
111
+ in Arthurian legend, is a knight of King Arthur's Round Table and one
112
+ of the three achievers of the Holy Grail. He is the illegitimate son
113
+ of Sir Lancelot and Elaine of Corbenic, and is renowned for his
114
+ gallantry and purity as the most perfect of all knights. Emerging quite
115
+ late in the medieval Arthurian tradition, Sir Galahad first appears in the
116
+ Lancelot–Grail cycle, and his story is taken up in later works such as
117
+ the Post-Vulgate Cycle and Sir Thomas Malory's Le Morte d'Arthur.
118
+ His name should not be mistaken with Galehaut, a different knight from
119
+ Arthurian legend.
120
+
35
121
  # System Compatibility
36
122
 
37
123
  * Oracle Linux 7
@@ -87,7 +173,7 @@ Panda, SciPy, SciKit-Learn and a couple more.
87
173
  > galaaz -T
88
174
 
89
175
  Shows a list with all available executalbe tasks. To execute a task, substitute the
90
- 'rake' word in the list with 'galaaz'. For instance, the following line shows up
176
+ 'rake' word in the list with 'galaaz'. For instance, the following line shows up
91
177
  after 'galaaz -T'
92
178
 
93
179
  rake master_list:scatter_plot # scatter_plot from:....
@@ -96,147 +182,713 @@ Panda, SciPy, SciKit-Learn and a couple more.
96
182
 
97
183
  > galaaz master_list:scatter_plot
98
184
 
99
- # Basic Types
100
185
 
101
- ## Vectors
186
+ # Accessing R from Ruby
102
187
 
103
- Vectors can be thought of as contiguous cells containing data. Cells are accessed through
104
- indexing operations such as x[5]. Galaaz has six basic (‘atomic’) vector types: logical,
105
- integer, real, complex, string (or character) and raw. The modes and storage modes for the
106
- different vector types are listed in the following
107
- table.
188
+ One of the nice aspects of Galaaz on GraalVM, is that variables and functions defined in R, can
189
+ be easily accessed from Ruby. For instance, to access the 'mtcars' data frame from R
190
+ in Ruby, we use the ':mtcar' symbol preceded by the '~' operator, thus '~:r_vec' retrieves the
191
+ value of the 'mtcars' variable.
108
192
 
109
- | typeof | mode | storage.mode |
110
- |-----------|:---------:|-------------:|
111
- | logical | logical | logical |
112
- | integer | numeric | integer |
113
- | double | numeric | double |
114
- | complex | complex | comples |
115
- | character | character | character |
116
- | raw | raw | raw |
117
193
 
118
- Single numbers, such as 4.2, and strings, such as "four point two" are still vectors, of length
119
- 1; there are no more basic types. Vectors with length zero are possible (and useful).
120
- String vectors have mode and storage mode "character". A single element of a character
121
- vector is often referred to as a character string.
194
+ ```ruby
195
+ puts ~:mtcars
196
+ ```
122
197
 
123
- To create a vector the 'c' (concatenate) method from the 'R' module should be used:
198
+ ```
199
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
200
+ ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
201
+ ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
202
+ ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
203
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
204
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
205
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
206
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
207
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
208
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
209
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
210
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
211
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
212
+ ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
213
+ ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
214
+ ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
215
+ ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
216
+ ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
217
+ ## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
218
+ ## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
219
+ ## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
220
+ ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
221
+ ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
222
+ ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
223
+ ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
224
+ ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
225
+ ## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
226
+ ## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
227
+ ## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
228
+ ## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
229
+ ## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
230
+ ## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
231
+ ## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
232
+ ```
233
+
234
+ To access an R function from Ruby, the R function needs to be preceeded by 'R.' scoping.
235
+ Bellow we see and example of creating a R::Vector by calling the 'c' R function
124
236
 
125
237
 
126
238
  ```ruby
127
- @vec = R.c(1, 2, 3)
128
- puts @vec
239
+ puts vec = R.c(1.0, 2.0, 3.0, 4.0)
129
240
  ```
130
241
 
131
242
  ```
132
- ## [1] 1 2 3
243
+ ## [1] 1 2 3 4
133
244
  ```
245
+ Note that 'vec' is an object of type R::Vector:
134
246
 
135
- Lets take a look at the type, mode and storage.mode of our vector @vec. In order to print
136
- this out, we are creating a data frame 'df' and printing it out. A data frame, for those
137
- not familiar with it, it basically a table. Here we create the data frame and add the
138
- column name by passing named parameters for each column, such as 'typeof:', 'mode:' and
139
- 'storage__mode'. You should also note here that the double underscore is converted to a '.'.
140
247
 
141
- In R, the method used to create a data frame is 'data.frame', in Galaaz we use 'data__frame'.
248
+ ```ruby
249
+ puts vec.class
250
+ ```
251
+
252
+ ```
253
+ ## R::Vector
254
+ ```
255
+ Every object created by a call to an R function will be of a type that inherits from
256
+ R::Object. In R, there is also a function 'class'. In order to access that function we
257
+ can call method 'rclass' in the R::Object:
142
258
 
143
259
 
144
260
  ```ruby
145
- df = R.data__frame(typeof: @vec.typeof, mode: @vec.mode, storage__mode: @vec.storage__mode)
146
- puts df
261
+ puts vec.rclass
147
262
  ```
148
263
 
149
264
  ```
150
- ## typeof mode storage.mode
151
- ## 1 integer numeric integer
265
+ ## [1] "numeric"
152
266
  ```
267
+ When working with R::Object(s), it is possible to use the '.' operator to pipe operations.
268
+ When using '.', the object to which the '.' is applied becomes the first argument of the
269
+ corresponding R function. For instance, function 'c' in R, can be used to concatenate
270
+ two vectors or more vectors (in R, there are no scalar values, scalars are converted to
271
+ vectors of size 1. Within Galaaz, scalar parameter is converted to a size one vector):
153
272
 
154
- If you want to create a vector with floating point numbers, then we need at least one of the
155
- vector's element to be a float, such as 1.0. R users should be careful, since in R a number
156
- like '1' is converted to float and to have an integer the R developer will use '1L'. Galaaz
157
- follows normal Ruby rules and the number 1 is an integer and 1.0 is a float.
273
+
274
+ ```ruby
275
+ puts R.c(vec, 10, 20, 30)
276
+ ```
277
+
278
+ ```
279
+ ## [1] 1 2 3 4 10 20 30
280
+ ```
281
+ The call above to the 'c' function can also be done using '.' notation:
158
282
 
159
283
 
160
284
  ```ruby
161
- @vec = R.c(1.0, 2, 3)
162
- puts @vec
285
+ puts vec.c(10, 20, 30)
163
286
  ```
164
287
 
165
288
  ```
166
- ## [1] 1 2 3
289
+ ## [1] 1 2 3 4 10 20 30
167
290
  ```
291
+ We will talk about vector indexing in a latter section. But notice here that indexing
292
+ an R::Vector will return another R::Vector:
168
293
 
169
294
 
170
295
  ```ruby
171
- df = R.data__frame(typeof: @vec.typeof, mode: @vec.mode, storage__mode: @vec.storage__mode)
172
- outputs df.kable.kable_styling
296
+ puts vec[1]
173
297
  ```
174
298
 
175
- <table class="table" style="margin-left: auto; margin-right: auto;">
176
- <thead>
177
- <tr>
178
- <th style="text-align:left;"> typeof </th>
179
- <th style="text-align:left;"> mode </th>
180
- <th style="text-align:left;"> storage.mode </th>
181
- </tr>
182
- </thead>
183
- <tbody>
184
- <tr>
185
- <td style="text-align:left;"> double </td>
186
- <td style="text-align:left;"> numeric </td>
187
- <td style="text-align:left;"> double </td>
188
- </tr>
189
- </tbody>
190
- </table>
299
+ ```
300
+ ## [1] 1
301
+ ```
302
+ Sometimes we want to index an R::Object and get back a Ruby object that is not wrapped
303
+ in an R::Object, but the native Ruby object. For this, we can index the R object with
304
+ the '>>' operator:
191
305
 
192
- In this next example we try to create a vector with a variable 'hello' that has not yet
193
- being defined. This will raise an exception that is printed out. We get two return blocks,
194
- the first with a message explaining what went wrong and the second with the full backtrace
195
- of the error.
306
+
307
+ ```ruby
308
+ puts vec >> 0
309
+ puts vec >> 2
310
+ ```
311
+
312
+ ```
313
+ ## 1.0
314
+ ## 3.0
315
+ ```
316
+
317
+ It is also possible to call an R function with named arguments, by creating the function
318
+ in Galaaz with named parameters. For instance, here is an example of creating a 'list'
319
+ with named elements:
196
320
 
197
321
 
198
322
  ```ruby
199
- vec = R.c(1, hello, 5)
323
+ puts R.list(first_name: "Rodrigo", last_name: "Botafogo")
200
324
  ```
201
325
 
202
326
  ```
203
- ## Message:
204
- ## undefined local variable or method `hello' for RubyChunk:Class
327
+ ## $first_name
328
+ ## [1] "Rodrigo"
329
+ ##
330
+ ## $last_name
331
+ ## [1] "Botafogo"
332
+ ```
333
+
334
+ Many R functions receive another function as argument. For instance, method 'map' applies
335
+ a function to every element of a vector. With Galaaz, it is possible to pass a Proc,
336
+ Method or Lambda in place of the expected R function. In this next example, we will
337
+ add 2 to every element of our previously created vector:
338
+
339
+
340
+ ```ruby
341
+ puts vec.map { |x| x + 2 }
342
+ ```
343
+
344
+ ```
345
+ ## [1] 3
346
+ ## [1] 4
347
+ ## [1] 5
348
+ ## [1] 6
349
+ ```
350
+
351
+ # gKnitting a Document
352
+
353
+ This manual has been formatted usign gKnit. gKnit uses Knitr and R markdown to knit
354
+ a document in Ruby or R and output it in any of the available formats for R markdown.
355
+ gKnit runs atop of GraalVM, and Galaaz. In gKnit, Ruby variables are persisted between
356
+ chunks, making it an ideal solution for literate programming. Also, since it is based
357
+ on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming with
358
+ Ruby and R is quite natural.
359
+
360
+ The idea of "literate programming" was first introduced by Donald Knuth in the
361
+ 1980's [@Knuth:literate_programming].
362
+ The main intention of this approach was to develop software interspersing macro snippets,
363
+ traditional source code, and a natural language such as English in a document
364
+ that could be compiled into
365
+ executable code and at the same time easily read by a human developer. According to Knuth
366
+ "The practitioner of
367
+ literate programming can be regarded as an essayist, whose main concern is with exposition
368
+ and excellence of style."
369
+
370
+ The idea of literate programming evolved into the idea of reproducible research, in which
371
+ all the data, software code, documentation, graphics etc. needed to reproduce the research
372
+ and its reports could be included in a
373
+ single document or set of documents that when distributed to peers could be rerun generating
374
+ the same output and reports.
375
+
376
+ The R community has put a great deal of effort in reproducible research. In 2002, Sweave was
377
+ introduced and it allowed mixing R code with Latex generating high quality PDF documents. A
378
+ Sweave document could include code, the results of executing the code, graphics and text
379
+ such that it contained the whole narrative to reproduce the research. In
380
+ 2012, Knitr, developed by Yihui Xie from RStudio was released to replace Sweave and to
381
+ consolidate in one single package the many extensions and add-on packages that
382
+ were necessary for Sweave.
383
+
384
+ With Knitr, __R markdown__ was also developed, an extension to the
385
+ Markdown format. With __R markdown__ and Knitr it is possible to generate reports in a multitude
386
+ of formats such as HTML, markdown, Latex, PDF, dvi, etc. __R markdown__ also allows the use of
387
+ multiple programming languages such as R, Ruby, Python, etc. in the same document.
388
+
389
+ In __R markdown__, text is interspersed with
390
+ code chunks that can be executed and both the code and its results can become
391
+ part of the final report. Although __R markdown__ allows multiple programming languages in the
392
+ same document, only R and Python (with
393
+ the reticulate package) can persist variables between chunks. For other languages, such as
394
+ Ruby, every chunk will start a new process and thus all data is lost between chunks, unless it
395
+ is somehow stored in a data file that is read by the next chunk.
396
+
397
+ Being able to persist data
398
+ between chunks is critical for literate programming otherwise the flow of the narrative is lost
399
+ by all the effort of having to save data and then reload it. Although this might, at first, seem like
400
+ a small nuisance, not being able to persist data between chunks is a major issue. For example, let's
401
+ take a look at the following simple example in which we want to show how to create a list and the
402
+ use it. Let's first assume that data cannot be persisted between chunks. In the next chunk we
403
+ create a list, then we would need to save it to file, but to save it, we need somehow to marshal the
404
+ data into a binary format:
405
+
406
+
407
+ ```ruby
408
+ lst = R.list(a: 1, b: 2, c: 3)
409
+ lst.saveRDS("lst.rds")
410
+ ```
411
+ then, on the next chunk, where variable 'lst' is used, we need to read back it's value
412
+
413
+
414
+ ```ruby
415
+ lst = R.readRDS("lst.rds")
416
+ puts lst
417
+ ```
418
+
419
+ ```
420
+ ## $a
421
+ ## [1] 1
422
+ ##
423
+ ## $b
424
+ ## [1] 2
425
+ ##
426
+ ## $c
427
+ ## [1] 3
428
+ ```
429
+
430
+ Now, any single code has dozens of variables that we might want to use and reuse between chunks.
431
+ Clearly, such an approach becomes quickly unmanageable. Probably, because of
432
+ this problem, it is very rare to see any __R markdown__ document in the Ruby community.
433
+
434
+ When variables can be used accross chunks, then no overhead is needed:
435
+
436
+
437
+ ```ruby
438
+ lst = R.list(a: 1, b: 2, c: 3)
439
+ # any other code can be added here
440
+ ```
441
+
442
+
443
+ ```ruby
444
+ puts lst
445
+ ```
446
+
447
+ ```
448
+ ## $a
449
+ ## [1] 1
450
+ ##
451
+ ## $b
452
+ ## [1] 2
453
+ ##
454
+ ## $c
455
+ ## [1] 3
456
+ ```
457
+
458
+ In the Python community, the same effort to have code and text in an integrated environment
459
+ started around the first decade of 2000. In 2006 iPython 0.7.2 was released. In 2014,
460
+ Fernando Pérez, spun off project Jupyter from iPython creating a web-based interactive
461
+ computation environment. Jupyter can now be used with many languages, including Ruby with the
462
+ iruby gem (https://github.com/SciRuby/iruby). In order to have multiple languages in a Jupyter
463
+ notebook the SoS kernel was developed (https://vatlab.github.io/sos-docs/).
464
+
465
+ ## gKnit and __R markdown__
466
+
467
+ gKnit is based on knitr and __R markdown__ and can knit a document
468
+ written both in Ruby and/or R and output it in any of the available formats of __R markdown__. gKnit
469
+ allows ruby developers to do literate programming and reproducible research by allowing them to
470
+ have in a single document, text and code.
471
+
472
+ In gKnit, Ruby variables are persisted between
473
+ chunks, making it an ideal solution for literate programming in this language. Also,
474
+ since it is based on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming
475
+ with Ruby and R is quite natural.
476
+
477
+ This is not a blog post on __R markdown__, and the interested user is directed to the following links
478
+ for detailed information on its capabilities and use.
479
+
480
+ * https://rmarkdown.rstudio.com/ or
481
+ * https://bookdown.org/yihui/rmarkdown/
482
+
483
+ In this post, we will describe just the main aspects of __R markdown__, so the user can start
484
+ gKnitting Ruby and R documents quickly.
485
+
486
+ ## The Yaml header
487
+
488
+ An __R markdown__ document should start with a Yaml header and be stored in a file with
489
+ '.Rmd' extension. This document has the following header for gKitting an HTML document.
490
+
491
+ ```
492
+ ---
493
+ title: "How to do reproducible research in Ruby with gKnit"
494
+ author:
495
+ - "Rodrigo Botafogo"
496
+ - "Daniel Mossé - University of Pittsburgh"
497
+ tags: [Tech, Data Science, Ruby, R, GraalVM]
498
+ date: "20/02/2019"
499
+ output:
500
+ html_document:
501
+ self_contained: true
502
+ keep_md: true
503
+ pdf_document:
504
+ includes:
505
+ in_header: ["../../sty/galaaz.sty"]
506
+ number_sections: yes
507
+ ---
508
+ ```
509
+
510
+ For more information on the options in the Yaml header, [check here](https://bookdown.org/yihui/rmarkdown/html-document.html).
511
+
512
+ ## __R Markdown__ formatting
513
+
514
+ Document formatting can be done with simple markups such as:
515
+
516
+ ## Headers
517
+
518
+ ```
519
+ # Header 1
520
+
521
+ ## Header 2
522
+
523
+ ### Header 3
524
+
525
+ ```
526
+
527
+ ## Lists
528
+
529
+ ```
530
+ Unordered lists:
531
+
532
+ * Item 1
533
+ * Item 2
534
+ + Item 2a
535
+ + Item 2b
536
+ ```
537
+
538
+ ```
539
+ Ordered Lists
540
+
541
+ 1. Item 1
542
+ 2. Item 2
543
+ 3. Item 3
544
+ + Item 3a
545
+ + Item 3b
546
+ ```
547
+
548
+ For more R markdown formatting go to https://rmarkdown.rstudio.com/authoring_basics.html.
549
+
550
+ ## R chunks
551
+
552
+ Running and executing Ruby and R code is actually what really interests us is this blog.
553
+ Inserting a code chunk is done by adding code in a block delimited by three back ticks
554
+ followed by an open
555
+ curly brace ('{') followed with the engine name (r, ruby, rb, include, ...), an
556
+ any optional chunk_label and options, as shown bellow:
557
+
558
+ ````
559
+ ```{engine_name [chunk_label], [chunk_options]}
560
+ ```
561
+ ````
562
+
563
+ for instance, let's add an R chunk to the document labeled 'first_r_chunk'. This is
564
+ a very simple code just to create a variable and print it out, as follows:
565
+
566
+ ````
567
+ ```{r first_r_chunk}
568
+ vec <- c(1, 2, 3)
569
+ print(vec)
570
+ ```
571
+ ````
572
+
573
+ If this block is added to an __R markdown__ document and gKnitted the result will be:
574
+
575
+
576
+ ```r
577
+ vec <- c(1, 2, 3)
578
+ print(vec)
579
+ ```
580
+
581
+ ```
582
+ ## [1] 1 2 3
583
+ ```
584
+
585
+ Now let's say that we want to do some analysis in the code, but just print the result and not the
586
+ code itself. For this, we need to add the option 'echo = FALSE'.
587
+
588
+ ````
589
+ ```{r second_r_chunk, echo = FALSE}
590
+ vec2 <- c(10, 20, 30)
591
+ vec3 <- vec * vec2
592
+ print(vec3)
593
+ ```
594
+ ````
595
+ Here is how this block will show up in the document. Observe that the code is not shown
596
+ and we only see the execution result in a white box
597
+
598
+
599
+ ```
600
+ ## [1] 10 40 90
601
+ ```
602
+
603
+ A description of the available chunk options can be found in https://yihui.name/knitr/.
604
+
605
+ Let's add another R chunk with a function definition. In this example, a vector
606
+ 'r_vec' is created and
607
+ a new function 'reduce_sum' is defined. The chunk specification is
608
+
609
+ ````
610
+ ```{r data_creation}
611
+ r_vec <- c(1, 2, 3, 4, 5)
612
+
613
+ reduce_sum <- function(...) {
614
+ Reduce(sum, as.list(...))
615
+ }
616
+ ```
617
+ ````
618
+
619
+ and this is how it will look like once executed. From now on, to be concise in the
620
+ presentation we will not show chunk definitions any longer.
621
+
622
+
623
+
624
+ ```r
625
+ r_vec <- c(1, 2, 3, 4, 5)
626
+
627
+ reduce_sum <- function(...) {
628
+ Reduce(sum, as.list(...))
629
+ }
630
+ ```
631
+
632
+ We can, possibly in another chunk, access the vector and call the function as follows:
633
+
634
+
635
+ ```r
636
+ print(r_vec)
637
+ ```
638
+
639
+ ```
640
+ ## [1] 1 2 3 4 5
641
+ ```
642
+
643
+ ```r
644
+ print(reduce_sum(r_vec))
645
+ ```
646
+
647
+ ```
648
+ ## [1] 15
649
+ ```
650
+ ## R Graphics with ggplot
651
+
652
+ In the following chunk, we create a bubble chart in R using ggplot and include it in
653
+ this document. Note that there is no directive in the code to include the image, this
654
+ occurs automatically. The 'mpg' dataframe is natively available to R and to Galaaz as
655
+ well.
656
+
657
+ For the reader not knowledgeable of ggplot, ggplot is a graphics library based on "the
658
+ grammar of graphics" [@Wilkinson:grammar_of_graphics]. The idea of the grammar of graphics
659
+ is to build a graphics by adding layers to the plot. More information can be found in
660
+ https://towardsdatascience.com/a-comprehensive-guide-to-the-grammar-of-graphics-for-effective-visualization-of-multi-dimensional-1f92b4ed4149.
661
+
662
+ In the plot bellow the 'mpg' dataset from base R is used. "The data concerns city-cycle fuel
663
+ consumption in miles per gallon, to be predicted in terms of 3 multivalued discrete and 5
664
+ continuous attributes." (Quinlan, 1993)
665
+
666
+ First, the 'mpg' dataset if filtered to extract only cars from the following manumactures: Audi, Ford,
667
+ Honda, and Hyundai and stored in the 'mpg_select' variable. Then, the selected dataframe is passed
668
+ to the ggplot function specifying in the aesthetic method (aes) that 'displacement' (disp) should
669
+ be plotted in the 'x' axis and 'city mileage' should be on the 'y' axis. In the 'labs' layer we
670
+ pass the 'title' and 'subtitle' for the plot. To the basic plot 'g', geom\_jitter is added, that
671
+ plots cars from the same manufactures with the same color (col=manufactures) and the size of the
672
+ car point equal its high way consumption (size = hwy). Finally, a last layer is plotter containing
673
+ a linear regression line (method = "lm") for every manufacturer.
674
+
675
+
676
+ ```r
677
+ # load package and data
678
+ library(ggplot2)
205
679
  ```
206
680
 
207
681
  ```
208
682
  ## Message:
209
- ## (eval):1:in `exec_ruby'
210
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:137:in `instance_eval'
211
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:137:in `exec_ruby'
212
- ## /home/rbotafogo/desenv/galaaz/lib/gknit/ruby_engine.rb:55:in `block in initialize'
213
- ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
214
- ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
215
- ## (eval):3:in `function(...) {\n rb_method(...)'
216
- ## unknown.r:1:in `in_dir'
217
- ## unknown.r:1:in `block_exec'
218
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc12/jre/languages/R/library/knitr/R/block.R:91:in `call_block'
219
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc12/jre/languages/R/library/knitr/R/block.R:6:in `process_group.block'
220
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc12/jre/languages/R/library/knitr/R/block.R:3:in `<no source>'
221
- ## unknown.r:1:in `withCallingHandlers'
222
- ## unknown.r:1:in `process_file'
223
- ## unknown.r:1:in `<no source>'
224
- ## unknown.r:1:in `<no source>'
225
- ## <REPL>:4:in `<repl wrapper>'
226
- ## <REPL>:1
683
+ ## Registered S3 methods overwritten by 'ggplot2':
684
+ ## method from
685
+ ## [.quosures rlang
686
+ ## c.quosures rlang
687
+ ## print.quosures rlang
688
+ ```
689
+
690
+ ```r
691
+ data(mpg, package="ggplot2")
692
+
693
+ mpg_select <- mpg[mpg$manufacturer %in% c("audi", "ford", "honda", "hyundai"), ]
694
+
695
+ # Scatterplot
696
+ theme_set(theme_bw()) # pre-set the bw theme.
697
+ g <- ggplot(mpg_select, aes(displ, cty)) +
698
+ labs(subtitle="mpg: Displacement vs City Mileage",
699
+ title="Bubble chart")
700
+
701
+ g + geom_jitter(aes(col=manufacturer, size=hwy)) +
702
+ geom_smooth(aes(col=manufacturer), method="lm", se=F)
703
+ ```
704
+
705
+ ![](manual_files/figure-html/bubble-1.png)<!-- -->
706
+
707
+ ## Ruby chunks
708
+
709
+ Including a Ruby chunk is just as easy as including an R chunk in the document: just
710
+ change the name of the engine to 'ruby'. It is also possible to pass chunk options
711
+ to the Ruby engine; however, this version does not accept all the options that are
712
+ available to R chunks. Future versions will add those options.
713
+
714
+ ````
715
+ ```{ruby first_ruby_chunk}
227
716
  ```
717
+ ````
718
+
719
+ In this example, the ruby chunk is called 'first_ruby_chunk'. One important
720
+ aspect of chunk labels is that they cannot be duplicated. If a chunk label is
721
+ duplicated, gKnit will stop with an error.
722
+
723
+ In the following chunk, variable 'a', 'b' and 'c' are standard Ruby variables
724
+ and 'vec' and 'vec2' are two vectors created by calling the 'c' method on the
725
+ R module.
726
+
727
+ In Galaaz, the R module allows us to access R functions transparently. The 'c'
728
+ function in R, is a function that concatenates its arguments making a vector.
729
+
730
+ It
731
+ should be clear that there is no requirement in gknit to call or use any R
732
+ functions. gKnit will knit standard Ruby code, or even general text without
733
+ any code.
228
734
 
229
735
 
230
736
  ```ruby
231
- outputs (~:mtcars).kable.kable_styling
737
+ a = [1, 2, 3]
738
+ b = "US$ 250.000"
739
+ c = "The 'outputs' function"
740
+
741
+ vec = R.c(1, 2, 3)
742
+ vec2 = R.c(10, 20, 30)
232
743
  ```
233
744
 
234
- <table class="table" style="margin-left: auto; margin-right: auto;">
235
- <thead>
236
- <tr>
237
- <th style="text-align:left;"> </th>
238
- <th style="text-align:right;"> mpg </th>
239
- <th style="text-align:right;"> cyl </th>
745
+ In the next block, variables 'a', 'vec' and 'vec2' are used and printed.
746
+
747
+
748
+ ```ruby
749
+ puts a
750
+ puts vec * vec2
751
+ ```
752
+
753
+ ```
754
+ ## 1
755
+ ## 2
756
+ ## 3
757
+ ## [1] 10 40 90
758
+ ```
759
+
760
+ Note that 'a' is a standard Ruby Array and 'vec' and 'vec2' are vectors that behave accordingly,
761
+ where multiplication works as expected.
762
+
763
+ ## Inline Ruby code
764
+
765
+ When using a Ruby chunk, the code and the output are formatted in blocks as seen above.
766
+ This formatting is not always desired. Sometimes, we want to have the results of the
767
+ Ruby evaluation included in the middle of a phrase. gKnit allows adding inline Ruby code
768
+ with the 'rb' engine. The following chunk specification will
769
+ create and inline Ruby text:
770
+
771
+ ````
772
+ This is some text with inline Ruby accessing variable 'b' which has value:
773
+ ```{rb puts b}
774
+ ```
775
+ and is followed by some other text!
776
+ ````
777
+
778
+ <div style="margin-bottom:30px;">
779
+ </div>
780
+
781
+ This is some text with inline Ruby accessing variable 'b' which has value:
782
+ US$ 250.000
783
+ and is followed by some other text!
784
+
785
+ <div style="margin-bottom:30px;">
786
+ </div>
787
+
788
+ Note that it is important not to add any new line before of after the code
789
+ block if we want everything to be in only one line, resulting in the following sentence
790
+ with inline Ruby code.
791
+
792
+
793
+ ### The 'outputs' function
794
+
795
+ He have previously used the standard 'puts' method in Ruby chunks in order produce
796
+ output. The result of a 'puts', as seen in all previous chunks that use it, is formatted
797
+ inside a white box that
798
+ follows the code block. Many times however, we would like to do some processing in the
799
+ Ruby chunk and have the result of this processing generate and output that is
800
+ "included" in the document as if we had typed it in __R markdown__ document.
801
+
802
+ For example, suppose we want to create a new heading in our document, but the heading
803
+ phrase is the result of some code processing: maybe it's the first line of a file we are
804
+ going to read. Method 'outputs' adds its output as if typed in the __R markdown__ document.
805
+
806
+ Take now a look at variable 'c' (it was defined in a previous block above) as
807
+ 'c = "The 'outputs' function". "The 'outputs' function" is actually the name of this
808
+ section and it was created using the 'outputs' function inside a Ruby chunk.
809
+
810
+ The ruby chunk to generate this heading is:
811
+
812
+ ````
813
+ ```{ruby heading}
814
+ outputs "### #{c}"
815
+ ```
816
+ ````
817
+
818
+ The three '###' is the way we add a Heading 3 in __R markdown__.
819
+
820
+
821
+ ### HTML Output from Ruby Chunks
822
+
823
+ We've just seen the use of method 'outputs' to add text to the the __R markdown__
824
+ document. This technique can also be used to add HTML code to the document. In
825
+ __R markdown__, any html code typed directly in the document will be properly rendered.
826
+ Here, for instance, is a table definition in HTML and its output in the document:
827
+
828
+ ```
829
+ <table style="width:100%">
830
+ <tr>
831
+ <th>Firstname</th>
832
+ <th>Lastname</th>
833
+ <th>Age</th>
834
+ </tr>
835
+ <tr>
836
+ <td>Jill</td>
837
+ <td>Smith</td>
838
+ <td>50</td>
839
+ </tr>
840
+ <tr>
841
+ <td>Eve</td>
842
+ <td>Jackson</td>
843
+ <td>94</td>
844
+ </tr>
845
+ </table>
846
+ ```
847
+ <div style="margin-bottom:30px;">
848
+ </div>
849
+
850
+ <table style="width:100%">
851
+ <tr>
852
+ <th>Firstname</th>
853
+ <th>Lastname</th>
854
+ <th>Age</th>
855
+ </tr>
856
+ <tr>
857
+ <td>Jill</td>
858
+ <td>Smith</td>
859
+ <td>50</td>
860
+ </tr>
861
+ <tr>
862
+ <td>Eve</td>
863
+ <td>Jackson</td>
864
+ <td>94</td>
865
+ </tr>
866
+ </table>
867
+
868
+ <div style="margin-bottom:30px;">
869
+ </div>
870
+
871
+ But manually creating HTML output is not always easy or desirable, specially
872
+ if we intend the document to be rendered in other formats, for example, as Latex.
873
+ Also, The above
874
+ table looks ugly. The 'kableExtra' library is a great library for
875
+ creating beautiful tables. Take a look at https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html
876
+
877
+ In the next chunk, we output the 'mtcars' dataframe from R in a nicely formatted
878
+ table. Note that we retrieve the mtcars dataframe by using '~:mtcars'.
879
+
880
+
881
+ ```ruby
882
+ R.install_and_loads('kableExtra')
883
+ outputs (~:mtcars).kable.kable_styling
884
+ ```
885
+
886
+ <table class="table" style="margin-left: auto; margin-right: auto;">
887
+ <thead>
888
+ <tr>
889
+ <th style="text-align:left;"> </th>
890
+ <th style="text-align:right;"> mpg </th>
891
+ <th style="text-align:right;"> cyl </th>
240
892
  <th style="text-align:right;"> disp </th>
241
893
  <th style="text-align:right;"> hp </th>
242
894
  <th style="text-align:right;"> drat </th>
@@ -700,47 +1352,2851 @@ outputs (~:mtcars).kable.kable_styling
700
1352
  </tbody>
701
1353
  </table>
702
1354
 
1355
+ ## Including Ruby files in a chunk
703
1356
 
704
- ## Graphics with ggplot
1357
+ R is a language that was created to be easy and fast for statisticians to use. As far
1358
+ as I know, it was not a
1359
+ language to be used for developing large systems. Of course, there are large systems and
1360
+ libraries in R, but the focus of the language is for developing statistical models and
1361
+ distribute that to peers.
705
1362
 
1363
+ Ruby on the other hand, is a language for large software development. Systems written in
1364
+ Ruby will have dozens, hundreds or even thousands of files. To document a
1365
+ large system with literate programming, we cannot expect the developer to add all the
1366
+ files in a single '.Rmd' file. gKnit provides the 'include' chunk engine to include
1367
+ a Ruby file as if it had being typed in the '.Rmd' file.
706
1368
 
707
- ```ruby
708
- require 'ggplot'
1369
+ To include a file, the following chunk should be created, where <filename> is the name of
1370
+ the file to be included and where the extension, if it is '.rb', does not need to be added.
1371
+ If the 'relative' option is not included, then it is treated as TRUE. When 'relative' is
1372
+ true, ruby's 'require\_relative' semantics is used to load the file, when false, Ruby's
1373
+ \$LOAD_PATH is searched to find the file and it is 'require'd.
1374
+
1375
+ ````
1376
+ ```{include <filename>, relative = <TRUE/FALSE>}
1377
+ ```
1378
+ ````
1379
+
1380
+ Bellow we include file 'model.rb', which is in the same directory of this blog.
1381
+ This code uses R 'caret' package to split a dataset in a train and test sets.
1382
+ The 'caret' package is a very important a useful package for doing Data Analysis,
1383
+ it has hundreds of functions for all steps of the Data Analysis workflow. To
1384
+ use 'caret' just to split a dataset is like using the proverbial cannon to
1385
+ kill the fly. We use it here only to show that integrating Ruby and R and
1386
+ using even a very complex package as 'caret' is trivial with Galaaz.
1387
+
1388
+ A word of advice: the 'caret' package has lots of dependencies and installing
1389
+ it in a Linux system is a time consuming operation. Method 'R.install_and_loads'
1390
+ will install the package if it is not already installed and can take a while.
1391
+
1392
+ ````
1393
+ ```{include model}
1394
+ ```
1395
+ ````
1396
+
1397
+
1398
+ ```include
1399
+ require 'galaaz'
1400
+
1401
+ # Loads the R 'caret' package. If not present, installs it
1402
+ R.install_and_loads 'caret'
1403
+
1404
+ class Model
1405
+
1406
+ attr_reader :data
1407
+ attr_reader :test
1408
+ attr_reader :train
1409
+
1410
+ #==========================================================
1411
+ #
1412
+ #==========================================================
1413
+
1414
+ def initialize(data, percent_train:, seed: 123)
1415
+
1416
+ R.set__seed(seed)
1417
+ @data = data
1418
+ @percent_train = percent_train
1419
+ @seed = seed
1420
+
1421
+ end
1422
+
1423
+ #==========================================================
1424
+ #
1425
+ #==========================================================
1426
+
1427
+ def partition(field)
1428
+
1429
+ train_index =
1430
+ R.createDataPartition(@data.send(field), p: @percet_train,
1431
+ list: false, times: 1)
1432
+ @train = @data[train_index, :all]
1433
+ @test = @data[-train_index, :all]
1434
+
1435
+ end
1436
+
1437
+ end
1438
+
1439
+ ```
709
1440
 
710
- R.theme_set R.theme_bw
711
1441
 
712
- # Data Prep
1442
+ ```ruby
713
1443
  mtcars = ~:mtcars
714
- mtcars.car_name = R.rownames(:mtcars)
715
- # compute normalized mpg
716
- mtcars.mpg_z = ((mtcars.mpg - mtcars.mpg.mean)/mtcars.mpg.sd).round 2
717
- mtcars.mpg_type = mtcars.mpg_z < 0 ? "below" : "above"
718
- mtcars = mtcars[mtcars.mpg_z.order, :all]
719
- # convert to factor to retain sorted order in plot
720
- mtcars.car_name = mtcars.car_name.factor levels: mtcars.car_name
1444
+ model = Model.new(mtcars, percent_train: 0.8)
1445
+ model.partition(:mpg)
1446
+ puts model.train.head
1447
+ puts model.test.head
1448
+ ```
721
1449
 
722
- # Diverging Barcharts
723
- gg = mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
724
- R.geom_bar(E.aes(fill: :mpg_type), stat: 'identity', width: 0.5) +
725
- R.scale_fill_manual(name: "Mileage",
726
- labels: R.c("Above Average", "Below Average"),
727
- values: R.c("above": "#00ba38", "below": "#f8766d")) +
728
- R.labs(subtitle: "Normalised mileage from 'mtcars'",
729
- title: "Diverging Bars") +
730
- R.coord_flip()
1450
+ ```
1451
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1452
+ ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
1453
+ ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
1454
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
1455
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
1456
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
1457
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
1458
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1459
+ ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
1460
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
1461
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
1462
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
1463
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
1464
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
1465
+ ```
731
1466
 
732
- puts gg
1467
+ ## Documenting Gems
1468
+
1469
+ gKnit also allows developers to document and load files that are not in the same directory
1470
+ of the '.Rmd' file.
1471
+
1472
+ Here is an example of loading the 'find.rb' file from TruffleRuby. In this example, relative
1473
+ is set to FALSE, so Ruby will look for the file in its $LOAD\_PATH, and the user does not
1474
+ need to no it's directory.
1475
+
1476
+ ````
1477
+ ```{include find, relative = FALSE}
733
1478
  ```
1479
+ ````
734
1480
 
735
1481
 
736
- ![](/home/rbotafogo/desenv/galaaz/blogs/manual/manual_files/figure-html/diverging_bar.png)<!-- -->
1482
+ ```include
1483
+ # frozen_string_literal: true
1484
+ #
1485
+ # find.rb: the Find module for processing all files under a given directory.
1486
+ #
737
1487
 
1488
+ #
1489
+ # The +Find+ module supports the top-down traversal of a set of file paths.
1490
+ #
1491
+ # For example, to total the size of all files under your home directory,
1492
+ # ignoring anything in a "dot" directory (e.g. $HOME/.ssh):
1493
+ #
1494
+ # require 'find'
1495
+ #
1496
+ # total_size = 0
1497
+ #
1498
+ # Find.find(ENV["HOME"]) do |path|
1499
+ # if FileTest.directory?(path)
1500
+ # if File.basename(path)[0] == ?.
1501
+ # Find.prune # Don't look any further into this directory.
1502
+ # else
1503
+ # next
1504
+ # end
1505
+ # else
1506
+ # total_size += FileTest.size(path)
1507
+ # end
1508
+ # end
1509
+ #
1510
+ module Find
738
1511
 
739
- [TO BE CONTINUED...]
1512
+ #
1513
+ # Calls the associated block with the name of every file and directory listed
1514
+ # as arguments, then recursively on their subdirectories, and so on.
1515
+ #
1516
+ # Returns an enumerator if no block is given.
1517
+ #
1518
+ # See the +Find+ module documentation for an example.
1519
+ #
1520
+ def find(*paths, ignore_error: true) # :yield: path
1521
+ block_given? or return enum_for(__method__, *paths, ignore_error: ignore_error)
740
1522
 
1523
+ fs_encoding = Encoding.find("filesystem")
741
1524
 
742
- # Contributing
1525
+ paths.collect!{|d| raise Errno::ENOENT, d unless File.exist?(d); d.dup}.each do |path|
1526
+ path = path.to_path if path.respond_to? :to_path
1527
+ enc = path.encoding == Encoding::US_ASCII ? fs_encoding : path.encoding
1528
+ ps = [path]
1529
+ while file = ps.shift
1530
+ catch(:prune) do
1531
+ yield file.dup.taint
1532
+ begin
1533
+ s = File.lstat(file)
1534
+ rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
1535
+ raise unless ignore_error
1536
+ next
1537
+ end
1538
+ if s.directory? then
1539
+ begin
1540
+ fs = Dir.children(file, encoding: enc)
1541
+ rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
1542
+ raise unless ignore_error
1543
+ next
1544
+ end
1545
+ fs.sort!
1546
+ fs.reverse_each {|f|
1547
+ f = File.join(file, f)
1548
+ ps.unshift f.untaint
1549
+ }
1550
+ end
1551
+ end
1552
+ end
1553
+ end
1554
+ nil
1555
+ end
1556
+
1557
+ #
1558
+ # Skips the current file or directory, restarting the loop with the next
1559
+ # entry. If the current file is a directory, that directory will not be
1560
+ # recursively entered. Meaningful only within the block associated with
1561
+ # Find::find.
1562
+ #
1563
+ # See the +Find+ module documentation for an example.
1564
+ #
1565
+ def prune
1566
+ throw :prune
1567
+ end
1568
+
1569
+ module_function :find, :prune
1570
+ end
1571
+ ```
1572
+
1573
+ ## Converting to PDF
1574
+
1575
+ One of the beauties of knitr is that the same input can be converted to many different outputs.
1576
+ One very useful format, is, of course, PDF. In order to converted an __R markdown__ file to PDF
1577
+ it is necessary to have LaTeX installed on the system. We will not explain here how to
1578
+ install LaTeX as there are plenty of documents on the web showing how to proceed.
1579
+
1580
+ gKnit comes with a simple LaTeX style file for gknitting this blog as a PDF document. Here is
1581
+ the Yaml header to generate this blog in PDF format instead of HTML:
1582
+
1583
+ ```
1584
+ ---
1585
+ title: "gKnit - Ruby and R Knitting with Galaaz in GraalVM"
1586
+ author: "Rodrigo Botafogo"
1587
+ tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, knitr, gknit]
1588
+ date: "29 October 2018"
1589
+ output:
1590
+ pdf\_document:
1591
+ includes:
1592
+ in\_header: ["../../sty/galaaz.sty"]
1593
+ number\_sections: yes
1594
+ ---
1595
+ ```
1596
+
1597
+ ## Template based documents generation
1598
+
1599
+ When a document is converted to PDF it follows a certain convertion template. We've seen above
1600
+ the use of 'galaaz.sty' as a basic template to generate a PDF document. Using the
1601
+ 'gknit-draft' app that comes with Galaaz, the same .Rmd file can be compiled to different
1602
+ looking PDF documents. Galaaz automatically loads the 'rticles' R package that comes with
1603
+ templates for the following journals with the respective template name:
1604
+
1605
+ * ACM articles: acm_article
1606
+ * ACS articles: acs_article
1607
+ * AEA journal submissions: aea_article
1608
+ * AGU journal submissions: ????
1609
+ * AMS articles: ams_article
1610
+ * American Statistical Association: asa_article
1611
+ * Biometrics articles: biometrics_article
1612
+ * Bulletin de l'AMQ journal submissions: amq_article
1613
+ * CTeX documents: ctex
1614
+ * Elsevier journal submissions: elsevier_article
1615
+ * IEEE Transaction journal submissions: ieee_article
1616
+ * JSS articles: jss_article
1617
+ * MDPI journal submissions: mdpi_article
1618
+ * Monthly Notices of the Royal Astronomical Society articles: mnras_article
1619
+ * NNRAS journal submissions: nmras_article
1620
+ * PeerJ articles: peerj_article
1621
+ * Royal Society Open Science journal submissions: rsos_article
1622
+ * Royal Statistical Society: rss_article
1623
+ * Sage journal submissions: sage_article
1624
+ * Springer journal submissions: springer_article
1625
+ * Statistics in Medicine journal submissions: sim_article
1626
+ * Copernicus Publications journal submissions: copernicus_article
1627
+ * The R Journal articles: rjournal_article
1628
+ * Frontiers articles: ???
1629
+ * Taylor & Francis articles: ???
1630
+ * Bulletin De L'AMQ: amq_article
1631
+ * PLOS journal: plos_article
1632
+ * Proceedings of the National Academy of Sciences of the USA: pnas_article
1633
+
1634
+ In order to create a document with one of those templates, use the following command:
1635
+
1636
+ ```
1637
+ gknit-draft --filename <my_document> --template <template> --package <package>
1638
+ --create_dir
1639
+ ```
1640
+ So, in order to create a template for writing an R Journal, use:
1641
+
1642
+ ```
1643
+ gknit-draft --filename my_r_article --template rjournal_article --package rticles
1644
+ --create_dir
1645
+ ```
1646
+
1647
+ # Accessing R variables
1648
+
1649
+ Galaaz allows Ruby to access variables created in R. For example, the 'mtcars' data set is
1650
+ available in R and can be accessed from Ruby by using the 'tilda' operator followed by the
1651
+ symbol for the variable, in this case ':mtcar'. In the code bellow method 'outputs' is
1652
+ used to output the 'mtcars' data set nicely formatted in HTML by use of the 'kable' and
1653
+ 'kable_styling' functions. Method 'outputs' is only available when used with 'gknit'.
1654
+
1655
+
1656
+ ```ruby
1657
+ outputs (~:mtcars).kable.kable_styling
1658
+ ```
743
1659
 
1660
+ <table class="table" style="margin-left: auto; margin-right: auto;">
1661
+ <thead>
1662
+ <tr>
1663
+ <th style="text-align:left;"> </th>
1664
+ <th style="text-align:right;"> mpg </th>
1665
+ <th style="text-align:right;"> cyl </th>
1666
+ <th style="text-align:right;"> disp </th>
1667
+ <th style="text-align:right;"> hp </th>
1668
+ <th style="text-align:right;"> drat </th>
1669
+ <th style="text-align:right;"> wt </th>
1670
+ <th style="text-align:right;"> qsec </th>
1671
+ <th style="text-align:right;"> vs </th>
1672
+ <th style="text-align:right;"> am </th>
1673
+ <th style="text-align:right;"> gear </th>
1674
+ <th style="text-align:right;"> carb </th>
1675
+ </tr>
1676
+ </thead>
1677
+ <tbody>
1678
+ <tr>
1679
+ <td style="text-align:left;"> Mazda RX4 </td>
1680
+ <td style="text-align:right;"> 21.0 </td>
1681
+ <td style="text-align:right;"> 6 </td>
1682
+ <td style="text-align:right;"> 160.0 </td>
1683
+ <td style="text-align:right;"> 110 </td>
1684
+ <td style="text-align:right;"> 3.90 </td>
1685
+ <td style="text-align:right;"> 2.620 </td>
1686
+ <td style="text-align:right;"> 16.46 </td>
1687
+ <td style="text-align:right;"> 0 </td>
1688
+ <td style="text-align:right;"> 1 </td>
1689
+ <td style="text-align:right;"> 4 </td>
1690
+ <td style="text-align:right;"> 4 </td>
1691
+ </tr>
1692
+ <tr>
1693
+ <td style="text-align:left;"> Mazda RX4 Wag </td>
1694
+ <td style="text-align:right;"> 21.0 </td>
1695
+ <td style="text-align:right;"> 6 </td>
1696
+ <td style="text-align:right;"> 160.0 </td>
1697
+ <td style="text-align:right;"> 110 </td>
1698
+ <td style="text-align:right;"> 3.90 </td>
1699
+ <td style="text-align:right;"> 2.875 </td>
1700
+ <td style="text-align:right;"> 17.02 </td>
1701
+ <td style="text-align:right;"> 0 </td>
1702
+ <td style="text-align:right;"> 1 </td>
1703
+ <td style="text-align:right;"> 4 </td>
1704
+ <td style="text-align:right;"> 4 </td>
1705
+ </tr>
1706
+ <tr>
1707
+ <td style="text-align:left;"> Datsun 710 </td>
1708
+ <td style="text-align:right;"> 22.8 </td>
1709
+ <td style="text-align:right;"> 4 </td>
1710
+ <td style="text-align:right;"> 108.0 </td>
1711
+ <td style="text-align:right;"> 93 </td>
1712
+ <td style="text-align:right;"> 3.85 </td>
1713
+ <td style="text-align:right;"> 2.320 </td>
1714
+ <td style="text-align:right;"> 18.61 </td>
1715
+ <td style="text-align:right;"> 1 </td>
1716
+ <td style="text-align:right;"> 1 </td>
1717
+ <td style="text-align:right;"> 4 </td>
1718
+ <td style="text-align:right;"> 1 </td>
1719
+ </tr>
1720
+ <tr>
1721
+ <td style="text-align:left;"> Hornet 4 Drive </td>
1722
+ <td style="text-align:right;"> 21.4 </td>
1723
+ <td style="text-align:right;"> 6 </td>
1724
+ <td style="text-align:right;"> 258.0 </td>
1725
+ <td style="text-align:right;"> 110 </td>
1726
+ <td style="text-align:right;"> 3.08 </td>
1727
+ <td style="text-align:right;"> 3.215 </td>
1728
+ <td style="text-align:right;"> 19.44 </td>
1729
+ <td style="text-align:right;"> 1 </td>
1730
+ <td style="text-align:right;"> 0 </td>
1731
+ <td style="text-align:right;"> 3 </td>
1732
+ <td style="text-align:right;"> 1 </td>
1733
+ </tr>
1734
+ <tr>
1735
+ <td style="text-align:left;"> Hornet Sportabout </td>
1736
+ <td style="text-align:right;"> 18.7 </td>
1737
+ <td style="text-align:right;"> 8 </td>
1738
+ <td style="text-align:right;"> 360.0 </td>
1739
+ <td style="text-align:right;"> 175 </td>
1740
+ <td style="text-align:right;"> 3.15 </td>
1741
+ <td style="text-align:right;"> 3.440 </td>
1742
+ <td style="text-align:right;"> 17.02 </td>
1743
+ <td style="text-align:right;"> 0 </td>
1744
+ <td style="text-align:right;"> 0 </td>
1745
+ <td style="text-align:right;"> 3 </td>
1746
+ <td style="text-align:right;"> 2 </td>
1747
+ </tr>
1748
+ <tr>
1749
+ <td style="text-align:left;"> Valiant </td>
1750
+ <td style="text-align:right;"> 18.1 </td>
1751
+ <td style="text-align:right;"> 6 </td>
1752
+ <td style="text-align:right;"> 225.0 </td>
1753
+ <td style="text-align:right;"> 105 </td>
1754
+ <td style="text-align:right;"> 2.76 </td>
1755
+ <td style="text-align:right;"> 3.460 </td>
1756
+ <td style="text-align:right;"> 20.22 </td>
1757
+ <td style="text-align:right;"> 1 </td>
1758
+ <td style="text-align:right;"> 0 </td>
1759
+ <td style="text-align:right;"> 3 </td>
1760
+ <td style="text-align:right;"> 1 </td>
1761
+ </tr>
1762
+ <tr>
1763
+ <td style="text-align:left;"> Duster 360 </td>
1764
+ <td style="text-align:right;"> 14.3 </td>
1765
+ <td style="text-align:right;"> 8 </td>
1766
+ <td style="text-align:right;"> 360.0 </td>
1767
+ <td style="text-align:right;"> 245 </td>
1768
+ <td style="text-align:right;"> 3.21 </td>
1769
+ <td style="text-align:right;"> 3.570 </td>
1770
+ <td style="text-align:right;"> 15.84 </td>
1771
+ <td style="text-align:right;"> 0 </td>
1772
+ <td style="text-align:right;"> 0 </td>
1773
+ <td style="text-align:right;"> 3 </td>
1774
+ <td style="text-align:right;"> 4 </td>
1775
+ </tr>
1776
+ <tr>
1777
+ <td style="text-align:left;"> Merc 240D </td>
1778
+ <td style="text-align:right;"> 24.4 </td>
1779
+ <td style="text-align:right;"> 4 </td>
1780
+ <td style="text-align:right;"> 146.7 </td>
1781
+ <td style="text-align:right;"> 62 </td>
1782
+ <td style="text-align:right;"> 3.69 </td>
1783
+ <td style="text-align:right;"> 3.190 </td>
1784
+ <td style="text-align:right;"> 20.00 </td>
1785
+ <td style="text-align:right;"> 1 </td>
1786
+ <td style="text-align:right;"> 0 </td>
1787
+ <td style="text-align:right;"> 4 </td>
1788
+ <td style="text-align:right;"> 2 </td>
1789
+ </tr>
1790
+ <tr>
1791
+ <td style="text-align:left;"> Merc 230 </td>
1792
+ <td style="text-align:right;"> 22.8 </td>
1793
+ <td style="text-align:right;"> 4 </td>
1794
+ <td style="text-align:right;"> 140.8 </td>
1795
+ <td style="text-align:right;"> 95 </td>
1796
+ <td style="text-align:right;"> 3.92 </td>
1797
+ <td style="text-align:right;"> 3.150 </td>
1798
+ <td style="text-align:right;"> 22.90 </td>
1799
+ <td style="text-align:right;"> 1 </td>
1800
+ <td style="text-align:right;"> 0 </td>
1801
+ <td style="text-align:right;"> 4 </td>
1802
+ <td style="text-align:right;"> 2 </td>
1803
+ </tr>
1804
+ <tr>
1805
+ <td style="text-align:left;"> Merc 280 </td>
1806
+ <td style="text-align:right;"> 19.2 </td>
1807
+ <td style="text-align:right;"> 6 </td>
1808
+ <td style="text-align:right;"> 167.6 </td>
1809
+ <td style="text-align:right;"> 123 </td>
1810
+ <td style="text-align:right;"> 3.92 </td>
1811
+ <td style="text-align:right;"> 3.440 </td>
1812
+ <td style="text-align:right;"> 18.30 </td>
1813
+ <td style="text-align:right;"> 1 </td>
1814
+ <td style="text-align:right;"> 0 </td>
1815
+ <td style="text-align:right;"> 4 </td>
1816
+ <td style="text-align:right;"> 4 </td>
1817
+ </tr>
1818
+ <tr>
1819
+ <td style="text-align:left;"> Merc 280C </td>
1820
+ <td style="text-align:right;"> 17.8 </td>
1821
+ <td style="text-align:right;"> 6 </td>
1822
+ <td style="text-align:right;"> 167.6 </td>
1823
+ <td style="text-align:right;"> 123 </td>
1824
+ <td style="text-align:right;"> 3.92 </td>
1825
+ <td style="text-align:right;"> 3.440 </td>
1826
+ <td style="text-align:right;"> 18.90 </td>
1827
+ <td style="text-align:right;"> 1 </td>
1828
+ <td style="text-align:right;"> 0 </td>
1829
+ <td style="text-align:right;"> 4 </td>
1830
+ <td style="text-align:right;"> 4 </td>
1831
+ </tr>
1832
+ <tr>
1833
+ <td style="text-align:left;"> Merc 450SE </td>
1834
+ <td style="text-align:right;"> 16.4 </td>
1835
+ <td style="text-align:right;"> 8 </td>
1836
+ <td style="text-align:right;"> 275.8 </td>
1837
+ <td style="text-align:right;"> 180 </td>
1838
+ <td style="text-align:right;"> 3.07 </td>
1839
+ <td style="text-align:right;"> 4.070 </td>
1840
+ <td style="text-align:right;"> 17.40 </td>
1841
+ <td style="text-align:right;"> 0 </td>
1842
+ <td style="text-align:right;"> 0 </td>
1843
+ <td style="text-align:right;"> 3 </td>
1844
+ <td style="text-align:right;"> 3 </td>
1845
+ </tr>
1846
+ <tr>
1847
+ <td style="text-align:left;"> Merc 450SL </td>
1848
+ <td style="text-align:right;"> 17.3 </td>
1849
+ <td style="text-align:right;"> 8 </td>
1850
+ <td style="text-align:right;"> 275.8 </td>
1851
+ <td style="text-align:right;"> 180 </td>
1852
+ <td style="text-align:right;"> 3.07 </td>
1853
+ <td style="text-align:right;"> 3.730 </td>
1854
+ <td style="text-align:right;"> 17.60 </td>
1855
+ <td style="text-align:right;"> 0 </td>
1856
+ <td style="text-align:right;"> 0 </td>
1857
+ <td style="text-align:right;"> 3 </td>
1858
+ <td style="text-align:right;"> 3 </td>
1859
+ </tr>
1860
+ <tr>
1861
+ <td style="text-align:left;"> Merc 450SLC </td>
1862
+ <td style="text-align:right;"> 15.2 </td>
1863
+ <td style="text-align:right;"> 8 </td>
1864
+ <td style="text-align:right;"> 275.8 </td>
1865
+ <td style="text-align:right;"> 180 </td>
1866
+ <td style="text-align:right;"> 3.07 </td>
1867
+ <td style="text-align:right;"> 3.780 </td>
1868
+ <td style="text-align:right;"> 18.00 </td>
1869
+ <td style="text-align:right;"> 0 </td>
1870
+ <td style="text-align:right;"> 0 </td>
1871
+ <td style="text-align:right;"> 3 </td>
1872
+ <td style="text-align:right;"> 3 </td>
1873
+ </tr>
1874
+ <tr>
1875
+ <td style="text-align:left;"> Cadillac Fleetwood </td>
1876
+ <td style="text-align:right;"> 10.4 </td>
1877
+ <td style="text-align:right;"> 8 </td>
1878
+ <td style="text-align:right;"> 472.0 </td>
1879
+ <td style="text-align:right;"> 205 </td>
1880
+ <td style="text-align:right;"> 2.93 </td>
1881
+ <td style="text-align:right;"> 5.250 </td>
1882
+ <td style="text-align:right;"> 17.98 </td>
1883
+ <td style="text-align:right;"> 0 </td>
1884
+ <td style="text-align:right;"> 0 </td>
1885
+ <td style="text-align:right;"> 3 </td>
1886
+ <td style="text-align:right;"> 4 </td>
1887
+ </tr>
1888
+ <tr>
1889
+ <td style="text-align:left;"> Lincoln Continental </td>
1890
+ <td style="text-align:right;"> 10.4 </td>
1891
+ <td style="text-align:right;"> 8 </td>
1892
+ <td style="text-align:right;"> 460.0 </td>
1893
+ <td style="text-align:right;"> 215 </td>
1894
+ <td style="text-align:right;"> 3.00 </td>
1895
+ <td style="text-align:right;"> 5.424 </td>
1896
+ <td style="text-align:right;"> 17.82 </td>
1897
+ <td style="text-align:right;"> 0 </td>
1898
+ <td style="text-align:right;"> 0 </td>
1899
+ <td style="text-align:right;"> 3 </td>
1900
+ <td style="text-align:right;"> 4 </td>
1901
+ </tr>
1902
+ <tr>
1903
+ <td style="text-align:left;"> Chrysler Imperial </td>
1904
+ <td style="text-align:right;"> 14.7 </td>
1905
+ <td style="text-align:right;"> 8 </td>
1906
+ <td style="text-align:right;"> 440.0 </td>
1907
+ <td style="text-align:right;"> 230 </td>
1908
+ <td style="text-align:right;"> 3.23 </td>
1909
+ <td style="text-align:right;"> 5.345 </td>
1910
+ <td style="text-align:right;"> 17.42 </td>
1911
+ <td style="text-align:right;"> 0 </td>
1912
+ <td style="text-align:right;"> 0 </td>
1913
+ <td style="text-align:right;"> 3 </td>
1914
+ <td style="text-align:right;"> 4 </td>
1915
+ </tr>
1916
+ <tr>
1917
+ <td style="text-align:left;"> Fiat 128 </td>
1918
+ <td style="text-align:right;"> 32.4 </td>
1919
+ <td style="text-align:right;"> 4 </td>
1920
+ <td style="text-align:right;"> 78.7 </td>
1921
+ <td style="text-align:right;"> 66 </td>
1922
+ <td style="text-align:right;"> 4.08 </td>
1923
+ <td style="text-align:right;"> 2.200 </td>
1924
+ <td style="text-align:right;"> 19.47 </td>
1925
+ <td style="text-align:right;"> 1 </td>
1926
+ <td style="text-align:right;"> 1 </td>
1927
+ <td style="text-align:right;"> 4 </td>
1928
+ <td style="text-align:right;"> 1 </td>
1929
+ </tr>
1930
+ <tr>
1931
+ <td style="text-align:left;"> Honda Civic </td>
1932
+ <td style="text-align:right;"> 30.4 </td>
1933
+ <td style="text-align:right;"> 4 </td>
1934
+ <td style="text-align:right;"> 75.7 </td>
1935
+ <td style="text-align:right;"> 52 </td>
1936
+ <td style="text-align:right;"> 4.93 </td>
1937
+ <td style="text-align:right;"> 1.615 </td>
1938
+ <td style="text-align:right;"> 18.52 </td>
1939
+ <td style="text-align:right;"> 1 </td>
1940
+ <td style="text-align:right;"> 1 </td>
1941
+ <td style="text-align:right;"> 4 </td>
1942
+ <td style="text-align:right;"> 2 </td>
1943
+ </tr>
1944
+ <tr>
1945
+ <td style="text-align:left;"> Toyota Corolla </td>
1946
+ <td style="text-align:right;"> 33.9 </td>
1947
+ <td style="text-align:right;"> 4 </td>
1948
+ <td style="text-align:right;"> 71.1 </td>
1949
+ <td style="text-align:right;"> 65 </td>
1950
+ <td style="text-align:right;"> 4.22 </td>
1951
+ <td style="text-align:right;"> 1.835 </td>
1952
+ <td style="text-align:right;"> 19.90 </td>
1953
+ <td style="text-align:right;"> 1 </td>
1954
+ <td style="text-align:right;"> 1 </td>
1955
+ <td style="text-align:right;"> 4 </td>
1956
+ <td style="text-align:right;"> 1 </td>
1957
+ </tr>
1958
+ <tr>
1959
+ <td style="text-align:left;"> Toyota Corona </td>
1960
+ <td style="text-align:right;"> 21.5 </td>
1961
+ <td style="text-align:right;"> 4 </td>
1962
+ <td style="text-align:right;"> 120.1 </td>
1963
+ <td style="text-align:right;"> 97 </td>
1964
+ <td style="text-align:right;"> 3.70 </td>
1965
+ <td style="text-align:right;"> 2.465 </td>
1966
+ <td style="text-align:right;"> 20.01 </td>
1967
+ <td style="text-align:right;"> 1 </td>
1968
+ <td style="text-align:right;"> 0 </td>
1969
+ <td style="text-align:right;"> 3 </td>
1970
+ <td style="text-align:right;"> 1 </td>
1971
+ </tr>
1972
+ <tr>
1973
+ <td style="text-align:left;"> Dodge Challenger </td>
1974
+ <td style="text-align:right;"> 15.5 </td>
1975
+ <td style="text-align:right;"> 8 </td>
1976
+ <td style="text-align:right;"> 318.0 </td>
1977
+ <td style="text-align:right;"> 150 </td>
1978
+ <td style="text-align:right;"> 2.76 </td>
1979
+ <td style="text-align:right;"> 3.520 </td>
1980
+ <td style="text-align:right;"> 16.87 </td>
1981
+ <td style="text-align:right;"> 0 </td>
1982
+ <td style="text-align:right;"> 0 </td>
1983
+ <td style="text-align:right;"> 3 </td>
1984
+ <td style="text-align:right;"> 2 </td>
1985
+ </tr>
1986
+ <tr>
1987
+ <td style="text-align:left;"> AMC Javelin </td>
1988
+ <td style="text-align:right;"> 15.2 </td>
1989
+ <td style="text-align:right;"> 8 </td>
1990
+ <td style="text-align:right;"> 304.0 </td>
1991
+ <td style="text-align:right;"> 150 </td>
1992
+ <td style="text-align:right;"> 3.15 </td>
1993
+ <td style="text-align:right;"> 3.435 </td>
1994
+ <td style="text-align:right;"> 17.30 </td>
1995
+ <td style="text-align:right;"> 0 </td>
1996
+ <td style="text-align:right;"> 0 </td>
1997
+ <td style="text-align:right;"> 3 </td>
1998
+ <td style="text-align:right;"> 2 </td>
1999
+ </tr>
2000
+ <tr>
2001
+ <td style="text-align:left;"> Camaro Z28 </td>
2002
+ <td style="text-align:right;"> 13.3 </td>
2003
+ <td style="text-align:right;"> 8 </td>
2004
+ <td style="text-align:right;"> 350.0 </td>
2005
+ <td style="text-align:right;"> 245 </td>
2006
+ <td style="text-align:right;"> 3.73 </td>
2007
+ <td style="text-align:right;"> 3.840 </td>
2008
+ <td style="text-align:right;"> 15.41 </td>
2009
+ <td style="text-align:right;"> 0 </td>
2010
+ <td style="text-align:right;"> 0 </td>
2011
+ <td style="text-align:right;"> 3 </td>
2012
+ <td style="text-align:right;"> 4 </td>
2013
+ </tr>
2014
+ <tr>
2015
+ <td style="text-align:left;"> Pontiac Firebird </td>
2016
+ <td style="text-align:right;"> 19.2 </td>
2017
+ <td style="text-align:right;"> 8 </td>
2018
+ <td style="text-align:right;"> 400.0 </td>
2019
+ <td style="text-align:right;"> 175 </td>
2020
+ <td style="text-align:right;"> 3.08 </td>
2021
+ <td style="text-align:right;"> 3.845 </td>
2022
+ <td style="text-align:right;"> 17.05 </td>
2023
+ <td style="text-align:right;"> 0 </td>
2024
+ <td style="text-align:right;"> 0 </td>
2025
+ <td style="text-align:right;"> 3 </td>
2026
+ <td style="text-align:right;"> 2 </td>
2027
+ </tr>
2028
+ <tr>
2029
+ <td style="text-align:left;"> Fiat X1-9 </td>
2030
+ <td style="text-align:right;"> 27.3 </td>
2031
+ <td style="text-align:right;"> 4 </td>
2032
+ <td style="text-align:right;"> 79.0 </td>
2033
+ <td style="text-align:right;"> 66 </td>
2034
+ <td style="text-align:right;"> 4.08 </td>
2035
+ <td style="text-align:right;"> 1.935 </td>
2036
+ <td style="text-align:right;"> 18.90 </td>
2037
+ <td style="text-align:right;"> 1 </td>
2038
+ <td style="text-align:right;"> 1 </td>
2039
+ <td style="text-align:right;"> 4 </td>
2040
+ <td style="text-align:right;"> 1 </td>
2041
+ </tr>
2042
+ <tr>
2043
+ <td style="text-align:left;"> Porsche 914-2 </td>
2044
+ <td style="text-align:right;"> 26.0 </td>
2045
+ <td style="text-align:right;"> 4 </td>
2046
+ <td style="text-align:right;"> 120.3 </td>
2047
+ <td style="text-align:right;"> 91 </td>
2048
+ <td style="text-align:right;"> 4.43 </td>
2049
+ <td style="text-align:right;"> 2.140 </td>
2050
+ <td style="text-align:right;"> 16.70 </td>
2051
+ <td style="text-align:right;"> 0 </td>
2052
+ <td style="text-align:right;"> 1 </td>
2053
+ <td style="text-align:right;"> 5 </td>
2054
+ <td style="text-align:right;"> 2 </td>
2055
+ </tr>
2056
+ <tr>
2057
+ <td style="text-align:left;"> Lotus Europa </td>
2058
+ <td style="text-align:right;"> 30.4 </td>
2059
+ <td style="text-align:right;"> 4 </td>
2060
+ <td style="text-align:right;"> 95.1 </td>
2061
+ <td style="text-align:right;"> 113 </td>
2062
+ <td style="text-align:right;"> 3.77 </td>
2063
+ <td style="text-align:right;"> 1.513 </td>
2064
+ <td style="text-align:right;"> 16.90 </td>
2065
+ <td style="text-align:right;"> 1 </td>
2066
+ <td style="text-align:right;"> 1 </td>
2067
+ <td style="text-align:right;"> 5 </td>
2068
+ <td style="text-align:right;"> 2 </td>
2069
+ </tr>
2070
+ <tr>
2071
+ <td style="text-align:left;"> Ford Pantera L </td>
2072
+ <td style="text-align:right;"> 15.8 </td>
2073
+ <td style="text-align:right;"> 8 </td>
2074
+ <td style="text-align:right;"> 351.0 </td>
2075
+ <td style="text-align:right;"> 264 </td>
2076
+ <td style="text-align:right;"> 4.22 </td>
2077
+ <td style="text-align:right;"> 3.170 </td>
2078
+ <td style="text-align:right;"> 14.50 </td>
2079
+ <td style="text-align:right;"> 0 </td>
2080
+ <td style="text-align:right;"> 1 </td>
2081
+ <td style="text-align:right;"> 5 </td>
2082
+ <td style="text-align:right;"> 4 </td>
2083
+ </tr>
2084
+ <tr>
2085
+ <td style="text-align:left;"> Ferrari Dino </td>
2086
+ <td style="text-align:right;"> 19.7 </td>
2087
+ <td style="text-align:right;"> 6 </td>
2088
+ <td style="text-align:right;"> 145.0 </td>
2089
+ <td style="text-align:right;"> 175 </td>
2090
+ <td style="text-align:right;"> 3.62 </td>
2091
+ <td style="text-align:right;"> 2.770 </td>
2092
+ <td style="text-align:right;"> 15.50 </td>
2093
+ <td style="text-align:right;"> 0 </td>
2094
+ <td style="text-align:right;"> 1 </td>
2095
+ <td style="text-align:right;"> 5 </td>
2096
+ <td style="text-align:right;"> 6 </td>
2097
+ </tr>
2098
+ <tr>
2099
+ <td style="text-align:left;"> Maserati Bora </td>
2100
+ <td style="text-align:right;"> 15.0 </td>
2101
+ <td style="text-align:right;"> 8 </td>
2102
+ <td style="text-align:right;"> 301.0 </td>
2103
+ <td style="text-align:right;"> 335 </td>
2104
+ <td style="text-align:right;"> 3.54 </td>
2105
+ <td style="text-align:right;"> 3.570 </td>
2106
+ <td style="text-align:right;"> 14.60 </td>
2107
+ <td style="text-align:right;"> 0 </td>
2108
+ <td style="text-align:right;"> 1 </td>
2109
+ <td style="text-align:right;"> 5 </td>
2110
+ <td style="text-align:right;"> 8 </td>
2111
+ </tr>
2112
+ <tr>
2113
+ <td style="text-align:left;"> Volvo 142E </td>
2114
+ <td style="text-align:right;"> 21.4 </td>
2115
+ <td style="text-align:right;"> 4 </td>
2116
+ <td style="text-align:right;"> 121.0 </td>
2117
+ <td style="text-align:right;"> 109 </td>
2118
+ <td style="text-align:right;"> 4.11 </td>
2119
+ <td style="text-align:right;"> 2.780 </td>
2120
+ <td style="text-align:right;"> 18.60 </td>
2121
+ <td style="text-align:right;"> 1 </td>
2122
+ <td style="text-align:right;"> 1 </td>
2123
+ <td style="text-align:right;"> 4 </td>
2124
+ <td style="text-align:right;"> 2 </td>
2125
+ </tr>
2126
+ </tbody>
2127
+ </table>
2128
+
2129
+ # Basic Data Types
2130
+
2131
+ ## Vector
2132
+
2133
+ Vectors can be thought of as contiguous cells containing data. Cells are accessed through
2134
+ indexing operations such as x[5]. Galaaz has six basic (‘atomic’) vector types: logical,
2135
+ integer, real, complex, string (or character) and raw. The modes and storage modes for the
2136
+ different vector types are listed in the following
2137
+ table.
2138
+
2139
+ | typeof | mode | storage.mode |
2140
+ |-----------|:---------:|-------------:|
2141
+ | logical | logical | logical |
2142
+ | integer | numeric | integer |
2143
+ | double | numeric | double |
2144
+ | complex | complex | comples |
2145
+ | character | character | character |
2146
+ | raw | raw | raw |
2147
+
2148
+ Single numbers, such as 4.2, and strings, such as "four point two" are still vectors, of length
2149
+ 1; there are no more basic types. Vectors with length zero are possible (and useful).
2150
+ String vectors have mode and storage mode "character". A single element of a character
2151
+ vector is often referred to as a character string.
2152
+
2153
+ To create a vector the 'c' (concatenate) method from the 'R' module should be used:
2154
+
2155
+
2156
+ ```ruby
2157
+ vec = R.c(1, 2, 3)
2158
+ puts vec
2159
+ ```
2160
+
2161
+ ```
2162
+ ## [1] 1 2 3
2163
+ ```
2164
+
2165
+ Lets take a look at the type, mode and storage.mode of our vector vec. In order to print
2166
+ this out, we are creating a data frame 'df' and printing it out. A data frame, for those
2167
+ not familiar with it, is basically a table. Here we create the data frame and add the
2168
+ column name by passing named parameters for each column, such as 'typeof:', 'mode:' and
2169
+ 'storage__mode?'. You should also note here that the double underscore is converted to a '.'.
2170
+ So, when printed 'storage\_\_mode' will actually print as 'storage.mode'.
2171
+
2172
+ Data frames will later be more carefully described. In R, the method used to create a
2173
+ data frame is 'data.frame', in Galaaz we use 'data\_\_frame'.
2174
+
2175
+
2176
+ ```ruby
2177
+ df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
2178
+ puts df
2179
+ ```
2180
+
2181
+ ```
2182
+ ## typeof mode storage.mode
2183
+ ## 1 integer numeric integer
2184
+ ```
2185
+
2186
+ If you want to create a vector with floating point numbers, then we need at least one of the
2187
+ vector's element to be a float, such as 1.0. R users should be careful, since in R a number
2188
+ like '1' is converted to float and to have an integer the R developer will use '1L'. Galaaz
2189
+ follows normal Ruby rules and the number 1 is an integer and 1.0 is a float.
2190
+
2191
+
2192
+ ```ruby
2193
+ vec = R.c(1.0, 2, 3)
2194
+ puts vec
2195
+ ```
2196
+
2197
+ ```
2198
+ ## [1] 1 2 3
2199
+ ```
2200
+
2201
+
2202
+ ```ruby
2203
+ df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
2204
+ outputs df.kable.kable_styling
2205
+ ```
2206
+
2207
+ <table class="table" style="margin-left: auto; margin-right: auto;">
2208
+ <thead>
2209
+ <tr>
2210
+ <th style="text-align:left;"> typeof </th>
2211
+ <th style="text-align:left;"> mode </th>
2212
+ <th style="text-align:left;"> storage.mode </th>
2213
+ </tr>
2214
+ </thead>
2215
+ <tbody>
2216
+ <tr>
2217
+ <td style="text-align:left;"> double </td>
2218
+ <td style="text-align:left;"> numeric </td>
2219
+ <td style="text-align:left;"> double </td>
2220
+ </tr>
2221
+ </tbody>
2222
+ </table>
2223
+
2224
+ In this next example we try to create a vector with a variable 'hello' that has not yet
2225
+ being defined. This will raise an exception that is printed out. We get two return blocks,
2226
+ the first with a message explaining what went wrong and the second with the full backtrace
2227
+ of the error.
2228
+
2229
+
2230
+ ```ruby
2231
+ vec = R.c(1, hello, 5)
2232
+ ```
2233
+
2234
+ ```
2235
+ ## Message:
2236
+ ## undefined local variable or method `hello' for #<RC:0x3d8 @out_list=nil>:RC
2237
+ ```
2238
+
2239
+ ```
2240
+ ## Message:
2241
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:103:in `get_binding'
2242
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `eval'
2243
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `exec_ruby'
2244
+ ## /home/rbotafogo/desenv/galaaz/lib/gknit/knitr_engine.rb:650:in `block in initialize'
2245
+ ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
2246
+ ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
2247
+ ## (eval):3:in `function(...) {\n rb_method(...)'
2248
+ ## unknown.r:1:in `in_dir'
2249
+ ## unknown.r:1:in `block_exec'
2250
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:92:in `call_block'
2251
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:6:in `process_group.block'
2252
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:3:in `<no source>'
2253
+ ## unknown.r:1:in `withCallingHandlers'
2254
+ ## unknown.r:1:in `process_file'
2255
+ ## unknown.r:1:in `<no source>'
2256
+ ## unknown.r:1:in `<no source>'
2257
+ ## <REPL>:4:in `<repl wrapper>'
2258
+ ## <REPL>:1
2259
+ ```
2260
+
2261
+ Here is a vector with logical values
2262
+
2263
+
2264
+ ```ruby
2265
+ vec = R.c(true, true, false, false, true)
2266
+ puts vec
2267
+ ```
2268
+
2269
+ ```
2270
+ ## [1] TRUE TRUE FALSE FALSE TRUE
2271
+ ```
2272
+
2273
+ ### Combining Vectors
2274
+
2275
+ The 'c' functions used to create vectors can also be used to combine two vectors:
2276
+
2277
+
2278
+ ```ruby
2279
+ vec1 = R.c(10.0, 20.0, 30.0)
2280
+ vec2 = R.c(4.0, 5.0, 6.0)
2281
+ vec = R.c(vec1, vec2)
2282
+ puts vec
2283
+ ```
2284
+
2285
+ ```
2286
+ ## [1] 10 20 30 4 5 6
2287
+ ```
2288
+ In galaaz, methods can be chainned (somewhat like the pipe operator in R %>%, but more generic).
2289
+ In this next example, method 'c' is chainned after 'vec1'. This also looks like 'c' is a
2290
+ method of the vector, but in reallity, this is actually closer to the pipe operator. When
2291
+ Galaaz identifies that 'c' is not a method of 'vec' it actually tries to call 'R.c' with
2292
+ 'vec1' as the first argument concatenated with all the other available arguments. The code
2293
+ bellow is automatically converted to the code above.
2294
+
2295
+
2296
+ ```ruby
2297
+ vec = vec1.c(vec2)
2298
+ puts vec
2299
+ ```
2300
+
2301
+ ```
2302
+ ## [1] 10 20 30 4 5 6
2303
+ ```
2304
+
2305
+ ### Vector Arithmetic
2306
+
2307
+ Arithmetic operations on vectors are performed element by element:
2308
+
2309
+
2310
+ ```ruby
2311
+ puts vec1 + vec2
2312
+ ```
2313
+
2314
+ ```
2315
+ ## [1] 14 25 36
2316
+ ```
2317
+
2318
+
2319
+ ```ruby
2320
+ puts vec1 * 5
2321
+ ```
2322
+
2323
+ ```
2324
+ ## [1] 50 100 150
2325
+ ```
2326
+
2327
+ When vectors have different length, a recycling rule is applied to the shorter vector:
2328
+
2329
+
2330
+ ```ruby
2331
+ vec3 = R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
2332
+ puts vec4 = vec1 + vec3
2333
+ ```
2334
+
2335
+ ```
2336
+ ## [1] 11 22 33 14 25 36 17 28 39
2337
+ ```
2338
+
2339
+ ### Vector Indexing
2340
+
2341
+ Vectors can be indexed by using the '[]' operator:
2342
+
2343
+
2344
+ ```ruby
2345
+ puts vec4[3]
2346
+ ```
2347
+
2348
+ ```
2349
+ ## [1] 33
2350
+ ```
2351
+
2352
+ We can also index a vector with another vector. For example, in the code bellow, we take elements
2353
+ 1, 3, 5, and 7 from vec3:
2354
+
2355
+
2356
+ ```ruby
2357
+ puts vec4[R.c(1, 3, 5, 7)]
2358
+ ```
2359
+
2360
+ ```
2361
+ ## [1] 11 33 25 17
2362
+ ```
2363
+
2364
+ Repeating an index and having indices out of order is valid code:
2365
+
2366
+
2367
+ ```ruby
2368
+ puts vec4[R.c(1, 3, 3, 1)]
2369
+ ```
2370
+
2371
+ ```
2372
+ ## [1] 11 33 33 11
2373
+ ```
2374
+
2375
+ It is also possible to index a vector with a negative number or negative vector. In these cases
2376
+ the indexed values are not returned:
2377
+
2378
+
2379
+ ```ruby
2380
+ puts vec4[-3]
2381
+ puts vec4[-R.c(1, 3, 5, 7)]
2382
+ ```
2383
+
2384
+ ```
2385
+ ## [1] 11 22 14 25 36 17 28 39
2386
+ ## [1] 22 14 36 28 39
2387
+ ```
2388
+
2389
+ If an index is out of range, a missing value (NA) will be reported.
2390
+
2391
+
2392
+ ```ruby
2393
+ puts vec4[30]
2394
+ ```
2395
+
2396
+ ```
2397
+ ## [1] NA
2398
+ ```
2399
+
2400
+ It is also possible to index a vector by range:
2401
+
2402
+
2403
+ ```ruby
2404
+ puts vec4[(2..5)]
2405
+ ```
2406
+
2407
+ ```
2408
+ ## [1] 22 33 14 25
2409
+ ```
2410
+
2411
+ Elements in a vector can be named using the 'names' attribute of a vector:
2412
+
2413
+
2414
+ ```ruby
2415
+ full_name = R.c("Rodrigo", "A", "Botafogo")
2416
+ full_name.names = R.c("First", "Middle", "Last")
2417
+ puts full_name
2418
+ ```
2419
+
2420
+ ```
2421
+ ## First Middle Last
2422
+ ## "Rodrigo" "A" "Botafogo"
2423
+ ```
2424
+
2425
+ Or it can also be named by using the 'c' function with named paramenters:
2426
+
2427
+
2428
+ ```ruby
2429
+ full_name = R.c(First: "Rodrigo", Middle: "A", Last: "Botafogo")
2430
+ puts full_name
2431
+ ```
2432
+
2433
+ ```
2434
+ ## First Middle Last
2435
+ ## "Rodrigo" "A" "Botafogo"
2436
+ ```
2437
+
2438
+ ### Extracting Native Ruby Types from a Vector
2439
+
2440
+ Vectors created with 'R.c' are of class R::Vector. You might have noticed that when indexing a
2441
+ vector, a new vector is returned, even if this vector has one single element. In order to use
2442
+ R::Vector with other ruby classes it might be necessary to extract the actual Ruby native type
2443
+ from the vector. In order to do this extraction the '>>' operator is used.
2444
+
2445
+
2446
+ ```ruby
2447
+ puts vec4
2448
+ puts vec4 >> 0
2449
+ puts vec4 >> 4
2450
+ ```
2451
+
2452
+ ```
2453
+ ## [1] 11 22 33 14 25 36 17 28 39
2454
+ ## 11.0
2455
+ ## 25.0
2456
+ ```
2457
+
2458
+ Note that indexing with '>>' starts at 0 and not at 1, also, we cannot do negative indexing.
2459
+
2460
+ ## Matrix
2461
+
2462
+ A matrix is a collection of elements organized as a two dimensional table. A matrix can be
2463
+ created by the 'matrix' function:
2464
+
2465
+
2466
+ ```ruby
2467
+ mat = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
2468
+ nrow: 3,
2469
+ ncol: 3)
2470
+
2471
+ puts mat
2472
+ ```
2473
+
2474
+ ```
2475
+ ## [,1] [,2] [,3]
2476
+ ## [1,] 1 4 7
2477
+ ## [2,] 2 5 8
2478
+ ## [3,] 3 6 9
2479
+ ```
2480
+ Note that matrices data is organized by column first. It is possible to organize the matrix
2481
+ memory by row first passing an extra argument to the 'matrix' function:
2482
+
2483
+
2484
+ ```ruby
2485
+ mat_row = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
2486
+ nrow: 3,
2487
+ ncol: 3,
2488
+ byrow: true)
2489
+
2490
+ puts mat_row
2491
+ ```
2492
+
2493
+ ```
2494
+ ## [,1] [,2] [,3]
2495
+ ## [1,] 1 2 3
2496
+ ## [2,] 4 5 6
2497
+ ## [3,] 7 8 9
2498
+ ```
2499
+
2500
+ ### Indexing a Matrix
2501
+
2502
+ A matrix can be indexed by [row, column]:
2503
+
2504
+
2505
+ ```ruby
2506
+ puts mat_row[1, 1]
2507
+ puts mat_row[2, 3]
2508
+ ```
2509
+
2510
+ ```
2511
+ ## [1] 1
2512
+ ## [1] 6
2513
+ ```
2514
+ It is possible to index an entire row or column with the ':all' keyword
2515
+
2516
+
2517
+ ```ruby
2518
+ puts mat_row[1, :all]
2519
+ puts mat_row[:all, 2]
2520
+ ```
2521
+
2522
+ ```
2523
+ ## [1] 1 2 3
2524
+ ## [1] 2 5 8
2525
+ ```
2526
+
2527
+ Indexing with a vector is also possible for matrices. In the following example we want
2528
+ rows 1 and 3 and columns 2 and 3 building a 2 x 2 matrix.
2529
+
2530
+
2531
+ ```ruby
2532
+ puts mat_row[R.c(1, 3), R.c(2, 3)]
2533
+ ```
2534
+
2535
+ ```
2536
+ ## [,1] [,2]
2537
+ ## [1,] 2 3
2538
+ ## [2,] 8 9
2539
+ ```
2540
+
2541
+ Matrices can be combined with functions 'rbind':
2542
+
2543
+
2544
+ ```ruby
2545
+ puts mat_row.rbind(mat)
2546
+ ```
2547
+
2548
+ ```
2549
+ ## [,1] [,2] [,3]
2550
+ ## [1,] 1 2 3
2551
+ ## [2,] 4 5 6
2552
+ ## [3,] 7 8 9
2553
+ ## [4,] 1 4 7
2554
+ ## [5,] 2 5 8
2555
+ ## [6,] 3 6 9
2556
+ ```
2557
+
2558
+ and 'cbind':
2559
+
2560
+
2561
+ ```ruby
2562
+ puts mat_row.cbind(mat)
2563
+ ```
2564
+
2565
+ ```
2566
+ ## [,1] [,2] [,3] [,4] [,5] [,6]
2567
+ ## [1,] 1 2 3 1 4 7
2568
+ ## [2,] 4 5 6 2 5 8
2569
+ ## [3,] 7 8 9 3 6 9
2570
+ ```
2571
+
2572
+ ## List
2573
+
2574
+ A list is a data structure that can contain sublists of different types, while vector and matrix
2575
+ can only hold one type of element.
2576
+
2577
+
2578
+ ```ruby
2579
+ nums = R.c(1.0, 2.0, 3.0)
2580
+ strs = R.c("a", "b", "c", "d")
2581
+ bool = R.c(true, true, false)
2582
+ lst = R.list(nums: nums, strs: strs, bool: bool)
2583
+ puts lst
2584
+ ```
2585
+
2586
+ ```
2587
+ ## $nums
2588
+ ## [1] 1 2 3
2589
+ ##
2590
+ ## $strs
2591
+ ## [1] "a" "b" "c" "d"
2592
+ ##
2593
+ ## $bool
2594
+ ## [1] TRUE TRUE FALSE
2595
+ ```
2596
+
2597
+ Note that 'lst' elements are named elements.
2598
+
2599
+
2600
+ ### List Indexing
2601
+
2602
+ List indexing, also called slicing, is done using the '[]' operator and the '[[]]' operator. Let's
2603
+ first start with the '[]' operator. The list above has three sublist indexing with '[]' will
2604
+ return one of the sublists.
2605
+
2606
+
2607
+ ```ruby
2608
+ puts lst[1]
2609
+ ```
2610
+
2611
+ ```
2612
+ ## $nums
2613
+ ## [1] 1 2 3
2614
+ ```
2615
+
2616
+ Note that when using '[]' a new list is returned. When using the double square bracket operator
2617
+ the value returned is the actual element of the list in the given position and not a slice of
2618
+ the original list
2619
+
2620
+
2621
+
2622
+ ```ruby
2623
+ puts lst[[1]]
2624
+ ```
2625
+
2626
+ ```
2627
+ ## [1] 1 2 3
2628
+ ```
2629
+
2630
+ When elements are named, as dones with lst, indexing can be done by name:
2631
+
2632
+
2633
+ ```ruby
2634
+ puts lst[['bool']][[1]] >> 0
2635
+ ```
2636
+
2637
+ ```
2638
+ ## true
2639
+ ```
2640
+
2641
+ In this example, first the 'bool' element of the list was extracted, not as a list, but as a vector,
2642
+ then the first element of the vector was extracted (note that vectors also accept the '[[]]'
2643
+ operator) and then the vector was indexed by its first element, extracting the native Ruby type.
2644
+
2645
+
2646
+ ## Data Frame
2647
+
2648
+ A data frame is a table like structure in which each column has the same number of
2649
+ rows. Data frames are the basic structure for storing data for data analysis. We have already
2650
+ seen a data frame previously when we accessed variable '~:mtcars'. In order to create a
2651
+ data frame, function 'data__frame' is used:
2652
+
2653
+
2654
+ ```ruby
2655
+ df = R.data__frame(
2656
+ year: R.c(2010, 2011, 2012),
2657
+ income: R.c(1000.0, 1500.0, 2000.0))
2658
+
2659
+ puts df
2660
+ ```
2661
+
2662
+ ```
2663
+ ## year income
2664
+ ## 1 2010 1000
2665
+ ## 2 2011 1500
2666
+ ## 3 2012 2000
2667
+ ```
2668
+
2669
+ ### Data Frame Indexing
2670
+
2671
+ A data frame can be indexed the same way as a matrix, by using '[row, column]', where row and
2672
+ column can either be a numeric or the name of the row or column
2673
+
2674
+
2675
+ ```ruby
2676
+ puts (~:mtcars).head
2677
+ puts (~:mtcars)[1, 2]
2678
+ puts (~:mtcars)['Datsun 710', 'mpg']
2679
+ ```
2680
+
2681
+ ```
2682
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2683
+ ## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
2684
+ ## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
2685
+ ## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
2686
+ ## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
2687
+ ## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
2688
+ ## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
2689
+ ## [1] 6
2690
+ ## [1] 22.8
2691
+ ```
2692
+
2693
+ Extracting a column from a data frame as a vector can be done by using the double square bracket
2694
+ operator:
2695
+
2696
+
2697
+ ```ruby
2698
+ puts (~:mtcars)[['mpg']]
2699
+ ```
2700
+
2701
+ ```
2702
+ ## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
2703
+ ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
2704
+ ## [29] 15.8 19.7 15.0 21.4
2705
+ ```
2706
+
2707
+ A data frame column can also be accessed as if it were an instance variable of the data frame:
2708
+
2709
+
2710
+ ```ruby
2711
+ puts (~:mtcars).mpg
2712
+ ```
2713
+
2714
+ ```
2715
+ ## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
2716
+ ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
2717
+ ## [29] 15.8 19.7 15.0 21.4
2718
+ ```
2719
+
2720
+ Slicing a data frame can be done by indexing it with a vector (we use 'head' to reduce the
2721
+ output):
2722
+
2723
+
2724
+ ```ruby
2725
+ puts (~:mtcars)[R.c('mpg', 'hp')].head
2726
+ ```
2727
+
2728
+ ```
2729
+ ## mpg hp
2730
+ ## Mazda RX4 21.0 110
2731
+ ## Mazda RX4 Wag 21.0 110
2732
+ ## Datsun 710 22.8 93
2733
+ ## Hornet 4 Drive 21.4 110
2734
+ ## Hornet Sportabout 18.7 175
2735
+ ## Valiant 18.1 105
2736
+ ```
2737
+
2738
+ A row slice can be obtained by indexing by row and using the ':all' keyword for the column:
2739
+
2740
+
2741
+ ```ruby
2742
+ puts (~:mtcars)[R.c('Datsun 710', 'Camaro Z28'), :all]
2743
+ ```
2744
+
2745
+ ```
2746
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2747
+ ## Datsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1
2748
+ ## Camaro Z28 13.3 8 350 245 3.73 3.84 15.41 0 0 3 4
2749
+ ```
2750
+
2751
+ Finally, a data frame can also be indexed with a logical vector. In this next example, the
2752
+ 'am' column of :mtcars is compared with 0 (with method 'eq'). When 'am' is equal to 0 the
2753
+ car is automatic. So, by doing '(~:mtcars).am.eq 0' a logical vector is created with
2754
+ 'true' whenever 'am' is 0 and 'false' otherwise.
2755
+
2756
+
2757
+ ```ruby
2758
+ # obtain a vector with 'true' for cars with automatic transmission
2759
+ automatic = (~:mtcars).am.eq 0
2760
+ puts automatic
2761
+ ```
2762
+
2763
+ ```
2764
+ ## [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
2765
+ ## [12] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE
2766
+ ## [23] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
2767
+ ```
2768
+
2769
+ Using this logical vector, the data frame is indexed, returning a new data frame in
2770
+ which all cars have automatic transmission.
2771
+
2772
+
2773
+ ```ruby
2774
+ # slice the data frame by using this vector
2775
+ puts (~:mtcars)[automatic, :all]
2776
+ ```
2777
+
2778
+ ```
2779
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2780
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
2781
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
2782
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
2783
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
2784
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
2785
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
2786
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
2787
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
2788
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
2789
+ ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
2790
+ ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
2791
+ ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
2792
+ ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
2793
+ ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
2794
+ ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
2795
+ ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
2796
+ ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
2797
+ ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
2798
+ ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
2799
+ ```
2800
+
2801
+ # Writing Expressions in Galaaz
2802
+
2803
+ Galaaz extends Ruby to work with complex expressions, similar to R's expressions build with 'quote'
2804
+ (base R) or 'quo' (tidyverse). Let's take a look at some of those expressions.
2805
+
2806
+ ## Expressions from operators
2807
+
2808
+ The code bellow
2809
+ creates an expression summing two symbols
2810
+
2811
+
2812
+ ```ruby
2813
+ exp1 = :a + :b
2814
+ puts exp1
2815
+ ```
2816
+
2817
+ ```
2818
+ ## a + b
2819
+ ```
2820
+ We can build any complex mathematical expression
2821
+
2822
+
2823
+ ```ruby
2824
+ exp2 = (:a + :b) * 2.0 + :c ** 2 / :z
2825
+ puts exp2
2826
+ ```
2827
+
2828
+ ```
2829
+ ## (a + b) * 2 + c^2L/z
2830
+ ```
2831
+
2832
+ It is also possible to use inequality operators in building expressions
2833
+
2834
+
2835
+ ```ruby
2836
+ exp3 = (:a + :b) >= :z
2837
+ puts exp3
2838
+ ```
2839
+
2840
+ ```
2841
+ ## a + b >= z
2842
+ ```
2843
+
2844
+ Galaaz provides both symbolic representations for operators, such as (>, <, !=) as functional
2845
+ notation for those operators such as (.gt, .ge, etc.). So the same expression written
2846
+ above can also be written as
2847
+
2848
+
2849
+ ```ruby
2850
+ exp4 = (:a + :b).ge :z
2851
+ puts exp4
2852
+ ```
2853
+
2854
+ ```
2855
+ ## a + b >= z
2856
+ ```
2857
+
2858
+ Two type of expression can only be created with the functional representation of the operators,
2859
+ those are expressions involving '==', and '='. In order to write an expression involving '==' we
2860
+ need to use the method '.eq' and for '=' we need the function '.assign'
2861
+
2862
+
2863
+ ```ruby
2864
+ exp5 = (:a + :b).eq :z
2865
+ puts exp5
2866
+ ```
2867
+
2868
+ ```
2869
+ ## a + b == z
2870
+ ```
2871
+
2872
+
2873
+ ```ruby
2874
+ exp6 = :y.assign :a + :b
2875
+ puts exp6
2876
+ ```
2877
+
2878
+ ```
2879
+ ## y <- a + b
2880
+ ```
2881
+ In general we think that using the functional notation is preferable to using the
2882
+ symbolic notation as otherwise, we end up writing invalid expressions such as
2883
+
2884
+
2885
+ ```ruby
2886
+ exp_wrong = (:a + :b) == :z
2887
+ puts exp_wrong
2888
+ ```
2889
+ and it might be difficult to understand what is going on here. The problem lies with the fact that
2890
+ when using '==' we are comparing expression (:a + :b) to expression :z with '=='. When the
2891
+ comparison is executed, the system tries to evaluate :a, :b and :z, and those symbols at
2892
+ this time are not bound to anything and we get a "object 'a' not found" message.
2893
+ If we only use functional notation, this type of error will not occur.
2894
+
2895
+ ## Expressions with R methods
2896
+
2897
+ It is often necessary to create an expression that uses a method or function. For instance, in
2898
+ mathematics, it's quite natural to write an expressin such as $y = sin(x)$. In this case, the
2899
+ 'sin' function is part of the expression and should not immediately executed. Now, let's say
2900
+ that 'x' is an angle of 45$^\circ$ and we acttually want our expression to be $y = 0.850...$.
2901
+ When we want the function to be part of the expression, we call the function preceeding it
2902
+ by the letter E, such as 'E.sin(x)'
2903
+
2904
+
2905
+ ```ruby
2906
+ exp7 = :y.assign E.sin(:x)
2907
+ puts exp7
2908
+ ```
2909
+
2910
+ ```
2911
+ ## y <- sin(x)
2912
+ ```
2913
+
2914
+ Expressions can also be written using '.' notation:
2915
+
2916
+
2917
+ ```ruby
2918
+ exp8 = :y.assign :x.sin
2919
+ puts exp8
2920
+ ```
2921
+
2922
+ ```
2923
+ ## y <- sin(x)
2924
+ ```
2925
+
2926
+ When a function has multiple arguments, the first one can be used before the '.':
2927
+
2928
+
2929
+ ```ruby
2930
+ exp9 = :x.c(:y)
2931
+ puts exp9
2932
+ ```
2933
+
2934
+ ```
2935
+ ## c(x, y)
2936
+ ```
2937
+
2938
+ ## Evaluating an Expression
2939
+
2940
+ Expressions can be evaluated by calling function 'eval' with a binding. A binding can be provided
2941
+ with a list:
2942
+
2943
+
2944
+ ```ruby
2945
+ exp = (:a + :b) * 2.0 + :c ** 2 / :z
2946
+ puts exp.eval(R.list(a: 10, b: 20, c: 30, z: 40))
2947
+ ```
2948
+
2949
+ ```
2950
+ ## [1] 82.5
2951
+ ```
2952
+
2953
+ ... with a data frame:
2954
+
2955
+
2956
+ ```ruby
2957
+ df = R.data__frame(
2958
+ a: R.c(1, 2, 3),
2959
+ b: R.c(10, 20, 30),
2960
+ c: R.c(100, 200, 300),
2961
+ z: R.c(1000, 2000, 3000))
2962
+
2963
+ puts exp.eval(df)
2964
+ ```
2965
+
2966
+ ```
2967
+ ## [1] 32 64 96
2968
+ ```
2969
+
2970
+ # Manipulating Data
2971
+
2972
+ One of the major benefits of Galaaz is to bring strong data manipulation to Ruby. The following
2973
+ examples were extracted from Hardley's "R for Data Science" (https://r4ds.had.co.nz/). This
2974
+ is a highly recommended book for those not already familiar with the 'tidyverse' style of
2975
+ programming in R. In the sections to follow, we will limit ourselves to convert the R code to
2976
+ Galaaz.
2977
+
2978
+ For these
2979
+ examples, we will investigate the nycflights13 data set available on the package by the
2980
+ same name. We use function 'R.install\_and\_loads' that checks if the library is available
2981
+ locally, and if not, installs it. This data frame contains all 336,776 flights that
2982
+ departed from New York City in 2013. The data comes from the US Bureau of
2983
+ Transportation Statistics.
2984
+
2985
+ Dplyr uses 'tibbles' in place of data frames; unfortunately, tibbles do not print yet properly in
2986
+ Galaaz due to a bug in fastR. In order to print a tibble we need to convert it to a data frame
2987
+ using the 'as\_\_data__frame' method.
2988
+
2989
+
2990
+ ```ruby
2991
+ R.install_and_loads('nycflights13')
2992
+ R.library('dplyr')
2993
+ ```
2994
+
2995
+
2996
+ ```ruby
2997
+ flights = ~:flights
2998
+ puts flights.head
2999
+ ```
3000
+
3001
+ ```
3002
+ ## # A tibble: 6 x 19
3003
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3004
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3005
+ ## 1 2013 1 1 517 515 2 830
3006
+ ## 2 2013 1 1 533 529 4 850
3007
+ ## 3 2013 1 1 542 540 2 923
3008
+ ## 4 2013 1 1 544 545 -1 1004
3009
+ ## 5 2013 1 1 554 600 -6 812
3010
+ ## 6 2013 1 1 554 558 -4 740
3011
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3012
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3013
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3014
+ ## # time_hour <dttm>
3015
+ ```
3016
+
3017
+ ## Filtering rows with Filter
3018
+
3019
+ In this example we filter the flights data set by giving to the filter function two expressions:
3020
+ the first :month.eq 1
3021
+
3022
+
3023
+ ```ruby
3024
+ puts flights.filter((:month.eq 1), (:day.eq 1)).head
3025
+ ```
3026
+
3027
+ ```
3028
+ ## # A tibble: 6 x 19
3029
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3030
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3031
+ ## 1 2013 1 1 517 515 2 830
3032
+ ## 2 2013 1 1 533 529 4 850
3033
+ ## 3 2013 1 1 542 540 2 923
3034
+ ## 4 2013 1 1 544 545 -1 1004
3035
+ ## 5 2013 1 1 554 600 -6 812
3036
+ ## 6 2013 1 1 554 558 -4 740
3037
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3038
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3039
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3040
+ ## # time_hour <dttm>
3041
+ ```
3042
+
3043
+ ## Logical Operators
3044
+
3045
+ All flights that departed in November of December
3046
+
3047
+
3048
+ ```ruby
3049
+ puts flights.filter((:month.eq 11) | (:month.eq 12)).head
3050
+ ```
3051
+
3052
+ ```
3053
+ ## # A tibble: 6 x 19
3054
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3055
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3056
+ ## 1 2013 11 1 5 2359 6 352
3057
+ ## 2 2013 11 1 35 2250 105 123
3058
+ ## 3 2013 11 1 455 500 -5 641
3059
+ ## 4 2013 11 1 539 545 -6 856
3060
+ ## 5 2013 11 1 542 545 -3 831
3061
+ ## 6 2013 11 1 549 600 -11 912
3062
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3063
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3064
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3065
+ ## # time_hour <dttm>
3066
+ ```
3067
+
3068
+ The same as above, but using the 'in' operator. In R, it is possible to define many operators
3069
+ by doing %<op>%. The %in% operator checks if a value is in a vector. In order to use those
3070
+ operators from Galaaz the '._' method is used, where the first argument is the operator's
3071
+ symbol, in this case ':in' and the second argument is the vector:
3072
+
3073
+
3074
+ ```ruby
3075
+ puts flights.filter(:month._ :in, R.c(11, 12)).head
3076
+ ```
3077
+
3078
+ ```
3079
+ ## # A tibble: 6 x 19
3080
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3081
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3082
+ ## 1 2013 11 1 5 2359 6 352
3083
+ ## 2 2013 11 1 35 2250 105 123
3084
+ ## 3 2013 11 1 455 500 -5 641
3085
+ ## 4 2013 11 1 539 545 -6 856
3086
+ ## 5 2013 11 1 542 545 -3 831
3087
+ ## 6 2013 11 1 549 600 -11 912
3088
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3089
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3090
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3091
+ ## # time_hour <dttm>
3092
+ ```
3093
+
3094
+ ## Filtering with NA (Not Available)
3095
+
3096
+ Let's first create a 'tibble' with a Not Available value (R::NA). Tibbles are a modern
3097
+ version of a data frame and operate very similarly to one. It differs in how it outputs
3098
+ the values and the result of some subsetting operations that are more consistent than
3099
+ what is obtained from data frame.
3100
+
3101
+
3102
+ ```ruby
3103
+ df = R.tibble(x: R.c(1, R::NA, 3))
3104
+ puts df
3105
+ ```
3106
+
3107
+ ```
3108
+ ## # A tibble: 3 x 1
3109
+ ## x
3110
+ ## <int>
3111
+ ## 1 1
3112
+ ## 2
3113
+ ## 3 3
3114
+ ```
3115
+
3116
+ Now filtering by :x > 1 shows all lines that satisfy this condition, where the row with R:NA does
3117
+ not.
3118
+
3119
+
3120
+ ```ruby
3121
+ puts df.filter(:x > 1)
3122
+ ```
3123
+
3124
+ ```
3125
+ ## # A tibble: 1 x 1
3126
+ ## x
3127
+ ## <int>
3128
+ ## 1 3
3129
+ ```
3130
+
3131
+ To match an NA use method 'is__na'
3132
+
3133
+
3134
+ ```ruby
3135
+ puts df.filter((:x.is__na) | (:x > 1))
3136
+ ```
3137
+
3138
+ ```
3139
+ ## # A tibble: 2 x 1
3140
+ ## x
3141
+ ## <int>
3142
+ ## 1
3143
+ ## 2 3
3144
+ ```
3145
+
3146
+ ## Arrange Rows with arrange
3147
+
3148
+ Arrange reorders the rows of a data frame by the given arguments.
3149
+
3150
+
3151
+ ```ruby
3152
+ puts flights.arrange(:year, :month, :day).head
3153
+ ```
3154
+
3155
+ ```
3156
+ ## # A tibble: 6 x 19
3157
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3158
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3159
+ ## 1 2013 1 1 517 515 2 830
3160
+ ## 2 2013 1 1 533 529 4 850
3161
+ ## 3 2013 1 1 542 540 2 923
3162
+ ## 4 2013 1 1 544 545 -1 1004
3163
+ ## 5 2013 1 1 554 600 -6 812
3164
+ ## 6 2013 1 1 554 558 -4 740
3165
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3166
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3167
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3168
+ ## # time_hour <dttm>
3169
+ ```
3170
+
3171
+ To arrange in descending order, use function 'desc'
3172
+
3173
+
3174
+ ```ruby
3175
+ puts flights.arrange(:dep_delay.desc).head
3176
+ ```
3177
+
3178
+ ```
3179
+ ## # A tibble: 6 x 19
3180
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3181
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3182
+ ## 1 2013 1 9 641 900 1301 1242
3183
+ ## 2 2013 6 15 1432 1935 1137 1607
3184
+ ## 3 2013 1 10 1121 1635 1126 1239
3185
+ ## 4 2013 9 20 1139 1845 1014 1457
3186
+ ## 5 2013 7 22 845 1600 1005 1044
3187
+ ## 6 2013 4 10 1100 1900 960 1342
3188
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3189
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3190
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3191
+ ## # time_hour <dttm>
3192
+ ```
3193
+
3194
+ ## Selecting columns
3195
+
3196
+ To select specific columns from a dataset we use function 'select':
3197
+
3198
+
3199
+ ```ruby
3200
+ puts flights.select(:year, :month, :day).head
3201
+ ```
3202
+
3203
+ ```
3204
+ ## # A tibble: 6 x 3
3205
+ ## year month day
3206
+ ## <int> <int> <int>
3207
+ ## 1 2013 1 1
3208
+ ## 2 2013 1 1
3209
+ ## 3 2013 1 1
3210
+ ## 4 2013 1 1
3211
+ ## 5 2013 1 1
3212
+ ## 6 2013 1 1
3213
+ ```
3214
+
3215
+ It is also possible to select column in a given range
3216
+
3217
+
3218
+ ```ruby
3219
+ puts flights.select(:year.up_to :day).head
3220
+ ```
3221
+
3222
+ ```
3223
+ ## # A tibble: 6 x 3
3224
+ ## year month day
3225
+ ## <int> <int> <int>
3226
+ ## 1 2013 1 1
3227
+ ## 2 2013 1 1
3228
+ ## 3 2013 1 1
3229
+ ## 4 2013 1 1
3230
+ ## 5 2013 1 1
3231
+ ## 6 2013 1 1
3232
+ ```
3233
+
3234
+ Select all columns that start with a given name sequence
3235
+
3236
+
3237
+ ```ruby
3238
+ puts flights.select(E.starts_with('arr')).head
3239
+ ```
3240
+
3241
+ ```
3242
+ ## # A tibble: 6 x 2
3243
+ ## arr_time arr_delay
3244
+ ## <int> <dbl>
3245
+ ## 1 830 11
3246
+ ## 2 850 20
3247
+ ## 3 923 33
3248
+ ## 4 1004 -18
3249
+ ## 5 812 -25
3250
+ ## 6 740 12
3251
+ ```
3252
+
3253
+ Other functions that can be used:
3254
+
3255
+ * ends_with("xyz"): matches names that end with “xyz”.
3256
+
3257
+ * contains("ijk"): matches names that contain “ijk”.
3258
+
3259
+ * matches("(.)\\1"): selects variables that match a regular expression. This one matches
3260
+ any variables that contain repeated characters.
3261
+
3262
+ * num_range("x", (1..3)): matches x1, x2 and x3
3263
+
3264
+ A helper function that comes in handy when we just want to rearrange column order is 'Everything':
3265
+
3266
+
3267
+ ```ruby
3268
+ puts flights.select(:year, :month, :day, E.everything).head
3269
+ ```
3270
+
3271
+ ```
3272
+ ## # A tibble: 6 x 19
3273
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3274
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3275
+ ## 1 2013 1 1 517 515 2 830
3276
+ ## 2 2013 1 1 533 529 4 850
3277
+ ## 3 2013 1 1 542 540 2 923
3278
+ ## 4 2013 1 1 544 545 -1 1004
3279
+ ## 5 2013 1 1 554 600 -6 812
3280
+ ## 6 2013 1 1 554 558 -4 740
3281
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3282
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3283
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3284
+ ## # time_hour <dttm>
3285
+ ```
3286
+
3287
+ ## Add variables to a dataframe with 'mutate'
3288
+
3289
+
3290
+ ```ruby
3291
+ flights_sm = flights.
3292
+ select((:year.up_to :day),
3293
+ E.ends_with('delay'),
3294
+ :distance,
3295
+ :air_time)
3296
+
3297
+ puts flights_sm.head
3298
+ ```
3299
+
3300
+ ```
3301
+ ## # A tibble: 6 x 7
3302
+ ## year month day dep_delay arr_delay distance air_time
3303
+ ## <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
3304
+ ## 1 2013 1 1 2 11 1400 227
3305
+ ## 2 2013 1 1 4 20 1416 227
3306
+ ## 3 2013 1 1 2 33 1089 160
3307
+ ## 4 2013 1 1 -1 -18 1576 183
3308
+ ## 5 2013 1 1 -6 -25 762 116
3309
+ ## 6 2013 1 1 -4 12 719 150
3310
+ ```
3311
+
3312
+
3313
+ ```ruby
3314
+ flights_sm = flights_sm.
3315
+ mutate(gain: :dep_delay - :arr_delay,
3316
+ speed: :distance / :air_time * 60)
3317
+ puts flights_sm.head
3318
+ ```
3319
+
3320
+ ```
3321
+ ## # A tibble: 6 x 9
3322
+ ## year month day dep_delay arr_delay distance air_time gain speed
3323
+ ## <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
3324
+ ## 1 2013 1 1 2 11 1400 227 -9 370.
3325
+ ## 2 2013 1 1 4 20 1416 227 -16 374.
3326
+ ## 3 2013 1 1 2 33 1089 160 -31 408.
3327
+ ## 4 2013 1 1 -1 -18 1576 183 17 517.
3328
+ ## 5 2013 1 1 -6 -25 762 116 19 394.
3329
+ ## 6 2013 1 1 -4 12 719 150 -16 288.
3330
+ ```
3331
+
3332
+ ## Summarising data
3333
+
3334
+ Function 'summarise' calculates summaries for the data frame. When no 'group_by' is used
3335
+ a single value is obtained from the data frame:
3336
+
3337
+
3338
+ ```ruby
3339
+ puts flights.summarise(delay: E.mean(:dep_delay, na__rm: true))
3340
+ ```
3341
+
3342
+ ```
3343
+ ## # A tibble: 1 x 1
3344
+ ## delay
3345
+ ## <dbl>
3346
+ ## 1 12.6
3347
+ ```
3348
+
3349
+ When a data frame is groupe with 'group_by' summaries apply to the given group:
3350
+
3351
+
3352
+ ```ruby
3353
+ by_day = flights.group_by(:year, :month, :day)
3354
+ puts by_day.summarise(delay: :dep_delay.mean(na__rm: true)).head
3355
+ ```
3356
+
3357
+ ```
3358
+ ## # A tibble: 6 x 4
3359
+ ## # Groups: year, month [1]
3360
+ ## year month day delay
3361
+ ## * <int> <int> <int> <dbl>
3362
+ ## 1 2013 1 1 11.5
3363
+ ## 2 2013 1 2 13.9
3364
+ ## 3 2013 1 3 11.0
3365
+ ## 4 2013 1 4 8.95
3366
+ ## 5 2013 1 5 5.73
3367
+ ## 6 2013 1 6 7.15
3368
+ ```
3369
+
3370
+ Next we put many operations together by pipping them one after the other:
3371
+
3372
+
3373
+ ```ruby
3374
+ delays = flights.
3375
+ group_by(:dest).
3376
+ summarise(
3377
+ count: E.n,
3378
+ dist: :distance.mean(na__rm: true),
3379
+ delay: :arr_delay.mean(na__rm: true)).
3380
+ filter(:count > 20, :dest != "NHL")
3381
+
3382
+ puts delays.head
3383
+ ```
3384
+
3385
+ ```
3386
+ ## # A tibble: 6 x 4
3387
+ ## dest count dist delay
3388
+ ## <chr> <int> <dbl> <dbl>
3389
+ ## 1 ABQ 254 1826 4.38
3390
+ ## 2 ACK 265 199 4.85
3391
+ ## 3 ALB 439 143 14.4
3392
+ ## 4 ATL 17215 757. 11.3
3393
+ ## 5 AUS 2439 1514. 6.02
3394
+ ## 6 AVL 275 584. 8.00
3395
+ ```
3396
+
3397
+ # Using Data Table
3398
+
3399
+
3400
+ ```ruby
3401
+ R.library('data.table')
3402
+ R.install_and_loads('curl')
3403
+
3404
+ input = "https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv"
3405
+ flights = R.fread(input)
3406
+ puts flights
3407
+ puts flights.dim
3408
+ ```
3409
+
3410
+ ```
3411
+ ## year month day dep_delay arr_delay carrier origin dest air_time
3412
+ ## 1: 2014 1 1 14 13 AA JFK LAX 359
3413
+ ## 2: 2014 1 1 -3 13 AA JFK LAX 363
3414
+ ## 3: 2014 1 1 2 9 AA JFK LAX 351
3415
+ ## 4: 2014 1 1 -8 -26 AA LGA PBI 157
3416
+ ## 5: 2014 1 1 2 1 AA JFK LAX 350
3417
+ ## ---
3418
+ ## 253312: 2014 10 31 1 -30 UA LGA IAH 201
3419
+ ## 253313: 2014 10 31 -5 -14 UA EWR IAH 189
3420
+ ## 253314: 2014 10 31 -8 16 MQ LGA RDU 83
3421
+ ## 253315: 2014 10 31 -4 15 MQ LGA DTW 75
3422
+ ## 253316: 2014 10 31 -5 1 MQ LGA SDF 110
3423
+ ## distance hour
3424
+ ## 1: 2475 9
3425
+ ## 2: 2475 11
3426
+ ## 3: 2475 19
3427
+ ## 4: 1035 7
3428
+ ## 5: 2475 13
3429
+ ## ---
3430
+ ## 253312: 1416 14
3431
+ ## 253313: 1400 8
3432
+ ## 253314: 431 11
3433
+ ## 253315: 502 11
3434
+ ## 253316: 659 8
3435
+ ## [1] 253316 11
3436
+ ```
3437
+
3438
+
3439
+ ```ruby
3440
+
3441
+ data_table = R.data__table(
3442
+ ID: R.c("b","b","b","a","a","c"),
3443
+ a: (1..6),
3444
+ b: (7..12),
3445
+ c: (13..18)
3446
+ )
3447
+
3448
+ puts data_table
3449
+ puts data_table.ID
3450
+ ```
3451
+
3452
+ ```
3453
+ ## ID a b c
3454
+ ## 1: b 1 7 13
3455
+ ## 2: b 2 8 14
3456
+ ## 3: b 3 9 15
3457
+ ## 4: a 4 10 16
3458
+ ## 5: a 5 11 17
3459
+ ## 6: c 6 12 18
3460
+ ## [1] "b" "b" "b" "a" "a" "c"
3461
+ ```
3462
+
3463
+
3464
+ ```ruby
3465
+ # subset rows in i
3466
+ ans = flights[(:origin.eq "JFK") & (:month.eq 6)]
3467
+ puts ans.head
3468
+
3469
+ # Get the first two rows from flights.
3470
+
3471
+ ans = flights[(1..2)]
3472
+ puts ans
3473
+
3474
+ # Sort flights first by column origin in ascending order, and then by dest in descending order:
3475
+
3476
+ # ans = flights[E.order(:origin, -(:dest))]
3477
+ # puts ans.head
3478
+ ```
3479
+
3480
+ ```
3481
+ ## year month day dep_delay arr_delay carrier origin dest air_time
3482
+ ## 1: 2014 6 1 -9 -5 AA JFK LAX 324
3483
+ ## 2: 2014 6 1 -10 -13 AA JFK LAX 329
3484
+ ## 3: 2014 6 1 18 -1 AA JFK LAX 326
3485
+ ## 4: 2014 6 1 -6 -16 AA JFK LAX 320
3486
+ ## 5: 2014 6 1 -4 -45 AA JFK LAX 326
3487
+ ## 6: 2014 6 1 -6 -23 AA JFK LAX 329
3488
+ ## distance hour
3489
+ ## 1: 2475 8
3490
+ ## 2: 2475 12
3491
+ ## 3: 2475 7
3492
+ ## 4: 2475 10
3493
+ ## 5: 2475 18
3494
+ ## 6: 2475 14
3495
+ ## year month day dep_delay arr_delay carrier origin dest air_time
3496
+ ## 1: 2014 1 1 14 13 AA JFK LAX 359
3497
+ ## 2: 2014 1 1 -3 13 AA JFK LAX 363
3498
+ ## distance hour
3499
+ ## 1: 2475 9
3500
+ ## 2: 2475 11
3501
+ ```
3502
+
3503
+
3504
+ ```ruby
3505
+ # Select column(s) in j
3506
+ # select arr_delay column, but return it as a vector.
3507
+
3508
+ ans = flights[:all, :arr_delay]
3509
+ puts ans.head
3510
+
3511
+ # Select arr_delay column, but return as a data.table instead.
3512
+
3513
+ ans = flights[:all, :arr_delay.list]
3514
+ puts ans.head
3515
+
3516
+ ans = flights[:all, E.list(:arr_delay, :dep_delay)]
3517
+ ```
3518
+
3519
+ ```
3520
+ ## [1] 13 13 9 -26 1 0
3521
+ ## arr_delay
3522
+ ## 1: 13
3523
+ ## 2: 13
3524
+ ## 3: 9
3525
+ ## 4: -26
3526
+ ## 5: 1
3527
+ ## 6: 0
3528
+ ```
3529
+
3530
+ # Graphics in Galaaz
3531
+
3532
+ Creating graphics in Galaaz is quite easy, as it can use all the power of ggplot2. There are
3533
+ many resources in the web that teaches ggplot, so here we give a quick example of ggplot
3534
+ integration with Ruby. We continue to use the :mtcars dataset and we will plot a diverging
3535
+ bar plot, showing cars that have 'above' or 'below' gas consuption. Let's first prepare
3536
+ the data frame with the necessary data:
3537
+
3538
+
3539
+ ```ruby
3540
+ # copy the R variable :mtcars to the Ruby mtcars variable
3541
+ mtcars = ~:mtcars
3542
+
3543
+ # create a new column 'car_name' to store the car names so that it can be
3544
+ # used for plotting. The 'rownames' of the data frame cannot be used as
3545
+ # data for plotting
3546
+ mtcars.car_name = R.rownames(:mtcars)
3547
+
3548
+ # compute normalized mpg and add it to a new column called mpg_z
3549
+ # Note that the mean value for mpg can be obtained by calling the 'mean'
3550
+ # function on the vector 'mtcars.mpg'. The same with the standard
3551
+ # deviation 'sd'. The vector is then rounded to two digits with 'round 2'
3552
+ mtcars.mpg_z = ((mtcars.mpg - mtcars.mpg.mean)/mtcars.mpg.sd).round 2
3553
+
3554
+ # create a new column 'mpg_type'. Function 'ifelse' is a vectorized function
3555
+ # that looks at every element of the mpg_z vector and if the value is below
3556
+ # 0, returns 'below', otherwise returns 'above'
3557
+ mtcars.mpg_type = (mtcars.mpg_z < 0).ifelse("below", "above")
3558
+
3559
+ # order the mtcar data set by the mpg_z vector from smaler to larger values
3560
+ mtcars = mtcars[mtcars.mpg_z.order, :all]
3561
+
3562
+ # convert the car_name column to a factor to retain sorted order in plot
3563
+ mtcars.car_name = mtcars.car_name.factor levels: mtcars.car_name
3564
+
3565
+ # let's look at the final data frame
3566
+ puts mtcars.head
3567
+ ```
3568
+
3569
+ ```
3570
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
3571
+ ## Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
3572
+ ## Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
3573
+ ## Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
3574
+ ## Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4
3575
+ ## Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
3576
+ ## Maserati Bora 15.0 8 301 335 3.54 3.570 14.60 0 1 5 8
3577
+ ## car_name mpg_z mpg_type
3578
+ ## Cadillac Fleetwood Cadillac Fleetwood -1.61 below
3579
+ ## Lincoln Continental Lincoln Continental -1.61 below
3580
+ ## Camaro Z28 Camaro Z28 -1.13 below
3581
+ ## Duster 360 Duster 360 -0.96 below
3582
+ ## Chrysler Imperial Chrysler Imperial -0.89 below
3583
+ ## Maserati Bora Maserati Bora -0.84 below
3584
+ ```
3585
+ Now, lets plot the diverging bar plot. When using gKnit, there is no need to call
3586
+ 'R.awt' to create a plotting device, since gKnit does take care of it. Galaaz
3587
+ provides integration with ggplot. The interested reader should check online for more
3588
+ information on ggplot, since it is outside the scope of this manual describing
3589
+ how ggplot works. We give here but a brief description on how this plot is generated.
3590
+
3591
+ ggplot implements the 'grammar of graphics'. In this approach, plots are build by
3592
+ adding layers to the plot. On the first layer we describe what we want on the 'x'
3593
+ and 'y' axis of the plot. In this case, we have 'car_name' on the 'x' axis and
3594
+ 'mpg\_z' on the 'y' axis. Then the type of graph is specified by adding
3595
+ 'geom\_bar' (for a bar graph). We specify that our bars should be filled using
3596
+ 'mpg\_type', which is either 'above' or 'bellow' giving then two colours for
3597
+ filling. On the next layer we specify the labels for the graph, then we add the
3598
+ title and subtitle. Finally, in a bar chart usually bars go on the vertical direction,
3599
+ but in this graph we want the bars to be horizontally layed so we add 'coord\_flip'.
3600
+
3601
+
3602
+ ```ruby
3603
+ require 'ggplot'
3604
+
3605
+ puts mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
3606
+ R.geom_bar(E.aes(fill: :mpg_type), stat: 'identity', width: 0.5) +
3607
+ R.scale_fill_manual(name: 'Mileage',
3608
+ labels: R.c('Above Average', 'Below Average'),
3609
+ values: R.c('above': '#00ba38', 'below': '#f8766d')) +
3610
+ R.labs(subtitle: "Normalised mileage from 'mtcars'",
3611
+ title: "Diverging Bars") +
3612
+ R.coord_flip
3613
+ ```
3614
+
3615
+
3616
+ ![](manual_files/figure-html/diverging_bar.png)<!-- -->
3617
+
3618
+ # Coding with Tidyverse
3619
+
3620
+ In R, and when coding with 'tidyverse', arguments to a function are usually not
3621
+ *referencially transparent*. That is, you can’t replace a value with a seemingly equivalent
3622
+ object that you’ve defined elsewhere. To see the problem, let's first define a data frame:
3623
+
3624
+
3625
+ ```ruby
3626
+ df = R.data__frame(x: (1..3), y: (3..1))
3627
+ puts df
3628
+ ```
3629
+
3630
+ ```
3631
+ ## x y
3632
+ ## 1 1 3
3633
+ ## 2 2 2
3634
+ ## 3 3 1
3635
+ ```
3636
+
3637
+ and now, let's look at this code:
3638
+
3639
+
3640
+ ```r
3641
+ my_var <- x
3642
+ filter(df, my_var == 1)
3643
+ ```
3644
+ It generates the following error: "object 'x' not found.
3645
+
3646
+ However, in Galaaz, arguments are referencially transparent as can be seen by the
3647
+ code bellow. Note initally that 'my_var = :x' will not give the error "object 'x' not found"
3648
+ since ':x' is treated as an expression and assigned to my\_var. Then when doing (my\_var.eq 1),
3649
+ my\_var is a variable that resolves to ':x' and it becomes equivalent to (:x.eq 1) which is
3650
+ what we want.
3651
+
3652
+
3653
+ ```ruby
3654
+ my_var = :x
3655
+ puts df.filter(my_var.eq 1)
3656
+ ```
3657
+
3658
+ ```
3659
+ ## x y
3660
+ ## 1 1 3
3661
+ ```
3662
+ As stated by Hardley
3663
+
3664
+ > dplyr code is ambiguous. Depending on what variables are defined where,
3665
+ > filter(df, x == y) could be equivalent to any of:
3666
+
3667
+ ```
3668
+ df[df$x == df$y, ]
3669
+ df[df$x == y, ]
3670
+ df[x == df$y, ]
3671
+ df[x == y, ]
3672
+ ```
3673
+ In galaaz this ambiguity does not exist, filter(df, x.eq y) is not a valid expression as
3674
+ expressions are build with symbols. In doing filter(df, :x.eq y) we are looking for elements
3675
+ of the 'x' column that are equal to a previously defined y variable. Finally in
3676
+ filter(df, :x.eq :y) we are looking for elements in which the 'x' column value is equal to
3677
+ the 'y' column value. This can be seen in the following two chunks of code:
3678
+
3679
+
3680
+ ```ruby
3681
+ y = 1
3682
+ x = 2
3683
+
3684
+ # looking for values where the 'x' column is equal to the 'y' column
3685
+ puts df.filter(:x.eq :y)
3686
+ ```
3687
+
3688
+ ```
3689
+ ## x y
3690
+ ## 1 2 2
3691
+ ```
3692
+
3693
+
3694
+ ```ruby
3695
+ # looking for values where the 'x' column is equal to the 'y' variable
3696
+ # in this case, the number 1
3697
+ puts df.filter(:x.eq y)
3698
+ ```
3699
+
3700
+ ```
3701
+ ## x y
3702
+ ## 1 1 3
3703
+ ```
3704
+ ## Writing a function that applies to different data sets
3705
+
3706
+ Let's suppose that we want to write a function that receives as the first argument a data frame
3707
+ and as second argument an expression that adds a column to the data frame that is equal to the
3708
+ sum of elements in column 'a' plus 'x'.
3709
+
3710
+ Here is the intended behaviour using the 'mutate' function of 'dplyr':
3711
+
3712
+ ```
3713
+ mutate(df1, y = a + x)
3714
+ mutate(df2, y = a + x)
3715
+ mutate(df3, y = a + x)
3716
+ mutate(df4, y = a + x)
3717
+ ```
3718
+ The naive approach to writing an R function to solve this problem is:
3719
+
3720
+ ```
3721
+ mutate_y <- function(df) {
3722
+ mutate(df, y = a + x)
3723
+ }
3724
+ ```
3725
+ Unfortunately, in R, this function can fail silently if one of the variables isn’t present
3726
+ in the data frame, but is present in the global environment. We will not go through here how
3727
+ to solve this problem in R.
3728
+
3729
+ In Galaaz the method mutate_y bellow will work fine and will never fail silently.
3730
+
3731
+
3732
+ ```ruby
3733
+ def mutate_y(df)
3734
+ df.mutate(:y.assign :a + :x)
3735
+ end
3736
+ ```
3737
+ Here we create a data frame that has only one column named 'x':
3738
+
3739
+
3740
+ ```ruby
3741
+ df1 = R.data__frame(x: (1..3))
3742
+ puts df1
3743
+ ```
3744
+
3745
+ ```
3746
+ ## x
3747
+ ## 1 1
3748
+ ## 2 2
3749
+ ## 3 3
3750
+ ```
3751
+
3752
+ Note that method mutate_y will fail independetly from the fact that variable 'a' is defined and
3753
+ in the scope of the method. Variable 'a' has no relationship with the symbol ':a' used in the
3754
+ definition of 'mutate\_y' above:
3755
+
3756
+
3757
+ ```ruby
3758
+ a = 10
3759
+ mutate_y(df1)
3760
+ ```
3761
+
3762
+ ```
3763
+ ## Message:
3764
+ ## Error in mutate_impl(.data, dots) :
3765
+ ## Evaluation error: object 'a' not found.
3766
+ ## In addition: Warning message:
3767
+ ## In mutate_impl(.data, dots) :
3768
+ ## mismatched protect/unprotect (unprotect with empty protect stack) (RError)
3769
+ ## Translated to internal error
3770
+ ```
3771
+ ## Different expressions
3772
+
3773
+ Let's move to the next problem as presented by Hardley where trying to write a function in R
3774
+ that will receive two argumens, the first a variable and the second an expression is not trivial.
3775
+ Bellow we create a data frame and we want to write a function that groups data by a variable and
3776
+ summarises it by an expression:
3777
+
3778
+
3779
+ ```r
3780
+ set.seed(123)
3781
+
3782
+ df <- data.frame(
3783
+ g1 = c(1, 1, 2, 2, 2),
3784
+ g2 = c(1, 2, 1, 2, 1),
3785
+ a = sample(5),
3786
+ b = sample(5)
3787
+ )
3788
+
3789
+ as.data.frame(df)
3790
+ ```
3791
+
3792
+ ```
3793
+ ## g1 g2 a b
3794
+ ## 1 1 1 3 3
3795
+ ## 2 1 2 2 1
3796
+ ## 3 2 1 5 2
3797
+ ## 4 2 2 4 5
3798
+ ## 5 2 1 1 4
3799
+ ```
3800
+
3801
+ ```r
3802
+ d2 <- df %>%
3803
+ group_by(g1) %>%
3804
+ summarise(a = mean(a))
3805
+
3806
+ as.data.frame(d2)
3807
+ ```
3808
+
3809
+ ```
3810
+ ## g1 a
3811
+ ## 1 1 2.500000
3812
+ ## 2 2 3.333333
3813
+ ```
3814
+
3815
+ ```r
3816
+ d2 <- df %>%
3817
+ group_by(g2) %>%
3818
+ summarise(a = mean(a))
3819
+
3820
+ as.data.frame(d2)
3821
+ ```
3822
+
3823
+ ```
3824
+ ## g2 a
3825
+ ## 1 1 3
3826
+ ## 2 2 3
3827
+ ```
3828
+
3829
+ As shown by Hardley, one might expect this function to do the trick:
3830
+
3831
+
3832
+ ```r
3833
+ my_summarise <- function(df, group_var) {
3834
+ df %>%
3835
+ group_by(group_var) %>%
3836
+ summarise(a = mean(a))
3837
+ }
3838
+
3839
+ # my_summarise(df, g1)
3840
+ #> Error: Column `group_var` is unknown
3841
+ ```
3842
+
3843
+ In order to solve this problem, coding with dplyr requires the introduction of many new concepts
3844
+ and functions such as 'quo', 'quos', 'enquo', 'enquos', '!!' (bang bang), '!!!' (triple bang).
3845
+ Again, we'll leave to Hardley the explanation on how to use all those functions.
3846
+
3847
+ Now, let's try to implement the same function in galaaz. The next code block first prints the
3848
+ 'df' data frame defined previously in R (to access an R variable from Galaaz, we use the tilda
3849
+ operator '~' applied to the R variable name as symbol, i.e., ':df'.
3850
+
3851
+
3852
+ ```ruby
3853
+ puts ~:df
3854
+ ```
3855
+
3856
+ ```
3857
+ ## g1 g2 a b
3858
+ ## 1 1 1 3 3
3859
+ ## 2 1 2 2 1
3860
+ ## 3 2 1 5 2
3861
+ ## 4 2 2 4 5
3862
+ ## 5 2 1 1 4
3863
+ ```
3864
+
3865
+ We then create the 'my_summarize' method and call it passing the R data frame and
3866
+ the group by variable ':g1':
3867
+
3868
+
3869
+ ```ruby
3870
+ def my_summarize(df, group_var)
3871
+ df.group_by(group_var).
3872
+ summarize(a: :a.mean)
3873
+ end
3874
+
3875
+ puts my_summarize(:df, :g1)
3876
+ ```
3877
+
3878
+ ```
3879
+ ## # A tibble: 2 x 2
3880
+ ## g1 a
3881
+ ## <dbl> <dbl>
3882
+ ## 1 1 2.5
3883
+ ## 2 2 3.33
3884
+ ```
3885
+
3886
+ It works!!! Well, let's make sure this was not just some coincidence
3887
+
3888
+
3889
+ ```ruby
3890
+ puts my_summarize(:df, :g2)
3891
+ ```
3892
+
3893
+ ```
3894
+ ## # A tibble: 2 x 2
3895
+ ## g2 a
3896
+ ## <dbl> <dbl>
3897
+ ## 1 1 3
3898
+ ## 2 2 3
3899
+ ```
3900
+
3901
+ Great, everything is fine! No magic, no new functions, no complexities, just normal, standard Ruby
3902
+ code. If you've ever done NSE in R, this certainly feels much safer and easy to implement.
3903
+
3904
+ ## Different input variables
3905
+
3906
+ In the previous section we've managed to get rid of all NSE formulation for a simple example, but
3907
+ does this remain true for more complex examples, or will the Galaaz way prove inpractical for
3908
+ more complex code?
3909
+
3910
+ In the next example Hardley proposes us to write a function that given an expression such as 'a'
3911
+ or 'a * b', calculates three summaries. What we want a function that does the same as these R
3912
+ statements:
3913
+
3914
+ ```
3915
+ summarise(df, mean = mean(a), sum = sum(a), n = n())
3916
+ #> # A tibble: 1 x 3
3917
+ #> mean sum n
3918
+ #> <dbl> <int> <int>
3919
+ #> 1 3 15 5
3920
+
3921
+ summarise(df, mean = mean(a * b), sum = sum(a * b), n = n())
3922
+ #> # A tibble: 1 x 3
3923
+ #> mean sum n
3924
+ #> <dbl> <int> <int>
3925
+ #> 1 9 45 5
3926
+ ```
3927
+
3928
+ Let's try it in galaaz:
3929
+
3930
+
3931
+ ```ruby
3932
+ def my_summarise2(df, expr)
3933
+ df.summarize(
3934
+ mean: E.mean(expr),
3935
+ sum: E.sum(expr),
3936
+ n: E.n
3937
+ )
3938
+ end
3939
+
3940
+ puts my_summarise2((~:df), :a)
3941
+ puts "\n"
3942
+ puts my_summarise2((~:df), :a * :b)
3943
+ ```
3944
+
3945
+ ```
3946
+ ## mean sum n
3947
+ ## 1 3 15 5
3948
+ ##
3949
+ ## mean sum n
3950
+ ## 1 9 45 5
3951
+ ```
3952
+
3953
+ Once again, there is no need to use any special theory or functions. The only point to be
3954
+ careful about is the use of 'E' to build expressions from functions 'mean', 'sum' and 'n'.
3955
+
3956
+ ## Different input and output variable
3957
+
3958
+ Now the next challenge presented by Hardley is to vary the name of the output variables based on
3959
+ the received expression. So, if the input expression is 'a', we want our data frame columns to
3960
+ be named 'mean\_a' and 'sum\_a'. Now, if the input expression is 'b', columns
3961
+ should be named 'mean\_b' and 'sum\_b'.
3962
+
3963
+ ```
3964
+ mutate(df, mean_a = mean(a), sum_a = sum(a))
3965
+ #> # A tibble: 5 x 6
3966
+ #> g1 g2 a b mean_a sum_a
3967
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
3968
+ #> 1 1 1 1 3 3 15
3969
+ #> 2 1 2 4 2 3 15
3970
+ #> 3 2 1 2 1 3 15
3971
+ #> 4 2 2 5 4 3 15
3972
+ #> # … with 1 more row
3973
+
3974
+ mutate(df, mean_b = mean(b), sum_b = sum(b))
3975
+ #> # A tibble: 5 x 6
3976
+ #> g1 g2 a b mean_b sum_b
3977
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
3978
+ #> 1 1 1 1 3 3 15
3979
+ #> 2 1 2 4 2 3 15
3980
+ #> 3 2 1 2 1 3 15
3981
+ #> 4 2 2 5 4 3 15
3982
+ #> # … with 1 more row
3983
+ ```
3984
+ In order to solve this problem in R, Hardley needs to introduce some more new functions and notations:
3985
+ 'quo_name' and the ':=' operator from package 'rlang'
3986
+
3987
+ Here is our Ruby code:
3988
+
3989
+
3990
+ ```ruby
3991
+ def my_mutate(df, expr)
3992
+ mean_name = "mean_#{expr.to_s}"
3993
+ sum_name = "sum_#{expr.to_s}"
3994
+
3995
+ df.mutate(mean_name => E.mean(expr),
3996
+ sum_name => E.sum(expr))
3997
+ end
3998
+
3999
+ puts my_mutate((~:df), :a)
4000
+ puts "\n"
4001
+ puts my_mutate((~:df), :b)
4002
+ ```
4003
+
4004
+ ```
4005
+ ## g1 g2 a b mean_a sum_a
4006
+ ## 1 1 1 3 3 3 15
4007
+ ## 2 1 2 2 1 3 15
4008
+ ## 3 2 1 5 2 3 15
4009
+ ## 4 2 2 4 5 3 15
4010
+ ## 5 2 1 1 4 3 15
4011
+ ##
4012
+ ## g1 g2 a b mean_b sum_b
4013
+ ## 1 1 1 3 3 3 15
4014
+ ## 2 1 2 2 1 3 15
4015
+ ## 3 2 1 5 2 3 15
4016
+ ## 4 2 2 4 5 3 15
4017
+ ## 5 2 1 1 4 3 15
4018
+ ```
4019
+ It really seems that "Non Standard Evaluation" is actually quite standard in Galaaz! But, you
4020
+ might have noticed a small change in the way the arguments to the mutate method were called.
4021
+ In a previous example we used df.summarise(mean: E.mean(:a), ...) where the column name was
4022
+ followed by a ':' colom. In this example, we have df.mutate(mean_name => E.mean(expr), ...)
4023
+ and variable mean\_name is not followed by ':' but by '=>'. This is standard Ruby notation.
4024
+
4025
+ [explain....]
4026
+
4027
+ ## Capturing multiple variables
4028
+
4029
+ Moving on with new complexities, Hardley proposes us to solve the problem in which the
4030
+ summarise function will receive any number of grouping variables.
4031
+
4032
+ This again is quite standard Ruby. In order to receive an undefined number of paramenters
4033
+ the paramenter is preceded by '*':
4034
+
4035
+
4036
+ ```ruby
4037
+ def my_summarise3(df, *group_vars)
4038
+ df.group_by(*group_vars).
4039
+ summarise(a: E.mean(:a))
4040
+ end
4041
+
4042
+ puts my_summarise3((~:df), :g1, :g2)
4043
+ ```
4044
+
4045
+ ```
4046
+ ## # A tibble: 4 x 3
4047
+ ## # Groups: g1 [?]
4048
+ ## g1 g2 a
4049
+ ## <dbl> <dbl> <dbl>
4050
+ ## 1 1 1 3
4051
+ ## 2 1 2 2
4052
+ ## 3 2 1 3
4053
+ ## 4 2 2 4
4054
+ ```
4055
+
4056
+ ## Why does R require NSE and Galaaz does not?
4057
+
4058
+ NSE introduces a number of new concepts, such as 'quoting', 'quasiquotation', 'unquoting' and
4059
+ 'unquote-splicing', while in Galaaz none of those concepts are needed. What gives?
4060
+
4061
+ R is an extremely flexible language and it has lazy evaluation of parameters. When in R a
4062
+ function is called as 'summarise(df, a = b)', the summarise function receives the litteral
4063
+ 'a = b' parameter and can work with this as if it were a string. In R, it is not clear what
4064
+ a and b are, they can be expressions or they can be variables, it is up to the function to
4065
+ decide what 'a = b' means.
4066
+
4067
+ In Ruby, there is no lazy evaluation of parameters and 'a' is always a variable and so is 'b'.
4068
+ Variables assume their value as soon as they are used, so 'x = a' is immediately evaluate and
4069
+ variable 'x' will receive the value of variable 'a' as soon as the Ruby statement is executed.
4070
+ Ruby also provides the notion of a symbol; ':a' is a symbol and does not evaluate to anything.
4071
+ Galaaz uses Ruby symbols to build expressions that are not bound to anything: ':a.eq :b' is
4072
+ clearly an expression and has no relationship whatsoever with the statment 'a = b'. By using
4073
+ symbols, variables and expressions all the possible ambiguities that are found in R are
4074
+ eliminated in Galaaz.
4075
+
4076
+ The main problem that remains, is that in R, functions are not clearly documented as what type
4077
+ of input they are expecting, they might be expecting regular variables or they might be
4078
+ expecting expressions and the R function will know how to deal with an input of the form
4079
+ 'a = b', now for the Ruby developer it might not be immediately clear if it should call the
4080
+ function passing the value 'true' if variable 'a' is equal to variable 'b' or if it should
4081
+ call the function passing the expression ':a.eq :b'.
4082
+
4083
+
4084
+ ## Advanced dplyr features
4085
+
4086
+ In the blog: Programming with dplyr by using dplyr (https://www.r-bloggers.com/programming-with-dplyr-by-using-dplyr/) Iñaki Úcar shows surprise that some R users are trying to code in dplyr avoiding
4087
+ the use of NSE. For instance he says:
4088
+
4089
+ > Take the example of seplyr. It stands for standard evaluation dplyr, and enables us to
4090
+ > program over dplyr without having “to bring in (or study) any deep-theory or
4091
+ > heavy-weight tools such as rlang/tidyeval”.
4092
+
4093
+ For me, there isn't really any surprise that users are trying to avoid dplyr deep-theory. R
4094
+ users frequently are not programmers and learning to code is already hard business, on top
4095
+ of that, having to learn how to 'quote' or 'enquo' or 'quos' or 'enquos' is not necessarily
4096
+ a 'piece of cake'. So much so, that 'tidyeval' has some more advanced functions that instead
4097
+ of using quoted expressions, uses strings as arguments.
4098
+
4099
+ In the following examples, we show the use of functions 'group\_by\_at', 'summarise\_at' and
4100
+ 'rename\_at' that receive strings as argument. The data frame used in 'starwars' that describes
4101
+ features of characters in the Starwars movies:
4102
+
4103
+
4104
+ ```ruby
4105
+ puts (~:starwars).head
4106
+ ```
4107
+
4108
+ ```
4109
+ ## # A tibble: 6 x 13
4110
+ ## name height mass hair_color skin_color eye_color birth_year gender
4111
+ ## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
4112
+ ## 1 Luke… 172 77 blond fair blue 19 male
4113
+ ## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
4114
+ ## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
4115
+ ## 4 Dart… 202 136 none white yellow 41.9 male
4116
+ ## 5 Leia… 150 49 brown light brown 19 female
4117
+ ## 6 Owen… 178 120 brown, gr… light blue 52 male
4118
+ ## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
4119
+ ## # vehicles <list>, starships <list>
4120
+ ```
4121
+ The grouped_mean function bellow will receive a grouping variable and calculate summaries for
4122
+ the value\_variables given:
4123
+
4124
+
4125
+ ```r
4126
+ grouped_mean <- function(data, grouping_variables, value_variables) {
4127
+ data %>%
4128
+ group_by_at(grouping_variables) %>%
4129
+ mutate(count = n()) %>%
4130
+ summarise_at(c(value_variables, "count"), mean, na.rm = TRUE) %>%
4131
+ rename_at(value_variables, funs(paste0("mean_", .)))
4132
+ }
4133
+
4134
+ gm = starwars %>%
4135
+ grouped_mean("eye_color", c("mass", "birth_year"))
4136
+
4137
+ as.data.frame(gm)
4138
+ ```
4139
+
4140
+ ```
4141
+ ## eye_color mean_mass mean_birth_year count
4142
+ ## 1 black 76.28571 33.00000 10
4143
+ ## 2 blue 86.51667 67.06923 19
4144
+ ## 3 blue-gray 77.00000 57.00000 1
4145
+ ## 4 brown 66.09231 108.96429 21
4146
+ ## 5 dark NaN NaN 1
4147
+ ## 6 gold NaN NaN 1
4148
+ ## 7 green, yellow 159.00000 NaN 1
4149
+ ## 8 hazel 66.00000 34.50000 3
4150
+ ## 9 orange 282.33333 231.00000 8
4151
+ ## 10 pink NaN NaN 1
4152
+ ## 11 red 81.40000 33.66667 5
4153
+ ## 12 red, blue NaN NaN 1
4154
+ ## 13 unknown 31.50000 NaN 3
4155
+ ## 14 white 48.00000 NaN 1
4156
+ ## 15 yellow 81.11111 76.38000 11
4157
+ ```
4158
+
4159
+ The same code with Galaaz, becomes:
4160
+
4161
+
4162
+ ```ruby
4163
+ def grouped_mean(data, grouping_variables, value_variables)
4164
+ data.
4165
+ group_by_at(grouping_variables).
4166
+ mutate(count: E.n).
4167
+ summarise_at(E.c(value_variables, "count"), ~:mean, na__rm: true).
4168
+ rename_at(value_variables, E.funs(E.paste0("mean_", value_variables)))
4169
+ end
4170
+
4171
+ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year"))
4172
+ ```
4173
+
4174
+ ```
4175
+ ## # A tibble: 15 x 4
4176
+ ## eye_color mean_mass mean_birth_year count
4177
+ ## <chr> <dbl> <dbl> <dbl>
4178
+ ## 1 black 76.3 33 10
4179
+ ## 2 blue 86.5 67.1 19
4180
+ ## 3 blue-gray 77 57 1
4181
+ ## 4 brown 66.1 109. 21
4182
+ ## 5 dark NaN NaN 1
4183
+ ## 6 gold NaN NaN 1
4184
+ ## 7 green, yellow 159 NaN 1
4185
+ ## 8 hazel 66 34.5 3
4186
+ ## 9 orange 282. 231 8
4187
+ ## 10 pink NaN NaN 1
4188
+ ## 11 red 81.4 33.7 5
4189
+ ## 12 red, blue NaN NaN 1
4190
+ ## 13 unknown 31.5 NaN 3
4191
+ ## 14 white 48 NaN 1
4192
+ ## 15 yellow 81.1 76.4 11
4193
+ ```
4194
+
4195
+
4196
+ [TO BE CONTINUED...]
4197
+
4198
+
4199
+ # Contributing
744
4200
 
745
4201
  * Fork it
746
4202
  * Create your feature branch (git checkout -b my-new-feature)
@@ -749,3 +4205,4 @@ puts gg
749
4205
  * Push to the branch (git push origin my-new-feature)
750
4206
  * Create new Pull Request
751
4207
 
4208
+ # References