galaaz 0.4.10 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (163) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2048 -531
  3. data/Rakefile +3 -2
  4. data/bin/gknit +152 -6
  5. data/bin/gknit-draft +105 -0
  6. data/bin/gknit-draft.rb +28 -0
  7. data/bin/gknit_Rscript +127 -0
  8. data/bin/grun +27 -1
  9. data/bin/gstudio +47 -4
  10. data/bin/{gstudio.rb → gstudio_irb.rb} +0 -0
  11. data/bin/gstudio_pry.rb +7 -0
  12. data/blogs/galaaz_ggplot/galaaz_ggplot.html +10 -195
  13. data/blogs/galaaz_ggplot/galaaz_ggplot.md +404 -0
  14. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
  15. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
  16. data/blogs/gknit/gknit.Rmd +5 -3
  17. data/blogs/gknit/gknit.pdf +0 -0
  18. data/blogs/gknit/lst.rds +0 -0
  19. data/blogs/manual/lst.rds +0 -0
  20. data/blogs/manual/manual.Rmd +826 -53
  21. data/blogs/manual/manual.html +2338 -695
  22. data/blogs/manual/manual.md +2032 -539
  23. data/blogs/manual/manual.pdf +0 -0
  24. data/blogs/manual/manual.tex +1804 -594
  25. data/blogs/manual/manual_files/figure-html/bubble-1.png +0 -0
  26. data/blogs/manual/manual_files/figure-html/diverging_bar.png +0 -0
  27. data/blogs/manual/manual_files/figure-latex/bubble-1.png +0 -0
  28. data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
  29. data/blogs/manual/model.rb +41 -0
  30. data/blogs/nse_dplyr/nse_dplyr.Rmd +226 -73
  31. data/blogs/nse_dplyr/nse_dplyr.html +254 -336
  32. data/blogs/nse_dplyr/nse_dplyr.md +353 -158
  33. data/blogs/oh_my/oh_my.html +274 -386
  34. data/blogs/oh_my/oh_my.md +208 -205
  35. data/blogs/ruby_plot/ruby_plot.html +20 -205
  36. data/blogs/ruby_plot/ruby_plot.md +14 -15
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  43. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  44. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  46. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  47. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  48. data/examples/Bibliography/master.bib +50 -0
  49. data/examples/Bibliography/stats.bib +72 -0
  50. data/examples/islr/x_y_rnorm.jpg +0 -0
  51. data/examples/latex_templates/Test-acm_article/Makefile +16 -0
  52. data/examples/latex_templates/Test-acm_article/Test-acm_article.Rmd +65 -0
  53. data/examples/latex_templates/Test-acm_article/acm_proc_article-sp.cls +1670 -0
  54. data/examples/latex_templates/Test-acm_article/sensys-abstract.cls +703 -0
  55. data/examples/latex_templates/Test-acm_article/sigproc.bib +59 -0
  56. data/examples/latex_templates/Test-acs_article/Test-acs_article.Rmd +260 -0
  57. data/examples/latex_templates/Test-acs_article/Test-acs_article.pdf +0 -0
  58. data/examples/latex_templates/Test-acs_article/acs-Test-acs_article.bib +11 -0
  59. data/examples/latex_templates/Test-acs_article/acs-my_output.bib +11 -0
  60. data/examples/latex_templates/Test-acs_article/acstest.bib +17 -0
  61. data/examples/latex_templates/Test-aea_article/AEA.cls +1414 -0
  62. data/{blogs/gknit/marshal.dump → examples/latex_templates/Test-aea_article/BibFile.bib} +0 -0
  63. data/examples/latex_templates/Test-aea_article/Test-aea_article.Rmd +108 -0
  64. data/examples/latex_templates/Test-aea_article/Test-aea_article.pdf +0 -0
  65. data/examples/latex_templates/Test-aea_article/aea.bst +1269 -0
  66. data/examples/latex_templates/Test-aea_article/multicol.sty +853 -0
  67. data/examples/latex_templates/Test-aea_article/references.bib +0 -0
  68. data/examples/latex_templates/Test-aea_article/setspace.sty +546 -0
  69. data/examples/latex_templates/Test-amq_article/Test-amq_article.Rmd +256 -0
  70. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdf +0 -0
  71. data/examples/latex_templates/Test-amq_article/Test-amq_article.pdfsync +3397 -0
  72. data/examples/latex_templates/Test-amq_article/pics/Figure2.pdf +0 -0
  73. data/examples/latex_templates/Test-ams_article/Test-ams_article.Rmd +215 -0
  74. data/examples/latex_templates/Test-ams_article/amstest.bib +436 -0
  75. data/examples/latex_templates/Test-asa_article/Test-asa_article.Rmd +153 -0
  76. data/examples/latex_templates/Test-asa_article/Test-asa_article.pdf +0 -0
  77. data/examples/latex_templates/Test-asa_article/agsm.bst +1353 -0
  78. data/examples/latex_templates/Test-asa_article/bibliography.bib +233 -0
  79. data/examples/latex_templates/Test-ieee_article/IEEEtran.bst +2409 -0
  80. data/examples/latex_templates/Test-ieee_article/IEEEtran.cls +6346 -0
  81. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.Rmd +175 -0
  82. data/examples/latex_templates/Test-ieee_article/Test-ieee_article.pdf +0 -0
  83. data/examples/latex_templates/Test-ieee_article/mybibfile.bib +20 -0
  84. data/examples/latex_templates/Test-rjournal_article/RJournal.sty +335 -0
  85. data/examples/latex_templates/Test-rjournal_article/RJreferences.bib +18 -0
  86. data/examples/latex_templates/Test-rjournal_article/RJwrapper.pdf +0 -0
  87. data/examples/latex_templates/Test-rjournal_article/Test-rjournal_article.Rmd +52 -0
  88. data/examples/latex_templates/Test-springer_article/Test-springer_article.Rmd +65 -0
  89. data/examples/latex_templates/Test-springer_article/Test-springer_article.pdf +0 -0
  90. data/examples/latex_templates/Test-springer_article/bibliography.bib +26 -0
  91. data/examples/latex_templates/Test-springer_article/spbasic.bst +1658 -0
  92. data/examples/latex_templates/Test-springer_article/spmpsci.bst +1512 -0
  93. data/examples/latex_templates/Test-springer_article/spphys.bst +1443 -0
  94. data/examples/latex_templates/Test-springer_article/svglov3.clo +113 -0
  95. data/examples/latex_templates/Test-springer_article/svjour3.cls +1431 -0
  96. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.Rmd +73 -0
  97. data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.pdf +0 -0
  98. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.Rmd +382 -0
  99. data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.pdf +0 -0
  100. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.Rmd +164 -0
  101. data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.pdf +0 -0
  102. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.Rmd +92 -0
  103. data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.pdf +0 -0
  104. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/attend-grade-relationships.csv +482 -0
  105. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.Rmd +280 -0
  106. data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.pdf +0 -0
  107. data/examples/rmarkdown/svm-xaringan-example/svm-xaringan-example.Rmd +386 -0
  108. data/lib/R_interface/r.rb +1 -1
  109. data/lib/R_interface/r_libs.R +1 -1
  110. data/lib/R_interface/r_methods.rb +10 -0
  111. data/lib/R_interface/rpkg.rb +1 -0
  112. data/lib/R_interface/rsupport.rb +4 -6
  113. data/lib/gknit.rb +2 -0
  114. data/lib/gknit/draft.rb +105 -0
  115. data/lib/gknit/knitr_engine.rb +0 -33
  116. data/lib/util/exec_ruby.rb +1 -27
  117. data/specs/figures/bg.jpeg +0 -0
  118. data/specs/figures/bg.png +0 -0
  119. data/specs/figures/dose_len.png +0 -0
  120. data/specs/figures/no_args.jpeg +0 -0
  121. data/specs/figures/no_args.png +0 -0
  122. data/specs/figures/width_height.jpeg +0 -0
  123. data/specs/figures/width_height.png +0 -0
  124. data/specs/figures/width_height_units1.jpeg +0 -0
  125. data/specs/figures/width_height_units1.png +0 -0
  126. data/specs/figures/width_height_units2.jpeg +0 -0
  127. data/specs/figures/width_height_units2.png +0 -0
  128. data/specs/r_dataframe.spec.rb +11 -11
  129. data/specs/ruby_expression.spec.rb +1 -0
  130. data/specs/tmp.rb +41 -20
  131. data/version.rb +1 -1
  132. metadata +73 -35
  133. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +0 -41
  134. data/blogs/galaaz_ggplot/galaaz_ggplot.out +0 -10
  135. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/midwest_rb.pdf +0 -0
  136. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/scatter_plot_rb.pdf +0 -0
  137. data/blogs/gknit/gknit.md +0 -1430
  138. data/blogs/gknit/gknit.tex +0 -1358
  139. data/blogs/manual/graph.rb +0 -29
  140. data/blogs/nse_dplyr/nse_dplyr.tex +0 -1373
  141. data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +0 -662
  142. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +0 -57
  143. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +0 -106
  144. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +0 -110
  145. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +0 -174
  146. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +0 -236
  147. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +0 -296
  148. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +0 -236
  149. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +0 -218
  150. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +0 -128
  151. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +0 -150
  152. data/blogs/ruby_plot/ruby_plot_files/figure-latex/dose_len.png +0 -0
  153. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_delivery.png +0 -0
  154. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_dose.png +0 -0
  155. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color.png +0 -0
  156. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color2.png +0 -0
  157. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_decorations.png +0 -0
  158. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_jitter.png +0 -0
  159. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_points.png +0 -0
  160. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_box_plot.png +0 -0
  161. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_violin_plot.png +0 -0
  162. data/blogs/ruby_plot/ruby_plot_files/figure-latex/violin_with_jitter.png +0 -0
  163. data/examples/paper/paper.rb +0 -36
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5028519688d5197e29ea9198499c8093f96aa27e498a0eb974367187d7d151da
4
- data.tar.gz: f5bad7debd953898f0335e04e83089137025c759a3910cf6d74061b53f4eb37e
3
+ metadata.gz: 0a936fac80a3198849bf43505e3badca81025fcef2b942fabe5edc328b6d35f3
4
+ data.tar.gz: 4aa40b1d667ee45ab94ee8e9565401e718179ad261c043a2173fe50d5b97dfb2
5
5
  SHA512:
6
- metadata.gz: 5b14427f32a5db4f2c9754c1ee7fea356c939727152a626c616c3dff1372cddb4fd4d982dc761c2a2e2ca1c211b8a0215d26c2b11eb162cd2f7ab5f0c1c9344e
7
- data.tar.gz: 94c7da10fd04a9136b9a36582574ae04c9f3a4767f1a3dd04137a64f4e104cb8c3c0906752c627cef27ff81b7bbca0bde83aa58e9e5b742005079b30c46616a2
6
+ metadata.gz: 34974a5d148a2f0896fa07ef26f046af1b43d1263750732d072e6614ad8f3ff32783248a02228acd9b6c0f2183ddb68c91a6dd93aebd51198c594c1f6e513298
7
+ data.tar.gz: 88ea82fcf3e298deacdae6c7305faabff38d89b41a526a8f0e528c00555190acd84006764365c0fa7e913e361f3ecaf69cdf1c00332b80d4ba7d276dad7d10fe
data/README.md CHANGED
@@ -1,3 +1,28 @@
1
+ ---
2
+ title: "Galaaz Manual"
3
+ subtitle: "How to tightly couple Ruby and R in GraalVM"
4
+ author: "Rodrigo Botafogo"
5
+ tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, ggplot2]
6
+ date: "2019"
7
+ bibliography: "/home/rbotafogo/Bibliography/stats.bib"
8
+ output:
9
+ html_document:
10
+ self_contained: true
11
+ keep_md: true
12
+ md_document:
13
+ variant: markdown_github
14
+ pdf_document:
15
+ includes:
16
+ in_header: "../../sty/galaaz.sty"
17
+ keep_tex: yes
18
+ number_sections: yes
19
+ toc: true
20
+ toc_depth: 3
21
+ fontsize: 11pt
22
+ ---
23
+
24
+
25
+
1
26
  # Introduction
2
27
 
3
28
  Galaaz is a system for tightly coupling Ruby and R. Ruby is a powerful language, with a large
@@ -7,6 +32,92 @@ other hand, R is considered one of the most powerful languages for solving all o
7
32
  problems. Maybe the strongest competitor to R is Python with libraries such as NumPy,
8
33
  Panda, SciPy, SciKit-Learn and a couple more.
9
34
 
35
+ With Galaaz we do not intend to re-implement any of the scientific libraries in R, we allow
36
+ for very tight coupling between the two languages to the point that the Ruby developer does
37
+ not need to know that there is an R engine running.
38
+
39
+ According to Wikipedia "Ruby is a dynamic, interpreted, reflective, object-oriented,
40
+ general-purpose programming language. It was designed and developed in the mid-1990s by Yukihiro
41
+ "Matz" Matsumoto in Japan." It reached high popularity with the development of Ruby on Rails
42
+ (RoR) by David Heinemeier Hansson. RoR is a web application framework first released
43
+ around 2005. It makes extensive use of Ruby's metaprogramming features. With RoR,
44
+ Ruby became very popular. According to [Ruby's Tiobe index](https://www.tiobe.com/tiobe-index/ruby/)
45
+ it peeked in popularity around 2008, then declined until 2015 when it started picking up again.
46
+ At the time of this writing (November 2018), the Tiobe index puts Ruby in 16th position as
47
+ most popular language.
48
+
49
+ Python, a language similar to Ruby, ranks 4th in the index. Java, C and C++ take the
50
+ first three positions. Ruby is often criticized for its focus on web applications.
51
+ But Ruby can do [much more](https://github.com/markets/awesome-ruby) than just web applications.
52
+ Yet, for scientific computing, Ruby lags way behind Python and R. Python has
53
+ Django framework for web, NumPy for numerical arrays, Pandas for data analysis.
54
+ R is a free software environment for statistical computing and graphics with thousands
55
+ of libraries for data analysis.
56
+
57
+ Until recently, there was no real perspective for Ruby to bridge this gap.
58
+ Implementing a complete scientific computing infrastructure would take too long.
59
+ Enters [Oracle's GraalVM](https://www.graalvm.org/):
60
+
61
+ > GraalVM is a universal virtual machine for running applications written in
62
+ > JavaScript, Python 3, Ruby, R, JVM-based languages like Java, Scala, Kotlin,
63
+ > and LLVM-based languages such as C and C++.
64
+ >
65
+ > GraalVM removes the isolation between programming languages and enables
66
+ > interoperability in a shared runtime. It can run either standalone or in the
67
+ > context of OpenJDK, Node.js, Oracle Database, or MySQL.
68
+ >
69
+ > GraalVM allows you to write polyglot applications with a seamless way to pass
70
+ > values from one language to another. With GraalVM there is no copying or
71
+ > marshaling necessary as it is with other polyglot systems. This lets you
72
+ > achieve high performance when language boundaries are crossed. Most of the time
73
+ > there is no additional cost for crossing a language boundary at all.
74
+ >
75
+ > Often developers have to make uncomfortable compromises that require them
76
+ > to rewrite their software in other languages. For example:
77
+ >
78
+ > * That library is not available in my language. I need to rewrite it.
79
+ > * That language would be the perfect fit for my problem, but we cannot
80
+ > run it in our environment.
81
+ > * That problem is already solved in my language, but the language is
82
+ > too slow.
83
+ >
84
+ > With GraalVM we aim to allow developers to freely choose the right language for
85
+ > the task at hand without making compromises.
86
+
87
+ As stated above, GraalVM is a _universal_ virtual machine that allows Ruby and R (and other
88
+ languages) to run on the same environment. GraalVM allows polyglot applications to
89
+ _seamlessly_ interact with one another and pass values from one language to the other.
90
+ Although a great idea, GraalVM still requires application writers to know several languages.
91
+ To eliminate that requirement, we built Galaaz, a gem for Ruby, to tightly couple
92
+ Ruby and R and allow those languages to interact in a way that the user will be unaware
93
+ of such interaction. In other words, a Ruby programmer will be able to use all
94
+ the capabilities of R without knowing the R syntax.
95
+
96
+ Library wrapping is a usual way of bringing features from one language into another.
97
+ To improve performance, Python often wraps more efficient C libraries. For the
98
+ Python developer, the existence of such C libraries is hidden. The problem with
99
+ library wrapping is that for any new library, there is the need to handcraft a new
100
+ wrapper.
101
+
102
+ Galaaz, instead of wrapping a single C or R library, wraps the whole R language
103
+ in Ruby. Doing so, all thousands of R libraries are available immediately
104
+ to Ruby developers without any new wrapping effort.
105
+
106
+ ## What does Galaaz mean
107
+
108
+ Galaaz is the Portuguese name for "Galahad". From Wikipedia:
109
+
110
+ Sir Galahad (sometimes referred to as Galeas or Galath),
111
+ in Arthurian legend, is a knight of King Arthur's Round Table and one
112
+ of the three achievers of the Holy Grail. He is the illegitimate son
113
+ of Sir Lancelot and Elaine of Corbenic, and is renowned for his
114
+ gallantry and purity as the most perfect of all knights. Emerging quite
115
+ late in the medieval Arthurian tradition, Sir Galahad first appears in the
116
+ Lancelot–Grail cycle, and his story is taken up in later works such as
117
+ the Post-Vulgate Cycle and Sir Thomas Malory's Le Morte d'Arthur.
118
+ His name should not be mistaken with Galehaut, a different knight from
119
+ Arthurian legend.
120
+
10
121
  # System Compatibility
11
122
 
12
123
  * Oracle Linux 7
@@ -62,7 +173,7 @@ Panda, SciPy, SciKit-Learn and a couple more.
62
173
  > galaaz -T
63
174
 
64
175
  Shows a list with all available executalbe tasks. To execute a task, substitute the
65
- 'rake' word in the list with 'galaaz'. For instance, the following line shows up
176
+ 'rake' word in the list with 'galaaz'. For instance, the following line shows up
66
177
  after 'galaaz -T'
67
178
 
68
179
  rake master_list:scatter_plot # scatter_plot from:....
@@ -71,349 +182,1468 @@ Panda, SciPy, SciKit-Learn and a couple more.
71
182
 
72
183
  > galaaz master_list:scatter_plot
73
184
 
74
- # gKnitting a Document
75
-
76
- This manual has been formatted usign gKnit. gKnit uses Knitr and R markdown to knit
77
- a document in Ruby or R and output it in any of the available formats for R markdown.
78
- gKnit runs atop of GraalVM, and Galaaz. In gKnit, Ruby variables are persisted between
79
- chunks, making it an ideal solution for literate programming. Also, since it is based
80
- on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming with
81
- Ruby and R is quite natural.
82
185
 
83
- [gknit is described in more details here](https://towardsdatascience.com/how-to-do-reproducible-research-in-ruby-with-gknit-c26d2684d64e)
186
+ # Accessing R from Ruby
84
187
 
85
- # Vector
188
+ One of the nice aspects of Galaaz on GraalVM, is that variables and functions defined in R, can
189
+ be easily accessed from Ruby. For instance, to access the 'mtcars' data frame from R
190
+ in Ruby, we use the ':mtcar' symbol preceded by the '~' operator, thus '~:r_vec' retrieves the
191
+ value of the 'mtcars' variable.
86
192
 
87
- Vectors can be thought of as contiguous cells containing data. Cells are accessed through
88
- indexing operations such as x[5]. Galaaz has six basic (‘atomic’) vector types: logical,
89
- integer, real, complex, string (or character) and raw. The modes and storage modes for the
90
- different vector types are listed in the following
91
- table.
92
193
 
93
- | typeof | mode | storage.mode |
94
- |-----------|:---------:|-------------:|
95
- | logical | logical | logical |
96
- | integer | numeric | integer |
97
- | double | numeric | double |
98
- | complex | complex | comples |
99
- | character | character | character |
100
- | raw | raw | raw |
194
+ ```ruby
195
+ puts ~:mtcars
196
+ ```
101
197
 
102
- Single numbers, such as 4.2, and strings, such as "four point two" are still vectors, of length
103
- 1; there are no more basic types. Vectors with length zero are possible (and useful).
104
- String vectors have mode and storage mode "character". A single element of a character
105
- vector is often referred to as a character string.
198
+ ```
199
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
200
+ ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
201
+ ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
202
+ ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
203
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
204
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
205
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
206
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
207
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
208
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
209
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
210
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
211
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
212
+ ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
213
+ ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
214
+ ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
215
+ ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
216
+ ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
217
+ ## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
218
+ ## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
219
+ ## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
220
+ ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
221
+ ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
222
+ ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
223
+ ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
224
+ ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
225
+ ## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
226
+ ## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
227
+ ## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
228
+ ## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
229
+ ## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
230
+ ## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
231
+ ## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
232
+ ```
106
233
 
107
- To create a vector the 'c' (concatenate) method from the 'R' module should be used:
234
+ To access an R function from Ruby, the R function needs to be preceeded by 'R.' scoping.
235
+ Bellow we see and example of creating a R::Vector by calling the 'c' R function
108
236
 
109
237
 
110
238
  ```ruby
111
- vec = R.c(1, 2, 3)
112
- puts vec
239
+ puts vec = R.c(1.0, 2.0, 3.0, 4.0)
113
240
  ```
114
241
 
115
242
  ```
116
- ## [1] 1 2 3
243
+ ## [1] 1 2 3 4
117
244
  ```
245
+ Note that 'vec' is an object of type R::Vector:
118
246
 
119
- Lets take a look at the type, mode and storage.mode of our vector vec. In order to print
120
- this out, we are creating a data frame 'df' and printing it out. A data frame, for those
121
- not familiar with it, is basically a table. Here we create the data frame and add the
122
- column name by passing named parameters for each column, such as 'typeof:', 'mode:' and
123
- 'storage__mode?'. You should also note here that the double underscore is converted to a '.'.
124
- So, when printed 'storage\_\_mode' will actually print as 'storage.mode'.
125
247
 
126
- Data frames will later be more carefully described. In R, the method used to create a
127
- data frame is 'data.frame', in Galaaz we use 'data\_\_frame'.
248
+ ```ruby
249
+ puts vec.class
250
+ ```
251
+
252
+ ```
253
+ ## R::Vector
254
+ ```
255
+ Every object created by a call to an R function will be of a type that inherits from
256
+ R::Object. In R, there is also a function 'class'. In order to access that function we
257
+ can call method 'rclass' in the R::Object:
128
258
 
129
259
 
130
260
  ```ruby
131
- df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
132
- puts df
261
+ puts vec.rclass
133
262
  ```
134
263
 
135
264
  ```
136
- ## typeof mode storage.mode
137
- ## 1 integer numeric integer
265
+ ## [1] "numeric"
138
266
  ```
139
-
140
- If you want to create a vector with floating point numbers, then we need at least one of the
141
- vector's element to be a float, such as 1.0. R users should be careful, since in R a number
142
- like '1' is converted to float and to have an integer the R developer will use '1L'. Galaaz
143
- follows normal Ruby rules and the number 1 is an integer and 1.0 is a float.
267
+ When working with R::Object(s), it is possible to use the '.' operator to pipe operations.
268
+ When using '.', the object to which the '.' is applied becomes the first argument of the
269
+ corresponding R function. For instance, function 'c' in R, can be used to concatenate
270
+ two vectors or more vectors (in R, there are no scalar values, scalars are converted to
271
+ vectors of size 1. Within Galaaz, scalar parameter is converted to a size one vector):
144
272
 
145
273
 
146
274
  ```ruby
147
- vec = R.c(1.0, 2, 3)
148
- puts vec
275
+ puts R.c(vec, 10, 20, 30)
149
276
  ```
150
277
 
151
278
  ```
152
- ## [1] 1 2 3
279
+ ## [1] 1 2 3 4 10 20 30
153
280
  ```
281
+ The call above to the 'c' function can also be done using '.' notation:
154
282
 
155
283
 
156
284
  ```ruby
157
- df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
158
- outputs df.kable.kable_styling
285
+ puts vec.c(10, 20, 30)
159
286
  ```
160
287
 
161
- <table class="table" style="margin-left: auto; margin-right: auto;">
162
- <thead>
163
- <tr>
164
- <th style="text-align:left;"> typeof </th>
165
- <th style="text-align:left;"> mode </th>
166
- <th style="text-align:left;"> storage.mode </th>
167
- </tr>
168
- </thead>
169
- <tbody>
170
- <tr>
171
- <td style="text-align:left;"> double </td>
172
- <td style="text-align:left;"> numeric </td>
173
- <td style="text-align:left;"> double </td>
174
- </tr>
175
- </tbody>
176
- </table>
177
-
178
- In this next example we try to create a vector with a variable 'hello' that has not yet
179
- being defined. This will raise an exception that is printed out. We get two return blocks,
180
- the first with a message explaining what went wrong and the second with the full backtrace
181
- of the error.
288
+ ```
289
+ ## [1] 1 2 3 4 10 20 30
290
+ ```
291
+ We will talk about vector indexing in a latter section. But notice here that indexing
292
+ an R::Vector will return another R::Vector:
182
293
 
183
294
 
184
295
  ```ruby
185
- vec = R.c(1, hello, 5)
296
+ puts vec[1]
186
297
  ```
187
298
 
188
299
  ```
189
- ## Message:
190
- ## undefined local variable or method `hello' for #<RC:0x2e0 @out_list=nil>:RC
300
+ ## [1] 1
301
+ ```
302
+ Sometimes we want to index an R::Object and get back a Ruby object that is not wrapped
303
+ in an R::Object, but the native Ruby object. For this, we can index the R object with
304
+ the '>>' operator:
305
+
306
+
307
+ ```ruby
308
+ puts vec >> 0
309
+ puts vec >> 2
191
310
  ```
192
311
 
193
312
  ```
194
- ## Message:
195
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:103:in `get_binding'
196
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `eval'
197
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `exec_ruby'
198
- ## /home/rbotafogo/desenv/galaaz/lib/gknit/knitr_engine.rb:650:in `block in initialize'
199
- ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
200
- ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
201
- ## (eval):3:in `function(...) {\n rb_method(...)'
202
- ## unknown.r:1:in `in_dir'
203
- ## unknown.r:1:in `block_exec:BLOCK0'
204
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:102:in `block_exec'
205
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:92:in `call_block'
206
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:6:in `process_group.block'
207
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:3:in `<no source>'
208
- ## unknown.r:1:in `withCallingHandlers'
209
- ## unknown.r:1:in `process_file'
210
- ## unknown.r:1:in `<no source>:BLOCK1'
211
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/output.R:129:in `<no source>'
212
- ## unknown.r:1:in `<no source>:BLOCK1'
213
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/rmarkdown/R/render.R:162:in `<no source>'
214
- ## <REPL>:5:in `<repl wrapper>'
215
- ## <REPL>:1
313
+ ## 1.0
314
+ ## 3.0
216
315
  ```
217
316
 
218
- Here is a vector with logical values
317
+ It is also possible to call an R function with named arguments, by creating the function
318
+ in Galaaz with named parameters. For instance, here is an example of creating a 'list'
319
+ with named elements:
219
320
 
220
321
 
221
322
  ```ruby
222
- vec = R.c(true, true, false, false, true)
223
- puts vec
323
+ puts R.list(first_name: "Rodrigo", last_name: "Botafogo")
224
324
  ```
225
325
 
226
326
  ```
227
- ## [1] TRUE TRUE FALSE FALSE TRUE
327
+ ## $first_name
328
+ ## [1] "Rodrigo"
329
+ ##
330
+ ## $last_name
331
+ ## [1] "Botafogo"
228
332
  ```
229
333
 
230
- ## Combining Vectors
231
-
232
- The 'c' functions used to create vectors can also be used to combine two vectors:
334
+ Many R functions receive another function as argument. For instance, method 'map' applies
335
+ a function to every element of a vector. With Galaaz, it is possible to pass a Proc,
336
+ Method or Lambda in place of the expected R function. In this next example, we will
337
+ add 2 to every element of our previously created vector:
233
338
 
234
339
 
235
340
  ```ruby
236
- vec1 = R.c(10.0, 20.0, 30.0)
237
- vec2 = R.c(4.0, 5.0, 6.0)
238
- vec = R.c(vec1, vec2)
239
- puts vec
341
+ puts vec.map { |x| x + 2 }
240
342
  ```
241
343
 
242
344
  ```
243
- ## [1] 10 20 30 4 5 6
345
+ ## [1] 3
346
+ ## [1] 4
347
+ ## [1] 5
348
+ ## [1] 6
244
349
  ```
245
- In galaaz, methods can be chainned (somewhat like the pipe operator in R %>%, but more generic).
246
- In this next example, method 'c' is chainned after 'vec1'. This also looks like 'c' is a
247
- method of the vector, but in reallity, this is actually closer to the pipe operator. When
248
- Galaaz identifies that 'c' is not a method of 'vec' it actually tries to call 'R.c' with
249
- 'vec1' as the first argument concatenated with all the other available arguments. The code
250
- bellow is automatically converted to the code above.
350
+
351
+ # gKnitting a Document
352
+
353
+ This manual has been formatted usign gKnit. gKnit uses Knitr and R markdown to knit
354
+ a document in Ruby or R and output it in any of the available formats for R markdown.
355
+ gKnit runs atop of GraalVM, and Galaaz. In gKnit, Ruby variables are persisted between
356
+ chunks, making it an ideal solution for literate programming. Also, since it is based
357
+ on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming with
358
+ Ruby and R is quite natural.
359
+
360
+ The idea of "literate programming" was first introduced by Donald Knuth in the
361
+ 1980's [@Knuth:literate_programming].
362
+ The main intention of this approach was to develop software interspersing macro snippets,
363
+ traditional source code, and a natural language such as English in a document
364
+ that could be compiled into
365
+ executable code and at the same time easily read by a human developer. According to Knuth
366
+ "The practitioner of
367
+ literate programming can be regarded as an essayist, whose main concern is with exposition
368
+ and excellence of style."
369
+
370
+ The idea of literate programming evolved into the idea of reproducible research, in which
371
+ all the data, software code, documentation, graphics etc. needed to reproduce the research
372
+ and its reports could be included in a
373
+ single document or set of documents that when distributed to peers could be rerun generating
374
+ the same output and reports.
375
+
376
+ The R community has put a great deal of effort in reproducible research. In 2002, Sweave was
377
+ introduced and it allowed mixing R code with Latex generating high quality PDF documents. A
378
+ Sweave document could include code, the results of executing the code, graphics and text
379
+ such that it contained the whole narrative to reproduce the research. In
380
+ 2012, Knitr, developed by Yihui Xie from RStudio was released to replace Sweave and to
381
+ consolidate in one single package the many extensions and add-on packages that
382
+ were necessary for Sweave.
383
+
384
+ With Knitr, __R markdown__ was also developed, an extension to the
385
+ Markdown format. With __R markdown__ and Knitr it is possible to generate reports in a multitude
386
+ of formats such as HTML, markdown, Latex, PDF, dvi, etc. __R markdown__ also allows the use of
387
+ multiple programming languages such as R, Ruby, Python, etc. in the same document.
388
+
389
+ In __R markdown__, text is interspersed with
390
+ code chunks that can be executed and both the code and its results can become
391
+ part of the final report. Although __R markdown__ allows multiple programming languages in the
392
+ same document, only R and Python (with
393
+ the reticulate package) can persist variables between chunks. For other languages, such as
394
+ Ruby, every chunk will start a new process and thus all data is lost between chunks, unless it
395
+ is somehow stored in a data file that is read by the next chunk.
396
+
397
+ Being able to persist data
398
+ between chunks is critical for literate programming otherwise the flow of the narrative is lost
399
+ by all the effort of having to save data and then reload it. Although this might, at first, seem like
400
+ a small nuisance, not being able to persist data between chunks is a major issue. For example, let's
401
+ take a look at the following simple example in which we want to show how to create a list and the
402
+ use it. Let's first assume that data cannot be persisted between chunks. In the next chunk we
403
+ create a list, then we would need to save it to file, but to save it, we need somehow to marshal the
404
+ data into a binary format:
251
405
 
252
406
 
253
407
  ```ruby
254
- vec = vec1.c(vec2)
255
- puts vec
408
+ lst = R.list(a: 1, b: 2, c: 3)
409
+ lst.saveRDS("lst.rds")
410
+ ```
411
+ then, on the next chunk, where variable 'lst' is used, we need to read back it's value
412
+
413
+
414
+ ```ruby
415
+ lst = R.readRDS("lst.rds")
416
+ puts lst
256
417
  ```
257
418
 
258
419
  ```
259
- ## [1] 10 20 30 4 5 6
420
+ ## $a
421
+ ## [1] 1
422
+ ##
423
+ ## $b
424
+ ## [1] 2
425
+ ##
426
+ ## $c
427
+ ## [1] 3
260
428
  ```
261
429
 
262
- ## Vector Arithmetic
430
+ Now, any single code has dozens of variables that we might want to use and reuse between chunks.
431
+ Clearly, such an approach becomes quickly unmanageable. Probably, because of
432
+ this problem, it is very rare to see any __R markdown__ document in the Ruby community.
263
433
 
264
- Arithmetic operations on vectors are performed element by element:
434
+ When variables can be used accross chunks, then no overhead is needed:
265
435
 
266
436
 
267
437
  ```ruby
268
- puts vec1 + vec2
438
+ lst = R.list(a: 1, b: 2, c: 3)
439
+ # any other code can be added here
440
+ ```
441
+
442
+
443
+ ```ruby
444
+ puts lst
269
445
  ```
270
446
 
271
447
  ```
272
- ## [1] 14 25 36
448
+ ## $a
449
+ ## [1] 1
450
+ ##
451
+ ## $b
452
+ ## [1] 2
453
+ ##
454
+ ## $c
455
+ ## [1] 3
273
456
  ```
274
457
 
458
+ In the Python community, the same effort to have code and text in an integrated environment
459
+ started around the first decade of 2000. In 2006 iPython 0.7.2 was released. In 2014,
460
+ Fernando Pérez, spun off project Jupyter from iPython creating a web-based interactive
461
+ computation environment. Jupyter can now be used with many languages, including Ruby with the
462
+ iruby gem (https://github.com/SciRuby/iruby). In order to have multiple languages in a Jupyter
463
+ notebook the SoS kernel was developed (https://vatlab.github.io/sos-docs/).
464
+
465
+ ## gKnit and __R markdown__
466
+
467
+ gKnit is based on knitr and __R markdown__ and can knit a document
468
+ written both in Ruby and/or R and output it in any of the available formats of __R markdown__. gKnit
469
+ allows ruby developers to do literate programming and reproducible research by allowing them to
470
+ have in a single document, text and code.
471
+
472
+ In gKnit, Ruby variables are persisted between
473
+ chunks, making it an ideal solution for literate programming in this language. Also,
474
+ since it is based on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming
475
+ with Ruby and R is quite natural.
476
+
477
+ This is not a blog post on __R markdown__, and the interested user is directed to the following links
478
+ for detailed information on its capabilities and use.
479
+
480
+ * https://rmarkdown.rstudio.com/ or
481
+ * https://bookdown.org/yihui/rmarkdown/
482
+
483
+ In this post, we will describe just the main aspects of __R markdown__, so the user can start
484
+ gKnitting Ruby and R documents quickly.
485
+
486
+ ## The Yaml header
487
+
488
+ An __R markdown__ document should start with a Yaml header and be stored in a file with
489
+ '.Rmd' extension. This document has the following header for gKitting an HTML document.
275
490
 
276
- ```ruby
277
- puts vec1 * 5
278
491
  ```
492
+ ---
493
+ title: "How to do reproducible research in Ruby with gKnit"
494
+ author:
495
+ - "Rodrigo Botafogo"
496
+ - "Daniel Mossé - University of Pittsburgh"
497
+ tags: [Tech, Data Science, Ruby, R, GraalVM]
498
+ date: "20/02/2019"
499
+ output:
500
+ html_document:
501
+ self_contained: true
502
+ keep_md: true
503
+ pdf_document:
504
+ includes:
505
+ in_header: ["../../sty/galaaz.sty"]
506
+ number_sections: yes
507
+ ---
508
+ ```
509
+
510
+ For more information on the options in the Yaml header, [check here](https://bookdown.org/yihui/rmarkdown/html-document.html).
511
+
512
+ ## __R Markdown__ formatting
513
+
514
+ Document formatting can be done with simple markups such as:
515
+
516
+ ## Headers
279
517
 
280
518
  ```
281
- ## [1] 50 100 150
519
+ # Header 1
520
+
521
+ ## Header 2
522
+
523
+ ### Header 3
524
+
282
525
  ```
283
526
 
284
- When vectors have different length, a recycling rule is applied to the shorter vector:
527
+ ## Lists
285
528
 
529
+ ```
530
+ Unordered lists:
286
531
 
287
- ```ruby
288
- vec3 = R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
289
- puts vec4 = vec1 + vec3
532
+ * Item 1
533
+ * Item 2
534
+ + Item 2a
535
+ + Item 2b
290
536
  ```
291
537
 
292
538
  ```
293
- ## [1] 11 22 33 14 25 36 17 28 39
539
+ Ordered Lists
540
+
541
+ 1. Item 1
542
+ 2. Item 2
543
+ 3. Item 3
544
+ + Item 3a
545
+ + Item 3b
294
546
  ```
295
547
 
296
- ## Vector Indexing
548
+ For more R markdown formatting go to https://rmarkdown.rstudio.com/authoring_basics.html.
297
549
 
298
- Vectors can be indexed by using the '[]' operator:
550
+ ## R chunks
299
551
 
552
+ Running and executing Ruby and R code is actually what really interests us is this blog.
553
+ Inserting a code chunk is done by adding code in a block delimited by three back ticks
554
+ followed by an open
555
+ curly brace ('{') followed with the engine name (r, ruby, rb, include, ...), an
556
+ any optional chunk_label and options, as shown bellow:
300
557
 
301
- ```ruby
302
- puts vec4[3]
558
+ ````
559
+ ```{engine_name [chunk_label], [chunk_options]}
303
560
  ```
561
+ ````
562
+
563
+ for instance, let's add an R chunk to the document labeled 'first_r_chunk'. This is
564
+ a very simple code just to create a variable and print it out, as follows:
304
565
 
566
+ ````
567
+ ```{r first_r_chunk}
568
+ vec <- c(1, 2, 3)
569
+ print(vec)
305
570
  ```
306
- ## [1] 33
571
+ ````
572
+
573
+ If this block is added to an __R markdown__ document and gKnitted the result will be:
574
+
575
+
576
+ ```r
577
+ vec <- c(1, 2, 3)
578
+ print(vec)
307
579
  ```
308
580
 
309
- We can also index a vector with another vector. For example, in the code bellow, we take elements
310
- 1, 3, 5, and 7 from vec3:
581
+ ```
582
+ ## [1] 1 2 3
583
+ ```
311
584
 
585
+ Now let's say that we want to do some analysis in the code, but just print the result and not the
586
+ code itself. For this, we need to add the option 'echo = FALSE'.
312
587
 
313
- ```ruby
314
- puts vec4[R.c(1, 3, 5, 7)]
588
+ ````
589
+ ```{r second_r_chunk, echo = FALSE}
590
+ vec2 <- c(10, 20, 30)
591
+ vec3 <- vec * vec2
592
+ print(vec3)
315
593
  ```
594
+ ````
595
+ Here is how this block will show up in the document. Observe that the code is not shown
596
+ and we only see the execution result in a white box
597
+
316
598
 
317
599
  ```
318
- ## [1] 11 33 25 17
600
+ ## [1] 10 40 90
319
601
  ```
320
602
 
321
- Repeating an index and having indices out of order is valid code:
603
+ A description of the available chunk options can be found in https://yihui.name/knitr/.
322
604
 
605
+ Let's add another R chunk with a function definition. In this example, a vector
606
+ 'r_vec' is created and
607
+ a new function 'reduce_sum' is defined. The chunk specification is
323
608
 
324
- ```ruby
325
- puts vec4[R.c(1, 3, 3, 1)]
609
+ ````
610
+ ```{r data_creation}
611
+ r_vec <- c(1, 2, 3, 4, 5)
612
+
613
+ reduce_sum <- function(...) {
614
+ Reduce(sum, as.list(...))
615
+ }
326
616
  ```
617
+ ````
618
+
619
+ and this is how it will look like once executed. From now on, to be concise in the
620
+ presentation we will not show chunk definitions any longer.
621
+
622
+
623
+
624
+ ```r
625
+ r_vec <- c(1, 2, 3, 4, 5)
327
626
 
627
+ reduce_sum <- function(...) {
628
+ Reduce(sum, as.list(...))
629
+ }
328
630
  ```
329
- ## [1] 11 33 33 11
631
+
632
+ We can, possibly in another chunk, access the vector and call the function as follows:
633
+
634
+
635
+ ```r
636
+ print(r_vec)
330
637
  ```
331
638
 
332
- It is also possible to index a vector with a negative number or negative vector. In these cases
333
- the indexed values are not returned:
639
+ ```
640
+ ## [1] 1 2 3 4 5
641
+ ```
334
642
 
643
+ ```r
644
+ print(reduce_sum(r_vec))
645
+ ```
335
646
 
336
- ```ruby
337
- puts vec4[-3]
338
- puts vec4[-R.c(1, 3, 5, 7)]
339
647
  ```
648
+ ## [1] 15
649
+ ```
650
+ ## R Graphics with ggplot
651
+
652
+ In the following chunk, we create a bubble chart in R using ggplot and include it in
653
+ this document. Note that there is no directive in the code to include the image, this
654
+ occurs automatically. The 'mpg' dataframe is natively available to R and to Galaaz as
655
+ well.
656
+
657
+ For the reader not knowledgeable of ggplot, ggplot is a graphics library based on "the
658
+ grammar of graphics" [@Wilkinson:grammar_of_graphics]. The idea of the grammar of graphics
659
+ is to build a graphics by adding layers to the plot. More information can be found in
660
+ https://towardsdatascience.com/a-comprehensive-guide-to-the-grammar-of-graphics-for-effective-visualization-of-multi-dimensional-1f92b4ed4149.
661
+
662
+ In the plot bellow the 'mpg' dataset from base R is used. "The data concerns city-cycle fuel
663
+ consumption in miles per gallon, to be predicted in terms of 3 multivalued discrete and 5
664
+ continuous attributes." (Quinlan, 1993)
665
+
666
+ First, the 'mpg' dataset if filtered to extract only cars from the following manumactures: Audi, Ford,
667
+ Honda, and Hyundai and stored in the 'mpg_select' variable. Then, the selected dataframe is passed
668
+ to the ggplot function specifying in the aesthetic method (aes) that 'displacement' (disp) should
669
+ be plotted in the 'x' axis and 'city mileage' should be on the 'y' axis. In the 'labs' layer we
670
+ pass the 'title' and 'subtitle' for the plot. To the basic plot 'g', geom\_jitter is added, that
671
+ plots cars from the same manufactures with the same color (col=manufactures) and the size of the
672
+ car point equal its high way consumption (size = hwy). Finally, a last layer is plotter containing
673
+ a linear regression line (method = "lm") for every manufacturer.
674
+
675
+
676
+ ```r
677
+ # load package and data
678
+ library(ggplot2)
679
+ ```
680
+
681
+ ```
682
+ ## Message:
683
+ ## Registered S3 methods overwritten by 'ggplot2':
684
+ ## method from
685
+ ## [.quosures rlang
686
+ ## c.quosures rlang
687
+ ## print.quosures rlang
688
+ ```
689
+
690
+ ```r
691
+ data(mpg, package="ggplot2")
692
+
693
+ mpg_select <- mpg[mpg$manufacturer %in% c("audi", "ford", "honda", "hyundai"), ]
694
+
695
+ # Scatterplot
696
+ theme_set(theme_bw()) # pre-set the bw theme.
697
+ g <- ggplot(mpg_select, aes(displ, cty)) +
698
+ labs(subtitle="mpg: Displacement vs City Mileage",
699
+ title="Bubble chart")
700
+
701
+ g + geom_jitter(aes(col=manufacturer, size=hwy)) +
702
+ geom_smooth(aes(col=manufacturer), method="lm", se=F)
703
+ ```
704
+
705
+ ![](manual_files/figure-html/bubble-1.png)<!-- -->
706
+
707
+ ## Ruby chunks
708
+
709
+ Including a Ruby chunk is just as easy as including an R chunk in the document: just
710
+ change the name of the engine to 'ruby'. It is also possible to pass chunk options
711
+ to the Ruby engine; however, this version does not accept all the options that are
712
+ available to R chunks. Future versions will add those options.
713
+
714
+ ````
715
+ ```{ruby first_ruby_chunk}
716
+ ```
717
+ ````
718
+
719
+ In this example, the ruby chunk is called 'first_ruby_chunk'. One important
720
+ aspect of chunk labels is that they cannot be duplicated. If a chunk label is
721
+ duplicated, gKnit will stop with an error.
722
+
723
+ In the following chunk, variable 'a', 'b' and 'c' are standard Ruby variables
724
+ and 'vec' and 'vec2' are two vectors created by calling the 'c' method on the
725
+ R module.
726
+
727
+ In Galaaz, the R module allows us to access R functions transparently. The 'c'
728
+ function in R, is a function that concatenates its arguments making a vector.
729
+
730
+ It
731
+ should be clear that there is no requirement in gknit to call or use any R
732
+ functions. gKnit will knit standard Ruby code, or even general text without
733
+ any code.
734
+
735
+
736
+ ```ruby
737
+ a = [1, 2, 3]
738
+ b = "US$ 250.000"
739
+ c = "The 'outputs' function"
740
+
741
+ vec = R.c(1, 2, 3)
742
+ vec2 = R.c(10, 20, 30)
743
+ ```
744
+
745
+ In the next block, variables 'a', 'vec' and 'vec2' are used and printed.
746
+
747
+
748
+ ```ruby
749
+ puts a
750
+ puts vec * vec2
751
+ ```
752
+
753
+ ```
754
+ ## 1
755
+ ## 2
756
+ ## 3
757
+ ## [1] 10 40 90
758
+ ```
759
+
760
+ Note that 'a' is a standard Ruby Array and 'vec' and 'vec2' are vectors that behave accordingly,
761
+ where multiplication works as expected.
762
+
763
+ ## Inline Ruby code
764
+
765
+ When using a Ruby chunk, the code and the output are formatted in blocks as seen above.
766
+ This formatting is not always desired. Sometimes, we want to have the results of the
767
+ Ruby evaluation included in the middle of a phrase. gKnit allows adding inline Ruby code
768
+ with the 'rb' engine. The following chunk specification will
769
+ create and inline Ruby text:
770
+
771
+ ````
772
+ This is some text with inline Ruby accessing variable 'b' which has value:
773
+ ```{rb puts b}
774
+ ```
775
+ and is followed by some other text!
776
+ ````
777
+
778
+ <div style="margin-bottom:30px;">
779
+ </div>
780
+
781
+ This is some text with inline Ruby accessing variable 'b' which has value:
782
+ US$ 250.000
783
+ and is followed by some other text!
784
+
785
+ <div style="margin-bottom:30px;">
786
+ </div>
787
+
788
+ Note that it is important not to add any new line before of after the code
789
+ block if we want everything to be in only one line, resulting in the following sentence
790
+ with inline Ruby code.
791
+
792
+
793
+ ### The 'outputs' function
794
+
795
+ He have previously used the standard 'puts' method in Ruby chunks in order produce
796
+ output. The result of a 'puts', as seen in all previous chunks that use it, is formatted
797
+ inside a white box that
798
+ follows the code block. Many times however, we would like to do some processing in the
799
+ Ruby chunk and have the result of this processing generate and output that is
800
+ "included" in the document as if we had typed it in __R markdown__ document.
801
+
802
+ For example, suppose we want to create a new heading in our document, but the heading
803
+ phrase is the result of some code processing: maybe it's the first line of a file we are
804
+ going to read. Method 'outputs' adds its output as if typed in the __R markdown__ document.
805
+
806
+ Take now a look at variable 'c' (it was defined in a previous block above) as
807
+ 'c = "The 'outputs' function". "The 'outputs' function" is actually the name of this
808
+ section and it was created using the 'outputs' function inside a Ruby chunk.
809
+
810
+ The ruby chunk to generate this heading is:
811
+
812
+ ````
813
+ ```{ruby heading}
814
+ outputs "### #{c}"
815
+ ```
816
+ ````
817
+
818
+ The three '###' is the way we add a Heading 3 in __R markdown__.
819
+
820
+
821
+ ### HTML Output from Ruby Chunks
822
+
823
+ We've just seen the use of method 'outputs' to add text to the the __R markdown__
824
+ document. This technique can also be used to add HTML code to the document. In
825
+ __R markdown__, any html code typed directly in the document will be properly rendered.
826
+ Here, for instance, is a table definition in HTML and its output in the document:
827
+
828
+ ```
829
+ <table style="width:100%">
830
+ <tr>
831
+ <th>Firstname</th>
832
+ <th>Lastname</th>
833
+ <th>Age</th>
834
+ </tr>
835
+ <tr>
836
+ <td>Jill</td>
837
+ <td>Smith</td>
838
+ <td>50</td>
839
+ </tr>
840
+ <tr>
841
+ <td>Eve</td>
842
+ <td>Jackson</td>
843
+ <td>94</td>
844
+ </tr>
845
+ </table>
846
+ ```
847
+ <div style="margin-bottom:30px;">
848
+ </div>
849
+
850
+ <table style="width:100%">
851
+ <tr>
852
+ <th>Firstname</th>
853
+ <th>Lastname</th>
854
+ <th>Age</th>
855
+ </tr>
856
+ <tr>
857
+ <td>Jill</td>
858
+ <td>Smith</td>
859
+ <td>50</td>
860
+ </tr>
861
+ <tr>
862
+ <td>Eve</td>
863
+ <td>Jackson</td>
864
+ <td>94</td>
865
+ </tr>
866
+ </table>
867
+
868
+ <div style="margin-bottom:30px;">
869
+ </div>
870
+
871
+ But manually creating HTML output is not always easy or desirable, specially
872
+ if we intend the document to be rendered in other formats, for example, as Latex.
873
+ Also, The above
874
+ table looks ugly. The 'kableExtra' library is a great library for
875
+ creating beautiful tables. Take a look at https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html
876
+
877
+ In the next chunk, we output the 'mtcars' dataframe from R in a nicely formatted
878
+ table. Note that we retrieve the mtcars dataframe by using '~:mtcars'.
879
+
880
+
881
+ ```ruby
882
+ R.install_and_loads('kableExtra')
883
+ outputs (~:mtcars).kable.kable_styling
884
+ ```
885
+
886
+ <table class="table" style="margin-left: auto; margin-right: auto;">
887
+ <thead>
888
+ <tr>
889
+ <th style="text-align:left;"> </th>
890
+ <th style="text-align:right;"> mpg </th>
891
+ <th style="text-align:right;"> cyl </th>
892
+ <th style="text-align:right;"> disp </th>
893
+ <th style="text-align:right;"> hp </th>
894
+ <th style="text-align:right;"> drat </th>
895
+ <th style="text-align:right;"> wt </th>
896
+ <th style="text-align:right;"> qsec </th>
897
+ <th style="text-align:right;"> vs </th>
898
+ <th style="text-align:right;"> am </th>
899
+ <th style="text-align:right;"> gear </th>
900
+ <th style="text-align:right;"> carb </th>
901
+ </tr>
902
+ </thead>
903
+ <tbody>
904
+ <tr>
905
+ <td style="text-align:left;"> Mazda RX4 </td>
906
+ <td style="text-align:right;"> 21.0 </td>
907
+ <td style="text-align:right;"> 6 </td>
908
+ <td style="text-align:right;"> 160.0 </td>
909
+ <td style="text-align:right;"> 110 </td>
910
+ <td style="text-align:right;"> 3.90 </td>
911
+ <td style="text-align:right;"> 2.620 </td>
912
+ <td style="text-align:right;"> 16.46 </td>
913
+ <td style="text-align:right;"> 0 </td>
914
+ <td style="text-align:right;"> 1 </td>
915
+ <td style="text-align:right;"> 4 </td>
916
+ <td style="text-align:right;"> 4 </td>
917
+ </tr>
918
+ <tr>
919
+ <td style="text-align:left;"> Mazda RX4 Wag </td>
920
+ <td style="text-align:right;"> 21.0 </td>
921
+ <td style="text-align:right;"> 6 </td>
922
+ <td style="text-align:right;"> 160.0 </td>
923
+ <td style="text-align:right;"> 110 </td>
924
+ <td style="text-align:right;"> 3.90 </td>
925
+ <td style="text-align:right;"> 2.875 </td>
926
+ <td style="text-align:right;"> 17.02 </td>
927
+ <td style="text-align:right;"> 0 </td>
928
+ <td style="text-align:right;"> 1 </td>
929
+ <td style="text-align:right;"> 4 </td>
930
+ <td style="text-align:right;"> 4 </td>
931
+ </tr>
932
+ <tr>
933
+ <td style="text-align:left;"> Datsun 710 </td>
934
+ <td style="text-align:right;"> 22.8 </td>
935
+ <td style="text-align:right;"> 4 </td>
936
+ <td style="text-align:right;"> 108.0 </td>
937
+ <td style="text-align:right;"> 93 </td>
938
+ <td style="text-align:right;"> 3.85 </td>
939
+ <td style="text-align:right;"> 2.320 </td>
940
+ <td style="text-align:right;"> 18.61 </td>
941
+ <td style="text-align:right;"> 1 </td>
942
+ <td style="text-align:right;"> 1 </td>
943
+ <td style="text-align:right;"> 4 </td>
944
+ <td style="text-align:right;"> 1 </td>
945
+ </tr>
946
+ <tr>
947
+ <td style="text-align:left;"> Hornet 4 Drive </td>
948
+ <td style="text-align:right;"> 21.4 </td>
949
+ <td style="text-align:right;"> 6 </td>
950
+ <td style="text-align:right;"> 258.0 </td>
951
+ <td style="text-align:right;"> 110 </td>
952
+ <td style="text-align:right;"> 3.08 </td>
953
+ <td style="text-align:right;"> 3.215 </td>
954
+ <td style="text-align:right;"> 19.44 </td>
955
+ <td style="text-align:right;"> 1 </td>
956
+ <td style="text-align:right;"> 0 </td>
957
+ <td style="text-align:right;"> 3 </td>
958
+ <td style="text-align:right;"> 1 </td>
959
+ </tr>
960
+ <tr>
961
+ <td style="text-align:left;"> Hornet Sportabout </td>
962
+ <td style="text-align:right;"> 18.7 </td>
963
+ <td style="text-align:right;"> 8 </td>
964
+ <td style="text-align:right;"> 360.0 </td>
965
+ <td style="text-align:right;"> 175 </td>
966
+ <td style="text-align:right;"> 3.15 </td>
967
+ <td style="text-align:right;"> 3.440 </td>
968
+ <td style="text-align:right;"> 17.02 </td>
969
+ <td style="text-align:right;"> 0 </td>
970
+ <td style="text-align:right;"> 0 </td>
971
+ <td style="text-align:right;"> 3 </td>
972
+ <td style="text-align:right;"> 2 </td>
973
+ </tr>
974
+ <tr>
975
+ <td style="text-align:left;"> Valiant </td>
976
+ <td style="text-align:right;"> 18.1 </td>
977
+ <td style="text-align:right;"> 6 </td>
978
+ <td style="text-align:right;"> 225.0 </td>
979
+ <td style="text-align:right;"> 105 </td>
980
+ <td style="text-align:right;"> 2.76 </td>
981
+ <td style="text-align:right;"> 3.460 </td>
982
+ <td style="text-align:right;"> 20.22 </td>
983
+ <td style="text-align:right;"> 1 </td>
984
+ <td style="text-align:right;"> 0 </td>
985
+ <td style="text-align:right;"> 3 </td>
986
+ <td style="text-align:right;"> 1 </td>
987
+ </tr>
988
+ <tr>
989
+ <td style="text-align:left;"> Duster 360 </td>
990
+ <td style="text-align:right;"> 14.3 </td>
991
+ <td style="text-align:right;"> 8 </td>
992
+ <td style="text-align:right;"> 360.0 </td>
993
+ <td style="text-align:right;"> 245 </td>
994
+ <td style="text-align:right;"> 3.21 </td>
995
+ <td style="text-align:right;"> 3.570 </td>
996
+ <td style="text-align:right;"> 15.84 </td>
997
+ <td style="text-align:right;"> 0 </td>
998
+ <td style="text-align:right;"> 0 </td>
999
+ <td style="text-align:right;"> 3 </td>
1000
+ <td style="text-align:right;"> 4 </td>
1001
+ </tr>
1002
+ <tr>
1003
+ <td style="text-align:left;"> Merc 240D </td>
1004
+ <td style="text-align:right;"> 24.4 </td>
1005
+ <td style="text-align:right;"> 4 </td>
1006
+ <td style="text-align:right;"> 146.7 </td>
1007
+ <td style="text-align:right;"> 62 </td>
1008
+ <td style="text-align:right;"> 3.69 </td>
1009
+ <td style="text-align:right;"> 3.190 </td>
1010
+ <td style="text-align:right;"> 20.00 </td>
1011
+ <td style="text-align:right;"> 1 </td>
1012
+ <td style="text-align:right;"> 0 </td>
1013
+ <td style="text-align:right;"> 4 </td>
1014
+ <td style="text-align:right;"> 2 </td>
1015
+ </tr>
1016
+ <tr>
1017
+ <td style="text-align:left;"> Merc 230 </td>
1018
+ <td style="text-align:right;"> 22.8 </td>
1019
+ <td style="text-align:right;"> 4 </td>
1020
+ <td style="text-align:right;"> 140.8 </td>
1021
+ <td style="text-align:right;"> 95 </td>
1022
+ <td style="text-align:right;"> 3.92 </td>
1023
+ <td style="text-align:right;"> 3.150 </td>
1024
+ <td style="text-align:right;"> 22.90 </td>
1025
+ <td style="text-align:right;"> 1 </td>
1026
+ <td style="text-align:right;"> 0 </td>
1027
+ <td style="text-align:right;"> 4 </td>
1028
+ <td style="text-align:right;"> 2 </td>
1029
+ </tr>
1030
+ <tr>
1031
+ <td style="text-align:left;"> Merc 280 </td>
1032
+ <td style="text-align:right;"> 19.2 </td>
1033
+ <td style="text-align:right;"> 6 </td>
1034
+ <td style="text-align:right;"> 167.6 </td>
1035
+ <td style="text-align:right;"> 123 </td>
1036
+ <td style="text-align:right;"> 3.92 </td>
1037
+ <td style="text-align:right;"> 3.440 </td>
1038
+ <td style="text-align:right;"> 18.30 </td>
1039
+ <td style="text-align:right;"> 1 </td>
1040
+ <td style="text-align:right;"> 0 </td>
1041
+ <td style="text-align:right;"> 4 </td>
1042
+ <td style="text-align:right;"> 4 </td>
1043
+ </tr>
1044
+ <tr>
1045
+ <td style="text-align:left;"> Merc 280C </td>
1046
+ <td style="text-align:right;"> 17.8 </td>
1047
+ <td style="text-align:right;"> 6 </td>
1048
+ <td style="text-align:right;"> 167.6 </td>
1049
+ <td style="text-align:right;"> 123 </td>
1050
+ <td style="text-align:right;"> 3.92 </td>
1051
+ <td style="text-align:right;"> 3.440 </td>
1052
+ <td style="text-align:right;"> 18.90 </td>
1053
+ <td style="text-align:right;"> 1 </td>
1054
+ <td style="text-align:right;"> 0 </td>
1055
+ <td style="text-align:right;"> 4 </td>
1056
+ <td style="text-align:right;"> 4 </td>
1057
+ </tr>
1058
+ <tr>
1059
+ <td style="text-align:left;"> Merc 450SE </td>
1060
+ <td style="text-align:right;"> 16.4 </td>
1061
+ <td style="text-align:right;"> 8 </td>
1062
+ <td style="text-align:right;"> 275.8 </td>
1063
+ <td style="text-align:right;"> 180 </td>
1064
+ <td style="text-align:right;"> 3.07 </td>
1065
+ <td style="text-align:right;"> 4.070 </td>
1066
+ <td style="text-align:right;"> 17.40 </td>
1067
+ <td style="text-align:right;"> 0 </td>
1068
+ <td style="text-align:right;"> 0 </td>
1069
+ <td style="text-align:right;"> 3 </td>
1070
+ <td style="text-align:right;"> 3 </td>
1071
+ </tr>
1072
+ <tr>
1073
+ <td style="text-align:left;"> Merc 450SL </td>
1074
+ <td style="text-align:right;"> 17.3 </td>
1075
+ <td style="text-align:right;"> 8 </td>
1076
+ <td style="text-align:right;"> 275.8 </td>
1077
+ <td style="text-align:right;"> 180 </td>
1078
+ <td style="text-align:right;"> 3.07 </td>
1079
+ <td style="text-align:right;"> 3.730 </td>
1080
+ <td style="text-align:right;"> 17.60 </td>
1081
+ <td style="text-align:right;"> 0 </td>
1082
+ <td style="text-align:right;"> 0 </td>
1083
+ <td style="text-align:right;"> 3 </td>
1084
+ <td style="text-align:right;"> 3 </td>
1085
+ </tr>
1086
+ <tr>
1087
+ <td style="text-align:left;"> Merc 450SLC </td>
1088
+ <td style="text-align:right;"> 15.2 </td>
1089
+ <td style="text-align:right;"> 8 </td>
1090
+ <td style="text-align:right;"> 275.8 </td>
1091
+ <td style="text-align:right;"> 180 </td>
1092
+ <td style="text-align:right;"> 3.07 </td>
1093
+ <td style="text-align:right;"> 3.780 </td>
1094
+ <td style="text-align:right;"> 18.00 </td>
1095
+ <td style="text-align:right;"> 0 </td>
1096
+ <td style="text-align:right;"> 0 </td>
1097
+ <td style="text-align:right;"> 3 </td>
1098
+ <td style="text-align:right;"> 3 </td>
1099
+ </tr>
1100
+ <tr>
1101
+ <td style="text-align:left;"> Cadillac Fleetwood </td>
1102
+ <td style="text-align:right;"> 10.4 </td>
1103
+ <td style="text-align:right;"> 8 </td>
1104
+ <td style="text-align:right;"> 472.0 </td>
1105
+ <td style="text-align:right;"> 205 </td>
1106
+ <td style="text-align:right;"> 2.93 </td>
1107
+ <td style="text-align:right;"> 5.250 </td>
1108
+ <td style="text-align:right;"> 17.98 </td>
1109
+ <td style="text-align:right;"> 0 </td>
1110
+ <td style="text-align:right;"> 0 </td>
1111
+ <td style="text-align:right;"> 3 </td>
1112
+ <td style="text-align:right;"> 4 </td>
1113
+ </tr>
1114
+ <tr>
1115
+ <td style="text-align:left;"> Lincoln Continental </td>
1116
+ <td style="text-align:right;"> 10.4 </td>
1117
+ <td style="text-align:right;"> 8 </td>
1118
+ <td style="text-align:right;"> 460.0 </td>
1119
+ <td style="text-align:right;"> 215 </td>
1120
+ <td style="text-align:right;"> 3.00 </td>
1121
+ <td style="text-align:right;"> 5.424 </td>
1122
+ <td style="text-align:right;"> 17.82 </td>
1123
+ <td style="text-align:right;"> 0 </td>
1124
+ <td style="text-align:right;"> 0 </td>
1125
+ <td style="text-align:right;"> 3 </td>
1126
+ <td style="text-align:right;"> 4 </td>
1127
+ </tr>
1128
+ <tr>
1129
+ <td style="text-align:left;"> Chrysler Imperial </td>
1130
+ <td style="text-align:right;"> 14.7 </td>
1131
+ <td style="text-align:right;"> 8 </td>
1132
+ <td style="text-align:right;"> 440.0 </td>
1133
+ <td style="text-align:right;"> 230 </td>
1134
+ <td style="text-align:right;"> 3.23 </td>
1135
+ <td style="text-align:right;"> 5.345 </td>
1136
+ <td style="text-align:right;"> 17.42 </td>
1137
+ <td style="text-align:right;"> 0 </td>
1138
+ <td style="text-align:right;"> 0 </td>
1139
+ <td style="text-align:right;"> 3 </td>
1140
+ <td style="text-align:right;"> 4 </td>
1141
+ </tr>
1142
+ <tr>
1143
+ <td style="text-align:left;"> Fiat 128 </td>
1144
+ <td style="text-align:right;"> 32.4 </td>
1145
+ <td style="text-align:right;"> 4 </td>
1146
+ <td style="text-align:right;"> 78.7 </td>
1147
+ <td style="text-align:right;"> 66 </td>
1148
+ <td style="text-align:right;"> 4.08 </td>
1149
+ <td style="text-align:right;"> 2.200 </td>
1150
+ <td style="text-align:right;"> 19.47 </td>
1151
+ <td style="text-align:right;"> 1 </td>
1152
+ <td style="text-align:right;"> 1 </td>
1153
+ <td style="text-align:right;"> 4 </td>
1154
+ <td style="text-align:right;"> 1 </td>
1155
+ </tr>
1156
+ <tr>
1157
+ <td style="text-align:left;"> Honda Civic </td>
1158
+ <td style="text-align:right;"> 30.4 </td>
1159
+ <td style="text-align:right;"> 4 </td>
1160
+ <td style="text-align:right;"> 75.7 </td>
1161
+ <td style="text-align:right;"> 52 </td>
1162
+ <td style="text-align:right;"> 4.93 </td>
1163
+ <td style="text-align:right;"> 1.615 </td>
1164
+ <td style="text-align:right;"> 18.52 </td>
1165
+ <td style="text-align:right;"> 1 </td>
1166
+ <td style="text-align:right;"> 1 </td>
1167
+ <td style="text-align:right;"> 4 </td>
1168
+ <td style="text-align:right;"> 2 </td>
1169
+ </tr>
1170
+ <tr>
1171
+ <td style="text-align:left;"> Toyota Corolla </td>
1172
+ <td style="text-align:right;"> 33.9 </td>
1173
+ <td style="text-align:right;"> 4 </td>
1174
+ <td style="text-align:right;"> 71.1 </td>
1175
+ <td style="text-align:right;"> 65 </td>
1176
+ <td style="text-align:right;"> 4.22 </td>
1177
+ <td style="text-align:right;"> 1.835 </td>
1178
+ <td style="text-align:right;"> 19.90 </td>
1179
+ <td style="text-align:right;"> 1 </td>
1180
+ <td style="text-align:right;"> 1 </td>
1181
+ <td style="text-align:right;"> 4 </td>
1182
+ <td style="text-align:right;"> 1 </td>
1183
+ </tr>
1184
+ <tr>
1185
+ <td style="text-align:left;"> Toyota Corona </td>
1186
+ <td style="text-align:right;"> 21.5 </td>
1187
+ <td style="text-align:right;"> 4 </td>
1188
+ <td style="text-align:right;"> 120.1 </td>
1189
+ <td style="text-align:right;"> 97 </td>
1190
+ <td style="text-align:right;"> 3.70 </td>
1191
+ <td style="text-align:right;"> 2.465 </td>
1192
+ <td style="text-align:right;"> 20.01 </td>
1193
+ <td style="text-align:right;"> 1 </td>
1194
+ <td style="text-align:right;"> 0 </td>
1195
+ <td style="text-align:right;"> 3 </td>
1196
+ <td style="text-align:right;"> 1 </td>
1197
+ </tr>
1198
+ <tr>
1199
+ <td style="text-align:left;"> Dodge Challenger </td>
1200
+ <td style="text-align:right;"> 15.5 </td>
1201
+ <td style="text-align:right;"> 8 </td>
1202
+ <td style="text-align:right;"> 318.0 </td>
1203
+ <td style="text-align:right;"> 150 </td>
1204
+ <td style="text-align:right;"> 2.76 </td>
1205
+ <td style="text-align:right;"> 3.520 </td>
1206
+ <td style="text-align:right;"> 16.87 </td>
1207
+ <td style="text-align:right;"> 0 </td>
1208
+ <td style="text-align:right;"> 0 </td>
1209
+ <td style="text-align:right;"> 3 </td>
1210
+ <td style="text-align:right;"> 2 </td>
1211
+ </tr>
1212
+ <tr>
1213
+ <td style="text-align:left;"> AMC Javelin </td>
1214
+ <td style="text-align:right;"> 15.2 </td>
1215
+ <td style="text-align:right;"> 8 </td>
1216
+ <td style="text-align:right;"> 304.0 </td>
1217
+ <td style="text-align:right;"> 150 </td>
1218
+ <td style="text-align:right;"> 3.15 </td>
1219
+ <td style="text-align:right;"> 3.435 </td>
1220
+ <td style="text-align:right;"> 17.30 </td>
1221
+ <td style="text-align:right;"> 0 </td>
1222
+ <td style="text-align:right;"> 0 </td>
1223
+ <td style="text-align:right;"> 3 </td>
1224
+ <td style="text-align:right;"> 2 </td>
1225
+ </tr>
1226
+ <tr>
1227
+ <td style="text-align:left;"> Camaro Z28 </td>
1228
+ <td style="text-align:right;"> 13.3 </td>
1229
+ <td style="text-align:right;"> 8 </td>
1230
+ <td style="text-align:right;"> 350.0 </td>
1231
+ <td style="text-align:right;"> 245 </td>
1232
+ <td style="text-align:right;"> 3.73 </td>
1233
+ <td style="text-align:right;"> 3.840 </td>
1234
+ <td style="text-align:right;"> 15.41 </td>
1235
+ <td style="text-align:right;"> 0 </td>
1236
+ <td style="text-align:right;"> 0 </td>
1237
+ <td style="text-align:right;"> 3 </td>
1238
+ <td style="text-align:right;"> 4 </td>
1239
+ </tr>
1240
+ <tr>
1241
+ <td style="text-align:left;"> Pontiac Firebird </td>
1242
+ <td style="text-align:right;"> 19.2 </td>
1243
+ <td style="text-align:right;"> 8 </td>
1244
+ <td style="text-align:right;"> 400.0 </td>
1245
+ <td style="text-align:right;"> 175 </td>
1246
+ <td style="text-align:right;"> 3.08 </td>
1247
+ <td style="text-align:right;"> 3.845 </td>
1248
+ <td style="text-align:right;"> 17.05 </td>
1249
+ <td style="text-align:right;"> 0 </td>
1250
+ <td style="text-align:right;"> 0 </td>
1251
+ <td style="text-align:right;"> 3 </td>
1252
+ <td style="text-align:right;"> 2 </td>
1253
+ </tr>
1254
+ <tr>
1255
+ <td style="text-align:left;"> Fiat X1-9 </td>
1256
+ <td style="text-align:right;"> 27.3 </td>
1257
+ <td style="text-align:right;"> 4 </td>
1258
+ <td style="text-align:right;"> 79.0 </td>
1259
+ <td style="text-align:right;"> 66 </td>
1260
+ <td style="text-align:right;"> 4.08 </td>
1261
+ <td style="text-align:right;"> 1.935 </td>
1262
+ <td style="text-align:right;"> 18.90 </td>
1263
+ <td style="text-align:right;"> 1 </td>
1264
+ <td style="text-align:right;"> 1 </td>
1265
+ <td style="text-align:right;"> 4 </td>
1266
+ <td style="text-align:right;"> 1 </td>
1267
+ </tr>
1268
+ <tr>
1269
+ <td style="text-align:left;"> Porsche 914-2 </td>
1270
+ <td style="text-align:right;"> 26.0 </td>
1271
+ <td style="text-align:right;"> 4 </td>
1272
+ <td style="text-align:right;"> 120.3 </td>
1273
+ <td style="text-align:right;"> 91 </td>
1274
+ <td style="text-align:right;"> 4.43 </td>
1275
+ <td style="text-align:right;"> 2.140 </td>
1276
+ <td style="text-align:right;"> 16.70 </td>
1277
+ <td style="text-align:right;"> 0 </td>
1278
+ <td style="text-align:right;"> 1 </td>
1279
+ <td style="text-align:right;"> 5 </td>
1280
+ <td style="text-align:right;"> 2 </td>
1281
+ </tr>
1282
+ <tr>
1283
+ <td style="text-align:left;"> Lotus Europa </td>
1284
+ <td style="text-align:right;"> 30.4 </td>
1285
+ <td style="text-align:right;"> 4 </td>
1286
+ <td style="text-align:right;"> 95.1 </td>
1287
+ <td style="text-align:right;"> 113 </td>
1288
+ <td style="text-align:right;"> 3.77 </td>
1289
+ <td style="text-align:right;"> 1.513 </td>
1290
+ <td style="text-align:right;"> 16.90 </td>
1291
+ <td style="text-align:right;"> 1 </td>
1292
+ <td style="text-align:right;"> 1 </td>
1293
+ <td style="text-align:right;"> 5 </td>
1294
+ <td style="text-align:right;"> 2 </td>
1295
+ </tr>
1296
+ <tr>
1297
+ <td style="text-align:left;"> Ford Pantera L </td>
1298
+ <td style="text-align:right;"> 15.8 </td>
1299
+ <td style="text-align:right;"> 8 </td>
1300
+ <td style="text-align:right;"> 351.0 </td>
1301
+ <td style="text-align:right;"> 264 </td>
1302
+ <td style="text-align:right;"> 4.22 </td>
1303
+ <td style="text-align:right;"> 3.170 </td>
1304
+ <td style="text-align:right;"> 14.50 </td>
1305
+ <td style="text-align:right;"> 0 </td>
1306
+ <td style="text-align:right;"> 1 </td>
1307
+ <td style="text-align:right;"> 5 </td>
1308
+ <td style="text-align:right;"> 4 </td>
1309
+ </tr>
1310
+ <tr>
1311
+ <td style="text-align:left;"> Ferrari Dino </td>
1312
+ <td style="text-align:right;"> 19.7 </td>
1313
+ <td style="text-align:right;"> 6 </td>
1314
+ <td style="text-align:right;"> 145.0 </td>
1315
+ <td style="text-align:right;"> 175 </td>
1316
+ <td style="text-align:right;"> 3.62 </td>
1317
+ <td style="text-align:right;"> 2.770 </td>
1318
+ <td style="text-align:right;"> 15.50 </td>
1319
+ <td style="text-align:right;"> 0 </td>
1320
+ <td style="text-align:right;"> 1 </td>
1321
+ <td style="text-align:right;"> 5 </td>
1322
+ <td style="text-align:right;"> 6 </td>
1323
+ </tr>
1324
+ <tr>
1325
+ <td style="text-align:left;"> Maserati Bora </td>
1326
+ <td style="text-align:right;"> 15.0 </td>
1327
+ <td style="text-align:right;"> 8 </td>
1328
+ <td style="text-align:right;"> 301.0 </td>
1329
+ <td style="text-align:right;"> 335 </td>
1330
+ <td style="text-align:right;"> 3.54 </td>
1331
+ <td style="text-align:right;"> 3.570 </td>
1332
+ <td style="text-align:right;"> 14.60 </td>
1333
+ <td style="text-align:right;"> 0 </td>
1334
+ <td style="text-align:right;"> 1 </td>
1335
+ <td style="text-align:right;"> 5 </td>
1336
+ <td style="text-align:right;"> 8 </td>
1337
+ </tr>
1338
+ <tr>
1339
+ <td style="text-align:left;"> Volvo 142E </td>
1340
+ <td style="text-align:right;"> 21.4 </td>
1341
+ <td style="text-align:right;"> 4 </td>
1342
+ <td style="text-align:right;"> 121.0 </td>
1343
+ <td style="text-align:right;"> 109 </td>
1344
+ <td style="text-align:right;"> 4.11 </td>
1345
+ <td style="text-align:right;"> 2.780 </td>
1346
+ <td style="text-align:right;"> 18.60 </td>
1347
+ <td style="text-align:right;"> 1 </td>
1348
+ <td style="text-align:right;"> 1 </td>
1349
+ <td style="text-align:right;"> 4 </td>
1350
+ <td style="text-align:right;"> 2 </td>
1351
+ </tr>
1352
+ </tbody>
1353
+ </table>
340
1354
 
341
- ```
342
- ## [1] 11 22 14 25 36 17 28 39
343
- ## [1] 22 14 36 28 39
344
- ```
1355
+ ## Including Ruby files in a chunk
345
1356
 
346
- If an index is out of range, a missing value (NA) will be reported.
1357
+ R is a language that was created to be easy and fast for statisticians to use. As far
1358
+ as I know, it was not a
1359
+ language to be used for developing large systems. Of course, there are large systems and
1360
+ libraries in R, but the focus of the language is for developing statistical models and
1361
+ distribute that to peers.
347
1362
 
1363
+ Ruby on the other hand, is a language for large software development. Systems written in
1364
+ Ruby will have dozens, hundreds or even thousands of files. To document a
1365
+ large system with literate programming, we cannot expect the developer to add all the
1366
+ files in a single '.Rmd' file. gKnit provides the 'include' chunk engine to include
1367
+ a Ruby file as if it had being typed in the '.Rmd' file.
348
1368
 
349
- ```ruby
350
- puts vec4[30]
351
- ```
1369
+ To include a file, the following chunk should be created, where <filename> is the name of
1370
+ the file to be included and where the extension, if it is '.rb', does not need to be added.
1371
+ If the 'relative' option is not included, then it is treated as TRUE. When 'relative' is
1372
+ true, ruby's 'require\_relative' semantics is used to load the file, when false, Ruby's
1373
+ \$LOAD_PATH is searched to find the file and it is 'require'd.
352
1374
 
1375
+ ````
1376
+ ```{include <filename>, relative = <TRUE/FALSE>}
353
1377
  ```
354
- ## [1] NA
355
- ```
356
-
357
- It is also possible to index a vector by range:
1378
+ ````
358
1379
 
1380
+ Bellow we include file 'model.rb', which is in the same directory of this blog.
1381
+ This code uses R 'caret' package to split a dataset in a train and test sets.
1382
+ The 'caret' package is a very important a useful package for doing Data Analysis,
1383
+ it has hundreds of functions for all steps of the Data Analysis workflow. To
1384
+ use 'caret' just to split a dataset is like using the proverbial cannon to
1385
+ kill the fly. We use it here only to show that integrating Ruby and R and
1386
+ using even a very complex package as 'caret' is trivial with Galaaz.
359
1387
 
360
- ```ruby
361
- puts vec4[(2..5)]
362
- ```
1388
+ A word of advice: the 'caret' package has lots of dependencies and installing
1389
+ it in a Linux system is a time consuming operation. Method 'R.install_and_loads'
1390
+ will install the package if it is not already installed and can take a while.
363
1391
 
1392
+ ````
1393
+ ```{include model}
364
1394
  ```
365
- ## [1] 22 33 14 25
366
- ```
367
-
368
- Elements in a vector can be named using the 'names' attribute of a vector:
369
-
1395
+ ````
370
1396
 
371
- ```ruby
372
- full_name = R.c("Rodrigo", "A", "Botafogo")
373
- full_name.names = R.c("First", "Middle", "Last")
374
- puts full_name
375
- ```
376
1397
 
377
- ```
378
- ## First Middle Last
379
- ## "Rodrigo" "A" "Botafogo"
380
- ```
1398
+ ```include
1399
+ require 'galaaz'
381
1400
 
382
- Or it can also be named by using the 'c' function with named paramenters:
1401
+ # Loads the R 'caret' package. If not present, installs it
1402
+ R.install_and_loads 'caret'
383
1403
 
1404
+ class Model
1405
+
1406
+ attr_reader :data
1407
+ attr_reader :test
1408
+ attr_reader :train
384
1409
 
385
- ```ruby
386
- full_name = R.c(First: "Rodrigo", Middle: "A", Last: "Botafogo")
387
- puts full_name
388
- ```
1410
+ #==========================================================
1411
+ #
1412
+ #==========================================================
1413
+
1414
+ def initialize(data, percent_train:, seed: 123)
1415
+
1416
+ R.set__seed(seed)
1417
+ @data = data
1418
+ @percent_train = percent_train
1419
+ @seed = seed
1420
+
1421
+ end
1422
+
1423
+ #==========================================================
1424
+ #
1425
+ #==========================================================
1426
+
1427
+ def partition(field)
1428
+
1429
+ train_index =
1430
+ R.createDataPartition(@data.send(field), p: @percet_train,
1431
+ list: false, times: 1)
1432
+ @train = @data[train_index, :all]
1433
+ @test = @data[-train_index, :all]
1434
+
1435
+ end
1436
+
1437
+ end
389
1438
 
390
1439
  ```
391
- ## First Middle Last
392
- ## "Rodrigo" "A" "Botafogo"
393
- ```
394
-
395
- ## Extracting Native Ruby Types from a Vector
396
-
397
- Vectors created with 'R.c' are of class R::Vector. You might have noticed that when indexing a
398
- vector, a new vector is returned, even if this vector has one single element. In order to use
399
- R::Vector with other ruby classes it might be necessary to extract the actual Ruby native type
400
- from the vector. In order to do this extraction the '>>' operator is used.
401
1440
 
402
1441
 
403
1442
  ```ruby
404
- puts vec4
405
- puts vec4 >> 0
406
- puts vec4 >> 4
1443
+ mtcars = ~:mtcars
1444
+ model = Model.new(mtcars, percent_train: 0.8)
1445
+ model.partition(:mpg)
1446
+ puts model.train.head
1447
+ puts model.test.head
1448
+ ```
1449
+
1450
+ ```
1451
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1452
+ ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
1453
+ ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
1454
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
1455
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
1456
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
1457
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
1458
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
1459
+ ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
1460
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
1461
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
1462
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
1463
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
1464
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
1465
+ ```
1466
+
1467
+ ## Documenting Gems
1468
+
1469
+ gKnit also allows developers to document and load files that are not in the same directory
1470
+ of the '.Rmd' file.
1471
+
1472
+ Here is an example of loading the 'find.rb' file from TruffleRuby. In this example, relative
1473
+ is set to FALSE, so Ruby will look for the file in its $LOAD\_PATH, and the user does not
1474
+ need to no it's directory.
1475
+
1476
+ ````
1477
+ ```{include find, relative = FALSE}
1478
+ ```
1479
+ ````
1480
+
1481
+
1482
+ ```include
1483
+ # frozen_string_literal: true
1484
+ #
1485
+ # find.rb: the Find module for processing all files under a given directory.
1486
+ #
1487
+
1488
+ #
1489
+ # The +Find+ module supports the top-down traversal of a set of file paths.
1490
+ #
1491
+ # For example, to total the size of all files under your home directory,
1492
+ # ignoring anything in a "dot" directory (e.g. $HOME/.ssh):
1493
+ #
1494
+ # require 'find'
1495
+ #
1496
+ # total_size = 0
1497
+ #
1498
+ # Find.find(ENV["HOME"]) do |path|
1499
+ # if FileTest.directory?(path)
1500
+ # if File.basename(path)[0] == ?.
1501
+ # Find.prune # Don't look any further into this directory.
1502
+ # else
1503
+ # next
1504
+ # end
1505
+ # else
1506
+ # total_size += FileTest.size(path)
1507
+ # end
1508
+ # end
1509
+ #
1510
+ module Find
1511
+
1512
+ #
1513
+ # Calls the associated block with the name of every file and directory listed
1514
+ # as arguments, then recursively on their subdirectories, and so on.
1515
+ #
1516
+ # Returns an enumerator if no block is given.
1517
+ #
1518
+ # See the +Find+ module documentation for an example.
1519
+ #
1520
+ def find(*paths, ignore_error: true) # :yield: path
1521
+ block_given? or return enum_for(__method__, *paths, ignore_error: ignore_error)
1522
+
1523
+ fs_encoding = Encoding.find("filesystem")
1524
+
1525
+ paths.collect!{|d| raise Errno::ENOENT, d unless File.exist?(d); d.dup}.each do |path|
1526
+ path = path.to_path if path.respond_to? :to_path
1527
+ enc = path.encoding == Encoding::US_ASCII ? fs_encoding : path.encoding
1528
+ ps = [path]
1529
+ while file = ps.shift
1530
+ catch(:prune) do
1531
+ yield file.dup.taint
1532
+ begin
1533
+ s = File.lstat(file)
1534
+ rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
1535
+ raise unless ignore_error
1536
+ next
1537
+ end
1538
+ if s.directory? then
1539
+ begin
1540
+ fs = Dir.children(file, encoding: enc)
1541
+ rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
1542
+ raise unless ignore_error
1543
+ next
1544
+ end
1545
+ fs.sort!
1546
+ fs.reverse_each {|f|
1547
+ f = File.join(file, f)
1548
+ ps.unshift f.untaint
1549
+ }
1550
+ end
1551
+ end
1552
+ end
1553
+ end
1554
+ nil
1555
+ end
1556
+
1557
+ #
1558
+ # Skips the current file or directory, restarting the loop with the next
1559
+ # entry. If the current file is a directory, that directory will not be
1560
+ # recursively entered. Meaningful only within the block associated with
1561
+ # Find::find.
1562
+ #
1563
+ # See the +Find+ module documentation for an example.
1564
+ #
1565
+ def prune
1566
+ throw :prune
1567
+ end
1568
+
1569
+ module_function :find, :prune
1570
+ end
407
1571
  ```
408
1572
 
409
- ```
410
- ## [1] 11 22 33 14 25 36 17 28 39
411
- ## 11.0
412
- ## 25.0
1573
+ ## Converting to PDF
1574
+
1575
+ One of the beauties of knitr is that the same input can be converted to many different outputs.
1576
+ One very useful format, is, of course, PDF. In order to converted an __R markdown__ file to PDF
1577
+ it is necessary to have LaTeX installed on the system. We will not explain here how to
1578
+ install LaTeX as there are plenty of documents on the web showing how to proceed.
1579
+
1580
+ gKnit comes with a simple LaTeX style file for gknitting this blog as a PDF document. Here is
1581
+ the Yaml header to generate this blog in PDF format instead of HTML:
1582
+
1583
+ ```
1584
+ ---
1585
+ title: "gKnit - Ruby and R Knitting with Galaaz in GraalVM"
1586
+ author: "Rodrigo Botafogo"
1587
+ tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, knitr, gknit]
1588
+ date: "29 October 2018"
1589
+ output:
1590
+ pdf\_document:
1591
+ includes:
1592
+ in\_header: ["../../sty/galaaz.sty"]
1593
+ number\_sections: yes
1594
+ ---
1595
+ ```
1596
+
1597
+ ## Template based documents generation
1598
+
1599
+ When a document is converted to PDF it follows a certain convertion template. We've seen above
1600
+ the use of 'galaaz.sty' as a basic template to generate a PDF document. Using the
1601
+ 'gknit-draft' app that comes with Galaaz, the same .Rmd file can be compiled to different
1602
+ looking PDF documents. Galaaz automatically loads the 'rticles' R package that comes with
1603
+ templates for the following journals with the respective template name:
1604
+
1605
+ * ACM articles: acm_article
1606
+ * ACS articles: acs_article
1607
+ * AEA journal submissions: aea_article
1608
+ * AGU journal submissions: ????
1609
+ * AMS articles: ams_article
1610
+ * American Statistical Association: asa_article
1611
+ * Biometrics articles: biometrics_article
1612
+ * Bulletin de l'AMQ journal submissions: amq_article
1613
+ * CTeX documents: ctex
1614
+ * Elsevier journal submissions: elsevier_article
1615
+ * IEEE Transaction journal submissions: ieee_article
1616
+ * JSS articles: jss_article
1617
+ * MDPI journal submissions: mdpi_article
1618
+ * Monthly Notices of the Royal Astronomical Society articles: mnras_article
1619
+ * NNRAS journal submissions: nmras_article
1620
+ * PeerJ articles: peerj_article
1621
+ * Royal Society Open Science journal submissions: rsos_article
1622
+ * Royal Statistical Society: rss_article
1623
+ * Sage journal submissions: sage_article
1624
+ * Springer journal submissions: springer_article
1625
+ * Statistics in Medicine journal submissions: sim_article
1626
+ * Copernicus Publications journal submissions: copernicus_article
1627
+ * The R Journal articles: rjournal_article
1628
+ * Frontiers articles: ???
1629
+ * Taylor & Francis articles: ???
1630
+ * Bulletin De L'AMQ: amq_article
1631
+ * PLOS journal: plos_article
1632
+ * Proceedings of the National Academy of Sciences of the USA: pnas_article
1633
+
1634
+ In order to create a document with one of those templates, use the following command:
1635
+
1636
+ ```
1637
+ gknit-draft --filename <my_document> --template <template> --package <package>
1638
+ --create_dir
1639
+ ```
1640
+ So, in order to create a template for writing an R Journal, use:
1641
+
1642
+ ```
1643
+ gknit-draft --filename my_r_article --template rjournal_article --package rticles
1644
+ --create_dir
413
1645
  ```
414
1646
 
415
- Note that indexing with '>>' starts at 0 and not at 1, also, we cannot do negative indexing.
416
-
417
1647
  # Accessing R variables
418
1648
 
419
1649
  Galaaz allows Ruby to access variables created in R. For example, the 'mtcars' data set is
@@ -896,7 +2126,338 @@ outputs (~:mtcars).kable.kable_styling
896
2126
  </tbody>
897
2127
  </table>
898
2128
 
899
- # Matrix
2129
+ # Basic Data Types
2130
+
2131
+ ## Vector
2132
+
2133
+ Vectors can be thought of as contiguous cells containing data. Cells are accessed through
2134
+ indexing operations such as x[5]. Galaaz has six basic (‘atomic’) vector types: logical,
2135
+ integer, real, complex, string (or character) and raw. The modes and storage modes for the
2136
+ different vector types are listed in the following
2137
+ table.
2138
+
2139
+ | typeof | mode | storage.mode |
2140
+ |-----------|:---------:|-------------:|
2141
+ | logical | logical | logical |
2142
+ | integer | numeric | integer |
2143
+ | double | numeric | double |
2144
+ | complex | complex | comples |
2145
+ | character | character | character |
2146
+ | raw | raw | raw |
2147
+
2148
+ Single numbers, such as 4.2, and strings, such as "four point two" are still vectors, of length
2149
+ 1; there are no more basic types. Vectors with length zero are possible (and useful).
2150
+ String vectors have mode and storage mode "character". A single element of a character
2151
+ vector is often referred to as a character string.
2152
+
2153
+ To create a vector the 'c' (concatenate) method from the 'R' module should be used:
2154
+
2155
+
2156
+ ```ruby
2157
+ vec = R.c(1, 2, 3)
2158
+ puts vec
2159
+ ```
2160
+
2161
+ ```
2162
+ ## [1] 1 2 3
2163
+ ```
2164
+
2165
+ Lets take a look at the type, mode and storage.mode of our vector vec. In order to print
2166
+ this out, we are creating a data frame 'df' and printing it out. A data frame, for those
2167
+ not familiar with it, is basically a table. Here we create the data frame and add the
2168
+ column name by passing named parameters for each column, such as 'typeof:', 'mode:' and
2169
+ 'storage__mode?'. You should also note here that the double underscore is converted to a '.'.
2170
+ So, when printed 'storage\_\_mode' will actually print as 'storage.mode'.
2171
+
2172
+ Data frames will later be more carefully described. In R, the method used to create a
2173
+ data frame is 'data.frame', in Galaaz we use 'data\_\_frame'.
2174
+
2175
+
2176
+ ```ruby
2177
+ df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
2178
+ puts df
2179
+ ```
2180
+
2181
+ ```
2182
+ ## typeof mode storage.mode
2183
+ ## 1 integer numeric integer
2184
+ ```
2185
+
2186
+ If you want to create a vector with floating point numbers, then we need at least one of the
2187
+ vector's element to be a float, such as 1.0. R users should be careful, since in R a number
2188
+ like '1' is converted to float and to have an integer the R developer will use '1L'. Galaaz
2189
+ follows normal Ruby rules and the number 1 is an integer and 1.0 is a float.
2190
+
2191
+
2192
+ ```ruby
2193
+ vec = R.c(1.0, 2, 3)
2194
+ puts vec
2195
+ ```
2196
+
2197
+ ```
2198
+ ## [1] 1 2 3
2199
+ ```
2200
+
2201
+
2202
+ ```ruby
2203
+ df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
2204
+ outputs df.kable.kable_styling
2205
+ ```
2206
+
2207
+ <table class="table" style="margin-left: auto; margin-right: auto;">
2208
+ <thead>
2209
+ <tr>
2210
+ <th style="text-align:left;"> typeof </th>
2211
+ <th style="text-align:left;"> mode </th>
2212
+ <th style="text-align:left;"> storage.mode </th>
2213
+ </tr>
2214
+ </thead>
2215
+ <tbody>
2216
+ <tr>
2217
+ <td style="text-align:left;"> double </td>
2218
+ <td style="text-align:left;"> numeric </td>
2219
+ <td style="text-align:left;"> double </td>
2220
+ </tr>
2221
+ </tbody>
2222
+ </table>
2223
+
2224
+ In this next example we try to create a vector with a variable 'hello' that has not yet
2225
+ being defined. This will raise an exception that is printed out. We get two return blocks,
2226
+ the first with a message explaining what went wrong and the second with the full backtrace
2227
+ of the error.
2228
+
2229
+
2230
+ ```ruby
2231
+ vec = R.c(1, hello, 5)
2232
+ ```
2233
+
2234
+ ```
2235
+ ## Message:
2236
+ ## undefined local variable or method `hello' for #<RC:0x3d8 @out_list=nil>:RC
2237
+ ```
2238
+
2239
+ ```
2240
+ ## Message:
2241
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:103:in `get_binding'
2242
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `eval'
2243
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `exec_ruby'
2244
+ ## /home/rbotafogo/desenv/galaaz/lib/gknit/knitr_engine.rb:650:in `block in initialize'
2245
+ ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
2246
+ ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
2247
+ ## (eval):3:in `function(...) {\n rb_method(...)'
2248
+ ## unknown.r:1:in `in_dir'
2249
+ ## unknown.r:1:in `block_exec'
2250
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:92:in `call_block'
2251
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:6:in `process_group.block'
2252
+ ## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:3:in `<no source>'
2253
+ ## unknown.r:1:in `withCallingHandlers'
2254
+ ## unknown.r:1:in `process_file'
2255
+ ## unknown.r:1:in `<no source>'
2256
+ ## unknown.r:1:in `<no source>'
2257
+ ## <REPL>:4:in `<repl wrapper>'
2258
+ ## <REPL>:1
2259
+ ```
2260
+
2261
+ Here is a vector with logical values
2262
+
2263
+
2264
+ ```ruby
2265
+ vec = R.c(true, true, false, false, true)
2266
+ puts vec
2267
+ ```
2268
+
2269
+ ```
2270
+ ## [1] TRUE TRUE FALSE FALSE TRUE
2271
+ ```
2272
+
2273
+ ### Combining Vectors
2274
+
2275
+ The 'c' functions used to create vectors can also be used to combine two vectors:
2276
+
2277
+
2278
+ ```ruby
2279
+ vec1 = R.c(10.0, 20.0, 30.0)
2280
+ vec2 = R.c(4.0, 5.0, 6.0)
2281
+ vec = R.c(vec1, vec2)
2282
+ puts vec
2283
+ ```
2284
+
2285
+ ```
2286
+ ## [1] 10 20 30 4 5 6
2287
+ ```
2288
+ In galaaz, methods can be chainned (somewhat like the pipe operator in R %>%, but more generic).
2289
+ In this next example, method 'c' is chainned after 'vec1'. This also looks like 'c' is a
2290
+ method of the vector, but in reallity, this is actually closer to the pipe operator. When
2291
+ Galaaz identifies that 'c' is not a method of 'vec' it actually tries to call 'R.c' with
2292
+ 'vec1' as the first argument concatenated with all the other available arguments. The code
2293
+ bellow is automatically converted to the code above.
2294
+
2295
+
2296
+ ```ruby
2297
+ vec = vec1.c(vec2)
2298
+ puts vec
2299
+ ```
2300
+
2301
+ ```
2302
+ ## [1] 10 20 30 4 5 6
2303
+ ```
2304
+
2305
+ ### Vector Arithmetic
2306
+
2307
+ Arithmetic operations on vectors are performed element by element:
2308
+
2309
+
2310
+ ```ruby
2311
+ puts vec1 + vec2
2312
+ ```
2313
+
2314
+ ```
2315
+ ## [1] 14 25 36
2316
+ ```
2317
+
2318
+
2319
+ ```ruby
2320
+ puts vec1 * 5
2321
+ ```
2322
+
2323
+ ```
2324
+ ## [1] 50 100 150
2325
+ ```
2326
+
2327
+ When vectors have different length, a recycling rule is applied to the shorter vector:
2328
+
2329
+
2330
+ ```ruby
2331
+ vec3 = R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
2332
+ puts vec4 = vec1 + vec3
2333
+ ```
2334
+
2335
+ ```
2336
+ ## [1] 11 22 33 14 25 36 17 28 39
2337
+ ```
2338
+
2339
+ ### Vector Indexing
2340
+
2341
+ Vectors can be indexed by using the '[]' operator:
2342
+
2343
+
2344
+ ```ruby
2345
+ puts vec4[3]
2346
+ ```
2347
+
2348
+ ```
2349
+ ## [1] 33
2350
+ ```
2351
+
2352
+ We can also index a vector with another vector. For example, in the code bellow, we take elements
2353
+ 1, 3, 5, and 7 from vec3:
2354
+
2355
+
2356
+ ```ruby
2357
+ puts vec4[R.c(1, 3, 5, 7)]
2358
+ ```
2359
+
2360
+ ```
2361
+ ## [1] 11 33 25 17
2362
+ ```
2363
+
2364
+ Repeating an index and having indices out of order is valid code:
2365
+
2366
+
2367
+ ```ruby
2368
+ puts vec4[R.c(1, 3, 3, 1)]
2369
+ ```
2370
+
2371
+ ```
2372
+ ## [1] 11 33 33 11
2373
+ ```
2374
+
2375
+ It is also possible to index a vector with a negative number or negative vector. In these cases
2376
+ the indexed values are not returned:
2377
+
2378
+
2379
+ ```ruby
2380
+ puts vec4[-3]
2381
+ puts vec4[-R.c(1, 3, 5, 7)]
2382
+ ```
2383
+
2384
+ ```
2385
+ ## [1] 11 22 14 25 36 17 28 39
2386
+ ## [1] 22 14 36 28 39
2387
+ ```
2388
+
2389
+ If an index is out of range, a missing value (NA) will be reported.
2390
+
2391
+
2392
+ ```ruby
2393
+ puts vec4[30]
2394
+ ```
2395
+
2396
+ ```
2397
+ ## [1] NA
2398
+ ```
2399
+
2400
+ It is also possible to index a vector by range:
2401
+
2402
+
2403
+ ```ruby
2404
+ puts vec4[(2..5)]
2405
+ ```
2406
+
2407
+ ```
2408
+ ## [1] 22 33 14 25
2409
+ ```
2410
+
2411
+ Elements in a vector can be named using the 'names' attribute of a vector:
2412
+
2413
+
2414
+ ```ruby
2415
+ full_name = R.c("Rodrigo", "A", "Botafogo")
2416
+ full_name.names = R.c("First", "Middle", "Last")
2417
+ puts full_name
2418
+ ```
2419
+
2420
+ ```
2421
+ ## First Middle Last
2422
+ ## "Rodrigo" "A" "Botafogo"
2423
+ ```
2424
+
2425
+ Or it can also be named by using the 'c' function with named paramenters:
2426
+
2427
+
2428
+ ```ruby
2429
+ full_name = R.c(First: "Rodrigo", Middle: "A", Last: "Botafogo")
2430
+ puts full_name
2431
+ ```
2432
+
2433
+ ```
2434
+ ## First Middle Last
2435
+ ## "Rodrigo" "A" "Botafogo"
2436
+ ```
2437
+
2438
+ ### Extracting Native Ruby Types from a Vector
2439
+
2440
+ Vectors created with 'R.c' are of class R::Vector. You might have noticed that when indexing a
2441
+ vector, a new vector is returned, even if this vector has one single element. In order to use
2442
+ R::Vector with other ruby classes it might be necessary to extract the actual Ruby native type
2443
+ from the vector. In order to do this extraction the '>>' operator is used.
2444
+
2445
+
2446
+ ```ruby
2447
+ puts vec4
2448
+ puts vec4 >> 0
2449
+ puts vec4 >> 4
2450
+ ```
2451
+
2452
+ ```
2453
+ ## [1] 11 22 33 14 25 36 17 28 39
2454
+ ## 11.0
2455
+ ## 25.0
2456
+ ```
2457
+
2458
+ Note that indexing with '>>' starts at 0 and not at 1, also, we cannot do negative indexing.
2459
+
2460
+ ## Matrix
900
2461
 
901
2462
  A matrix is a collection of elements organized as a two dimensional table. A matrix can be
902
2463
  created by the 'matrix' function:
@@ -936,7 +2497,7 @@ puts mat_row
936
2497
  ## [3,] 7 8 9
937
2498
  ```
938
2499
 
939
- ## Indexing a Matrix
2500
+ ### Indexing a Matrix
940
2501
 
941
2502
  A matrix can be indexed by [row, column]:
942
2503
 
@@ -1008,7 +2569,7 @@ puts mat_row.cbind(mat)
1008
2569
  ## [3,] 7 8 9 3 6 9
1009
2570
  ```
1010
2571
 
1011
- # List
2572
+ ## List
1012
2573
 
1013
2574
  A list is a data structure that can contain sublists of different types, while vector and matrix
1014
2575
  can only hold one type of element.
@@ -1036,7 +2597,7 @@ puts lst
1036
2597
  Note that 'lst' elements are named elements.
1037
2598
 
1038
2599
 
1039
- ## List Indexing
2600
+ ### List Indexing
1040
2601
 
1041
2602
  List indexing, also called slicing, is done using the '[]' operator and the '[[]]' operator. Let's
1042
2603
  first start with the '[]' operator. The list above has three sublist indexing with '[]' will
@@ -1082,7 +2643,7 @@ then the first element of the vector was extracted (note that vectors also accep
1082
2643
  operator) and then the vector was indexed by its first element, extracting the native Ruby type.
1083
2644
 
1084
2645
 
1085
- # Data Frame
2646
+ ## Data Frame
1086
2647
 
1087
2648
  A data frame is a table like structure in which each column has the same number of
1088
2649
  rows. Data frames are the basic structure for storing data for data analysis. We have already
@@ -1105,7 +2666,7 @@ puts df
1105
2666
  ## 3 2012 2000
1106
2667
  ```
1107
2668
 
1108
- ## Data Frame Indexing
2669
+ ### Data Frame Indexing
1109
2670
 
1110
2671
  A data frame can be indexed the same way as a matrix, by using '[row, column]', where row and
1111
2672
  column can either be a numeric or the name of the row or column
@@ -1325,13 +2886,6 @@ symbolic notation as otherwise, we end up writing invalid expressions such as
1325
2886
  exp_wrong = (:a + :b) == :z
1326
2887
  puts exp_wrong
1327
2888
  ```
1328
-
1329
- ```
1330
- ## Message:
1331
- ## Error in function (x, y, num.eq = TRUE, single.NA = TRUE, attrib.as.set = TRUE, :
1332
- ## object 'a' not found (RError)
1333
- ## Translated to internal error
1334
- ```
1335
2889
  and it might be difficult to understand what is going on here. The problem lies with the fact that
1336
2890
  when using '==' we are comparing expression (:a + :b) to expression :z with '=='. When the
1337
2891
  comparison is executed, the system tries to evaluate :a, :b and :z, and those symbols at
@@ -1423,11 +2977,15 @@ Galaaz.
1423
2977
 
1424
2978
  For these
1425
2979
  examples, we will investigate the nycflights13 data set available on the package by the
1426
- same name. We use function 'R.install_and_loads' that checks if the library is available
2980
+ same name. We use function 'R.install\_and\_loads' that checks if the library is available
1427
2981
  locally, and if not, installs it. This data frame contains all 336,776 flights that
1428
2982
  departed from New York City in 2013. The data comes from the US Bureau of
1429
2983
  Transportation Statistics.
1430
2984
 
2985
+ Dplyr uses 'tibbles' in place of data frames; unfortunately, tibbles do not print yet properly in
2986
+ Galaaz due to a bug in fastR. In order to print a tibble we need to convert it to a data frame
2987
+ using the 'as\_\_data__frame' method.
2988
+
1431
2989
 
1432
2990
  ```ruby
1433
2991
  R.install_and_loads('nycflights13')
@@ -1437,31 +2995,23 @@ R.library('dplyr')
1437
2995
 
1438
2996
  ```ruby
1439
2997
  flights = ~:flights
1440
- puts flights.head.as__data__frame
1441
- ```
1442
-
1443
- ```
1444
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1445
- ## 1 2013 1 1 517 515 2 830 819
1446
- ## 2 2013 1 1 533 529 4 850 830
1447
- ## 3 2013 1 1 542 540 2 923 850
1448
- ## 4 2013 1 1 544 545 -1 1004 1022
1449
- ## 5 2013 1 1 554 600 -6 812 837
1450
- ## 6 2013 1 1 554 558 -4 740 728
1451
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1452
- ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1453
- ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1454
- ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1455
- ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1456
- ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1457
- ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1458
- ## minute time_hour
1459
- ## 1 15 2013-01-01 05:00:00
1460
- ## 2 29 2013-01-01 05:00:00
1461
- ## 3 40 2013-01-01 05:00:00
1462
- ## 4 45 2013-01-01 05:00:00
1463
- ## 5 0 2013-01-01 06:00:00
1464
- ## 6 58 2013-01-01 05:00:00
2998
+ puts flights.head
2999
+ ```
3000
+
3001
+ ```
3002
+ ## # A tibble: 6 x 19
3003
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3004
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3005
+ ## 1 2013 1 1 517 515 2 830
3006
+ ## 2 2013 1 1 533 529 4 850
3007
+ ## 3 2013 1 1 542 540 2 923
3008
+ ## 4 2013 1 1 544 545 -1 1004
3009
+ ## 5 2013 1 1 554 600 -6 812
3010
+ ## 6 2013 1 1 554 558 -4 740
3011
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3012
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3013
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3014
+ ## # time_hour <dttm>
1465
3015
  ```
1466
3016
 
1467
3017
  ## Filtering rows with Filter
@@ -1471,31 +3021,23 @@ the first :month.eq 1
1471
3021
 
1472
3022
 
1473
3023
  ```ruby
1474
- puts flights.filter((:month.eq 1), (:day.eq 1)).head.as__data__frame
3024
+ puts flights.filter((:month.eq 1), (:day.eq 1)).head
1475
3025
  ```
1476
3026
 
1477
3027
  ```
1478
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1479
- ## 1 2013 1 1 517 515 2 830 819
1480
- ## 2 2013 1 1 533 529 4 850 830
1481
- ## 3 2013 1 1 542 540 2 923 850
1482
- ## 4 2013 1 1 544 545 -1 1004 1022
1483
- ## 5 2013 1 1 554 600 -6 812 837
1484
- ## 6 2013 1 1 554 558 -4 740 728
1485
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1486
- ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1487
- ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1488
- ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1489
- ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1490
- ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1491
- ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1492
- ## minute time_hour
1493
- ## 1 15 2013-01-01 05:00:00
1494
- ## 2 29 2013-01-01 05:00:00
1495
- ## 3 40 2013-01-01 05:00:00
1496
- ## 4 45 2013-01-01 05:00:00
1497
- ## 5 0 2013-01-01 06:00:00
1498
- ## 6 58 2013-01-01 05:00:00
3028
+ ## # A tibble: 6 x 19
3029
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3030
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3031
+ ## 1 2013 1 1 517 515 2 830
3032
+ ## 2 2013 1 1 533 529 4 850
3033
+ ## 3 2013 1 1 542 540 2 923
3034
+ ## 4 2013 1 1 544 545 -1 1004
3035
+ ## 5 2013 1 1 554 600 -6 812
3036
+ ## 6 2013 1 1 554 558 -4 740
3037
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3038
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3039
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3040
+ ## # time_hour <dttm>
1499
3041
  ```
1500
3042
 
1501
3043
  ## Logical Operators
@@ -1504,31 +3046,23 @@ All flights that departed in November of December
1504
3046
 
1505
3047
 
1506
3048
  ```ruby
1507
- puts flights.filter((:month.eq 11) | (:month.eq 12)).head.as__data__frame
3049
+ puts flights.filter((:month.eq 11) | (:month.eq 12)).head
1508
3050
  ```
1509
3051
 
1510
3052
  ```
1511
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1512
- ## 1 2013 11 1 5 2359 6 352 345
1513
- ## 2 2013 11 1 35 2250 105 123 2356
1514
- ## 3 2013 11 1 455 500 -5 641 651
1515
- ## 4 2013 11 1 539 545 -6 856 827
1516
- ## 5 2013 11 1 542 545 -3 831 855
1517
- ## 6 2013 11 1 549 600 -11 912 923
1518
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1519
- ## 1 7 B6 745 N568JB JFK PSE 205 1617 23
1520
- ## 2 87 B6 1816 N353JB JFK SYR 36 209 22
1521
- ## 3 -10 US 1895 N192UW EWR CLT 88 529 5
1522
- ## 4 29 UA 1714 N38727 LGA IAH 229 1416 5
1523
- ## 5 -24 AA 2243 N5CLAA JFK MIA 147 1089 5
1524
- ## 6 -11 UA 303 N595UA JFK SFO 359 2586 6
1525
- ## minute time_hour
1526
- ## 1 59 2013-11-01 23:00:00
1527
- ## 2 50 2013-11-01 22:00:00
1528
- ## 3 0 2013-11-01 05:00:00
1529
- ## 4 45 2013-11-01 05:00:00
1530
- ## 5 45 2013-11-01 05:00:00
1531
- ## 6 0 2013-11-01 06:00:00
3053
+ ## # A tibble: 6 x 19
3054
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3055
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3056
+ ## 1 2013 11 1 5 2359 6 352
3057
+ ## 2 2013 11 1 35 2250 105 123
3058
+ ## 3 2013 11 1 455 500 -5 641
3059
+ ## 4 2013 11 1 539 545 -6 856
3060
+ ## 5 2013 11 1 542 545 -3 831
3061
+ ## 6 2013 11 1 549 600 -11 912
3062
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3063
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3064
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3065
+ ## # time_hour <dttm>
1532
3066
  ```
1533
3067
 
1534
3068
  The same as above, but using the 'in' operator. In R, it is possible to define many operators
@@ -1538,31 +3072,23 @@ symbol, in this case ':in' and the second argument is the vector:
1538
3072
 
1539
3073
 
1540
3074
  ```ruby
1541
- puts flights.filter(:month._ :in, R.c(11, 12)).head.as__data__frame
3075
+ puts flights.filter(:month._ :in, R.c(11, 12)).head
1542
3076
  ```
1543
3077
 
1544
3078
  ```
1545
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1546
- ## 1 2013 11 1 5 2359 6 352 345
1547
- ## 2 2013 11 1 35 2250 105 123 2356
1548
- ## 3 2013 11 1 455 500 -5 641 651
1549
- ## 4 2013 11 1 539 545 -6 856 827
1550
- ## 5 2013 11 1 542 545 -3 831 855
1551
- ## 6 2013 11 1 549 600 -11 912 923
1552
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1553
- ## 1 7 B6 745 N568JB JFK PSE 205 1617 23
1554
- ## 2 87 B6 1816 N353JB JFK SYR 36 209 22
1555
- ## 3 -10 US 1895 N192UW EWR CLT 88 529 5
1556
- ## 4 29 UA 1714 N38727 LGA IAH 229 1416 5
1557
- ## 5 -24 AA 2243 N5CLAA JFK MIA 147 1089 5
1558
- ## 6 -11 UA 303 N595UA JFK SFO 359 2586 6
1559
- ## minute time_hour
1560
- ## 1 59 2013-11-01 23:00:00
1561
- ## 2 50 2013-11-01 22:00:00
1562
- ## 3 0 2013-11-01 05:00:00
1563
- ## 4 45 2013-11-01 05:00:00
1564
- ## 5 45 2013-11-01 05:00:00
1565
- ## 6 0 2013-11-01 06:00:00
3079
+ ## # A tibble: 6 x 19
3080
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3081
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3082
+ ## 1 2013 11 1 5 2359 6 352
3083
+ ## 2 2013 11 1 35 2250 105 123
3084
+ ## 3 2013 11 1 455 500 -5 641
3085
+ ## 4 2013 11 1 539 545 -6 856
3086
+ ## 5 2013 11 1 542 545 -3 831
3087
+ ## 6 2013 11 1 549 600 -11 912
3088
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3089
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3090
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3091
+ ## # time_hour <dttm>
1566
3092
  ```
1567
3093
 
1568
3094
  ## Filtering with NA (Not Available)
@@ -1575,14 +3101,16 @@ what is obtained from data frame.
1575
3101
 
1576
3102
  ```ruby
1577
3103
  df = R.tibble(x: R.c(1, R::NA, 3))
1578
- puts df.as__data__frame
3104
+ puts df
1579
3105
  ```
1580
3106
 
1581
3107
  ```
1582
- ## x
1583
- ## 1 1
1584
- ## 2 NA
1585
- ## 3 3
3108
+ ## # A tibble: 3 x 1
3109
+ ## x
3110
+ ## <int>
3111
+ ## 1 1
3112
+ ## 2
3113
+ ## 3 3
1586
3114
  ```
1587
3115
 
1588
3116
  Now filtering by :x > 1 shows all lines that satisfy this condition, where the row with R:NA does
@@ -1590,25 +3118,29 @@ not.
1590
3118
 
1591
3119
 
1592
3120
  ```ruby
1593
- puts df.filter(:x > 1).as__data__frame
3121
+ puts df.filter(:x > 1)
1594
3122
  ```
1595
3123
 
1596
3124
  ```
1597
- ## x
1598
- ## 1 3
3125
+ ## # A tibble: 1 x 1
3126
+ ## x
3127
+ ## <int>
3128
+ ## 1 3
1599
3129
  ```
1600
3130
 
1601
3131
  To match an NA use method 'is__na'
1602
3132
 
1603
3133
 
1604
3134
  ```ruby
1605
- puts df.filter((:x.is__na) | (:x > 1)).as__data__frame
3135
+ puts df.filter((:x.is__na) | (:x > 1))
1606
3136
  ```
1607
3137
 
1608
3138
  ```
1609
- ## x
1610
- ## 1 NA
1611
- ## 2 3
3139
+ ## # A tibble: 2 x 1
3140
+ ## x
3141
+ ## <int>
3142
+ ## 1
3143
+ ## 2 3
1612
3144
  ```
1613
3145
 
1614
3146
  ## Arrange Rows with arrange
@@ -1617,62 +3149,46 @@ Arrange reorders the rows of a data frame by the given arguments.
1617
3149
 
1618
3150
 
1619
3151
  ```ruby
1620
- puts flights.arrange(:year, :month, :day).head.as__data__frame
3152
+ puts flights.arrange(:year, :month, :day).head
1621
3153
  ```
1622
3154
 
1623
3155
  ```
1624
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1625
- ## 1 2013 1 1 517 515 2 830 819
1626
- ## 2 2013 1 1 533 529 4 850 830
1627
- ## 3 2013 1 1 542 540 2 923 850
1628
- ## 4 2013 1 1 544 545 -1 1004 1022
1629
- ## 5 2013 1 1 554 600 -6 812 837
1630
- ## 6 2013 1 1 554 558 -4 740 728
1631
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1632
- ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1633
- ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1634
- ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1635
- ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1636
- ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1637
- ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1638
- ## minute time_hour
1639
- ## 1 15 2013-01-01 05:00:00
1640
- ## 2 29 2013-01-01 05:00:00
1641
- ## 3 40 2013-01-01 05:00:00
1642
- ## 4 45 2013-01-01 05:00:00
1643
- ## 5 0 2013-01-01 06:00:00
1644
- ## 6 58 2013-01-01 05:00:00
3156
+ ## # A tibble: 6 x 19
3157
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3158
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3159
+ ## 1 2013 1 1 517 515 2 830
3160
+ ## 2 2013 1 1 533 529 4 850
3161
+ ## 3 2013 1 1 542 540 2 923
3162
+ ## 4 2013 1 1 544 545 -1 1004
3163
+ ## 5 2013 1 1 554 600 -6 812
3164
+ ## 6 2013 1 1 554 558 -4 740
3165
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3166
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3167
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3168
+ ## # time_hour <dttm>
1645
3169
  ```
1646
3170
 
1647
3171
  To arrange in descending order, use function 'desc'
1648
3172
 
1649
3173
 
1650
3174
  ```ruby
1651
- puts flights.arrange(:dep_delay.desc).head.as__data__frame
3175
+ puts flights.arrange(:dep_delay.desc).head
1652
3176
  ```
1653
3177
 
1654
3178
  ```
1655
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1656
- ## 1 2013 1 9 641 900 1301 1242 1530
1657
- ## 2 2013 6 15 1432 1935 1137 1607 2120
1658
- ## 3 2013 1 10 1121 1635 1126 1239 1810
1659
- ## 4 2013 9 20 1139 1845 1014 1457 2210
1660
- ## 5 2013 7 22 845 1600 1005 1044 1815
1661
- ## 6 2013 4 10 1100 1900 960 1342 2211
1662
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1663
- ## 1 1272 HA 51 N384HA JFK HNL 640 4983 9
1664
- ## 2 1127 MQ 3535 N504MQ JFK CMH 74 483 19
1665
- ## 3 1109 MQ 3695 N517MQ EWR ORD 111 719 16
1666
- ## 4 1007 AA 177 N338AA JFK SFO 354 2586 18
1667
- ## 5 989 MQ 3075 N665MQ JFK CVG 96 589 16
1668
- ## 6 931 DL 2391 N959DL JFK TPA 139 1005 19
1669
- ## minute time_hour
1670
- ## 1 0 2013-01-09 09:00:00
1671
- ## 2 35 2013-06-15 19:00:00
1672
- ## 3 35 2013-01-10 16:00:00
1673
- ## 4 45 2013-09-20 18:00:00
1674
- ## 5 0 2013-07-22 16:00:00
1675
- ## 6 0 2013-04-10 19:00:00
3179
+ ## # A tibble: 6 x 19
3180
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3181
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3182
+ ## 1 2013 1 9 641 900 1301 1242
3183
+ ## 2 2013 6 15 1432 1935 1137 1607
3184
+ ## 3 2013 1 10 1121 1635 1126 1239
3185
+ ## 4 2013 9 20 1139 1845 1014 1457
3186
+ ## 5 2013 7 22 845 1600 1005 1044
3187
+ ## 6 2013 4 10 1100 1900 960 1342
3188
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3189
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3190
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3191
+ ## # time_hour <dttm>
1676
3192
  ```
1677
3193
 
1678
3194
  ## Selecting columns
@@ -1681,45 +3197,51 @@ To select specific columns from a dataset we use function 'select':
1681
3197
 
1682
3198
 
1683
3199
  ```ruby
1684
- puts flights.select(:year, :month, :day).head.as__data__frame
3200
+ puts flights.select(:year, :month, :day).head
1685
3201
  ```
1686
3202
 
1687
3203
  ```
1688
- ## year month day
1689
- ## 1 2013 1 1
1690
- ## 2 2013 1 1
1691
- ## 3 2013 1 1
1692
- ## 4 2013 1 1
1693
- ## 5 2013 1 1
1694
- ## 6 2013 1 1
3204
+ ## # A tibble: 6 x 3
3205
+ ## year month day
3206
+ ## <int> <int> <int>
3207
+ ## 1 2013 1 1
3208
+ ## 2 2013 1 1
3209
+ ## 3 2013 1 1
3210
+ ## 4 2013 1 1
3211
+ ## 5 2013 1 1
3212
+ ## 6 2013 1 1
1695
3213
  ```
1696
3214
 
1697
3215
  It is also possible to select column in a given range
1698
3216
 
1699
3217
 
1700
3218
  ```ruby
1701
- puts flights.select(:year.up_to :day).head.as__data__frame
3219
+ puts flights.select(:year.up_to :day).head
1702
3220
  ```
1703
3221
 
1704
3222
  ```
1705
- ## year month day
1706
- ## 1 2013 1 1
1707
- ## 2 2013 1 1
1708
- ## 3 2013 1 1
1709
- ## 4 2013 1 1
1710
- ## 5 2013 1 1
1711
- ## 6 2013 1 1
3223
+ ## # A tibble: 6 x 3
3224
+ ## year month day
3225
+ ## <int> <int> <int>
3226
+ ## 1 2013 1 1
3227
+ ## 2 2013 1 1
3228
+ ## 3 2013 1 1
3229
+ ## 4 2013 1 1
3230
+ ## 5 2013 1 1
3231
+ ## 6 2013 1 1
1712
3232
  ```
1713
3233
 
1714
3234
  Select all columns that start with a given name sequence
1715
3235
 
1716
3236
 
1717
3237
  ```ruby
1718
- puts flights.select(E.starts_with('arr')).head.as__data__frame
3238
+ puts flights.select(E.starts_with('arr')).head
1719
3239
  ```
1720
3240
 
1721
3241
  ```
3242
+ ## # A tibble: 6 x 2
1722
3243
  ## arr_time arr_delay
3244
+ ## <int> <dbl>
1723
3245
  ## 1 830 11
1724
3246
  ## 2 850 20
1725
3247
  ## 3 923 33
@@ -1743,31 +3265,23 @@ A helper function that comes in handy when we just want to rearrange column orde
1743
3265
 
1744
3266
 
1745
3267
  ```ruby
1746
- puts flights.select(:year, :month, :day, E.everything).head.as__data__frame
3268
+ puts flights.select(:year, :month, :day, E.everything).head
1747
3269
  ```
1748
3270
 
1749
3271
  ```
1750
- ## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
1751
- ## 1 2013 1 1 517 515 2 830 819
1752
- ## 2 2013 1 1 533 529 4 850 830
1753
- ## 3 2013 1 1 542 540 2 923 850
1754
- ## 4 2013 1 1 544 545 -1 1004 1022
1755
- ## 5 2013 1 1 554 600 -6 812 837
1756
- ## 6 2013 1 1 554 558 -4 740 728
1757
- ## arr_delay carrier flight tailnum origin dest air_time distance hour
1758
- ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
1759
- ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
1760
- ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
1761
- ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
1762
- ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
1763
- ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
1764
- ## minute time_hour
1765
- ## 1 15 2013-01-01 05:00:00
1766
- ## 2 29 2013-01-01 05:00:00
1767
- ## 3 40 2013-01-01 05:00:00
1768
- ## 4 45 2013-01-01 05:00:00
1769
- ## 5 0 2013-01-01 06:00:00
1770
- ## 6 58 2013-01-01 05:00:00
3272
+ ## # A tibble: 6 x 19
3273
+ ## year month day dep_time sched_dep_time dep_delay arr_time
3274
+ ## <int> <int> <int> <int> <int> <dbl> <int>
3275
+ ## 1 2013 1 1 517 515 2 830
3276
+ ## 2 2013 1 1 533 529 4 850
3277
+ ## 3 2013 1 1 542 540 2 923
3278
+ ## 4 2013 1 1 544 545 -1 1004
3279
+ ## 5 2013 1 1 554 600 -6 812
3280
+ ## 6 2013 1 1 554 558 -4 740
3281
+ ## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
3282
+ ## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
3283
+ ## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
3284
+ ## # time_hour <dttm>
1771
3285
  ```
1772
3286
 
1773
3287
  ## Add variables to a dataframe with 'mutate'
@@ -1780,17 +3294,19 @@ flights_sm = flights.
1780
3294
  :distance,
1781
3295
  :air_time)
1782
3296
 
1783
- puts flights_sm.head.as__data__frame
3297
+ puts flights_sm.head
1784
3298
  ```
1785
3299
 
1786
3300
  ```
1787
- ## year month day dep_delay arr_delay distance air_time
1788
- ## 1 2013 1 1 2 11 1400 227
1789
- ## 2 2013 1 1 4 20 1416 227
1790
- ## 3 2013 1 1 2 33 1089 160
1791
- ## 4 2013 1 1 -1 -18 1576 183
1792
- ## 5 2013 1 1 -6 -25 762 116
1793
- ## 6 2013 1 1 -4 12 719 150
3301
+ ## # A tibble: 6 x 7
3302
+ ## year month day dep_delay arr_delay distance air_time
3303
+ ## <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
3304
+ ## 1 2013 1 1 2 11 1400 227
3305
+ ## 2 2013 1 1 4 20 1416 227
3306
+ ## 3 2013 1 1 2 33 1089 160
3307
+ ## 4 2013 1 1 -1 -18 1576 183
3308
+ ## 5 2013 1 1 -6 -25 762 116
3309
+ ## 6 2013 1 1 -4 12 719 150
1794
3310
  ```
1795
3311
 
1796
3312
 
@@ -1798,17 +3314,19 @@ puts flights_sm.head.as__data__frame
1798
3314
  flights_sm = flights_sm.
1799
3315
  mutate(gain: :dep_delay - :arr_delay,
1800
3316
  speed: :distance / :air_time * 60)
1801
- puts flights_sm.head.as__data__frame
3317
+ puts flights_sm.head
1802
3318
  ```
1803
3319
 
1804
3320
  ```
1805
- ## year month day dep_delay arr_delay distance air_time gain speed
1806
- ## 1 2013 1 1 2 11 1400 227 -9 370.0441
1807
- ## 2 2013 1 1 4 20 1416 227 -16 374.2731
1808
- ## 3 2013 1 1 2 33 1089 160 -31 408.3750
1809
- ## 4 2013 1 1 -1 -18 1576 183 17 516.7213
1810
- ## 5 2013 1 1 -6 -25 762 116 19 394.1379
1811
- ## 6 2013 1 1 -4 12 719 150 -16 287.6000
3321
+ ## # A tibble: 6 x 9
3322
+ ## year month day dep_delay arr_delay distance air_time gain speed
3323
+ ## <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
3324
+ ## 1 2013 1 1 2 11 1400 227 -9 370.
3325
+ ## 2 2013 1 1 4 20 1416 227 -16 374.
3326
+ ## 3 2013 1 1 2 33 1089 160 -31 408.
3327
+ ## 4 2013 1 1 -1 -18 1576 183 17 517.
3328
+ ## 5 2013 1 1 -6 -25 762 116 19 394.
3329
+ ## 6 2013 1 1 -4 12 719 150 -16 288.
1812
3330
  ```
1813
3331
 
1814
3332
  ## Summarising data
@@ -1818,12 +3336,14 @@ a single value is obtained from the data frame:
1818
3336
 
1819
3337
 
1820
3338
  ```ruby
1821
- puts flights.summarise(delay: E.mean(:dep_delay, na__rm: true)).as__data__frame
3339
+ puts flights.summarise(delay: E.mean(:dep_delay, na__rm: true))
1822
3340
  ```
1823
3341
 
1824
3342
  ```
1825
- ## delay
1826
- ## 1 12.63907
3343
+ ## # A tibble: 1 x 1
3344
+ ## delay
3345
+ ## <dbl>
3346
+ ## 1 12.6
1827
3347
  ```
1828
3348
 
1829
3349
  When a data frame is groupe with 'group_by' summaries apply to the given group:
@@ -1831,17 +3351,20 @@ When a data frame is groupe with 'group_by' summaries apply to the given group:
1831
3351
 
1832
3352
  ```ruby
1833
3353
  by_day = flights.group_by(:year, :month, :day)
1834
- puts by_day.summarise(delay: :dep_delay.mean(na__rm: true)).head.as__data__frame
3354
+ puts by_day.summarise(delay: :dep_delay.mean(na__rm: true)).head
1835
3355
  ```
1836
3356
 
1837
3357
  ```
1838
- ## year month day delay
1839
- ## 1 2013 1 1 11.548926
1840
- ## 2 2013 1 2 13.858824
1841
- ## 3 2013 1 3 10.987832
1842
- ## 4 2013 1 4 8.951595
1843
- ## 5 2013 1 5 5.732218
1844
- ## 6 2013 1 6 7.148014
3358
+ ## # A tibble: 6 x 4
3359
+ ## # Groups: year, month [1]
3360
+ ## year month day delay
3361
+ ## * <int> <int> <int> <dbl>
3362
+ ## 1 2013 1 1 11.5
3363
+ ## 2 2013 1 2 13.9
3364
+ ## 3 2013 1 3 11.0
3365
+ ## 4 2013 1 4 8.95
3366
+ ## 5 2013 1 5 5.73
3367
+ ## 6 2013 1 6 7.15
1845
3368
  ```
1846
3369
 
1847
3370
  Next we put many operations together by pipping them one after the other:
@@ -1856,17 +3379,19 @@ delays = flights.
1856
3379
  delay: :arr_delay.mean(na__rm: true)).
1857
3380
  filter(:count > 20, :dest != "NHL")
1858
3381
 
1859
- puts delays.as__data__frame.head
3382
+ puts delays.head
1860
3383
  ```
1861
3384
 
1862
3385
  ```
1863
- ## dest count dist delay
1864
- ## 1 ABQ 254 1826.0000 4.381890
1865
- ## 2 ACK 265 199.0000 4.852273
1866
- ## 3 ALB 439 143.0000 14.397129
1867
- ## 4 ATL 17215 757.1082 11.300113
1868
- ## 5 AUS 2439 1514.2530 6.019909
1869
- ## 6 AVL 275 583.5818 8.003831
3386
+ ## # A tibble: 6 x 4
3387
+ ## dest count dist delay
3388
+ ## <chr> <int> <dbl> <dbl>
3389
+ ## 1 ABQ 254 1826 4.38
3390
+ ## 2 ACK 265 199 4.85
3391
+ ## 3 ALB 439 143 14.4
3392
+ ## 4 ATL 17215 757. 11.3
3393
+ ## 5 AUS 2439 1514. 6.02
3394
+ ## 6 AVL 275 584. 8.00
1870
3395
  ```
1871
3396
 
1872
3397
  # Using Data Table
@@ -2088,7 +3613,7 @@ puts mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
2088
3613
  ```
2089
3614
 
2090
3615
 
2091
- ![](/home/rbotafogo/desenv/galaaz/blogs/manual/manual_files/figure-html/diverging_bar.png)<!-- -->
3616
+ ![](manual_files/figure-html/diverging_bar.png)<!-- -->
2092
3617
 
2093
3618
  # Coding with Tidyverse
2094
3619
 
@@ -2266,11 +3791,11 @@ as.data.frame(df)
2266
3791
 
2267
3792
  ```
2268
3793
  ## g1 g2 a b
2269
- ## 1 1 1 2 1
2270
- ## 2 1 2 4 3
2271
- ## 3 2 1 5 4
2272
- ## 4 2 2 3 2
2273
- ## 5 2 1 1 5
3794
+ ## 1 1 1 3 3
3795
+ ## 2 1 2 2 1
3796
+ ## 3 2 1 5 2
3797
+ ## 4 2 2 4 5
3798
+ ## 5 2 1 1 4
2274
3799
  ```
2275
3800
 
2276
3801
  ```r
@@ -2282,9 +3807,9 @@ as.data.frame(d2)
2282
3807
  ```
2283
3808
 
2284
3809
  ```
2285
- ## g1 a
2286
- ## 1 1 3
2287
- ## 2 2 3
3810
+ ## g1 a
3811
+ ## 1 1 2.500000
3812
+ ## 2 2 3.333333
2288
3813
  ```
2289
3814
 
2290
3815
  ```r
@@ -2296,9 +3821,9 @@ as.data.frame(d2)
2296
3821
  ```
2297
3822
 
2298
3823
  ```
2299
- ## g2 a
2300
- ## 1 1 2.666667
2301
- ## 2 2 3.500000
3824
+ ## g2 a
3825
+ ## 1 1 3
3826
+ ## 2 2 3
2302
3827
  ```
2303
3828
 
2304
3829
  As shown by Hardley, one might expect this function to do the trick:
@@ -2330,11 +3855,11 @@ puts ~:df
2330
3855
 
2331
3856
  ```
2332
3857
  ## g1 g2 a b
2333
- ## 1 1 1 2 1
2334
- ## 2 1 2 4 3
2335
- ## 3 2 1 5 4
2336
- ## 4 2 2 3 2
2337
- ## 5 2 1 1 5
3858
+ ## 1 1 1 3 3
3859
+ ## 2 1 2 2 1
3860
+ ## 3 2 1 5 2
3861
+ ## 4 2 2 4 5
3862
+ ## 5 2 1 1 4
2338
3863
  ```
2339
3864
 
2340
3865
  We then create the 'my_summarize' method and call it passing the R data frame and
@@ -2347,26 +3872,30 @@ def my_summarize(df, group_var)
2347
3872
  summarize(a: :a.mean)
2348
3873
  end
2349
3874
 
2350
- puts my_summarize(:df, :g1).as__data__frame
3875
+ puts my_summarize(:df, :g1)
2351
3876
  ```
2352
3877
 
2353
3878
  ```
2354
- ## g1 a
2355
- ## 1 1 3
2356
- ## 2 2 3
3879
+ ## # A tibble: 2 x 2
3880
+ ## g1 a
3881
+ ## <dbl> <dbl>
3882
+ ## 1 1 2.5
3883
+ ## 2 2 3.33
2357
3884
  ```
2358
3885
 
2359
3886
  It works!!! Well, let's make sure this was not just some coincidence
2360
3887
 
2361
3888
 
2362
3889
  ```ruby
2363
- puts my_summarize(:df, :g2).as__data__frame
3890
+ puts my_summarize(:df, :g2)
2364
3891
  ```
2365
3892
 
2366
3893
  ```
2367
- ## g2 a
2368
- ## 1 1 2.666667
2369
- ## 2 2 3.500000
3894
+ ## # A tibble: 2 x 2
3895
+ ## g2 a
3896
+ ## <dbl> <dbl>
3897
+ ## 1 1 3
3898
+ ## 2 2 3
2370
3899
  ```
2371
3900
 
2372
3901
  Great, everything is fine! No magic, no new functions, no complexities, just normal, standard Ruby
@@ -2474,18 +4003,18 @@ puts my_mutate((~:df), :b)
2474
4003
 
2475
4004
  ```
2476
4005
  ## g1 g2 a b mean_a sum_a
2477
- ## 1 1 1 2 1 3 15
2478
- ## 2 1 2 4 3 3 15
2479
- ## 3 2 1 5 4 3 15
2480
- ## 4 2 2 3 2 3 15
2481
- ## 5 2 1 1 5 3 15
4006
+ ## 1 1 1 3 3 3 15
4007
+ ## 2 1 2 2 1 3 15
4008
+ ## 3 2 1 5 2 3 15
4009
+ ## 4 2 2 4 5 3 15
4010
+ ## 5 2 1 1 4 3 15
2482
4011
  ##
2483
4012
  ## g1 g2 a b mean_b sum_b
2484
- ## 1 1 1 2 1 3 15
2485
- ## 2 1 2 4 3 3 15
2486
- ## 3 2 1 5 4 3 15
2487
- ## 4 2 2 3 2 3 15
2488
- ## 5 2 1 1 5 3 15
4013
+ ## 1 1 1 3 3 3 15
4014
+ ## 2 1 2 2 1 3 15
4015
+ ## 3 2 1 5 2 3 15
4016
+ ## 4 2 2 4 5 3 15
4017
+ ## 5 2 1 1 4 3 15
2489
4018
  ```
2490
4019
  It really seems that "Non Standard Evaluation" is actually quite standard in Galaaz! But, you
2491
4020
  might have noticed a small change in the way the arguments to the mutate method were called.
@@ -2510,15 +4039,18 @@ def my_summarise3(df, *group_vars)
2510
4039
  summarise(a: E.mean(:a))
2511
4040
  end
2512
4041
 
2513
- puts my_summarise3((~:df), :g1, :g2).as__data__frame
4042
+ puts my_summarise3((~:df), :g1, :g2)
2514
4043
  ```
2515
4044
 
2516
4045
  ```
2517
- ## g1 g2 a
2518
- ## 1 1 1 2
2519
- ## 2 1 2 4
2520
- ## 3 2 1 3
2521
- ## 4 2 2 3
4046
+ ## # A tibble: 4 x 3
4047
+ ## # Groups: g1 [?]
4048
+ ## g1 g2 a
4049
+ ## <dbl> <dbl> <dbl>
4050
+ ## 1 1 1 3
4051
+ ## 2 1 2 2
4052
+ ## 3 2 1 3
4053
+ ## 4 2 2 4
2522
4054
  ```
2523
4055
 
2524
4056
  ## Why does R require NSE and Galaaz does not?
@@ -2570,38 +4102,21 @@ features of characters in the Starwars movies:
2570
4102
 
2571
4103
 
2572
4104
  ```ruby
2573
- puts (~:starwars).head.as__data__frame
2574
- ```
2575
-
2576
- ```
2577
- ## name height mass hair_color skin_color eye_color birth_year
2578
- ## 1 Luke Skywalker 172 77 blond fair blue 19.0
2579
- ## 2 C-3PO 167 75 <NA> gold yellow 112.0
2580
- ## 3 R2-D2 96 32 <NA> white, blue red 33.0
2581
- ## 4 Darth Vader 202 136 none white yellow 41.9
2582
- ## 5 Leia Organa 150 49 brown light brown 19.0
2583
- ## 6 Owen Lars 178 120 brown, grey light blue 52.0
2584
- ## gender homeworld species
2585
- ## 1 male Tatooine Human
2586
- ## 2 <NA> Tatooine Droid
2587
- ## 3 <NA> Naboo Droid
2588
- ## 4 male Tatooine Human
2589
- ## 5 female Alderaan Human
2590
- ## 6 male Tatooine Human
2591
- ## films
2592
- ## 1 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2593
- ## 2 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
2594
- ## 3 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2595
- ## 4 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
2596
- ## 5 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2597
- ## 6 Attack of the Clones, Revenge of the Sith, A New Hope
2598
- ## vehicles starships
2599
- ## 1 Snowspeeder, Imperial Speeder Bike X-wing, Imperial shuttle
2600
- ## 2
2601
- ## 3
2602
- ## 4 TIE Advanced x1
2603
- ## 5 Imperial Speeder Bike
2604
- ## 6
4105
+ puts (~:starwars).head
4106
+ ```
4107
+
4108
+ ```
4109
+ ## # A tibble: 6 x 13
4110
+ ## name height mass hair_color skin_color eye_color birth_year gender
4111
+ ## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
4112
+ ## 1 Luke… 172 77 blond fair blue 19 male
4113
+ ## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
4114
+ ## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
4115
+ ## 4 Dart… 202 136 none white yellow 41.9 male
4116
+ ## 5 Leia… 150 49 brown light brown 19 female
4117
+ ## 6 Owen… 178 120 brown, gr… light blue 52 male
4118
+ ## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
4119
+ ## # vehicles <list>, starships <list>
2605
4120
  ```
2606
4121
  The grouped_mean function bellow will receive a grouping variable and calculate summaries for
2607
4122
  the value\_variables given:
@@ -2653,26 +4168,28 @@ def grouped_mean(data, grouping_variables, value_variables)
2653
4168
  rename_at(value_variables, E.funs(E.paste0("mean_", value_variables)))
2654
4169
  end
2655
4170
 
2656
- puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year")).as__data__frame
4171
+ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year"))
2657
4172
  ```
2658
4173
 
2659
4174
  ```
2660
- ## eye_color mean_mass mean_birth_year count
2661
- ## 1 black 76.28571 33.00000 10
2662
- ## 2 blue 86.51667 67.06923 19
2663
- ## 3 blue-gray 77.00000 57.00000 1
2664
- ## 4 brown 66.09231 108.96429 21
2665
- ## 5 dark NaN NaN 1
2666
- ## 6 gold NaN NaN 1
2667
- ## 7 green, yellow 159.00000 NaN 1
2668
- ## 8 hazel 66.00000 34.50000 3
2669
- ## 9 orange 282.33333 231.00000 8
2670
- ## 10 pink NaN NaN 1
2671
- ## 11 red 81.40000 33.66667 5
2672
- ## 12 red, blue NaN NaN 1
2673
- ## 13 unknown 31.50000 NaN 3
2674
- ## 14 white 48.00000 NaN 1
2675
- ## 15 yellow 81.11111 76.38000 11
4175
+ ## # A tibble: 15 x 4
4176
+ ## eye_color mean_mass mean_birth_year count
4177
+ ## <chr> <dbl> <dbl> <dbl>
4178
+ ## 1 black 76.3 33 10
4179
+ ## 2 blue 86.5 67.1 19
4180
+ ## 3 blue-gray 77 57 1
4181
+ ## 4 brown 66.1 109. 21
4182
+ ## 5 dark NaN NaN 1
4183
+ ## 6 gold NaN NaN 1
4184
+ ## 7 green, yellow 159 NaN 1
4185
+ ## 8 hazel 66 34.5 3
4186
+ ## 9 orange 282. 231 8
4187
+ ## 10 pink NaN NaN 1
4188
+ ## 11 red 81.4 33.7 5
4189
+ ## 12 red, blue NaN NaN 1
4190
+ ## 13 unknown 31.5 NaN 3
4191
+ ## 14 white 48 NaN 1
4192
+ ## 15 yellow 81.1 76.4 11
2676
4193
  ```
2677
4194
 
2678
4195
 
@@ -2681,7 +4198,6 @@ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year")).as__data
2681
4198
 
2682
4199
  # Contributing
2683
4200
 
2684
-
2685
4201
  * Fork it
2686
4202
  * Create your feature branch (git checkout -b my-new-feature)
2687
4203
  * Write Tests!
@@ -2689,3 +4205,4 @@ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year")).as__data
2689
4205
  * Push to the branch (git push origin my-new-feature)
2690
4206
  * Create new Pull Request
2691
4207
 
4208
+ # References