galaaz 0.4.10 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2048 -531
- data/Rakefile +3 -2
- data/bin/gknit +152 -6
- data/bin/gknit-draft +105 -0
- data/bin/gknit-draft.rb +28 -0
- data/bin/gknit_Rscript +127 -0
- data/bin/grun +27 -1
- data/bin/gstudio +47 -4
- data/bin/{gstudio.rb → gstudio_irb.rb} +0 -0
- data/bin/gstudio_pry.rb +7 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.html +10 -195
- data/blogs/galaaz_ggplot/galaaz_ggplot.md +404 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
- data/blogs/gknit/gknit.Rmd +5 -3
- data/blogs/gknit/gknit.pdf +0 -0
- data/blogs/gknit/lst.rds +0 -0
- data/blogs/manual/lst.rds +0 -0
- data/blogs/manual/manual.Rmd +826 -53
- data/blogs/manual/manual.html +2338 -695
- data/blogs/manual/manual.md +2032 -539
- data/blogs/manual/manual.pdf +0 -0
- data/blogs/manual/manual.tex +1804 -594
- data/blogs/manual/manual_files/figure-html/bubble-1.png +0 -0
- data/blogs/manual/manual_files/figure-html/diverging_bar.png +0 -0
- data/blogs/manual/manual_files/figure-latex/bubble-1.png +0 -0
- data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
- data/blogs/manual/model.rb +41 -0
- data/blogs/nse_dplyr/nse_dplyr.Rmd +226 -73
- data/blogs/nse_dplyr/nse_dplyr.html +254 -336
- data/blogs/nse_dplyr/nse_dplyr.md +353 -158
- data/blogs/oh_my/oh_my.html +274 -386
- data/blogs/oh_my/oh_my.md +208 -205
- data/blogs/ruby_plot/ruby_plot.html +20 -205
- data/blogs/ruby_plot/ruby_plot.md +14 -15
- data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
- data/examples/Bibliography/master.bib +50 -0
- data/examples/Bibliography/stats.bib +72 -0
- data/examples/islr/x_y_rnorm.jpg +0 -0
- data/examples/latex_templates/Test-acm_article/Makefile +16 -0
- data/examples/latex_templates/Test-acm_article/Test-acm_article.Rmd +65 -0
- data/examples/latex_templates/Test-acm_article/acm_proc_article-sp.cls +1670 -0
- data/examples/latex_templates/Test-acm_article/sensys-abstract.cls +703 -0
- data/examples/latex_templates/Test-acm_article/sigproc.bib +59 -0
- data/examples/latex_templates/Test-acs_article/Test-acs_article.Rmd +260 -0
- data/examples/latex_templates/Test-acs_article/Test-acs_article.pdf +0 -0
- data/examples/latex_templates/Test-acs_article/acs-Test-acs_article.bib +11 -0
- data/examples/latex_templates/Test-acs_article/acs-my_output.bib +11 -0
- data/examples/latex_templates/Test-acs_article/acstest.bib +17 -0
- data/examples/latex_templates/Test-aea_article/AEA.cls +1414 -0
- data/{blogs/gknit/marshal.dump → examples/latex_templates/Test-aea_article/BibFile.bib} +0 -0
- data/examples/latex_templates/Test-aea_article/Test-aea_article.Rmd +108 -0
- data/examples/latex_templates/Test-aea_article/Test-aea_article.pdf +0 -0
- data/examples/latex_templates/Test-aea_article/aea.bst +1269 -0
- data/examples/latex_templates/Test-aea_article/multicol.sty +853 -0
- data/examples/latex_templates/Test-aea_article/references.bib +0 -0
- data/examples/latex_templates/Test-aea_article/setspace.sty +546 -0
- data/examples/latex_templates/Test-amq_article/Test-amq_article.Rmd +256 -0
- data/examples/latex_templates/Test-amq_article/Test-amq_article.pdf +0 -0
- data/examples/latex_templates/Test-amq_article/Test-amq_article.pdfsync +3397 -0
- data/examples/latex_templates/Test-amq_article/pics/Figure2.pdf +0 -0
- data/examples/latex_templates/Test-ams_article/Test-ams_article.Rmd +215 -0
- data/examples/latex_templates/Test-ams_article/amstest.bib +436 -0
- data/examples/latex_templates/Test-asa_article/Test-asa_article.Rmd +153 -0
- data/examples/latex_templates/Test-asa_article/Test-asa_article.pdf +0 -0
- data/examples/latex_templates/Test-asa_article/agsm.bst +1353 -0
- data/examples/latex_templates/Test-asa_article/bibliography.bib +233 -0
- data/examples/latex_templates/Test-ieee_article/IEEEtran.bst +2409 -0
- data/examples/latex_templates/Test-ieee_article/IEEEtran.cls +6346 -0
- data/examples/latex_templates/Test-ieee_article/Test-ieee_article.Rmd +175 -0
- data/examples/latex_templates/Test-ieee_article/Test-ieee_article.pdf +0 -0
- data/examples/latex_templates/Test-ieee_article/mybibfile.bib +20 -0
- data/examples/latex_templates/Test-rjournal_article/RJournal.sty +335 -0
- data/examples/latex_templates/Test-rjournal_article/RJreferences.bib +18 -0
- data/examples/latex_templates/Test-rjournal_article/RJwrapper.pdf +0 -0
- data/examples/latex_templates/Test-rjournal_article/Test-rjournal_article.Rmd +52 -0
- data/examples/latex_templates/Test-springer_article/Test-springer_article.Rmd +65 -0
- data/examples/latex_templates/Test-springer_article/Test-springer_article.pdf +0 -0
- data/examples/latex_templates/Test-springer_article/bibliography.bib +26 -0
- data/examples/latex_templates/Test-springer_article/spbasic.bst +1658 -0
- data/examples/latex_templates/Test-springer_article/spmpsci.bst +1512 -0
- data/examples/latex_templates/Test-springer_article/spphys.bst +1443 -0
- data/examples/latex_templates/Test-springer_article/svglov3.clo +113 -0
- data/examples/latex_templates/Test-springer_article/svjour3.cls +1431 -0
- data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.Rmd +73 -0
- data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.pdf +0 -0
- data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.Rmd +382 -0
- data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.pdf +0 -0
- data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.Rmd +164 -0
- data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.pdf +0 -0
- data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.Rmd +92 -0
- data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.pdf +0 -0
- data/examples/rmarkdown/svm-rmarkdown-syllabus-example/attend-grade-relationships.csv +482 -0
- data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.Rmd +280 -0
- data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.pdf +0 -0
- data/examples/rmarkdown/svm-xaringan-example/svm-xaringan-example.Rmd +386 -0
- data/lib/R_interface/r.rb +1 -1
- data/lib/R_interface/r_libs.R +1 -1
- data/lib/R_interface/r_methods.rb +10 -0
- data/lib/R_interface/rpkg.rb +1 -0
- data/lib/R_interface/rsupport.rb +4 -6
- data/lib/gknit.rb +2 -0
- data/lib/gknit/draft.rb +105 -0
- data/lib/gknit/knitr_engine.rb +0 -33
- data/lib/util/exec_ruby.rb +1 -27
- data/specs/figures/bg.jpeg +0 -0
- data/specs/figures/bg.png +0 -0
- data/specs/figures/dose_len.png +0 -0
- data/specs/figures/no_args.jpeg +0 -0
- data/specs/figures/no_args.png +0 -0
- data/specs/figures/width_height.jpeg +0 -0
- data/specs/figures/width_height.png +0 -0
- data/specs/figures/width_height_units1.jpeg +0 -0
- data/specs/figures/width_height_units1.png +0 -0
- data/specs/figures/width_height_units2.jpeg +0 -0
- data/specs/figures/width_height_units2.png +0 -0
- data/specs/r_dataframe.spec.rb +11 -11
- data/specs/ruby_expression.spec.rb +1 -0
- data/specs/tmp.rb +41 -20
- data/version.rb +1 -1
- metadata +73 -35
- data/blogs/galaaz_ggplot/galaaz_ggplot.aux +0 -41
- data/blogs/galaaz_ggplot/galaaz_ggplot.out +0 -10
- data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/midwest_rb.pdf +0 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/scatter_plot_rb.pdf +0 -0
- data/blogs/gknit/gknit.md +0 -1430
- data/blogs/gknit/gknit.tex +0 -1358
- data/blogs/manual/graph.rb +0 -29
- data/blogs/nse_dplyr/nse_dplyr.tex +0 -1373
- data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +0 -662
- data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +0 -57
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +0 -106
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +0 -110
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +0 -174
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +0 -236
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +0 -296
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +0 -236
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +0 -218
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +0 -128
- data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +0 -150
- data/blogs/ruby_plot/ruby_plot_files/figure-latex/dose_len.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_delivery.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_dose.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color2.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_decorations.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_jitter.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_points.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_box_plot.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_violin_plot.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-latex/violin_with_jitter.png +0 -0
- data/examples/paper/paper.rb +0 -36
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 0a936fac80a3198849bf43505e3badca81025fcef2b942fabe5edc328b6d35f3
|
|
4
|
+
data.tar.gz: 4aa40b1d667ee45ab94ee8e9565401e718179ad261c043a2173fe50d5b97dfb2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 34974a5d148a2f0896fa07ef26f046af1b43d1263750732d072e6614ad8f3ff32783248a02228acd9b6c0f2183ddb68c91a6dd93aebd51198c594c1f6e513298
|
|
7
|
+
data.tar.gz: 88ea82fcf3e298deacdae6c7305faabff38d89b41a526a8f0e528c00555190acd84006764365c0fa7e913e361f3ecaf69cdf1c00332b80d4ba7d276dad7d10fe
|
data/README.md
CHANGED
|
@@ -1,3 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: "Galaaz Manual"
|
|
3
|
+
subtitle: "How to tightly couple Ruby and R in GraalVM"
|
|
4
|
+
author: "Rodrigo Botafogo"
|
|
5
|
+
tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, ggplot2]
|
|
6
|
+
date: "2019"
|
|
7
|
+
bibliography: "/home/rbotafogo/Bibliography/stats.bib"
|
|
8
|
+
output:
|
|
9
|
+
html_document:
|
|
10
|
+
self_contained: true
|
|
11
|
+
keep_md: true
|
|
12
|
+
md_document:
|
|
13
|
+
variant: markdown_github
|
|
14
|
+
pdf_document:
|
|
15
|
+
includes:
|
|
16
|
+
in_header: "../../sty/galaaz.sty"
|
|
17
|
+
keep_tex: yes
|
|
18
|
+
number_sections: yes
|
|
19
|
+
toc: true
|
|
20
|
+
toc_depth: 3
|
|
21
|
+
fontsize: 11pt
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
1
26
|
# Introduction
|
|
2
27
|
|
|
3
28
|
Galaaz is a system for tightly coupling Ruby and R. Ruby is a powerful language, with a large
|
|
@@ -7,6 +32,92 @@ other hand, R is considered one of the most powerful languages for solving all o
|
|
|
7
32
|
problems. Maybe the strongest competitor to R is Python with libraries such as NumPy,
|
|
8
33
|
Panda, SciPy, SciKit-Learn and a couple more.
|
|
9
34
|
|
|
35
|
+
With Galaaz we do not intend to re-implement any of the scientific libraries in R, we allow
|
|
36
|
+
for very tight coupling between the two languages to the point that the Ruby developer does
|
|
37
|
+
not need to know that there is an R engine running.
|
|
38
|
+
|
|
39
|
+
According to Wikipedia "Ruby is a dynamic, interpreted, reflective, object-oriented,
|
|
40
|
+
general-purpose programming language. It was designed and developed in the mid-1990s by Yukihiro
|
|
41
|
+
"Matz" Matsumoto in Japan." It reached high popularity with the development of Ruby on Rails
|
|
42
|
+
(RoR) by David Heinemeier Hansson. RoR is a web application framework first released
|
|
43
|
+
around 2005. It makes extensive use of Ruby's metaprogramming features. With RoR,
|
|
44
|
+
Ruby became very popular. According to [Ruby's Tiobe index](https://www.tiobe.com/tiobe-index/ruby/)
|
|
45
|
+
it peeked in popularity around 2008, then declined until 2015 when it started picking up again.
|
|
46
|
+
At the time of this writing (November 2018), the Tiobe index puts Ruby in 16th position as
|
|
47
|
+
most popular language.
|
|
48
|
+
|
|
49
|
+
Python, a language similar to Ruby, ranks 4th in the index. Java, C and C++ take the
|
|
50
|
+
first three positions. Ruby is often criticized for its focus on web applications.
|
|
51
|
+
But Ruby can do [much more](https://github.com/markets/awesome-ruby) than just web applications.
|
|
52
|
+
Yet, for scientific computing, Ruby lags way behind Python and R. Python has
|
|
53
|
+
Django framework for web, NumPy for numerical arrays, Pandas for data analysis.
|
|
54
|
+
R is a free software environment for statistical computing and graphics with thousands
|
|
55
|
+
of libraries for data analysis.
|
|
56
|
+
|
|
57
|
+
Until recently, there was no real perspective for Ruby to bridge this gap.
|
|
58
|
+
Implementing a complete scientific computing infrastructure would take too long.
|
|
59
|
+
Enters [Oracle's GraalVM](https://www.graalvm.org/):
|
|
60
|
+
|
|
61
|
+
> GraalVM is a universal virtual machine for running applications written in
|
|
62
|
+
> JavaScript, Python 3, Ruby, R, JVM-based languages like Java, Scala, Kotlin,
|
|
63
|
+
> and LLVM-based languages such as C and C++.
|
|
64
|
+
>
|
|
65
|
+
> GraalVM removes the isolation between programming languages and enables
|
|
66
|
+
> interoperability in a shared runtime. It can run either standalone or in the
|
|
67
|
+
> context of OpenJDK, Node.js, Oracle Database, or MySQL.
|
|
68
|
+
>
|
|
69
|
+
> GraalVM allows you to write polyglot applications with a seamless way to pass
|
|
70
|
+
> values from one language to another. With GraalVM there is no copying or
|
|
71
|
+
> marshaling necessary as it is with other polyglot systems. This lets you
|
|
72
|
+
> achieve high performance when language boundaries are crossed. Most of the time
|
|
73
|
+
> there is no additional cost for crossing a language boundary at all.
|
|
74
|
+
>
|
|
75
|
+
> Often developers have to make uncomfortable compromises that require them
|
|
76
|
+
> to rewrite their software in other languages. For example:
|
|
77
|
+
>
|
|
78
|
+
> * That library is not available in my language. I need to rewrite it.
|
|
79
|
+
> * That language would be the perfect fit for my problem, but we cannot
|
|
80
|
+
> run it in our environment.
|
|
81
|
+
> * That problem is already solved in my language, but the language is
|
|
82
|
+
> too slow.
|
|
83
|
+
>
|
|
84
|
+
> With GraalVM we aim to allow developers to freely choose the right language for
|
|
85
|
+
> the task at hand without making compromises.
|
|
86
|
+
|
|
87
|
+
As stated above, GraalVM is a _universal_ virtual machine that allows Ruby and R (and other
|
|
88
|
+
languages) to run on the same environment. GraalVM allows polyglot applications to
|
|
89
|
+
_seamlessly_ interact with one another and pass values from one language to the other.
|
|
90
|
+
Although a great idea, GraalVM still requires application writers to know several languages.
|
|
91
|
+
To eliminate that requirement, we built Galaaz, a gem for Ruby, to tightly couple
|
|
92
|
+
Ruby and R and allow those languages to interact in a way that the user will be unaware
|
|
93
|
+
of such interaction. In other words, a Ruby programmer will be able to use all
|
|
94
|
+
the capabilities of R without knowing the R syntax.
|
|
95
|
+
|
|
96
|
+
Library wrapping is a usual way of bringing features from one language into another.
|
|
97
|
+
To improve performance, Python often wraps more efficient C libraries. For the
|
|
98
|
+
Python developer, the existence of such C libraries is hidden. The problem with
|
|
99
|
+
library wrapping is that for any new library, there is the need to handcraft a new
|
|
100
|
+
wrapper.
|
|
101
|
+
|
|
102
|
+
Galaaz, instead of wrapping a single C or R library, wraps the whole R language
|
|
103
|
+
in Ruby. Doing so, all thousands of R libraries are available immediately
|
|
104
|
+
to Ruby developers without any new wrapping effort.
|
|
105
|
+
|
|
106
|
+
## What does Galaaz mean
|
|
107
|
+
|
|
108
|
+
Galaaz is the Portuguese name for "Galahad". From Wikipedia:
|
|
109
|
+
|
|
110
|
+
Sir Galahad (sometimes referred to as Galeas or Galath),
|
|
111
|
+
in Arthurian legend, is a knight of King Arthur's Round Table and one
|
|
112
|
+
of the three achievers of the Holy Grail. He is the illegitimate son
|
|
113
|
+
of Sir Lancelot and Elaine of Corbenic, and is renowned for his
|
|
114
|
+
gallantry and purity as the most perfect of all knights. Emerging quite
|
|
115
|
+
late in the medieval Arthurian tradition, Sir Galahad first appears in the
|
|
116
|
+
Lancelot–Grail cycle, and his story is taken up in later works such as
|
|
117
|
+
the Post-Vulgate Cycle and Sir Thomas Malory's Le Morte d'Arthur.
|
|
118
|
+
His name should not be mistaken with Galehaut, a different knight from
|
|
119
|
+
Arthurian legend.
|
|
120
|
+
|
|
10
121
|
# System Compatibility
|
|
11
122
|
|
|
12
123
|
* Oracle Linux 7
|
|
@@ -62,7 +173,7 @@ Panda, SciPy, SciKit-Learn and a couple more.
|
|
|
62
173
|
> galaaz -T
|
|
63
174
|
|
|
64
175
|
Shows a list with all available executalbe tasks. To execute a task, substitute the
|
|
65
|
-
|
|
176
|
+
'rake' word in the list with 'galaaz'. For instance, the following line shows up
|
|
66
177
|
after 'galaaz -T'
|
|
67
178
|
|
|
68
179
|
rake master_list:scatter_plot # scatter_plot from:....
|
|
@@ -71,349 +182,1468 @@ Panda, SciPy, SciKit-Learn and a couple more.
|
|
|
71
182
|
|
|
72
183
|
> galaaz master_list:scatter_plot
|
|
73
184
|
|
|
74
|
-
# gKnitting a Document
|
|
75
|
-
|
|
76
|
-
This manual has been formatted usign gKnit. gKnit uses Knitr and R markdown to knit
|
|
77
|
-
a document in Ruby or R and output it in any of the available formats for R markdown.
|
|
78
|
-
gKnit runs atop of GraalVM, and Galaaz. In gKnit, Ruby variables are persisted between
|
|
79
|
-
chunks, making it an ideal solution for literate programming. Also, since it is based
|
|
80
|
-
on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming with
|
|
81
|
-
Ruby and R is quite natural.
|
|
82
185
|
|
|
83
|
-
|
|
186
|
+
# Accessing R from Ruby
|
|
84
187
|
|
|
85
|
-
|
|
188
|
+
One of the nice aspects of Galaaz on GraalVM, is that variables and functions defined in R, can
|
|
189
|
+
be easily accessed from Ruby. For instance, to access the 'mtcars' data frame from R
|
|
190
|
+
in Ruby, we use the ':mtcar' symbol preceded by the '~' operator, thus '~:r_vec' retrieves the
|
|
191
|
+
value of the 'mtcars' variable.
|
|
86
192
|
|
|
87
|
-
Vectors can be thought of as contiguous cells containing data. Cells are accessed through
|
|
88
|
-
indexing operations such as x[5]. Galaaz has six basic (‘atomic’) vector types: logical,
|
|
89
|
-
integer, real, complex, string (or character) and raw. The modes and storage modes for the
|
|
90
|
-
different vector types are listed in the following
|
|
91
|
-
table.
|
|
92
193
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
| integer | numeric | integer |
|
|
97
|
-
| double | numeric | double |
|
|
98
|
-
| complex | complex | comples |
|
|
99
|
-
| character | character | character |
|
|
100
|
-
| raw | raw | raw |
|
|
194
|
+
```ruby
|
|
195
|
+
puts ~:mtcars
|
|
196
|
+
```
|
|
101
197
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
198
|
+
```
|
|
199
|
+
## mpg cyl disp hp drat wt qsec vs am gear carb
|
|
200
|
+
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
|
|
201
|
+
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
|
|
202
|
+
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
|
|
203
|
+
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
|
|
204
|
+
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
|
|
205
|
+
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
|
|
206
|
+
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
|
|
207
|
+
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
|
|
208
|
+
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
|
|
209
|
+
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
|
|
210
|
+
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
|
|
211
|
+
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
|
|
212
|
+
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
|
|
213
|
+
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
|
|
214
|
+
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
|
|
215
|
+
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
|
|
216
|
+
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
|
|
217
|
+
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
|
|
218
|
+
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
|
|
219
|
+
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
|
|
220
|
+
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
|
|
221
|
+
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
|
|
222
|
+
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
|
|
223
|
+
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
|
|
224
|
+
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
|
|
225
|
+
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
|
|
226
|
+
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
|
|
227
|
+
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
|
|
228
|
+
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
|
|
229
|
+
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
|
|
230
|
+
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
|
|
231
|
+
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
|
|
232
|
+
```
|
|
106
233
|
|
|
107
|
-
To
|
|
234
|
+
To access an R function from Ruby, the R function needs to be preceeded by 'R.' scoping.
|
|
235
|
+
Bellow we see and example of creating a R::Vector by calling the 'c' R function
|
|
108
236
|
|
|
109
237
|
|
|
110
238
|
```ruby
|
|
111
|
-
vec = R.c(1, 2, 3)
|
|
112
|
-
puts vec
|
|
239
|
+
puts vec = R.c(1.0, 2.0, 3.0, 4.0)
|
|
113
240
|
```
|
|
114
241
|
|
|
115
242
|
```
|
|
116
|
-
## [1] 1 2 3
|
|
243
|
+
## [1] 1 2 3 4
|
|
117
244
|
```
|
|
245
|
+
Note that 'vec' is an object of type R::Vector:
|
|
118
246
|
|
|
119
|
-
Lets take a look at the type, mode and storage.mode of our vector vec. In order to print
|
|
120
|
-
this out, we are creating a data frame 'df' and printing it out. A data frame, for those
|
|
121
|
-
not familiar with it, is basically a table. Here we create the data frame and add the
|
|
122
|
-
column name by passing named parameters for each column, such as 'typeof:', 'mode:' and
|
|
123
|
-
'storage__mode?'. You should also note here that the double underscore is converted to a '.'.
|
|
124
|
-
So, when printed 'storage\_\_mode' will actually print as 'storage.mode'.
|
|
125
247
|
|
|
126
|
-
|
|
127
|
-
|
|
248
|
+
```ruby
|
|
249
|
+
puts vec.class
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
```
|
|
253
|
+
## R::Vector
|
|
254
|
+
```
|
|
255
|
+
Every object created by a call to an R function will be of a type that inherits from
|
|
256
|
+
R::Object. In R, there is also a function 'class'. In order to access that function we
|
|
257
|
+
can call method 'rclass' in the R::Object:
|
|
128
258
|
|
|
129
259
|
|
|
130
260
|
```ruby
|
|
131
|
-
|
|
132
|
-
puts df
|
|
261
|
+
puts vec.rclass
|
|
133
262
|
```
|
|
134
263
|
|
|
135
264
|
```
|
|
136
|
-
##
|
|
137
|
-
## 1 integer numeric integer
|
|
265
|
+
## [1] "numeric"
|
|
138
266
|
```
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
267
|
+
When working with R::Object(s), it is possible to use the '.' operator to pipe operations.
|
|
268
|
+
When using '.', the object to which the '.' is applied becomes the first argument of the
|
|
269
|
+
corresponding R function. For instance, function 'c' in R, can be used to concatenate
|
|
270
|
+
two vectors or more vectors (in R, there are no scalar values, scalars are converted to
|
|
271
|
+
vectors of size 1. Within Galaaz, scalar parameter is converted to a size one vector):
|
|
144
272
|
|
|
145
273
|
|
|
146
274
|
```ruby
|
|
147
|
-
|
|
148
|
-
puts vec
|
|
275
|
+
puts R.c(vec, 10, 20, 30)
|
|
149
276
|
```
|
|
150
277
|
|
|
151
278
|
```
|
|
152
|
-
## [1]
|
|
279
|
+
## [1] 1 2 3 4 10 20 30
|
|
153
280
|
```
|
|
281
|
+
The call above to the 'c' function can also be done using '.' notation:
|
|
154
282
|
|
|
155
283
|
|
|
156
284
|
```ruby
|
|
157
|
-
|
|
158
|
-
outputs df.kable.kable_styling
|
|
285
|
+
puts vec.c(10, 20, 30)
|
|
159
286
|
```
|
|
160
287
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
<th style="text-align:left;"> storage.mode </th>
|
|
167
|
-
</tr>
|
|
168
|
-
</thead>
|
|
169
|
-
<tbody>
|
|
170
|
-
<tr>
|
|
171
|
-
<td style="text-align:left;"> double </td>
|
|
172
|
-
<td style="text-align:left;"> numeric </td>
|
|
173
|
-
<td style="text-align:left;"> double </td>
|
|
174
|
-
</tr>
|
|
175
|
-
</tbody>
|
|
176
|
-
</table>
|
|
177
|
-
|
|
178
|
-
In this next example we try to create a vector with a variable 'hello' that has not yet
|
|
179
|
-
being defined. This will raise an exception that is printed out. We get two return blocks,
|
|
180
|
-
the first with a message explaining what went wrong and the second with the full backtrace
|
|
181
|
-
of the error.
|
|
288
|
+
```
|
|
289
|
+
## [1] 1 2 3 4 10 20 30
|
|
290
|
+
```
|
|
291
|
+
We will talk about vector indexing in a latter section. But notice here that indexing
|
|
292
|
+
an R::Vector will return another R::Vector:
|
|
182
293
|
|
|
183
294
|
|
|
184
295
|
```ruby
|
|
185
|
-
vec
|
|
296
|
+
puts vec[1]
|
|
186
297
|
```
|
|
187
298
|
|
|
188
299
|
```
|
|
189
|
-
##
|
|
190
|
-
|
|
300
|
+
## [1] 1
|
|
301
|
+
```
|
|
302
|
+
Sometimes we want to index an R::Object and get back a Ruby object that is not wrapped
|
|
303
|
+
in an R::Object, but the native Ruby object. For this, we can index the R object with
|
|
304
|
+
the '>>' operator:
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
```ruby
|
|
308
|
+
puts vec >> 0
|
|
309
|
+
puts vec >> 2
|
|
191
310
|
```
|
|
192
311
|
|
|
193
312
|
```
|
|
194
|
-
##
|
|
195
|
-
##
|
|
196
|
-
## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `eval'
|
|
197
|
-
## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `exec_ruby'
|
|
198
|
-
## /home/rbotafogo/desenv/galaaz/lib/gknit/knitr_engine.rb:650:in `block in initialize'
|
|
199
|
-
## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
|
|
200
|
-
## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
|
|
201
|
-
## (eval):3:in `function(...) {\n rb_method(...)'
|
|
202
|
-
## unknown.r:1:in `in_dir'
|
|
203
|
-
## unknown.r:1:in `block_exec:BLOCK0'
|
|
204
|
-
## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:102:in `block_exec'
|
|
205
|
-
## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:92:in `call_block'
|
|
206
|
-
## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:6:in `process_group.block'
|
|
207
|
-
## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/block.R:3:in `<no source>'
|
|
208
|
-
## unknown.r:1:in `withCallingHandlers'
|
|
209
|
-
## unknown.r:1:in `process_file'
|
|
210
|
-
## unknown.r:1:in `<no source>:BLOCK1'
|
|
211
|
-
## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/knitr/R/output.R:129:in `<no source>'
|
|
212
|
-
## unknown.r:1:in `<no source>:BLOCK1'
|
|
213
|
-
## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc16/jre/languages/R/library/rmarkdown/R/render.R:162:in `<no source>'
|
|
214
|
-
## <REPL>:5:in `<repl wrapper>'
|
|
215
|
-
## <REPL>:1
|
|
313
|
+
## 1.0
|
|
314
|
+
## 3.0
|
|
216
315
|
```
|
|
217
316
|
|
|
218
|
-
|
|
317
|
+
It is also possible to call an R function with named arguments, by creating the function
|
|
318
|
+
in Galaaz with named parameters. For instance, here is an example of creating a 'list'
|
|
319
|
+
with named elements:
|
|
219
320
|
|
|
220
321
|
|
|
221
322
|
```ruby
|
|
222
|
-
|
|
223
|
-
puts vec
|
|
323
|
+
puts R.list(first_name: "Rodrigo", last_name: "Botafogo")
|
|
224
324
|
```
|
|
225
325
|
|
|
226
326
|
```
|
|
227
|
-
##
|
|
327
|
+
## $first_name
|
|
328
|
+
## [1] "Rodrigo"
|
|
329
|
+
##
|
|
330
|
+
## $last_name
|
|
331
|
+
## [1] "Botafogo"
|
|
228
332
|
```
|
|
229
333
|
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
334
|
+
Many R functions receive another function as argument. For instance, method 'map' applies
|
|
335
|
+
a function to every element of a vector. With Galaaz, it is possible to pass a Proc,
|
|
336
|
+
Method or Lambda in place of the expected R function. In this next example, we will
|
|
337
|
+
add 2 to every element of our previously created vector:
|
|
233
338
|
|
|
234
339
|
|
|
235
340
|
```ruby
|
|
236
|
-
|
|
237
|
-
vec2 = R.c(4.0, 5.0, 6.0)
|
|
238
|
-
vec = R.c(vec1, vec2)
|
|
239
|
-
puts vec
|
|
341
|
+
puts vec.map { |x| x + 2 }
|
|
240
342
|
```
|
|
241
343
|
|
|
242
344
|
```
|
|
243
|
-
## [1]
|
|
345
|
+
## [1] 3
|
|
346
|
+
## [1] 4
|
|
347
|
+
## [1] 5
|
|
348
|
+
## [1] 6
|
|
244
349
|
```
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
350
|
+
|
|
351
|
+
# gKnitting a Document
|
|
352
|
+
|
|
353
|
+
This manual has been formatted usign gKnit. gKnit uses Knitr and R markdown to knit
|
|
354
|
+
a document in Ruby or R and output it in any of the available formats for R markdown.
|
|
355
|
+
gKnit runs atop of GraalVM, and Galaaz. In gKnit, Ruby variables are persisted between
|
|
356
|
+
chunks, making it an ideal solution for literate programming. Also, since it is based
|
|
357
|
+
on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming with
|
|
358
|
+
Ruby and R is quite natural.
|
|
359
|
+
|
|
360
|
+
The idea of "literate programming" was first introduced by Donald Knuth in the
|
|
361
|
+
1980's [@Knuth:literate_programming].
|
|
362
|
+
The main intention of this approach was to develop software interspersing macro snippets,
|
|
363
|
+
traditional source code, and a natural language such as English in a document
|
|
364
|
+
that could be compiled into
|
|
365
|
+
executable code and at the same time easily read by a human developer. According to Knuth
|
|
366
|
+
"The practitioner of
|
|
367
|
+
literate programming can be regarded as an essayist, whose main concern is with exposition
|
|
368
|
+
and excellence of style."
|
|
369
|
+
|
|
370
|
+
The idea of literate programming evolved into the idea of reproducible research, in which
|
|
371
|
+
all the data, software code, documentation, graphics etc. needed to reproduce the research
|
|
372
|
+
and its reports could be included in a
|
|
373
|
+
single document or set of documents that when distributed to peers could be rerun generating
|
|
374
|
+
the same output and reports.
|
|
375
|
+
|
|
376
|
+
The R community has put a great deal of effort in reproducible research. In 2002, Sweave was
|
|
377
|
+
introduced and it allowed mixing R code with Latex generating high quality PDF documents. A
|
|
378
|
+
Sweave document could include code, the results of executing the code, graphics and text
|
|
379
|
+
such that it contained the whole narrative to reproduce the research. In
|
|
380
|
+
2012, Knitr, developed by Yihui Xie from RStudio was released to replace Sweave and to
|
|
381
|
+
consolidate in one single package the many extensions and add-on packages that
|
|
382
|
+
were necessary for Sweave.
|
|
383
|
+
|
|
384
|
+
With Knitr, __R markdown__ was also developed, an extension to the
|
|
385
|
+
Markdown format. With __R markdown__ and Knitr it is possible to generate reports in a multitude
|
|
386
|
+
of formats such as HTML, markdown, Latex, PDF, dvi, etc. __R markdown__ also allows the use of
|
|
387
|
+
multiple programming languages such as R, Ruby, Python, etc. in the same document.
|
|
388
|
+
|
|
389
|
+
In __R markdown__, text is interspersed with
|
|
390
|
+
code chunks that can be executed and both the code and its results can become
|
|
391
|
+
part of the final report. Although __R markdown__ allows multiple programming languages in the
|
|
392
|
+
same document, only R and Python (with
|
|
393
|
+
the reticulate package) can persist variables between chunks. For other languages, such as
|
|
394
|
+
Ruby, every chunk will start a new process and thus all data is lost between chunks, unless it
|
|
395
|
+
is somehow stored in a data file that is read by the next chunk.
|
|
396
|
+
|
|
397
|
+
Being able to persist data
|
|
398
|
+
between chunks is critical for literate programming otherwise the flow of the narrative is lost
|
|
399
|
+
by all the effort of having to save data and then reload it. Although this might, at first, seem like
|
|
400
|
+
a small nuisance, not being able to persist data between chunks is a major issue. For example, let's
|
|
401
|
+
take a look at the following simple example in which we want to show how to create a list and the
|
|
402
|
+
use it. Let's first assume that data cannot be persisted between chunks. In the next chunk we
|
|
403
|
+
create a list, then we would need to save it to file, but to save it, we need somehow to marshal the
|
|
404
|
+
data into a binary format:
|
|
251
405
|
|
|
252
406
|
|
|
253
407
|
```ruby
|
|
254
|
-
|
|
255
|
-
|
|
408
|
+
lst = R.list(a: 1, b: 2, c: 3)
|
|
409
|
+
lst.saveRDS("lst.rds")
|
|
410
|
+
```
|
|
411
|
+
then, on the next chunk, where variable 'lst' is used, we need to read back it's value
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
```ruby
|
|
415
|
+
lst = R.readRDS("lst.rds")
|
|
416
|
+
puts lst
|
|
256
417
|
```
|
|
257
418
|
|
|
258
419
|
```
|
|
259
|
-
##
|
|
420
|
+
## $a
|
|
421
|
+
## [1] 1
|
|
422
|
+
##
|
|
423
|
+
## $b
|
|
424
|
+
## [1] 2
|
|
425
|
+
##
|
|
426
|
+
## $c
|
|
427
|
+
## [1] 3
|
|
260
428
|
```
|
|
261
429
|
|
|
262
|
-
|
|
430
|
+
Now, any single code has dozens of variables that we might want to use and reuse between chunks.
|
|
431
|
+
Clearly, such an approach becomes quickly unmanageable. Probably, because of
|
|
432
|
+
this problem, it is very rare to see any __R markdown__ document in the Ruby community.
|
|
263
433
|
|
|
264
|
-
|
|
434
|
+
When variables can be used accross chunks, then no overhead is needed:
|
|
265
435
|
|
|
266
436
|
|
|
267
437
|
```ruby
|
|
268
|
-
|
|
438
|
+
lst = R.list(a: 1, b: 2, c: 3)
|
|
439
|
+
# any other code can be added here
|
|
440
|
+
```
|
|
441
|
+
|
|
442
|
+
|
|
443
|
+
```ruby
|
|
444
|
+
puts lst
|
|
269
445
|
```
|
|
270
446
|
|
|
271
447
|
```
|
|
272
|
-
##
|
|
448
|
+
## $a
|
|
449
|
+
## [1] 1
|
|
450
|
+
##
|
|
451
|
+
## $b
|
|
452
|
+
## [1] 2
|
|
453
|
+
##
|
|
454
|
+
## $c
|
|
455
|
+
## [1] 3
|
|
273
456
|
```
|
|
274
457
|
|
|
458
|
+
In the Python community, the same effort to have code and text in an integrated environment
|
|
459
|
+
started around the first decade of 2000. In 2006 iPython 0.7.2 was released. In 2014,
|
|
460
|
+
Fernando Pérez, spun off project Jupyter from iPython creating a web-based interactive
|
|
461
|
+
computation environment. Jupyter can now be used with many languages, including Ruby with the
|
|
462
|
+
iruby gem (https://github.com/SciRuby/iruby). In order to have multiple languages in a Jupyter
|
|
463
|
+
notebook the SoS kernel was developed (https://vatlab.github.io/sos-docs/).
|
|
464
|
+
|
|
465
|
+
## gKnit and __R markdown__
|
|
466
|
+
|
|
467
|
+
gKnit is based on knitr and __R markdown__ and can knit a document
|
|
468
|
+
written both in Ruby and/or R and output it in any of the available formats of __R markdown__. gKnit
|
|
469
|
+
allows ruby developers to do literate programming and reproducible research by allowing them to
|
|
470
|
+
have in a single document, text and code.
|
|
471
|
+
|
|
472
|
+
In gKnit, Ruby variables are persisted between
|
|
473
|
+
chunks, making it an ideal solution for literate programming in this language. Also,
|
|
474
|
+
since it is based on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming
|
|
475
|
+
with Ruby and R is quite natural.
|
|
476
|
+
|
|
477
|
+
This is not a blog post on __R markdown__, and the interested user is directed to the following links
|
|
478
|
+
for detailed information on its capabilities and use.
|
|
479
|
+
|
|
480
|
+
* https://rmarkdown.rstudio.com/ or
|
|
481
|
+
* https://bookdown.org/yihui/rmarkdown/
|
|
482
|
+
|
|
483
|
+
In this post, we will describe just the main aspects of __R markdown__, so the user can start
|
|
484
|
+
gKnitting Ruby and R documents quickly.
|
|
485
|
+
|
|
486
|
+
## The Yaml header
|
|
487
|
+
|
|
488
|
+
An __R markdown__ document should start with a Yaml header and be stored in a file with
|
|
489
|
+
'.Rmd' extension. This document has the following header for gKitting an HTML document.
|
|
275
490
|
|
|
276
|
-
```ruby
|
|
277
|
-
puts vec1 * 5
|
|
278
491
|
```
|
|
492
|
+
---
|
|
493
|
+
title: "How to do reproducible research in Ruby with gKnit"
|
|
494
|
+
author:
|
|
495
|
+
- "Rodrigo Botafogo"
|
|
496
|
+
- "Daniel Mossé - University of Pittsburgh"
|
|
497
|
+
tags: [Tech, Data Science, Ruby, R, GraalVM]
|
|
498
|
+
date: "20/02/2019"
|
|
499
|
+
output:
|
|
500
|
+
html_document:
|
|
501
|
+
self_contained: true
|
|
502
|
+
keep_md: true
|
|
503
|
+
pdf_document:
|
|
504
|
+
includes:
|
|
505
|
+
in_header: ["../../sty/galaaz.sty"]
|
|
506
|
+
number_sections: yes
|
|
507
|
+
---
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
For more information on the options in the Yaml header, [check here](https://bookdown.org/yihui/rmarkdown/html-document.html).
|
|
511
|
+
|
|
512
|
+
## __R Markdown__ formatting
|
|
513
|
+
|
|
514
|
+
Document formatting can be done with simple markups such as:
|
|
515
|
+
|
|
516
|
+
## Headers
|
|
279
517
|
|
|
280
518
|
```
|
|
281
|
-
|
|
519
|
+
# Header 1
|
|
520
|
+
|
|
521
|
+
## Header 2
|
|
522
|
+
|
|
523
|
+
### Header 3
|
|
524
|
+
|
|
282
525
|
```
|
|
283
526
|
|
|
284
|
-
|
|
527
|
+
## Lists
|
|
285
528
|
|
|
529
|
+
```
|
|
530
|
+
Unordered lists:
|
|
286
531
|
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
532
|
+
* Item 1
|
|
533
|
+
* Item 2
|
|
534
|
+
+ Item 2a
|
|
535
|
+
+ Item 2b
|
|
290
536
|
```
|
|
291
537
|
|
|
292
538
|
```
|
|
293
|
-
|
|
539
|
+
Ordered Lists
|
|
540
|
+
|
|
541
|
+
1. Item 1
|
|
542
|
+
2. Item 2
|
|
543
|
+
3. Item 3
|
|
544
|
+
+ Item 3a
|
|
545
|
+
+ Item 3b
|
|
294
546
|
```
|
|
295
547
|
|
|
296
|
-
|
|
548
|
+
For more R markdown formatting go to https://rmarkdown.rstudio.com/authoring_basics.html.
|
|
297
549
|
|
|
298
|
-
|
|
550
|
+
## R chunks
|
|
299
551
|
|
|
552
|
+
Running and executing Ruby and R code is actually what really interests us is this blog.
|
|
553
|
+
Inserting a code chunk is done by adding code in a block delimited by three back ticks
|
|
554
|
+
followed by an open
|
|
555
|
+
curly brace ('{') followed with the engine name (r, ruby, rb, include, ...), an
|
|
556
|
+
any optional chunk_label and options, as shown bellow:
|
|
300
557
|
|
|
301
|
-
|
|
302
|
-
|
|
558
|
+
````
|
|
559
|
+
```{engine_name [chunk_label], [chunk_options]}
|
|
303
560
|
```
|
|
561
|
+
````
|
|
562
|
+
|
|
563
|
+
for instance, let's add an R chunk to the document labeled 'first_r_chunk'. This is
|
|
564
|
+
a very simple code just to create a variable and print it out, as follows:
|
|
304
565
|
|
|
566
|
+
````
|
|
567
|
+
```{r first_r_chunk}
|
|
568
|
+
vec <- c(1, 2, 3)
|
|
569
|
+
print(vec)
|
|
305
570
|
```
|
|
306
|
-
|
|
571
|
+
````
|
|
572
|
+
|
|
573
|
+
If this block is added to an __R markdown__ document and gKnitted the result will be:
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
```r
|
|
577
|
+
vec <- c(1, 2, 3)
|
|
578
|
+
print(vec)
|
|
307
579
|
```
|
|
308
580
|
|
|
309
|
-
|
|
310
|
-
1
|
|
581
|
+
```
|
|
582
|
+
## [1] 1 2 3
|
|
583
|
+
```
|
|
311
584
|
|
|
585
|
+
Now let's say that we want to do some analysis in the code, but just print the result and not the
|
|
586
|
+
code itself. For this, we need to add the option 'echo = FALSE'.
|
|
312
587
|
|
|
313
|
-
|
|
314
|
-
|
|
588
|
+
````
|
|
589
|
+
```{r second_r_chunk, echo = FALSE}
|
|
590
|
+
vec2 <- c(10, 20, 30)
|
|
591
|
+
vec3 <- vec * vec2
|
|
592
|
+
print(vec3)
|
|
315
593
|
```
|
|
594
|
+
````
|
|
595
|
+
Here is how this block will show up in the document. Observe that the code is not shown
|
|
596
|
+
and we only see the execution result in a white box
|
|
597
|
+
|
|
316
598
|
|
|
317
599
|
```
|
|
318
|
-
## [1]
|
|
600
|
+
## [1] 10 40 90
|
|
319
601
|
```
|
|
320
602
|
|
|
321
|
-
|
|
603
|
+
A description of the available chunk options can be found in https://yihui.name/knitr/.
|
|
322
604
|
|
|
605
|
+
Let's add another R chunk with a function definition. In this example, a vector
|
|
606
|
+
'r_vec' is created and
|
|
607
|
+
a new function 'reduce_sum' is defined. The chunk specification is
|
|
323
608
|
|
|
324
|
-
|
|
325
|
-
|
|
609
|
+
````
|
|
610
|
+
```{r data_creation}
|
|
611
|
+
r_vec <- c(1, 2, 3, 4, 5)
|
|
612
|
+
|
|
613
|
+
reduce_sum <- function(...) {
|
|
614
|
+
Reduce(sum, as.list(...))
|
|
615
|
+
}
|
|
326
616
|
```
|
|
617
|
+
````
|
|
618
|
+
|
|
619
|
+
and this is how it will look like once executed. From now on, to be concise in the
|
|
620
|
+
presentation we will not show chunk definitions any longer.
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
|
|
624
|
+
```r
|
|
625
|
+
r_vec <- c(1, 2, 3, 4, 5)
|
|
327
626
|
|
|
627
|
+
reduce_sum <- function(...) {
|
|
628
|
+
Reduce(sum, as.list(...))
|
|
629
|
+
}
|
|
328
630
|
```
|
|
329
|
-
|
|
631
|
+
|
|
632
|
+
We can, possibly in another chunk, access the vector and call the function as follows:
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
```r
|
|
636
|
+
print(r_vec)
|
|
330
637
|
```
|
|
331
638
|
|
|
332
|
-
|
|
333
|
-
|
|
639
|
+
```
|
|
640
|
+
## [1] 1 2 3 4 5
|
|
641
|
+
```
|
|
334
642
|
|
|
643
|
+
```r
|
|
644
|
+
print(reduce_sum(r_vec))
|
|
645
|
+
```
|
|
335
646
|
|
|
336
|
-
```ruby
|
|
337
|
-
puts vec4[-3]
|
|
338
|
-
puts vec4[-R.c(1, 3, 5, 7)]
|
|
339
647
|
```
|
|
648
|
+
## [1] 15
|
|
649
|
+
```
|
|
650
|
+
## R Graphics with ggplot
|
|
651
|
+
|
|
652
|
+
In the following chunk, we create a bubble chart in R using ggplot and include it in
|
|
653
|
+
this document. Note that there is no directive in the code to include the image, this
|
|
654
|
+
occurs automatically. The 'mpg' dataframe is natively available to R and to Galaaz as
|
|
655
|
+
well.
|
|
656
|
+
|
|
657
|
+
For the reader not knowledgeable of ggplot, ggplot is a graphics library based on "the
|
|
658
|
+
grammar of graphics" [@Wilkinson:grammar_of_graphics]. The idea of the grammar of graphics
|
|
659
|
+
is to build a graphics by adding layers to the plot. More information can be found in
|
|
660
|
+
https://towardsdatascience.com/a-comprehensive-guide-to-the-grammar-of-graphics-for-effective-visualization-of-multi-dimensional-1f92b4ed4149.
|
|
661
|
+
|
|
662
|
+
In the plot bellow the 'mpg' dataset from base R is used. "The data concerns city-cycle fuel
|
|
663
|
+
consumption in miles per gallon, to be predicted in terms of 3 multivalued discrete and 5
|
|
664
|
+
continuous attributes." (Quinlan, 1993)
|
|
665
|
+
|
|
666
|
+
First, the 'mpg' dataset if filtered to extract only cars from the following manumactures: Audi, Ford,
|
|
667
|
+
Honda, and Hyundai and stored in the 'mpg_select' variable. Then, the selected dataframe is passed
|
|
668
|
+
to the ggplot function specifying in the aesthetic method (aes) that 'displacement' (disp) should
|
|
669
|
+
be plotted in the 'x' axis and 'city mileage' should be on the 'y' axis. In the 'labs' layer we
|
|
670
|
+
pass the 'title' and 'subtitle' for the plot. To the basic plot 'g', geom\_jitter is added, that
|
|
671
|
+
plots cars from the same manufactures with the same color (col=manufactures) and the size of the
|
|
672
|
+
car point equal its high way consumption (size = hwy). Finally, a last layer is plotter containing
|
|
673
|
+
a linear regression line (method = "lm") for every manufacturer.
|
|
674
|
+
|
|
675
|
+
|
|
676
|
+
```r
|
|
677
|
+
# load package and data
|
|
678
|
+
library(ggplot2)
|
|
679
|
+
```
|
|
680
|
+
|
|
681
|
+
```
|
|
682
|
+
## Message:
|
|
683
|
+
## Registered S3 methods overwritten by 'ggplot2':
|
|
684
|
+
## method from
|
|
685
|
+
## [.quosures rlang
|
|
686
|
+
## c.quosures rlang
|
|
687
|
+
## print.quosures rlang
|
|
688
|
+
```
|
|
689
|
+
|
|
690
|
+
```r
|
|
691
|
+
data(mpg, package="ggplot2")
|
|
692
|
+
|
|
693
|
+
mpg_select <- mpg[mpg$manufacturer %in% c("audi", "ford", "honda", "hyundai"), ]
|
|
694
|
+
|
|
695
|
+
# Scatterplot
|
|
696
|
+
theme_set(theme_bw()) # pre-set the bw theme.
|
|
697
|
+
g <- ggplot(mpg_select, aes(displ, cty)) +
|
|
698
|
+
labs(subtitle="mpg: Displacement vs City Mileage",
|
|
699
|
+
title="Bubble chart")
|
|
700
|
+
|
|
701
|
+
g + geom_jitter(aes(col=manufacturer, size=hwy)) +
|
|
702
|
+
geom_smooth(aes(col=manufacturer), method="lm", se=F)
|
|
703
|
+
```
|
|
704
|
+
|
|
705
|
+
<!-- -->
|
|
706
|
+
|
|
707
|
+
## Ruby chunks
|
|
708
|
+
|
|
709
|
+
Including a Ruby chunk is just as easy as including an R chunk in the document: just
|
|
710
|
+
change the name of the engine to 'ruby'. It is also possible to pass chunk options
|
|
711
|
+
to the Ruby engine; however, this version does not accept all the options that are
|
|
712
|
+
available to R chunks. Future versions will add those options.
|
|
713
|
+
|
|
714
|
+
````
|
|
715
|
+
```{ruby first_ruby_chunk}
|
|
716
|
+
```
|
|
717
|
+
````
|
|
718
|
+
|
|
719
|
+
In this example, the ruby chunk is called 'first_ruby_chunk'. One important
|
|
720
|
+
aspect of chunk labels is that they cannot be duplicated. If a chunk label is
|
|
721
|
+
duplicated, gKnit will stop with an error.
|
|
722
|
+
|
|
723
|
+
In the following chunk, variable 'a', 'b' and 'c' are standard Ruby variables
|
|
724
|
+
and 'vec' and 'vec2' are two vectors created by calling the 'c' method on the
|
|
725
|
+
R module.
|
|
726
|
+
|
|
727
|
+
In Galaaz, the R module allows us to access R functions transparently. The 'c'
|
|
728
|
+
function in R, is a function that concatenates its arguments making a vector.
|
|
729
|
+
|
|
730
|
+
It
|
|
731
|
+
should be clear that there is no requirement in gknit to call or use any R
|
|
732
|
+
functions. gKnit will knit standard Ruby code, or even general text without
|
|
733
|
+
any code.
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
```ruby
|
|
737
|
+
a = [1, 2, 3]
|
|
738
|
+
b = "US$ 250.000"
|
|
739
|
+
c = "The 'outputs' function"
|
|
740
|
+
|
|
741
|
+
vec = R.c(1, 2, 3)
|
|
742
|
+
vec2 = R.c(10, 20, 30)
|
|
743
|
+
```
|
|
744
|
+
|
|
745
|
+
In the next block, variables 'a', 'vec' and 'vec2' are used and printed.
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
```ruby
|
|
749
|
+
puts a
|
|
750
|
+
puts vec * vec2
|
|
751
|
+
```
|
|
752
|
+
|
|
753
|
+
```
|
|
754
|
+
## 1
|
|
755
|
+
## 2
|
|
756
|
+
## 3
|
|
757
|
+
## [1] 10 40 90
|
|
758
|
+
```
|
|
759
|
+
|
|
760
|
+
Note that 'a' is a standard Ruby Array and 'vec' and 'vec2' are vectors that behave accordingly,
|
|
761
|
+
where multiplication works as expected.
|
|
762
|
+
|
|
763
|
+
## Inline Ruby code
|
|
764
|
+
|
|
765
|
+
When using a Ruby chunk, the code and the output are formatted in blocks as seen above.
|
|
766
|
+
This formatting is not always desired. Sometimes, we want to have the results of the
|
|
767
|
+
Ruby evaluation included in the middle of a phrase. gKnit allows adding inline Ruby code
|
|
768
|
+
with the 'rb' engine. The following chunk specification will
|
|
769
|
+
create and inline Ruby text:
|
|
770
|
+
|
|
771
|
+
````
|
|
772
|
+
This is some text with inline Ruby accessing variable 'b' which has value:
|
|
773
|
+
```{rb puts b}
|
|
774
|
+
```
|
|
775
|
+
and is followed by some other text!
|
|
776
|
+
````
|
|
777
|
+
|
|
778
|
+
<div style="margin-bottom:30px;">
|
|
779
|
+
</div>
|
|
780
|
+
|
|
781
|
+
This is some text with inline Ruby accessing variable 'b' which has value:
|
|
782
|
+
US$ 250.000
|
|
783
|
+
and is followed by some other text!
|
|
784
|
+
|
|
785
|
+
<div style="margin-bottom:30px;">
|
|
786
|
+
</div>
|
|
787
|
+
|
|
788
|
+
Note that it is important not to add any new line before of after the code
|
|
789
|
+
block if we want everything to be in only one line, resulting in the following sentence
|
|
790
|
+
with inline Ruby code.
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
### The 'outputs' function
|
|
794
|
+
|
|
795
|
+
He have previously used the standard 'puts' method in Ruby chunks in order produce
|
|
796
|
+
output. The result of a 'puts', as seen in all previous chunks that use it, is formatted
|
|
797
|
+
inside a white box that
|
|
798
|
+
follows the code block. Many times however, we would like to do some processing in the
|
|
799
|
+
Ruby chunk and have the result of this processing generate and output that is
|
|
800
|
+
"included" in the document as if we had typed it in __R markdown__ document.
|
|
801
|
+
|
|
802
|
+
For example, suppose we want to create a new heading in our document, but the heading
|
|
803
|
+
phrase is the result of some code processing: maybe it's the first line of a file we are
|
|
804
|
+
going to read. Method 'outputs' adds its output as if typed in the __R markdown__ document.
|
|
805
|
+
|
|
806
|
+
Take now a look at variable 'c' (it was defined in a previous block above) as
|
|
807
|
+
'c = "The 'outputs' function". "The 'outputs' function" is actually the name of this
|
|
808
|
+
section and it was created using the 'outputs' function inside a Ruby chunk.
|
|
809
|
+
|
|
810
|
+
The ruby chunk to generate this heading is:
|
|
811
|
+
|
|
812
|
+
````
|
|
813
|
+
```{ruby heading}
|
|
814
|
+
outputs "### #{c}"
|
|
815
|
+
```
|
|
816
|
+
````
|
|
817
|
+
|
|
818
|
+
The three '###' is the way we add a Heading 3 in __R markdown__.
|
|
819
|
+
|
|
820
|
+
|
|
821
|
+
### HTML Output from Ruby Chunks
|
|
822
|
+
|
|
823
|
+
We've just seen the use of method 'outputs' to add text to the the __R markdown__
|
|
824
|
+
document. This technique can also be used to add HTML code to the document. In
|
|
825
|
+
__R markdown__, any html code typed directly in the document will be properly rendered.
|
|
826
|
+
Here, for instance, is a table definition in HTML and its output in the document:
|
|
827
|
+
|
|
828
|
+
```
|
|
829
|
+
<table style="width:100%">
|
|
830
|
+
<tr>
|
|
831
|
+
<th>Firstname</th>
|
|
832
|
+
<th>Lastname</th>
|
|
833
|
+
<th>Age</th>
|
|
834
|
+
</tr>
|
|
835
|
+
<tr>
|
|
836
|
+
<td>Jill</td>
|
|
837
|
+
<td>Smith</td>
|
|
838
|
+
<td>50</td>
|
|
839
|
+
</tr>
|
|
840
|
+
<tr>
|
|
841
|
+
<td>Eve</td>
|
|
842
|
+
<td>Jackson</td>
|
|
843
|
+
<td>94</td>
|
|
844
|
+
</tr>
|
|
845
|
+
</table>
|
|
846
|
+
```
|
|
847
|
+
<div style="margin-bottom:30px;">
|
|
848
|
+
</div>
|
|
849
|
+
|
|
850
|
+
<table style="width:100%">
|
|
851
|
+
<tr>
|
|
852
|
+
<th>Firstname</th>
|
|
853
|
+
<th>Lastname</th>
|
|
854
|
+
<th>Age</th>
|
|
855
|
+
</tr>
|
|
856
|
+
<tr>
|
|
857
|
+
<td>Jill</td>
|
|
858
|
+
<td>Smith</td>
|
|
859
|
+
<td>50</td>
|
|
860
|
+
</tr>
|
|
861
|
+
<tr>
|
|
862
|
+
<td>Eve</td>
|
|
863
|
+
<td>Jackson</td>
|
|
864
|
+
<td>94</td>
|
|
865
|
+
</tr>
|
|
866
|
+
</table>
|
|
867
|
+
|
|
868
|
+
<div style="margin-bottom:30px;">
|
|
869
|
+
</div>
|
|
870
|
+
|
|
871
|
+
But manually creating HTML output is not always easy or desirable, specially
|
|
872
|
+
if we intend the document to be rendered in other formats, for example, as Latex.
|
|
873
|
+
Also, The above
|
|
874
|
+
table looks ugly. The 'kableExtra' library is a great library for
|
|
875
|
+
creating beautiful tables. Take a look at https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html
|
|
876
|
+
|
|
877
|
+
In the next chunk, we output the 'mtcars' dataframe from R in a nicely formatted
|
|
878
|
+
table. Note that we retrieve the mtcars dataframe by using '~:mtcars'.
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
```ruby
|
|
882
|
+
R.install_and_loads('kableExtra')
|
|
883
|
+
outputs (~:mtcars).kable.kable_styling
|
|
884
|
+
```
|
|
885
|
+
|
|
886
|
+
<table class="table" style="margin-left: auto; margin-right: auto;">
|
|
887
|
+
<thead>
|
|
888
|
+
<tr>
|
|
889
|
+
<th style="text-align:left;"> </th>
|
|
890
|
+
<th style="text-align:right;"> mpg </th>
|
|
891
|
+
<th style="text-align:right;"> cyl </th>
|
|
892
|
+
<th style="text-align:right;"> disp </th>
|
|
893
|
+
<th style="text-align:right;"> hp </th>
|
|
894
|
+
<th style="text-align:right;"> drat </th>
|
|
895
|
+
<th style="text-align:right;"> wt </th>
|
|
896
|
+
<th style="text-align:right;"> qsec </th>
|
|
897
|
+
<th style="text-align:right;"> vs </th>
|
|
898
|
+
<th style="text-align:right;"> am </th>
|
|
899
|
+
<th style="text-align:right;"> gear </th>
|
|
900
|
+
<th style="text-align:right;"> carb </th>
|
|
901
|
+
</tr>
|
|
902
|
+
</thead>
|
|
903
|
+
<tbody>
|
|
904
|
+
<tr>
|
|
905
|
+
<td style="text-align:left;"> Mazda RX4 </td>
|
|
906
|
+
<td style="text-align:right;"> 21.0 </td>
|
|
907
|
+
<td style="text-align:right;"> 6 </td>
|
|
908
|
+
<td style="text-align:right;"> 160.0 </td>
|
|
909
|
+
<td style="text-align:right;"> 110 </td>
|
|
910
|
+
<td style="text-align:right;"> 3.90 </td>
|
|
911
|
+
<td style="text-align:right;"> 2.620 </td>
|
|
912
|
+
<td style="text-align:right;"> 16.46 </td>
|
|
913
|
+
<td style="text-align:right;"> 0 </td>
|
|
914
|
+
<td style="text-align:right;"> 1 </td>
|
|
915
|
+
<td style="text-align:right;"> 4 </td>
|
|
916
|
+
<td style="text-align:right;"> 4 </td>
|
|
917
|
+
</tr>
|
|
918
|
+
<tr>
|
|
919
|
+
<td style="text-align:left;"> Mazda RX4 Wag </td>
|
|
920
|
+
<td style="text-align:right;"> 21.0 </td>
|
|
921
|
+
<td style="text-align:right;"> 6 </td>
|
|
922
|
+
<td style="text-align:right;"> 160.0 </td>
|
|
923
|
+
<td style="text-align:right;"> 110 </td>
|
|
924
|
+
<td style="text-align:right;"> 3.90 </td>
|
|
925
|
+
<td style="text-align:right;"> 2.875 </td>
|
|
926
|
+
<td style="text-align:right;"> 17.02 </td>
|
|
927
|
+
<td style="text-align:right;"> 0 </td>
|
|
928
|
+
<td style="text-align:right;"> 1 </td>
|
|
929
|
+
<td style="text-align:right;"> 4 </td>
|
|
930
|
+
<td style="text-align:right;"> 4 </td>
|
|
931
|
+
</tr>
|
|
932
|
+
<tr>
|
|
933
|
+
<td style="text-align:left;"> Datsun 710 </td>
|
|
934
|
+
<td style="text-align:right;"> 22.8 </td>
|
|
935
|
+
<td style="text-align:right;"> 4 </td>
|
|
936
|
+
<td style="text-align:right;"> 108.0 </td>
|
|
937
|
+
<td style="text-align:right;"> 93 </td>
|
|
938
|
+
<td style="text-align:right;"> 3.85 </td>
|
|
939
|
+
<td style="text-align:right;"> 2.320 </td>
|
|
940
|
+
<td style="text-align:right;"> 18.61 </td>
|
|
941
|
+
<td style="text-align:right;"> 1 </td>
|
|
942
|
+
<td style="text-align:right;"> 1 </td>
|
|
943
|
+
<td style="text-align:right;"> 4 </td>
|
|
944
|
+
<td style="text-align:right;"> 1 </td>
|
|
945
|
+
</tr>
|
|
946
|
+
<tr>
|
|
947
|
+
<td style="text-align:left;"> Hornet 4 Drive </td>
|
|
948
|
+
<td style="text-align:right;"> 21.4 </td>
|
|
949
|
+
<td style="text-align:right;"> 6 </td>
|
|
950
|
+
<td style="text-align:right;"> 258.0 </td>
|
|
951
|
+
<td style="text-align:right;"> 110 </td>
|
|
952
|
+
<td style="text-align:right;"> 3.08 </td>
|
|
953
|
+
<td style="text-align:right;"> 3.215 </td>
|
|
954
|
+
<td style="text-align:right;"> 19.44 </td>
|
|
955
|
+
<td style="text-align:right;"> 1 </td>
|
|
956
|
+
<td style="text-align:right;"> 0 </td>
|
|
957
|
+
<td style="text-align:right;"> 3 </td>
|
|
958
|
+
<td style="text-align:right;"> 1 </td>
|
|
959
|
+
</tr>
|
|
960
|
+
<tr>
|
|
961
|
+
<td style="text-align:left;"> Hornet Sportabout </td>
|
|
962
|
+
<td style="text-align:right;"> 18.7 </td>
|
|
963
|
+
<td style="text-align:right;"> 8 </td>
|
|
964
|
+
<td style="text-align:right;"> 360.0 </td>
|
|
965
|
+
<td style="text-align:right;"> 175 </td>
|
|
966
|
+
<td style="text-align:right;"> 3.15 </td>
|
|
967
|
+
<td style="text-align:right;"> 3.440 </td>
|
|
968
|
+
<td style="text-align:right;"> 17.02 </td>
|
|
969
|
+
<td style="text-align:right;"> 0 </td>
|
|
970
|
+
<td style="text-align:right;"> 0 </td>
|
|
971
|
+
<td style="text-align:right;"> 3 </td>
|
|
972
|
+
<td style="text-align:right;"> 2 </td>
|
|
973
|
+
</tr>
|
|
974
|
+
<tr>
|
|
975
|
+
<td style="text-align:left;"> Valiant </td>
|
|
976
|
+
<td style="text-align:right;"> 18.1 </td>
|
|
977
|
+
<td style="text-align:right;"> 6 </td>
|
|
978
|
+
<td style="text-align:right;"> 225.0 </td>
|
|
979
|
+
<td style="text-align:right;"> 105 </td>
|
|
980
|
+
<td style="text-align:right;"> 2.76 </td>
|
|
981
|
+
<td style="text-align:right;"> 3.460 </td>
|
|
982
|
+
<td style="text-align:right;"> 20.22 </td>
|
|
983
|
+
<td style="text-align:right;"> 1 </td>
|
|
984
|
+
<td style="text-align:right;"> 0 </td>
|
|
985
|
+
<td style="text-align:right;"> 3 </td>
|
|
986
|
+
<td style="text-align:right;"> 1 </td>
|
|
987
|
+
</tr>
|
|
988
|
+
<tr>
|
|
989
|
+
<td style="text-align:left;"> Duster 360 </td>
|
|
990
|
+
<td style="text-align:right;"> 14.3 </td>
|
|
991
|
+
<td style="text-align:right;"> 8 </td>
|
|
992
|
+
<td style="text-align:right;"> 360.0 </td>
|
|
993
|
+
<td style="text-align:right;"> 245 </td>
|
|
994
|
+
<td style="text-align:right;"> 3.21 </td>
|
|
995
|
+
<td style="text-align:right;"> 3.570 </td>
|
|
996
|
+
<td style="text-align:right;"> 15.84 </td>
|
|
997
|
+
<td style="text-align:right;"> 0 </td>
|
|
998
|
+
<td style="text-align:right;"> 0 </td>
|
|
999
|
+
<td style="text-align:right;"> 3 </td>
|
|
1000
|
+
<td style="text-align:right;"> 4 </td>
|
|
1001
|
+
</tr>
|
|
1002
|
+
<tr>
|
|
1003
|
+
<td style="text-align:left;"> Merc 240D </td>
|
|
1004
|
+
<td style="text-align:right;"> 24.4 </td>
|
|
1005
|
+
<td style="text-align:right;"> 4 </td>
|
|
1006
|
+
<td style="text-align:right;"> 146.7 </td>
|
|
1007
|
+
<td style="text-align:right;"> 62 </td>
|
|
1008
|
+
<td style="text-align:right;"> 3.69 </td>
|
|
1009
|
+
<td style="text-align:right;"> 3.190 </td>
|
|
1010
|
+
<td style="text-align:right;"> 20.00 </td>
|
|
1011
|
+
<td style="text-align:right;"> 1 </td>
|
|
1012
|
+
<td style="text-align:right;"> 0 </td>
|
|
1013
|
+
<td style="text-align:right;"> 4 </td>
|
|
1014
|
+
<td style="text-align:right;"> 2 </td>
|
|
1015
|
+
</tr>
|
|
1016
|
+
<tr>
|
|
1017
|
+
<td style="text-align:left;"> Merc 230 </td>
|
|
1018
|
+
<td style="text-align:right;"> 22.8 </td>
|
|
1019
|
+
<td style="text-align:right;"> 4 </td>
|
|
1020
|
+
<td style="text-align:right;"> 140.8 </td>
|
|
1021
|
+
<td style="text-align:right;"> 95 </td>
|
|
1022
|
+
<td style="text-align:right;"> 3.92 </td>
|
|
1023
|
+
<td style="text-align:right;"> 3.150 </td>
|
|
1024
|
+
<td style="text-align:right;"> 22.90 </td>
|
|
1025
|
+
<td style="text-align:right;"> 1 </td>
|
|
1026
|
+
<td style="text-align:right;"> 0 </td>
|
|
1027
|
+
<td style="text-align:right;"> 4 </td>
|
|
1028
|
+
<td style="text-align:right;"> 2 </td>
|
|
1029
|
+
</tr>
|
|
1030
|
+
<tr>
|
|
1031
|
+
<td style="text-align:left;"> Merc 280 </td>
|
|
1032
|
+
<td style="text-align:right;"> 19.2 </td>
|
|
1033
|
+
<td style="text-align:right;"> 6 </td>
|
|
1034
|
+
<td style="text-align:right;"> 167.6 </td>
|
|
1035
|
+
<td style="text-align:right;"> 123 </td>
|
|
1036
|
+
<td style="text-align:right;"> 3.92 </td>
|
|
1037
|
+
<td style="text-align:right;"> 3.440 </td>
|
|
1038
|
+
<td style="text-align:right;"> 18.30 </td>
|
|
1039
|
+
<td style="text-align:right;"> 1 </td>
|
|
1040
|
+
<td style="text-align:right;"> 0 </td>
|
|
1041
|
+
<td style="text-align:right;"> 4 </td>
|
|
1042
|
+
<td style="text-align:right;"> 4 </td>
|
|
1043
|
+
</tr>
|
|
1044
|
+
<tr>
|
|
1045
|
+
<td style="text-align:left;"> Merc 280C </td>
|
|
1046
|
+
<td style="text-align:right;"> 17.8 </td>
|
|
1047
|
+
<td style="text-align:right;"> 6 </td>
|
|
1048
|
+
<td style="text-align:right;"> 167.6 </td>
|
|
1049
|
+
<td style="text-align:right;"> 123 </td>
|
|
1050
|
+
<td style="text-align:right;"> 3.92 </td>
|
|
1051
|
+
<td style="text-align:right;"> 3.440 </td>
|
|
1052
|
+
<td style="text-align:right;"> 18.90 </td>
|
|
1053
|
+
<td style="text-align:right;"> 1 </td>
|
|
1054
|
+
<td style="text-align:right;"> 0 </td>
|
|
1055
|
+
<td style="text-align:right;"> 4 </td>
|
|
1056
|
+
<td style="text-align:right;"> 4 </td>
|
|
1057
|
+
</tr>
|
|
1058
|
+
<tr>
|
|
1059
|
+
<td style="text-align:left;"> Merc 450SE </td>
|
|
1060
|
+
<td style="text-align:right;"> 16.4 </td>
|
|
1061
|
+
<td style="text-align:right;"> 8 </td>
|
|
1062
|
+
<td style="text-align:right;"> 275.8 </td>
|
|
1063
|
+
<td style="text-align:right;"> 180 </td>
|
|
1064
|
+
<td style="text-align:right;"> 3.07 </td>
|
|
1065
|
+
<td style="text-align:right;"> 4.070 </td>
|
|
1066
|
+
<td style="text-align:right;"> 17.40 </td>
|
|
1067
|
+
<td style="text-align:right;"> 0 </td>
|
|
1068
|
+
<td style="text-align:right;"> 0 </td>
|
|
1069
|
+
<td style="text-align:right;"> 3 </td>
|
|
1070
|
+
<td style="text-align:right;"> 3 </td>
|
|
1071
|
+
</tr>
|
|
1072
|
+
<tr>
|
|
1073
|
+
<td style="text-align:left;"> Merc 450SL </td>
|
|
1074
|
+
<td style="text-align:right;"> 17.3 </td>
|
|
1075
|
+
<td style="text-align:right;"> 8 </td>
|
|
1076
|
+
<td style="text-align:right;"> 275.8 </td>
|
|
1077
|
+
<td style="text-align:right;"> 180 </td>
|
|
1078
|
+
<td style="text-align:right;"> 3.07 </td>
|
|
1079
|
+
<td style="text-align:right;"> 3.730 </td>
|
|
1080
|
+
<td style="text-align:right;"> 17.60 </td>
|
|
1081
|
+
<td style="text-align:right;"> 0 </td>
|
|
1082
|
+
<td style="text-align:right;"> 0 </td>
|
|
1083
|
+
<td style="text-align:right;"> 3 </td>
|
|
1084
|
+
<td style="text-align:right;"> 3 </td>
|
|
1085
|
+
</tr>
|
|
1086
|
+
<tr>
|
|
1087
|
+
<td style="text-align:left;"> Merc 450SLC </td>
|
|
1088
|
+
<td style="text-align:right;"> 15.2 </td>
|
|
1089
|
+
<td style="text-align:right;"> 8 </td>
|
|
1090
|
+
<td style="text-align:right;"> 275.8 </td>
|
|
1091
|
+
<td style="text-align:right;"> 180 </td>
|
|
1092
|
+
<td style="text-align:right;"> 3.07 </td>
|
|
1093
|
+
<td style="text-align:right;"> 3.780 </td>
|
|
1094
|
+
<td style="text-align:right;"> 18.00 </td>
|
|
1095
|
+
<td style="text-align:right;"> 0 </td>
|
|
1096
|
+
<td style="text-align:right;"> 0 </td>
|
|
1097
|
+
<td style="text-align:right;"> 3 </td>
|
|
1098
|
+
<td style="text-align:right;"> 3 </td>
|
|
1099
|
+
</tr>
|
|
1100
|
+
<tr>
|
|
1101
|
+
<td style="text-align:left;"> Cadillac Fleetwood </td>
|
|
1102
|
+
<td style="text-align:right;"> 10.4 </td>
|
|
1103
|
+
<td style="text-align:right;"> 8 </td>
|
|
1104
|
+
<td style="text-align:right;"> 472.0 </td>
|
|
1105
|
+
<td style="text-align:right;"> 205 </td>
|
|
1106
|
+
<td style="text-align:right;"> 2.93 </td>
|
|
1107
|
+
<td style="text-align:right;"> 5.250 </td>
|
|
1108
|
+
<td style="text-align:right;"> 17.98 </td>
|
|
1109
|
+
<td style="text-align:right;"> 0 </td>
|
|
1110
|
+
<td style="text-align:right;"> 0 </td>
|
|
1111
|
+
<td style="text-align:right;"> 3 </td>
|
|
1112
|
+
<td style="text-align:right;"> 4 </td>
|
|
1113
|
+
</tr>
|
|
1114
|
+
<tr>
|
|
1115
|
+
<td style="text-align:left;"> Lincoln Continental </td>
|
|
1116
|
+
<td style="text-align:right;"> 10.4 </td>
|
|
1117
|
+
<td style="text-align:right;"> 8 </td>
|
|
1118
|
+
<td style="text-align:right;"> 460.0 </td>
|
|
1119
|
+
<td style="text-align:right;"> 215 </td>
|
|
1120
|
+
<td style="text-align:right;"> 3.00 </td>
|
|
1121
|
+
<td style="text-align:right;"> 5.424 </td>
|
|
1122
|
+
<td style="text-align:right;"> 17.82 </td>
|
|
1123
|
+
<td style="text-align:right;"> 0 </td>
|
|
1124
|
+
<td style="text-align:right;"> 0 </td>
|
|
1125
|
+
<td style="text-align:right;"> 3 </td>
|
|
1126
|
+
<td style="text-align:right;"> 4 </td>
|
|
1127
|
+
</tr>
|
|
1128
|
+
<tr>
|
|
1129
|
+
<td style="text-align:left;"> Chrysler Imperial </td>
|
|
1130
|
+
<td style="text-align:right;"> 14.7 </td>
|
|
1131
|
+
<td style="text-align:right;"> 8 </td>
|
|
1132
|
+
<td style="text-align:right;"> 440.0 </td>
|
|
1133
|
+
<td style="text-align:right;"> 230 </td>
|
|
1134
|
+
<td style="text-align:right;"> 3.23 </td>
|
|
1135
|
+
<td style="text-align:right;"> 5.345 </td>
|
|
1136
|
+
<td style="text-align:right;"> 17.42 </td>
|
|
1137
|
+
<td style="text-align:right;"> 0 </td>
|
|
1138
|
+
<td style="text-align:right;"> 0 </td>
|
|
1139
|
+
<td style="text-align:right;"> 3 </td>
|
|
1140
|
+
<td style="text-align:right;"> 4 </td>
|
|
1141
|
+
</tr>
|
|
1142
|
+
<tr>
|
|
1143
|
+
<td style="text-align:left;"> Fiat 128 </td>
|
|
1144
|
+
<td style="text-align:right;"> 32.4 </td>
|
|
1145
|
+
<td style="text-align:right;"> 4 </td>
|
|
1146
|
+
<td style="text-align:right;"> 78.7 </td>
|
|
1147
|
+
<td style="text-align:right;"> 66 </td>
|
|
1148
|
+
<td style="text-align:right;"> 4.08 </td>
|
|
1149
|
+
<td style="text-align:right;"> 2.200 </td>
|
|
1150
|
+
<td style="text-align:right;"> 19.47 </td>
|
|
1151
|
+
<td style="text-align:right;"> 1 </td>
|
|
1152
|
+
<td style="text-align:right;"> 1 </td>
|
|
1153
|
+
<td style="text-align:right;"> 4 </td>
|
|
1154
|
+
<td style="text-align:right;"> 1 </td>
|
|
1155
|
+
</tr>
|
|
1156
|
+
<tr>
|
|
1157
|
+
<td style="text-align:left;"> Honda Civic </td>
|
|
1158
|
+
<td style="text-align:right;"> 30.4 </td>
|
|
1159
|
+
<td style="text-align:right;"> 4 </td>
|
|
1160
|
+
<td style="text-align:right;"> 75.7 </td>
|
|
1161
|
+
<td style="text-align:right;"> 52 </td>
|
|
1162
|
+
<td style="text-align:right;"> 4.93 </td>
|
|
1163
|
+
<td style="text-align:right;"> 1.615 </td>
|
|
1164
|
+
<td style="text-align:right;"> 18.52 </td>
|
|
1165
|
+
<td style="text-align:right;"> 1 </td>
|
|
1166
|
+
<td style="text-align:right;"> 1 </td>
|
|
1167
|
+
<td style="text-align:right;"> 4 </td>
|
|
1168
|
+
<td style="text-align:right;"> 2 </td>
|
|
1169
|
+
</tr>
|
|
1170
|
+
<tr>
|
|
1171
|
+
<td style="text-align:left;"> Toyota Corolla </td>
|
|
1172
|
+
<td style="text-align:right;"> 33.9 </td>
|
|
1173
|
+
<td style="text-align:right;"> 4 </td>
|
|
1174
|
+
<td style="text-align:right;"> 71.1 </td>
|
|
1175
|
+
<td style="text-align:right;"> 65 </td>
|
|
1176
|
+
<td style="text-align:right;"> 4.22 </td>
|
|
1177
|
+
<td style="text-align:right;"> 1.835 </td>
|
|
1178
|
+
<td style="text-align:right;"> 19.90 </td>
|
|
1179
|
+
<td style="text-align:right;"> 1 </td>
|
|
1180
|
+
<td style="text-align:right;"> 1 </td>
|
|
1181
|
+
<td style="text-align:right;"> 4 </td>
|
|
1182
|
+
<td style="text-align:right;"> 1 </td>
|
|
1183
|
+
</tr>
|
|
1184
|
+
<tr>
|
|
1185
|
+
<td style="text-align:left;"> Toyota Corona </td>
|
|
1186
|
+
<td style="text-align:right;"> 21.5 </td>
|
|
1187
|
+
<td style="text-align:right;"> 4 </td>
|
|
1188
|
+
<td style="text-align:right;"> 120.1 </td>
|
|
1189
|
+
<td style="text-align:right;"> 97 </td>
|
|
1190
|
+
<td style="text-align:right;"> 3.70 </td>
|
|
1191
|
+
<td style="text-align:right;"> 2.465 </td>
|
|
1192
|
+
<td style="text-align:right;"> 20.01 </td>
|
|
1193
|
+
<td style="text-align:right;"> 1 </td>
|
|
1194
|
+
<td style="text-align:right;"> 0 </td>
|
|
1195
|
+
<td style="text-align:right;"> 3 </td>
|
|
1196
|
+
<td style="text-align:right;"> 1 </td>
|
|
1197
|
+
</tr>
|
|
1198
|
+
<tr>
|
|
1199
|
+
<td style="text-align:left;"> Dodge Challenger </td>
|
|
1200
|
+
<td style="text-align:right;"> 15.5 </td>
|
|
1201
|
+
<td style="text-align:right;"> 8 </td>
|
|
1202
|
+
<td style="text-align:right;"> 318.0 </td>
|
|
1203
|
+
<td style="text-align:right;"> 150 </td>
|
|
1204
|
+
<td style="text-align:right;"> 2.76 </td>
|
|
1205
|
+
<td style="text-align:right;"> 3.520 </td>
|
|
1206
|
+
<td style="text-align:right;"> 16.87 </td>
|
|
1207
|
+
<td style="text-align:right;"> 0 </td>
|
|
1208
|
+
<td style="text-align:right;"> 0 </td>
|
|
1209
|
+
<td style="text-align:right;"> 3 </td>
|
|
1210
|
+
<td style="text-align:right;"> 2 </td>
|
|
1211
|
+
</tr>
|
|
1212
|
+
<tr>
|
|
1213
|
+
<td style="text-align:left;"> AMC Javelin </td>
|
|
1214
|
+
<td style="text-align:right;"> 15.2 </td>
|
|
1215
|
+
<td style="text-align:right;"> 8 </td>
|
|
1216
|
+
<td style="text-align:right;"> 304.0 </td>
|
|
1217
|
+
<td style="text-align:right;"> 150 </td>
|
|
1218
|
+
<td style="text-align:right;"> 3.15 </td>
|
|
1219
|
+
<td style="text-align:right;"> 3.435 </td>
|
|
1220
|
+
<td style="text-align:right;"> 17.30 </td>
|
|
1221
|
+
<td style="text-align:right;"> 0 </td>
|
|
1222
|
+
<td style="text-align:right;"> 0 </td>
|
|
1223
|
+
<td style="text-align:right;"> 3 </td>
|
|
1224
|
+
<td style="text-align:right;"> 2 </td>
|
|
1225
|
+
</tr>
|
|
1226
|
+
<tr>
|
|
1227
|
+
<td style="text-align:left;"> Camaro Z28 </td>
|
|
1228
|
+
<td style="text-align:right;"> 13.3 </td>
|
|
1229
|
+
<td style="text-align:right;"> 8 </td>
|
|
1230
|
+
<td style="text-align:right;"> 350.0 </td>
|
|
1231
|
+
<td style="text-align:right;"> 245 </td>
|
|
1232
|
+
<td style="text-align:right;"> 3.73 </td>
|
|
1233
|
+
<td style="text-align:right;"> 3.840 </td>
|
|
1234
|
+
<td style="text-align:right;"> 15.41 </td>
|
|
1235
|
+
<td style="text-align:right;"> 0 </td>
|
|
1236
|
+
<td style="text-align:right;"> 0 </td>
|
|
1237
|
+
<td style="text-align:right;"> 3 </td>
|
|
1238
|
+
<td style="text-align:right;"> 4 </td>
|
|
1239
|
+
</tr>
|
|
1240
|
+
<tr>
|
|
1241
|
+
<td style="text-align:left;"> Pontiac Firebird </td>
|
|
1242
|
+
<td style="text-align:right;"> 19.2 </td>
|
|
1243
|
+
<td style="text-align:right;"> 8 </td>
|
|
1244
|
+
<td style="text-align:right;"> 400.0 </td>
|
|
1245
|
+
<td style="text-align:right;"> 175 </td>
|
|
1246
|
+
<td style="text-align:right;"> 3.08 </td>
|
|
1247
|
+
<td style="text-align:right;"> 3.845 </td>
|
|
1248
|
+
<td style="text-align:right;"> 17.05 </td>
|
|
1249
|
+
<td style="text-align:right;"> 0 </td>
|
|
1250
|
+
<td style="text-align:right;"> 0 </td>
|
|
1251
|
+
<td style="text-align:right;"> 3 </td>
|
|
1252
|
+
<td style="text-align:right;"> 2 </td>
|
|
1253
|
+
</tr>
|
|
1254
|
+
<tr>
|
|
1255
|
+
<td style="text-align:left;"> Fiat X1-9 </td>
|
|
1256
|
+
<td style="text-align:right;"> 27.3 </td>
|
|
1257
|
+
<td style="text-align:right;"> 4 </td>
|
|
1258
|
+
<td style="text-align:right;"> 79.0 </td>
|
|
1259
|
+
<td style="text-align:right;"> 66 </td>
|
|
1260
|
+
<td style="text-align:right;"> 4.08 </td>
|
|
1261
|
+
<td style="text-align:right;"> 1.935 </td>
|
|
1262
|
+
<td style="text-align:right;"> 18.90 </td>
|
|
1263
|
+
<td style="text-align:right;"> 1 </td>
|
|
1264
|
+
<td style="text-align:right;"> 1 </td>
|
|
1265
|
+
<td style="text-align:right;"> 4 </td>
|
|
1266
|
+
<td style="text-align:right;"> 1 </td>
|
|
1267
|
+
</tr>
|
|
1268
|
+
<tr>
|
|
1269
|
+
<td style="text-align:left;"> Porsche 914-2 </td>
|
|
1270
|
+
<td style="text-align:right;"> 26.0 </td>
|
|
1271
|
+
<td style="text-align:right;"> 4 </td>
|
|
1272
|
+
<td style="text-align:right;"> 120.3 </td>
|
|
1273
|
+
<td style="text-align:right;"> 91 </td>
|
|
1274
|
+
<td style="text-align:right;"> 4.43 </td>
|
|
1275
|
+
<td style="text-align:right;"> 2.140 </td>
|
|
1276
|
+
<td style="text-align:right;"> 16.70 </td>
|
|
1277
|
+
<td style="text-align:right;"> 0 </td>
|
|
1278
|
+
<td style="text-align:right;"> 1 </td>
|
|
1279
|
+
<td style="text-align:right;"> 5 </td>
|
|
1280
|
+
<td style="text-align:right;"> 2 </td>
|
|
1281
|
+
</tr>
|
|
1282
|
+
<tr>
|
|
1283
|
+
<td style="text-align:left;"> Lotus Europa </td>
|
|
1284
|
+
<td style="text-align:right;"> 30.4 </td>
|
|
1285
|
+
<td style="text-align:right;"> 4 </td>
|
|
1286
|
+
<td style="text-align:right;"> 95.1 </td>
|
|
1287
|
+
<td style="text-align:right;"> 113 </td>
|
|
1288
|
+
<td style="text-align:right;"> 3.77 </td>
|
|
1289
|
+
<td style="text-align:right;"> 1.513 </td>
|
|
1290
|
+
<td style="text-align:right;"> 16.90 </td>
|
|
1291
|
+
<td style="text-align:right;"> 1 </td>
|
|
1292
|
+
<td style="text-align:right;"> 1 </td>
|
|
1293
|
+
<td style="text-align:right;"> 5 </td>
|
|
1294
|
+
<td style="text-align:right;"> 2 </td>
|
|
1295
|
+
</tr>
|
|
1296
|
+
<tr>
|
|
1297
|
+
<td style="text-align:left;"> Ford Pantera L </td>
|
|
1298
|
+
<td style="text-align:right;"> 15.8 </td>
|
|
1299
|
+
<td style="text-align:right;"> 8 </td>
|
|
1300
|
+
<td style="text-align:right;"> 351.0 </td>
|
|
1301
|
+
<td style="text-align:right;"> 264 </td>
|
|
1302
|
+
<td style="text-align:right;"> 4.22 </td>
|
|
1303
|
+
<td style="text-align:right;"> 3.170 </td>
|
|
1304
|
+
<td style="text-align:right;"> 14.50 </td>
|
|
1305
|
+
<td style="text-align:right;"> 0 </td>
|
|
1306
|
+
<td style="text-align:right;"> 1 </td>
|
|
1307
|
+
<td style="text-align:right;"> 5 </td>
|
|
1308
|
+
<td style="text-align:right;"> 4 </td>
|
|
1309
|
+
</tr>
|
|
1310
|
+
<tr>
|
|
1311
|
+
<td style="text-align:left;"> Ferrari Dino </td>
|
|
1312
|
+
<td style="text-align:right;"> 19.7 </td>
|
|
1313
|
+
<td style="text-align:right;"> 6 </td>
|
|
1314
|
+
<td style="text-align:right;"> 145.0 </td>
|
|
1315
|
+
<td style="text-align:right;"> 175 </td>
|
|
1316
|
+
<td style="text-align:right;"> 3.62 </td>
|
|
1317
|
+
<td style="text-align:right;"> 2.770 </td>
|
|
1318
|
+
<td style="text-align:right;"> 15.50 </td>
|
|
1319
|
+
<td style="text-align:right;"> 0 </td>
|
|
1320
|
+
<td style="text-align:right;"> 1 </td>
|
|
1321
|
+
<td style="text-align:right;"> 5 </td>
|
|
1322
|
+
<td style="text-align:right;"> 6 </td>
|
|
1323
|
+
</tr>
|
|
1324
|
+
<tr>
|
|
1325
|
+
<td style="text-align:left;"> Maserati Bora </td>
|
|
1326
|
+
<td style="text-align:right;"> 15.0 </td>
|
|
1327
|
+
<td style="text-align:right;"> 8 </td>
|
|
1328
|
+
<td style="text-align:right;"> 301.0 </td>
|
|
1329
|
+
<td style="text-align:right;"> 335 </td>
|
|
1330
|
+
<td style="text-align:right;"> 3.54 </td>
|
|
1331
|
+
<td style="text-align:right;"> 3.570 </td>
|
|
1332
|
+
<td style="text-align:right;"> 14.60 </td>
|
|
1333
|
+
<td style="text-align:right;"> 0 </td>
|
|
1334
|
+
<td style="text-align:right;"> 1 </td>
|
|
1335
|
+
<td style="text-align:right;"> 5 </td>
|
|
1336
|
+
<td style="text-align:right;"> 8 </td>
|
|
1337
|
+
</tr>
|
|
1338
|
+
<tr>
|
|
1339
|
+
<td style="text-align:left;"> Volvo 142E </td>
|
|
1340
|
+
<td style="text-align:right;"> 21.4 </td>
|
|
1341
|
+
<td style="text-align:right;"> 4 </td>
|
|
1342
|
+
<td style="text-align:right;"> 121.0 </td>
|
|
1343
|
+
<td style="text-align:right;"> 109 </td>
|
|
1344
|
+
<td style="text-align:right;"> 4.11 </td>
|
|
1345
|
+
<td style="text-align:right;"> 2.780 </td>
|
|
1346
|
+
<td style="text-align:right;"> 18.60 </td>
|
|
1347
|
+
<td style="text-align:right;"> 1 </td>
|
|
1348
|
+
<td style="text-align:right;"> 1 </td>
|
|
1349
|
+
<td style="text-align:right;"> 4 </td>
|
|
1350
|
+
<td style="text-align:right;"> 2 </td>
|
|
1351
|
+
</tr>
|
|
1352
|
+
</tbody>
|
|
1353
|
+
</table>
|
|
340
1354
|
|
|
341
|
-
|
|
342
|
-
## [1] 11 22 14 25 36 17 28 39
|
|
343
|
-
## [1] 22 14 36 28 39
|
|
344
|
-
```
|
|
1355
|
+
## Including Ruby files in a chunk
|
|
345
1356
|
|
|
346
|
-
|
|
1357
|
+
R is a language that was created to be easy and fast for statisticians to use. As far
|
|
1358
|
+
as I know, it was not a
|
|
1359
|
+
language to be used for developing large systems. Of course, there are large systems and
|
|
1360
|
+
libraries in R, but the focus of the language is for developing statistical models and
|
|
1361
|
+
distribute that to peers.
|
|
347
1362
|
|
|
1363
|
+
Ruby on the other hand, is a language for large software development. Systems written in
|
|
1364
|
+
Ruby will have dozens, hundreds or even thousands of files. To document a
|
|
1365
|
+
large system with literate programming, we cannot expect the developer to add all the
|
|
1366
|
+
files in a single '.Rmd' file. gKnit provides the 'include' chunk engine to include
|
|
1367
|
+
a Ruby file as if it had being typed in the '.Rmd' file.
|
|
348
1368
|
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
1369
|
+
To include a file, the following chunk should be created, where <filename> is the name of
|
|
1370
|
+
the file to be included and where the extension, if it is '.rb', does not need to be added.
|
|
1371
|
+
If the 'relative' option is not included, then it is treated as TRUE. When 'relative' is
|
|
1372
|
+
true, ruby's 'require\_relative' semantics is used to load the file, when false, Ruby's
|
|
1373
|
+
\$LOAD_PATH is searched to find the file and it is 'require'd.
|
|
352
1374
|
|
|
1375
|
+
````
|
|
1376
|
+
```{include <filename>, relative = <TRUE/FALSE>}
|
|
353
1377
|
```
|
|
354
|
-
|
|
355
|
-
```
|
|
356
|
-
|
|
357
|
-
It is also possible to index a vector by range:
|
|
1378
|
+
````
|
|
358
1379
|
|
|
1380
|
+
Bellow we include file 'model.rb', which is in the same directory of this blog.
|
|
1381
|
+
This code uses R 'caret' package to split a dataset in a train and test sets.
|
|
1382
|
+
The 'caret' package is a very important a useful package for doing Data Analysis,
|
|
1383
|
+
it has hundreds of functions for all steps of the Data Analysis workflow. To
|
|
1384
|
+
use 'caret' just to split a dataset is like using the proverbial cannon to
|
|
1385
|
+
kill the fly. We use it here only to show that integrating Ruby and R and
|
|
1386
|
+
using even a very complex package as 'caret' is trivial with Galaaz.
|
|
359
1387
|
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
1388
|
+
A word of advice: the 'caret' package has lots of dependencies and installing
|
|
1389
|
+
it in a Linux system is a time consuming operation. Method 'R.install_and_loads'
|
|
1390
|
+
will install the package if it is not already installed and can take a while.
|
|
363
1391
|
|
|
1392
|
+
````
|
|
1393
|
+
```{include model}
|
|
364
1394
|
```
|
|
365
|
-
|
|
366
|
-
```
|
|
367
|
-
|
|
368
|
-
Elements in a vector can be named using the 'names' attribute of a vector:
|
|
369
|
-
|
|
1395
|
+
````
|
|
370
1396
|
|
|
371
|
-
```ruby
|
|
372
|
-
full_name = R.c("Rodrigo", "A", "Botafogo")
|
|
373
|
-
full_name.names = R.c("First", "Middle", "Last")
|
|
374
|
-
puts full_name
|
|
375
|
-
```
|
|
376
1397
|
|
|
377
|
-
```
|
|
378
|
-
|
|
379
|
-
## "Rodrigo" "A" "Botafogo"
|
|
380
|
-
```
|
|
1398
|
+
```include
|
|
1399
|
+
require 'galaaz'
|
|
381
1400
|
|
|
382
|
-
|
|
1401
|
+
# Loads the R 'caret' package. If not present, installs it
|
|
1402
|
+
R.install_and_loads 'caret'
|
|
383
1403
|
|
|
1404
|
+
class Model
|
|
1405
|
+
|
|
1406
|
+
attr_reader :data
|
|
1407
|
+
attr_reader :test
|
|
1408
|
+
attr_reader :train
|
|
384
1409
|
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
1410
|
+
#==========================================================
|
|
1411
|
+
#
|
|
1412
|
+
#==========================================================
|
|
1413
|
+
|
|
1414
|
+
def initialize(data, percent_train:, seed: 123)
|
|
1415
|
+
|
|
1416
|
+
R.set__seed(seed)
|
|
1417
|
+
@data = data
|
|
1418
|
+
@percent_train = percent_train
|
|
1419
|
+
@seed = seed
|
|
1420
|
+
|
|
1421
|
+
end
|
|
1422
|
+
|
|
1423
|
+
#==========================================================
|
|
1424
|
+
#
|
|
1425
|
+
#==========================================================
|
|
1426
|
+
|
|
1427
|
+
def partition(field)
|
|
1428
|
+
|
|
1429
|
+
train_index =
|
|
1430
|
+
R.createDataPartition(@data.send(field), p: @percet_train,
|
|
1431
|
+
list: false, times: 1)
|
|
1432
|
+
@train = @data[train_index, :all]
|
|
1433
|
+
@test = @data[-train_index, :all]
|
|
1434
|
+
|
|
1435
|
+
end
|
|
1436
|
+
|
|
1437
|
+
end
|
|
389
1438
|
|
|
390
1439
|
```
|
|
391
|
-
## First Middle Last
|
|
392
|
-
## "Rodrigo" "A" "Botafogo"
|
|
393
|
-
```
|
|
394
|
-
|
|
395
|
-
## Extracting Native Ruby Types from a Vector
|
|
396
|
-
|
|
397
|
-
Vectors created with 'R.c' are of class R::Vector. You might have noticed that when indexing a
|
|
398
|
-
vector, a new vector is returned, even if this vector has one single element. In order to use
|
|
399
|
-
R::Vector with other ruby classes it might be necessary to extract the actual Ruby native type
|
|
400
|
-
from the vector. In order to do this extraction the '>>' operator is used.
|
|
401
1440
|
|
|
402
1441
|
|
|
403
1442
|
```ruby
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
1443
|
+
mtcars = ~:mtcars
|
|
1444
|
+
model = Model.new(mtcars, percent_train: 0.8)
|
|
1445
|
+
model.partition(:mpg)
|
|
1446
|
+
puts model.train.head
|
|
1447
|
+
puts model.test.head
|
|
1448
|
+
```
|
|
1449
|
+
|
|
1450
|
+
```
|
|
1451
|
+
## mpg cyl disp hp drat wt qsec vs am gear carb
|
|
1452
|
+
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
|
|
1453
|
+
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
|
|
1454
|
+
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
|
|
1455
|
+
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
|
|
1456
|
+
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
|
|
1457
|
+
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
|
|
1458
|
+
## mpg cyl disp hp drat wt qsec vs am gear carb
|
|
1459
|
+
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
|
|
1460
|
+
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
|
|
1461
|
+
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
|
|
1462
|
+
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
|
|
1463
|
+
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
|
|
1464
|
+
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
|
|
1465
|
+
```
|
|
1466
|
+
|
|
1467
|
+
## Documenting Gems
|
|
1468
|
+
|
|
1469
|
+
gKnit also allows developers to document and load files that are not in the same directory
|
|
1470
|
+
of the '.Rmd' file.
|
|
1471
|
+
|
|
1472
|
+
Here is an example of loading the 'find.rb' file from TruffleRuby. In this example, relative
|
|
1473
|
+
is set to FALSE, so Ruby will look for the file in its $LOAD\_PATH, and the user does not
|
|
1474
|
+
need to no it's directory.
|
|
1475
|
+
|
|
1476
|
+
````
|
|
1477
|
+
```{include find, relative = FALSE}
|
|
1478
|
+
```
|
|
1479
|
+
````
|
|
1480
|
+
|
|
1481
|
+
|
|
1482
|
+
```include
|
|
1483
|
+
# frozen_string_literal: true
|
|
1484
|
+
#
|
|
1485
|
+
# find.rb: the Find module for processing all files under a given directory.
|
|
1486
|
+
#
|
|
1487
|
+
|
|
1488
|
+
#
|
|
1489
|
+
# The +Find+ module supports the top-down traversal of a set of file paths.
|
|
1490
|
+
#
|
|
1491
|
+
# For example, to total the size of all files under your home directory,
|
|
1492
|
+
# ignoring anything in a "dot" directory (e.g. $HOME/.ssh):
|
|
1493
|
+
#
|
|
1494
|
+
# require 'find'
|
|
1495
|
+
#
|
|
1496
|
+
# total_size = 0
|
|
1497
|
+
#
|
|
1498
|
+
# Find.find(ENV["HOME"]) do |path|
|
|
1499
|
+
# if FileTest.directory?(path)
|
|
1500
|
+
# if File.basename(path)[0] == ?.
|
|
1501
|
+
# Find.prune # Don't look any further into this directory.
|
|
1502
|
+
# else
|
|
1503
|
+
# next
|
|
1504
|
+
# end
|
|
1505
|
+
# else
|
|
1506
|
+
# total_size += FileTest.size(path)
|
|
1507
|
+
# end
|
|
1508
|
+
# end
|
|
1509
|
+
#
|
|
1510
|
+
module Find
|
|
1511
|
+
|
|
1512
|
+
#
|
|
1513
|
+
# Calls the associated block with the name of every file and directory listed
|
|
1514
|
+
# as arguments, then recursively on their subdirectories, and so on.
|
|
1515
|
+
#
|
|
1516
|
+
# Returns an enumerator if no block is given.
|
|
1517
|
+
#
|
|
1518
|
+
# See the +Find+ module documentation for an example.
|
|
1519
|
+
#
|
|
1520
|
+
def find(*paths, ignore_error: true) # :yield: path
|
|
1521
|
+
block_given? or return enum_for(__method__, *paths, ignore_error: ignore_error)
|
|
1522
|
+
|
|
1523
|
+
fs_encoding = Encoding.find("filesystem")
|
|
1524
|
+
|
|
1525
|
+
paths.collect!{|d| raise Errno::ENOENT, d unless File.exist?(d); d.dup}.each do |path|
|
|
1526
|
+
path = path.to_path if path.respond_to? :to_path
|
|
1527
|
+
enc = path.encoding == Encoding::US_ASCII ? fs_encoding : path.encoding
|
|
1528
|
+
ps = [path]
|
|
1529
|
+
while file = ps.shift
|
|
1530
|
+
catch(:prune) do
|
|
1531
|
+
yield file.dup.taint
|
|
1532
|
+
begin
|
|
1533
|
+
s = File.lstat(file)
|
|
1534
|
+
rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
|
|
1535
|
+
raise unless ignore_error
|
|
1536
|
+
next
|
|
1537
|
+
end
|
|
1538
|
+
if s.directory? then
|
|
1539
|
+
begin
|
|
1540
|
+
fs = Dir.children(file, encoding: enc)
|
|
1541
|
+
rescue Errno::ENOENT, Errno::EACCES, Errno::ENOTDIR, Errno::ELOOP, Errno::ENAMETOOLONG
|
|
1542
|
+
raise unless ignore_error
|
|
1543
|
+
next
|
|
1544
|
+
end
|
|
1545
|
+
fs.sort!
|
|
1546
|
+
fs.reverse_each {|f|
|
|
1547
|
+
f = File.join(file, f)
|
|
1548
|
+
ps.unshift f.untaint
|
|
1549
|
+
}
|
|
1550
|
+
end
|
|
1551
|
+
end
|
|
1552
|
+
end
|
|
1553
|
+
end
|
|
1554
|
+
nil
|
|
1555
|
+
end
|
|
1556
|
+
|
|
1557
|
+
#
|
|
1558
|
+
# Skips the current file or directory, restarting the loop with the next
|
|
1559
|
+
# entry. If the current file is a directory, that directory will not be
|
|
1560
|
+
# recursively entered. Meaningful only within the block associated with
|
|
1561
|
+
# Find::find.
|
|
1562
|
+
#
|
|
1563
|
+
# See the +Find+ module documentation for an example.
|
|
1564
|
+
#
|
|
1565
|
+
def prune
|
|
1566
|
+
throw :prune
|
|
1567
|
+
end
|
|
1568
|
+
|
|
1569
|
+
module_function :find, :prune
|
|
1570
|
+
end
|
|
407
1571
|
```
|
|
408
1572
|
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
1573
|
+
## Converting to PDF
|
|
1574
|
+
|
|
1575
|
+
One of the beauties of knitr is that the same input can be converted to many different outputs.
|
|
1576
|
+
One very useful format, is, of course, PDF. In order to converted an __R markdown__ file to PDF
|
|
1577
|
+
it is necessary to have LaTeX installed on the system. We will not explain here how to
|
|
1578
|
+
install LaTeX as there are plenty of documents on the web showing how to proceed.
|
|
1579
|
+
|
|
1580
|
+
gKnit comes with a simple LaTeX style file for gknitting this blog as a PDF document. Here is
|
|
1581
|
+
the Yaml header to generate this blog in PDF format instead of HTML:
|
|
1582
|
+
|
|
1583
|
+
```
|
|
1584
|
+
---
|
|
1585
|
+
title: "gKnit - Ruby and R Knitting with Galaaz in GraalVM"
|
|
1586
|
+
author: "Rodrigo Botafogo"
|
|
1587
|
+
tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, knitr, gknit]
|
|
1588
|
+
date: "29 October 2018"
|
|
1589
|
+
output:
|
|
1590
|
+
pdf\_document:
|
|
1591
|
+
includes:
|
|
1592
|
+
in\_header: ["../../sty/galaaz.sty"]
|
|
1593
|
+
number\_sections: yes
|
|
1594
|
+
---
|
|
1595
|
+
```
|
|
1596
|
+
|
|
1597
|
+
## Template based documents generation
|
|
1598
|
+
|
|
1599
|
+
When a document is converted to PDF it follows a certain convertion template. We've seen above
|
|
1600
|
+
the use of 'galaaz.sty' as a basic template to generate a PDF document. Using the
|
|
1601
|
+
'gknit-draft' app that comes with Galaaz, the same .Rmd file can be compiled to different
|
|
1602
|
+
looking PDF documents. Galaaz automatically loads the 'rticles' R package that comes with
|
|
1603
|
+
templates for the following journals with the respective template name:
|
|
1604
|
+
|
|
1605
|
+
* ACM articles: acm_article
|
|
1606
|
+
* ACS articles: acs_article
|
|
1607
|
+
* AEA journal submissions: aea_article
|
|
1608
|
+
* AGU journal submissions: ????
|
|
1609
|
+
* AMS articles: ams_article
|
|
1610
|
+
* American Statistical Association: asa_article
|
|
1611
|
+
* Biometrics articles: biometrics_article
|
|
1612
|
+
* Bulletin de l'AMQ journal submissions: amq_article
|
|
1613
|
+
* CTeX documents: ctex
|
|
1614
|
+
* Elsevier journal submissions: elsevier_article
|
|
1615
|
+
* IEEE Transaction journal submissions: ieee_article
|
|
1616
|
+
* JSS articles: jss_article
|
|
1617
|
+
* MDPI journal submissions: mdpi_article
|
|
1618
|
+
* Monthly Notices of the Royal Astronomical Society articles: mnras_article
|
|
1619
|
+
* NNRAS journal submissions: nmras_article
|
|
1620
|
+
* PeerJ articles: peerj_article
|
|
1621
|
+
* Royal Society Open Science journal submissions: rsos_article
|
|
1622
|
+
* Royal Statistical Society: rss_article
|
|
1623
|
+
* Sage journal submissions: sage_article
|
|
1624
|
+
* Springer journal submissions: springer_article
|
|
1625
|
+
* Statistics in Medicine journal submissions: sim_article
|
|
1626
|
+
* Copernicus Publications journal submissions: copernicus_article
|
|
1627
|
+
* The R Journal articles: rjournal_article
|
|
1628
|
+
* Frontiers articles: ???
|
|
1629
|
+
* Taylor & Francis articles: ???
|
|
1630
|
+
* Bulletin De L'AMQ: amq_article
|
|
1631
|
+
* PLOS journal: plos_article
|
|
1632
|
+
* Proceedings of the National Academy of Sciences of the USA: pnas_article
|
|
1633
|
+
|
|
1634
|
+
In order to create a document with one of those templates, use the following command:
|
|
1635
|
+
|
|
1636
|
+
```
|
|
1637
|
+
gknit-draft --filename <my_document> --template <template> --package <package>
|
|
1638
|
+
--create_dir
|
|
1639
|
+
```
|
|
1640
|
+
So, in order to create a template for writing an R Journal, use:
|
|
1641
|
+
|
|
1642
|
+
```
|
|
1643
|
+
gknit-draft --filename my_r_article --template rjournal_article --package rticles
|
|
1644
|
+
--create_dir
|
|
413
1645
|
```
|
|
414
1646
|
|
|
415
|
-
Note that indexing with '>>' starts at 0 and not at 1, also, we cannot do negative indexing.
|
|
416
|
-
|
|
417
1647
|
# Accessing R variables
|
|
418
1648
|
|
|
419
1649
|
Galaaz allows Ruby to access variables created in R. For example, the 'mtcars' data set is
|
|
@@ -896,7 +2126,338 @@ outputs (~:mtcars).kable.kable_styling
|
|
|
896
2126
|
</tbody>
|
|
897
2127
|
</table>
|
|
898
2128
|
|
|
899
|
-
#
|
|
2129
|
+
# Basic Data Types
|
|
2130
|
+
|
|
2131
|
+
## Vector
|
|
2132
|
+
|
|
2133
|
+
Vectors can be thought of as contiguous cells containing data. Cells are accessed through
|
|
2134
|
+
indexing operations such as x[5]. Galaaz has six basic (‘atomic’) vector types: logical,
|
|
2135
|
+
integer, real, complex, string (or character) and raw. The modes and storage modes for the
|
|
2136
|
+
different vector types are listed in the following
|
|
2137
|
+
table.
|
|
2138
|
+
|
|
2139
|
+
| typeof | mode | storage.mode |
|
|
2140
|
+
|-----------|:---------:|-------------:|
|
|
2141
|
+
| logical | logical | logical |
|
|
2142
|
+
| integer | numeric | integer |
|
|
2143
|
+
| double | numeric | double |
|
|
2144
|
+
| complex | complex | comples |
|
|
2145
|
+
| character | character | character |
|
|
2146
|
+
| raw | raw | raw |
|
|
2147
|
+
|
|
2148
|
+
Single numbers, such as 4.2, and strings, such as "four point two" are still vectors, of length
|
|
2149
|
+
1; there are no more basic types. Vectors with length zero are possible (and useful).
|
|
2150
|
+
String vectors have mode and storage mode "character". A single element of a character
|
|
2151
|
+
vector is often referred to as a character string.
|
|
2152
|
+
|
|
2153
|
+
To create a vector the 'c' (concatenate) method from the 'R' module should be used:
|
|
2154
|
+
|
|
2155
|
+
|
|
2156
|
+
```ruby
|
|
2157
|
+
vec = R.c(1, 2, 3)
|
|
2158
|
+
puts vec
|
|
2159
|
+
```
|
|
2160
|
+
|
|
2161
|
+
```
|
|
2162
|
+
## [1] 1 2 3
|
|
2163
|
+
```
|
|
2164
|
+
|
|
2165
|
+
Lets take a look at the type, mode and storage.mode of our vector vec. In order to print
|
|
2166
|
+
this out, we are creating a data frame 'df' and printing it out. A data frame, for those
|
|
2167
|
+
not familiar with it, is basically a table. Here we create the data frame and add the
|
|
2168
|
+
column name by passing named parameters for each column, such as 'typeof:', 'mode:' and
|
|
2169
|
+
'storage__mode?'. You should also note here that the double underscore is converted to a '.'.
|
|
2170
|
+
So, when printed 'storage\_\_mode' will actually print as 'storage.mode'.
|
|
2171
|
+
|
|
2172
|
+
Data frames will later be more carefully described. In R, the method used to create a
|
|
2173
|
+
data frame is 'data.frame', in Galaaz we use 'data\_\_frame'.
|
|
2174
|
+
|
|
2175
|
+
|
|
2176
|
+
```ruby
|
|
2177
|
+
df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
|
|
2178
|
+
puts df
|
|
2179
|
+
```
|
|
2180
|
+
|
|
2181
|
+
```
|
|
2182
|
+
## typeof mode storage.mode
|
|
2183
|
+
## 1 integer numeric integer
|
|
2184
|
+
```
|
|
2185
|
+
|
|
2186
|
+
If you want to create a vector with floating point numbers, then we need at least one of the
|
|
2187
|
+
vector's element to be a float, such as 1.0. R users should be careful, since in R a number
|
|
2188
|
+
like '1' is converted to float and to have an integer the R developer will use '1L'. Galaaz
|
|
2189
|
+
follows normal Ruby rules and the number 1 is an integer and 1.0 is a float.
|
|
2190
|
+
|
|
2191
|
+
|
|
2192
|
+
```ruby
|
|
2193
|
+
vec = R.c(1.0, 2, 3)
|
|
2194
|
+
puts vec
|
|
2195
|
+
```
|
|
2196
|
+
|
|
2197
|
+
```
|
|
2198
|
+
## [1] 1 2 3
|
|
2199
|
+
```
|
|
2200
|
+
|
|
2201
|
+
|
|
2202
|
+
```ruby
|
|
2203
|
+
df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
|
|
2204
|
+
outputs df.kable.kable_styling
|
|
2205
|
+
```
|
|
2206
|
+
|
|
2207
|
+
<table class="table" style="margin-left: auto; margin-right: auto;">
|
|
2208
|
+
<thead>
|
|
2209
|
+
<tr>
|
|
2210
|
+
<th style="text-align:left;"> typeof </th>
|
|
2211
|
+
<th style="text-align:left;"> mode </th>
|
|
2212
|
+
<th style="text-align:left;"> storage.mode </th>
|
|
2213
|
+
</tr>
|
|
2214
|
+
</thead>
|
|
2215
|
+
<tbody>
|
|
2216
|
+
<tr>
|
|
2217
|
+
<td style="text-align:left;"> double </td>
|
|
2218
|
+
<td style="text-align:left;"> numeric </td>
|
|
2219
|
+
<td style="text-align:left;"> double </td>
|
|
2220
|
+
</tr>
|
|
2221
|
+
</tbody>
|
|
2222
|
+
</table>
|
|
2223
|
+
|
|
2224
|
+
In this next example we try to create a vector with a variable 'hello' that has not yet
|
|
2225
|
+
being defined. This will raise an exception that is printed out. We get two return blocks,
|
|
2226
|
+
the first with a message explaining what went wrong and the second with the full backtrace
|
|
2227
|
+
of the error.
|
|
2228
|
+
|
|
2229
|
+
|
|
2230
|
+
```ruby
|
|
2231
|
+
vec = R.c(1, hello, 5)
|
|
2232
|
+
```
|
|
2233
|
+
|
|
2234
|
+
```
|
|
2235
|
+
## Message:
|
|
2236
|
+
## undefined local variable or method `hello' for #<RC:0x3d8 @out_list=nil>:RC
|
|
2237
|
+
```
|
|
2238
|
+
|
|
2239
|
+
```
|
|
2240
|
+
## Message:
|
|
2241
|
+
## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:103:in `get_binding'
|
|
2242
|
+
## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `eval'
|
|
2243
|
+
## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `exec_ruby'
|
|
2244
|
+
## /home/rbotafogo/desenv/galaaz/lib/gknit/knitr_engine.rb:650:in `block in initialize'
|
|
2245
|
+
## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
|
|
2246
|
+
## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
|
|
2247
|
+
## (eval):3:in `function(...) {\n rb_method(...)'
|
|
2248
|
+
## unknown.r:1:in `in_dir'
|
|
2249
|
+
## unknown.r:1:in `block_exec'
|
|
2250
|
+
## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:92:in `call_block'
|
|
2251
|
+
## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:6:in `process_group.block'
|
|
2252
|
+
## /usr/local/lib/graalvm-ce-java11-20.0.0/languages/R/library/knitr/R/block.R:3:in `<no source>'
|
|
2253
|
+
## unknown.r:1:in `withCallingHandlers'
|
|
2254
|
+
## unknown.r:1:in `process_file'
|
|
2255
|
+
## unknown.r:1:in `<no source>'
|
|
2256
|
+
## unknown.r:1:in `<no source>'
|
|
2257
|
+
## <REPL>:4:in `<repl wrapper>'
|
|
2258
|
+
## <REPL>:1
|
|
2259
|
+
```
|
|
2260
|
+
|
|
2261
|
+
Here is a vector with logical values
|
|
2262
|
+
|
|
2263
|
+
|
|
2264
|
+
```ruby
|
|
2265
|
+
vec = R.c(true, true, false, false, true)
|
|
2266
|
+
puts vec
|
|
2267
|
+
```
|
|
2268
|
+
|
|
2269
|
+
```
|
|
2270
|
+
## [1] TRUE TRUE FALSE FALSE TRUE
|
|
2271
|
+
```
|
|
2272
|
+
|
|
2273
|
+
### Combining Vectors
|
|
2274
|
+
|
|
2275
|
+
The 'c' functions used to create vectors can also be used to combine two vectors:
|
|
2276
|
+
|
|
2277
|
+
|
|
2278
|
+
```ruby
|
|
2279
|
+
vec1 = R.c(10.0, 20.0, 30.0)
|
|
2280
|
+
vec2 = R.c(4.0, 5.0, 6.0)
|
|
2281
|
+
vec = R.c(vec1, vec2)
|
|
2282
|
+
puts vec
|
|
2283
|
+
```
|
|
2284
|
+
|
|
2285
|
+
```
|
|
2286
|
+
## [1] 10 20 30 4 5 6
|
|
2287
|
+
```
|
|
2288
|
+
In galaaz, methods can be chainned (somewhat like the pipe operator in R %>%, but more generic).
|
|
2289
|
+
In this next example, method 'c' is chainned after 'vec1'. This also looks like 'c' is a
|
|
2290
|
+
method of the vector, but in reallity, this is actually closer to the pipe operator. When
|
|
2291
|
+
Galaaz identifies that 'c' is not a method of 'vec' it actually tries to call 'R.c' with
|
|
2292
|
+
'vec1' as the first argument concatenated with all the other available arguments. The code
|
|
2293
|
+
bellow is automatically converted to the code above.
|
|
2294
|
+
|
|
2295
|
+
|
|
2296
|
+
```ruby
|
|
2297
|
+
vec = vec1.c(vec2)
|
|
2298
|
+
puts vec
|
|
2299
|
+
```
|
|
2300
|
+
|
|
2301
|
+
```
|
|
2302
|
+
## [1] 10 20 30 4 5 6
|
|
2303
|
+
```
|
|
2304
|
+
|
|
2305
|
+
### Vector Arithmetic
|
|
2306
|
+
|
|
2307
|
+
Arithmetic operations on vectors are performed element by element:
|
|
2308
|
+
|
|
2309
|
+
|
|
2310
|
+
```ruby
|
|
2311
|
+
puts vec1 + vec2
|
|
2312
|
+
```
|
|
2313
|
+
|
|
2314
|
+
```
|
|
2315
|
+
## [1] 14 25 36
|
|
2316
|
+
```
|
|
2317
|
+
|
|
2318
|
+
|
|
2319
|
+
```ruby
|
|
2320
|
+
puts vec1 * 5
|
|
2321
|
+
```
|
|
2322
|
+
|
|
2323
|
+
```
|
|
2324
|
+
## [1] 50 100 150
|
|
2325
|
+
```
|
|
2326
|
+
|
|
2327
|
+
When vectors have different length, a recycling rule is applied to the shorter vector:
|
|
2328
|
+
|
|
2329
|
+
|
|
2330
|
+
```ruby
|
|
2331
|
+
vec3 = R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
|
|
2332
|
+
puts vec4 = vec1 + vec3
|
|
2333
|
+
```
|
|
2334
|
+
|
|
2335
|
+
```
|
|
2336
|
+
## [1] 11 22 33 14 25 36 17 28 39
|
|
2337
|
+
```
|
|
2338
|
+
|
|
2339
|
+
### Vector Indexing
|
|
2340
|
+
|
|
2341
|
+
Vectors can be indexed by using the '[]' operator:
|
|
2342
|
+
|
|
2343
|
+
|
|
2344
|
+
```ruby
|
|
2345
|
+
puts vec4[3]
|
|
2346
|
+
```
|
|
2347
|
+
|
|
2348
|
+
```
|
|
2349
|
+
## [1] 33
|
|
2350
|
+
```
|
|
2351
|
+
|
|
2352
|
+
We can also index a vector with another vector. For example, in the code bellow, we take elements
|
|
2353
|
+
1, 3, 5, and 7 from vec3:
|
|
2354
|
+
|
|
2355
|
+
|
|
2356
|
+
```ruby
|
|
2357
|
+
puts vec4[R.c(1, 3, 5, 7)]
|
|
2358
|
+
```
|
|
2359
|
+
|
|
2360
|
+
```
|
|
2361
|
+
## [1] 11 33 25 17
|
|
2362
|
+
```
|
|
2363
|
+
|
|
2364
|
+
Repeating an index and having indices out of order is valid code:
|
|
2365
|
+
|
|
2366
|
+
|
|
2367
|
+
```ruby
|
|
2368
|
+
puts vec4[R.c(1, 3, 3, 1)]
|
|
2369
|
+
```
|
|
2370
|
+
|
|
2371
|
+
```
|
|
2372
|
+
## [1] 11 33 33 11
|
|
2373
|
+
```
|
|
2374
|
+
|
|
2375
|
+
It is also possible to index a vector with a negative number or negative vector. In these cases
|
|
2376
|
+
the indexed values are not returned:
|
|
2377
|
+
|
|
2378
|
+
|
|
2379
|
+
```ruby
|
|
2380
|
+
puts vec4[-3]
|
|
2381
|
+
puts vec4[-R.c(1, 3, 5, 7)]
|
|
2382
|
+
```
|
|
2383
|
+
|
|
2384
|
+
```
|
|
2385
|
+
## [1] 11 22 14 25 36 17 28 39
|
|
2386
|
+
## [1] 22 14 36 28 39
|
|
2387
|
+
```
|
|
2388
|
+
|
|
2389
|
+
If an index is out of range, a missing value (NA) will be reported.
|
|
2390
|
+
|
|
2391
|
+
|
|
2392
|
+
```ruby
|
|
2393
|
+
puts vec4[30]
|
|
2394
|
+
```
|
|
2395
|
+
|
|
2396
|
+
```
|
|
2397
|
+
## [1] NA
|
|
2398
|
+
```
|
|
2399
|
+
|
|
2400
|
+
It is also possible to index a vector by range:
|
|
2401
|
+
|
|
2402
|
+
|
|
2403
|
+
```ruby
|
|
2404
|
+
puts vec4[(2..5)]
|
|
2405
|
+
```
|
|
2406
|
+
|
|
2407
|
+
```
|
|
2408
|
+
## [1] 22 33 14 25
|
|
2409
|
+
```
|
|
2410
|
+
|
|
2411
|
+
Elements in a vector can be named using the 'names' attribute of a vector:
|
|
2412
|
+
|
|
2413
|
+
|
|
2414
|
+
```ruby
|
|
2415
|
+
full_name = R.c("Rodrigo", "A", "Botafogo")
|
|
2416
|
+
full_name.names = R.c("First", "Middle", "Last")
|
|
2417
|
+
puts full_name
|
|
2418
|
+
```
|
|
2419
|
+
|
|
2420
|
+
```
|
|
2421
|
+
## First Middle Last
|
|
2422
|
+
## "Rodrigo" "A" "Botafogo"
|
|
2423
|
+
```
|
|
2424
|
+
|
|
2425
|
+
Or it can also be named by using the 'c' function with named paramenters:
|
|
2426
|
+
|
|
2427
|
+
|
|
2428
|
+
```ruby
|
|
2429
|
+
full_name = R.c(First: "Rodrigo", Middle: "A", Last: "Botafogo")
|
|
2430
|
+
puts full_name
|
|
2431
|
+
```
|
|
2432
|
+
|
|
2433
|
+
```
|
|
2434
|
+
## First Middle Last
|
|
2435
|
+
## "Rodrigo" "A" "Botafogo"
|
|
2436
|
+
```
|
|
2437
|
+
|
|
2438
|
+
### Extracting Native Ruby Types from a Vector
|
|
2439
|
+
|
|
2440
|
+
Vectors created with 'R.c' are of class R::Vector. You might have noticed that when indexing a
|
|
2441
|
+
vector, a new vector is returned, even if this vector has one single element. In order to use
|
|
2442
|
+
R::Vector with other ruby classes it might be necessary to extract the actual Ruby native type
|
|
2443
|
+
from the vector. In order to do this extraction the '>>' operator is used.
|
|
2444
|
+
|
|
2445
|
+
|
|
2446
|
+
```ruby
|
|
2447
|
+
puts vec4
|
|
2448
|
+
puts vec4 >> 0
|
|
2449
|
+
puts vec4 >> 4
|
|
2450
|
+
```
|
|
2451
|
+
|
|
2452
|
+
```
|
|
2453
|
+
## [1] 11 22 33 14 25 36 17 28 39
|
|
2454
|
+
## 11.0
|
|
2455
|
+
## 25.0
|
|
2456
|
+
```
|
|
2457
|
+
|
|
2458
|
+
Note that indexing with '>>' starts at 0 and not at 1, also, we cannot do negative indexing.
|
|
2459
|
+
|
|
2460
|
+
## Matrix
|
|
900
2461
|
|
|
901
2462
|
A matrix is a collection of elements organized as a two dimensional table. A matrix can be
|
|
902
2463
|
created by the 'matrix' function:
|
|
@@ -936,7 +2497,7 @@ puts mat_row
|
|
|
936
2497
|
## [3,] 7 8 9
|
|
937
2498
|
```
|
|
938
2499
|
|
|
939
|
-
|
|
2500
|
+
### Indexing a Matrix
|
|
940
2501
|
|
|
941
2502
|
A matrix can be indexed by [row, column]:
|
|
942
2503
|
|
|
@@ -1008,7 +2569,7 @@ puts mat_row.cbind(mat)
|
|
|
1008
2569
|
## [3,] 7 8 9 3 6 9
|
|
1009
2570
|
```
|
|
1010
2571
|
|
|
1011
|
-
|
|
2572
|
+
## List
|
|
1012
2573
|
|
|
1013
2574
|
A list is a data structure that can contain sublists of different types, while vector and matrix
|
|
1014
2575
|
can only hold one type of element.
|
|
@@ -1036,7 +2597,7 @@ puts lst
|
|
|
1036
2597
|
Note that 'lst' elements are named elements.
|
|
1037
2598
|
|
|
1038
2599
|
|
|
1039
|
-
|
|
2600
|
+
### List Indexing
|
|
1040
2601
|
|
|
1041
2602
|
List indexing, also called slicing, is done using the '[]' operator and the '[[]]' operator. Let's
|
|
1042
2603
|
first start with the '[]' operator. The list above has three sublist indexing with '[]' will
|
|
@@ -1082,7 +2643,7 @@ then the first element of the vector was extracted (note that vectors also accep
|
|
|
1082
2643
|
operator) and then the vector was indexed by its first element, extracting the native Ruby type.
|
|
1083
2644
|
|
|
1084
2645
|
|
|
1085
|
-
|
|
2646
|
+
## Data Frame
|
|
1086
2647
|
|
|
1087
2648
|
A data frame is a table like structure in which each column has the same number of
|
|
1088
2649
|
rows. Data frames are the basic structure for storing data for data analysis. We have already
|
|
@@ -1105,7 +2666,7 @@ puts df
|
|
|
1105
2666
|
## 3 2012 2000
|
|
1106
2667
|
```
|
|
1107
2668
|
|
|
1108
|
-
|
|
2669
|
+
### Data Frame Indexing
|
|
1109
2670
|
|
|
1110
2671
|
A data frame can be indexed the same way as a matrix, by using '[row, column]', where row and
|
|
1111
2672
|
column can either be a numeric or the name of the row or column
|
|
@@ -1325,13 +2886,6 @@ symbolic notation as otherwise, we end up writing invalid expressions such as
|
|
|
1325
2886
|
exp_wrong = (:a + :b) == :z
|
|
1326
2887
|
puts exp_wrong
|
|
1327
2888
|
```
|
|
1328
|
-
|
|
1329
|
-
```
|
|
1330
|
-
## Message:
|
|
1331
|
-
## Error in function (x, y, num.eq = TRUE, single.NA = TRUE, attrib.as.set = TRUE, :
|
|
1332
|
-
## object 'a' not found (RError)
|
|
1333
|
-
## Translated to internal error
|
|
1334
|
-
```
|
|
1335
2889
|
and it might be difficult to understand what is going on here. The problem lies with the fact that
|
|
1336
2890
|
when using '==' we are comparing expression (:a + :b) to expression :z with '=='. When the
|
|
1337
2891
|
comparison is executed, the system tries to evaluate :a, :b and :z, and those symbols at
|
|
@@ -1423,11 +2977,15 @@ Galaaz.
|
|
|
1423
2977
|
|
|
1424
2978
|
For these
|
|
1425
2979
|
examples, we will investigate the nycflights13 data set available on the package by the
|
|
1426
|
-
same name. We use function 'R.
|
|
2980
|
+
same name. We use function 'R.install\_and\_loads' that checks if the library is available
|
|
1427
2981
|
locally, and if not, installs it. This data frame contains all 336,776 flights that
|
|
1428
2982
|
departed from New York City in 2013. The data comes from the US Bureau of
|
|
1429
2983
|
Transportation Statistics.
|
|
1430
2984
|
|
|
2985
|
+
Dplyr uses 'tibbles' in place of data frames; unfortunately, tibbles do not print yet properly in
|
|
2986
|
+
Galaaz due to a bug in fastR. In order to print a tibble we need to convert it to a data frame
|
|
2987
|
+
using the 'as\_\_data__frame' method.
|
|
2988
|
+
|
|
1431
2989
|
|
|
1432
2990
|
```ruby
|
|
1433
2991
|
R.install_and_loads('nycflights13')
|
|
@@ -1437,31 +2995,23 @@ R.library('dplyr')
|
|
|
1437
2995
|
|
|
1438
2996
|
```ruby
|
|
1439
2997
|
flights = ~:flights
|
|
1440
|
-
puts flights.head
|
|
1441
|
-
```
|
|
1442
|
-
|
|
1443
|
-
```
|
|
1444
|
-
##
|
|
1445
|
-
##
|
|
1446
|
-
##
|
|
1447
|
-
##
|
|
1448
|
-
##
|
|
1449
|
-
##
|
|
1450
|
-
##
|
|
1451
|
-
##
|
|
1452
|
-
## 1
|
|
1453
|
-
##
|
|
1454
|
-
##
|
|
1455
|
-
##
|
|
1456
|
-
##
|
|
1457
|
-
## 6 12 UA 1696 N39463 EWR ORD 150 719 5
|
|
1458
|
-
## minute time_hour
|
|
1459
|
-
## 1 15 2013-01-01 05:00:00
|
|
1460
|
-
## 2 29 2013-01-01 05:00:00
|
|
1461
|
-
## 3 40 2013-01-01 05:00:00
|
|
1462
|
-
## 4 45 2013-01-01 05:00:00
|
|
1463
|
-
## 5 0 2013-01-01 06:00:00
|
|
1464
|
-
## 6 58 2013-01-01 05:00:00
|
|
2998
|
+
puts flights.head
|
|
2999
|
+
```
|
|
3000
|
+
|
|
3001
|
+
```
|
|
3002
|
+
## # A tibble: 6 x 19
|
|
3003
|
+
## year month day dep_time sched_dep_time dep_delay arr_time
|
|
3004
|
+
## <int> <int> <int> <int> <int> <dbl> <int>
|
|
3005
|
+
## 1 2013 1 1 517 515 2 830
|
|
3006
|
+
## 2 2013 1 1 533 529 4 850
|
|
3007
|
+
## 3 2013 1 1 542 540 2 923
|
|
3008
|
+
## 4 2013 1 1 544 545 -1 1004
|
|
3009
|
+
## 5 2013 1 1 554 600 -6 812
|
|
3010
|
+
## 6 2013 1 1 554 558 -4 740
|
|
3011
|
+
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
|
|
3012
|
+
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
|
|
3013
|
+
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
|
|
3014
|
+
## # time_hour <dttm>
|
|
1465
3015
|
```
|
|
1466
3016
|
|
|
1467
3017
|
## Filtering rows with Filter
|
|
@@ -1471,31 +3021,23 @@ the first :month.eq 1
|
|
|
1471
3021
|
|
|
1472
3022
|
|
|
1473
3023
|
```ruby
|
|
1474
|
-
puts flights.filter((:month.eq 1), (:day.eq 1)).head
|
|
3024
|
+
puts flights.filter((:month.eq 1), (:day.eq 1)).head
|
|
1475
3025
|
```
|
|
1476
3026
|
|
|
1477
3027
|
```
|
|
1478
|
-
##
|
|
1479
|
-
##
|
|
1480
|
-
##
|
|
1481
|
-
##
|
|
1482
|
-
##
|
|
1483
|
-
##
|
|
1484
|
-
##
|
|
1485
|
-
##
|
|
1486
|
-
## 1
|
|
1487
|
-
##
|
|
1488
|
-
##
|
|
1489
|
-
##
|
|
1490
|
-
##
|
|
1491
|
-
## 6 12 UA 1696 N39463 EWR ORD 150 719 5
|
|
1492
|
-
## minute time_hour
|
|
1493
|
-
## 1 15 2013-01-01 05:00:00
|
|
1494
|
-
## 2 29 2013-01-01 05:00:00
|
|
1495
|
-
## 3 40 2013-01-01 05:00:00
|
|
1496
|
-
## 4 45 2013-01-01 05:00:00
|
|
1497
|
-
## 5 0 2013-01-01 06:00:00
|
|
1498
|
-
## 6 58 2013-01-01 05:00:00
|
|
3028
|
+
## # A tibble: 6 x 19
|
|
3029
|
+
## year month day dep_time sched_dep_time dep_delay arr_time
|
|
3030
|
+
## <int> <int> <int> <int> <int> <dbl> <int>
|
|
3031
|
+
## 1 2013 1 1 517 515 2 830
|
|
3032
|
+
## 2 2013 1 1 533 529 4 850
|
|
3033
|
+
## 3 2013 1 1 542 540 2 923
|
|
3034
|
+
## 4 2013 1 1 544 545 -1 1004
|
|
3035
|
+
## 5 2013 1 1 554 600 -6 812
|
|
3036
|
+
## 6 2013 1 1 554 558 -4 740
|
|
3037
|
+
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
|
|
3038
|
+
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
|
|
3039
|
+
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
|
|
3040
|
+
## # time_hour <dttm>
|
|
1499
3041
|
```
|
|
1500
3042
|
|
|
1501
3043
|
## Logical Operators
|
|
@@ -1504,31 +3046,23 @@ All flights that departed in November of December
|
|
|
1504
3046
|
|
|
1505
3047
|
|
|
1506
3048
|
```ruby
|
|
1507
|
-
puts flights.filter((:month.eq 11) | (:month.eq 12)).head
|
|
3049
|
+
puts flights.filter((:month.eq 11) | (:month.eq 12)).head
|
|
1508
3050
|
```
|
|
1509
3051
|
|
|
1510
3052
|
```
|
|
1511
|
-
##
|
|
1512
|
-
##
|
|
1513
|
-
##
|
|
1514
|
-
##
|
|
1515
|
-
##
|
|
1516
|
-
##
|
|
1517
|
-
##
|
|
1518
|
-
##
|
|
1519
|
-
##
|
|
1520
|
-
##
|
|
1521
|
-
##
|
|
1522
|
-
##
|
|
1523
|
-
##
|
|
1524
|
-
## 6 -11 UA 303 N595UA JFK SFO 359 2586 6
|
|
1525
|
-
## minute time_hour
|
|
1526
|
-
## 1 59 2013-11-01 23:00:00
|
|
1527
|
-
## 2 50 2013-11-01 22:00:00
|
|
1528
|
-
## 3 0 2013-11-01 05:00:00
|
|
1529
|
-
## 4 45 2013-11-01 05:00:00
|
|
1530
|
-
## 5 45 2013-11-01 05:00:00
|
|
1531
|
-
## 6 0 2013-11-01 06:00:00
|
|
3053
|
+
## # A tibble: 6 x 19
|
|
3054
|
+
## year month day dep_time sched_dep_time dep_delay arr_time
|
|
3055
|
+
## <int> <int> <int> <int> <int> <dbl> <int>
|
|
3056
|
+
## 1 2013 11 1 5 2359 6 352
|
|
3057
|
+
## 2 2013 11 1 35 2250 105 123
|
|
3058
|
+
## 3 2013 11 1 455 500 -5 641
|
|
3059
|
+
## 4 2013 11 1 539 545 -6 856
|
|
3060
|
+
## 5 2013 11 1 542 545 -3 831
|
|
3061
|
+
## 6 2013 11 1 549 600 -11 912
|
|
3062
|
+
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
|
|
3063
|
+
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
|
|
3064
|
+
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
|
|
3065
|
+
## # time_hour <dttm>
|
|
1532
3066
|
```
|
|
1533
3067
|
|
|
1534
3068
|
The same as above, but using the 'in' operator. In R, it is possible to define many operators
|
|
@@ -1538,31 +3072,23 @@ symbol, in this case ':in' and the second argument is the vector:
|
|
|
1538
3072
|
|
|
1539
3073
|
|
|
1540
3074
|
```ruby
|
|
1541
|
-
puts flights.filter(:month._ :in, R.c(11, 12)).head
|
|
3075
|
+
puts flights.filter(:month._ :in, R.c(11, 12)).head
|
|
1542
3076
|
```
|
|
1543
3077
|
|
|
1544
3078
|
```
|
|
1545
|
-
##
|
|
1546
|
-
##
|
|
1547
|
-
##
|
|
1548
|
-
##
|
|
1549
|
-
##
|
|
1550
|
-
##
|
|
1551
|
-
##
|
|
1552
|
-
##
|
|
1553
|
-
##
|
|
1554
|
-
##
|
|
1555
|
-
##
|
|
1556
|
-
##
|
|
1557
|
-
##
|
|
1558
|
-
## 6 -11 UA 303 N595UA JFK SFO 359 2586 6
|
|
1559
|
-
## minute time_hour
|
|
1560
|
-
## 1 59 2013-11-01 23:00:00
|
|
1561
|
-
## 2 50 2013-11-01 22:00:00
|
|
1562
|
-
## 3 0 2013-11-01 05:00:00
|
|
1563
|
-
## 4 45 2013-11-01 05:00:00
|
|
1564
|
-
## 5 45 2013-11-01 05:00:00
|
|
1565
|
-
## 6 0 2013-11-01 06:00:00
|
|
3079
|
+
## # A tibble: 6 x 19
|
|
3080
|
+
## year month day dep_time sched_dep_time dep_delay arr_time
|
|
3081
|
+
## <int> <int> <int> <int> <int> <dbl> <int>
|
|
3082
|
+
## 1 2013 11 1 5 2359 6 352
|
|
3083
|
+
## 2 2013 11 1 35 2250 105 123
|
|
3084
|
+
## 3 2013 11 1 455 500 -5 641
|
|
3085
|
+
## 4 2013 11 1 539 545 -6 856
|
|
3086
|
+
## 5 2013 11 1 542 545 -3 831
|
|
3087
|
+
## 6 2013 11 1 549 600 -11 912
|
|
3088
|
+
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
|
|
3089
|
+
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
|
|
3090
|
+
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
|
|
3091
|
+
## # time_hour <dttm>
|
|
1566
3092
|
```
|
|
1567
3093
|
|
|
1568
3094
|
## Filtering with NA (Not Available)
|
|
@@ -1575,14 +3101,16 @@ what is obtained from data frame.
|
|
|
1575
3101
|
|
|
1576
3102
|
```ruby
|
|
1577
3103
|
df = R.tibble(x: R.c(1, R::NA, 3))
|
|
1578
|
-
puts df
|
|
3104
|
+
puts df
|
|
1579
3105
|
```
|
|
1580
3106
|
|
|
1581
3107
|
```
|
|
1582
|
-
##
|
|
1583
|
-
##
|
|
1584
|
-
##
|
|
1585
|
-
##
|
|
3108
|
+
## # A tibble: 3 x 1
|
|
3109
|
+
## x
|
|
3110
|
+
## <int>
|
|
3111
|
+
## 1 1
|
|
3112
|
+
## 2
|
|
3113
|
+
## 3 3
|
|
1586
3114
|
```
|
|
1587
3115
|
|
|
1588
3116
|
Now filtering by :x > 1 shows all lines that satisfy this condition, where the row with R:NA does
|
|
@@ -1590,25 +3118,29 @@ not.
|
|
|
1590
3118
|
|
|
1591
3119
|
|
|
1592
3120
|
```ruby
|
|
1593
|
-
puts df.filter(:x > 1)
|
|
3121
|
+
puts df.filter(:x > 1)
|
|
1594
3122
|
```
|
|
1595
3123
|
|
|
1596
3124
|
```
|
|
1597
|
-
##
|
|
1598
|
-
##
|
|
3125
|
+
## # A tibble: 1 x 1
|
|
3126
|
+
## x
|
|
3127
|
+
## <int>
|
|
3128
|
+
## 1 3
|
|
1599
3129
|
```
|
|
1600
3130
|
|
|
1601
3131
|
To match an NA use method 'is__na'
|
|
1602
3132
|
|
|
1603
3133
|
|
|
1604
3134
|
```ruby
|
|
1605
|
-
puts df.filter((:x.is__na) | (:x > 1))
|
|
3135
|
+
puts df.filter((:x.is__na) | (:x > 1))
|
|
1606
3136
|
```
|
|
1607
3137
|
|
|
1608
3138
|
```
|
|
1609
|
-
##
|
|
1610
|
-
##
|
|
1611
|
-
##
|
|
3139
|
+
## # A tibble: 2 x 1
|
|
3140
|
+
## x
|
|
3141
|
+
## <int>
|
|
3142
|
+
## 1
|
|
3143
|
+
## 2 3
|
|
1612
3144
|
```
|
|
1613
3145
|
|
|
1614
3146
|
## Arrange Rows with arrange
|
|
@@ -1617,62 +3149,46 @@ Arrange reorders the rows of a data frame by the given arguments.
|
|
|
1617
3149
|
|
|
1618
3150
|
|
|
1619
3151
|
```ruby
|
|
1620
|
-
puts flights.arrange(:year, :month, :day).head
|
|
3152
|
+
puts flights.arrange(:year, :month, :day).head
|
|
1621
3153
|
```
|
|
1622
3154
|
|
|
1623
3155
|
```
|
|
1624
|
-
##
|
|
1625
|
-
##
|
|
1626
|
-
##
|
|
1627
|
-
##
|
|
1628
|
-
##
|
|
1629
|
-
##
|
|
1630
|
-
##
|
|
1631
|
-
##
|
|
1632
|
-
## 1
|
|
1633
|
-
##
|
|
1634
|
-
##
|
|
1635
|
-
##
|
|
1636
|
-
##
|
|
1637
|
-
## 6 12 UA 1696 N39463 EWR ORD 150 719 5
|
|
1638
|
-
## minute time_hour
|
|
1639
|
-
## 1 15 2013-01-01 05:00:00
|
|
1640
|
-
## 2 29 2013-01-01 05:00:00
|
|
1641
|
-
## 3 40 2013-01-01 05:00:00
|
|
1642
|
-
## 4 45 2013-01-01 05:00:00
|
|
1643
|
-
## 5 0 2013-01-01 06:00:00
|
|
1644
|
-
## 6 58 2013-01-01 05:00:00
|
|
3156
|
+
## # A tibble: 6 x 19
|
|
3157
|
+
## year month day dep_time sched_dep_time dep_delay arr_time
|
|
3158
|
+
## <int> <int> <int> <int> <int> <dbl> <int>
|
|
3159
|
+
## 1 2013 1 1 517 515 2 830
|
|
3160
|
+
## 2 2013 1 1 533 529 4 850
|
|
3161
|
+
## 3 2013 1 1 542 540 2 923
|
|
3162
|
+
## 4 2013 1 1 544 545 -1 1004
|
|
3163
|
+
## 5 2013 1 1 554 600 -6 812
|
|
3164
|
+
## 6 2013 1 1 554 558 -4 740
|
|
3165
|
+
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
|
|
3166
|
+
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
|
|
3167
|
+
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
|
|
3168
|
+
## # time_hour <dttm>
|
|
1645
3169
|
```
|
|
1646
3170
|
|
|
1647
3171
|
To arrange in descending order, use function 'desc'
|
|
1648
3172
|
|
|
1649
3173
|
|
|
1650
3174
|
```ruby
|
|
1651
|
-
puts flights.arrange(:dep_delay.desc).head
|
|
3175
|
+
puts flights.arrange(:dep_delay.desc).head
|
|
1652
3176
|
```
|
|
1653
3177
|
|
|
1654
3178
|
```
|
|
1655
|
-
##
|
|
1656
|
-
##
|
|
1657
|
-
##
|
|
1658
|
-
##
|
|
1659
|
-
##
|
|
1660
|
-
##
|
|
1661
|
-
##
|
|
1662
|
-
##
|
|
1663
|
-
##
|
|
1664
|
-
##
|
|
1665
|
-
##
|
|
1666
|
-
##
|
|
1667
|
-
##
|
|
1668
|
-
## 6 931 DL 2391 N959DL JFK TPA 139 1005 19
|
|
1669
|
-
## minute time_hour
|
|
1670
|
-
## 1 0 2013-01-09 09:00:00
|
|
1671
|
-
## 2 35 2013-06-15 19:00:00
|
|
1672
|
-
## 3 35 2013-01-10 16:00:00
|
|
1673
|
-
## 4 45 2013-09-20 18:00:00
|
|
1674
|
-
## 5 0 2013-07-22 16:00:00
|
|
1675
|
-
## 6 0 2013-04-10 19:00:00
|
|
3179
|
+
## # A tibble: 6 x 19
|
|
3180
|
+
## year month day dep_time sched_dep_time dep_delay arr_time
|
|
3181
|
+
## <int> <int> <int> <int> <int> <dbl> <int>
|
|
3182
|
+
## 1 2013 1 9 641 900 1301 1242
|
|
3183
|
+
## 2 2013 6 15 1432 1935 1137 1607
|
|
3184
|
+
## 3 2013 1 10 1121 1635 1126 1239
|
|
3185
|
+
## 4 2013 9 20 1139 1845 1014 1457
|
|
3186
|
+
## 5 2013 7 22 845 1600 1005 1044
|
|
3187
|
+
## 6 2013 4 10 1100 1900 960 1342
|
|
3188
|
+
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
|
|
3189
|
+
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
|
|
3190
|
+
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
|
|
3191
|
+
## # time_hour <dttm>
|
|
1676
3192
|
```
|
|
1677
3193
|
|
|
1678
3194
|
## Selecting columns
|
|
@@ -1681,45 +3197,51 @@ To select specific columns from a dataset we use function 'select':
|
|
|
1681
3197
|
|
|
1682
3198
|
|
|
1683
3199
|
```ruby
|
|
1684
|
-
puts flights.select(:year, :month, :day).head
|
|
3200
|
+
puts flights.select(:year, :month, :day).head
|
|
1685
3201
|
```
|
|
1686
3202
|
|
|
1687
3203
|
```
|
|
1688
|
-
##
|
|
1689
|
-
##
|
|
1690
|
-
##
|
|
1691
|
-
##
|
|
1692
|
-
##
|
|
1693
|
-
##
|
|
1694
|
-
##
|
|
3204
|
+
## # A tibble: 6 x 3
|
|
3205
|
+
## year month day
|
|
3206
|
+
## <int> <int> <int>
|
|
3207
|
+
## 1 2013 1 1
|
|
3208
|
+
## 2 2013 1 1
|
|
3209
|
+
## 3 2013 1 1
|
|
3210
|
+
## 4 2013 1 1
|
|
3211
|
+
## 5 2013 1 1
|
|
3212
|
+
## 6 2013 1 1
|
|
1695
3213
|
```
|
|
1696
3214
|
|
|
1697
3215
|
It is also possible to select column in a given range
|
|
1698
3216
|
|
|
1699
3217
|
|
|
1700
3218
|
```ruby
|
|
1701
|
-
puts flights.select(:year.up_to :day).head
|
|
3219
|
+
puts flights.select(:year.up_to :day).head
|
|
1702
3220
|
```
|
|
1703
3221
|
|
|
1704
3222
|
```
|
|
1705
|
-
##
|
|
1706
|
-
##
|
|
1707
|
-
##
|
|
1708
|
-
##
|
|
1709
|
-
##
|
|
1710
|
-
##
|
|
1711
|
-
##
|
|
3223
|
+
## # A tibble: 6 x 3
|
|
3224
|
+
## year month day
|
|
3225
|
+
## <int> <int> <int>
|
|
3226
|
+
## 1 2013 1 1
|
|
3227
|
+
## 2 2013 1 1
|
|
3228
|
+
## 3 2013 1 1
|
|
3229
|
+
## 4 2013 1 1
|
|
3230
|
+
## 5 2013 1 1
|
|
3231
|
+
## 6 2013 1 1
|
|
1712
3232
|
```
|
|
1713
3233
|
|
|
1714
3234
|
Select all columns that start with a given name sequence
|
|
1715
3235
|
|
|
1716
3236
|
|
|
1717
3237
|
```ruby
|
|
1718
|
-
puts flights.select(E.starts_with('arr')).head
|
|
3238
|
+
puts flights.select(E.starts_with('arr')).head
|
|
1719
3239
|
```
|
|
1720
3240
|
|
|
1721
3241
|
```
|
|
3242
|
+
## # A tibble: 6 x 2
|
|
1722
3243
|
## arr_time arr_delay
|
|
3244
|
+
## <int> <dbl>
|
|
1723
3245
|
## 1 830 11
|
|
1724
3246
|
## 2 850 20
|
|
1725
3247
|
## 3 923 33
|
|
@@ -1743,31 +3265,23 @@ A helper function that comes in handy when we just want to rearrange column orde
|
|
|
1743
3265
|
|
|
1744
3266
|
|
|
1745
3267
|
```ruby
|
|
1746
|
-
puts flights.select(:year, :month, :day, E.everything).head
|
|
3268
|
+
puts flights.select(:year, :month, :day, E.everything).head
|
|
1747
3269
|
```
|
|
1748
3270
|
|
|
1749
3271
|
```
|
|
1750
|
-
##
|
|
1751
|
-
##
|
|
1752
|
-
##
|
|
1753
|
-
##
|
|
1754
|
-
##
|
|
1755
|
-
##
|
|
1756
|
-
##
|
|
1757
|
-
##
|
|
1758
|
-
## 1
|
|
1759
|
-
##
|
|
1760
|
-
##
|
|
1761
|
-
##
|
|
1762
|
-
##
|
|
1763
|
-
## 6 12 UA 1696 N39463 EWR ORD 150 719 5
|
|
1764
|
-
## minute time_hour
|
|
1765
|
-
## 1 15 2013-01-01 05:00:00
|
|
1766
|
-
## 2 29 2013-01-01 05:00:00
|
|
1767
|
-
## 3 40 2013-01-01 05:00:00
|
|
1768
|
-
## 4 45 2013-01-01 05:00:00
|
|
1769
|
-
## 5 0 2013-01-01 06:00:00
|
|
1770
|
-
## 6 58 2013-01-01 05:00:00
|
|
3272
|
+
## # A tibble: 6 x 19
|
|
3273
|
+
## year month day dep_time sched_dep_time dep_delay arr_time
|
|
3274
|
+
## <int> <int> <int> <int> <int> <dbl> <int>
|
|
3275
|
+
## 1 2013 1 1 517 515 2 830
|
|
3276
|
+
## 2 2013 1 1 533 529 4 850
|
|
3277
|
+
## 3 2013 1 1 542 540 2 923
|
|
3278
|
+
## 4 2013 1 1 544 545 -1 1004
|
|
3279
|
+
## 5 2013 1 1 554 600 -6 812
|
|
3280
|
+
## 6 2013 1 1 554 558 -4 740
|
|
3281
|
+
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
|
|
3282
|
+
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
|
|
3283
|
+
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
|
|
3284
|
+
## # time_hour <dttm>
|
|
1771
3285
|
```
|
|
1772
3286
|
|
|
1773
3287
|
## Add variables to a dataframe with 'mutate'
|
|
@@ -1780,17 +3294,19 @@ flights_sm = flights.
|
|
|
1780
3294
|
:distance,
|
|
1781
3295
|
:air_time)
|
|
1782
3296
|
|
|
1783
|
-
puts flights_sm.head
|
|
3297
|
+
puts flights_sm.head
|
|
1784
3298
|
```
|
|
1785
3299
|
|
|
1786
3300
|
```
|
|
1787
|
-
##
|
|
1788
|
-
##
|
|
1789
|
-
##
|
|
1790
|
-
##
|
|
1791
|
-
##
|
|
1792
|
-
##
|
|
1793
|
-
##
|
|
3301
|
+
## # A tibble: 6 x 7
|
|
3302
|
+
## year month day dep_delay arr_delay distance air_time
|
|
3303
|
+
## <int> <int> <int> <dbl> <dbl> <dbl> <dbl>
|
|
3304
|
+
## 1 2013 1 1 2 11 1400 227
|
|
3305
|
+
## 2 2013 1 1 4 20 1416 227
|
|
3306
|
+
## 3 2013 1 1 2 33 1089 160
|
|
3307
|
+
## 4 2013 1 1 -1 -18 1576 183
|
|
3308
|
+
## 5 2013 1 1 -6 -25 762 116
|
|
3309
|
+
## 6 2013 1 1 -4 12 719 150
|
|
1794
3310
|
```
|
|
1795
3311
|
|
|
1796
3312
|
|
|
@@ -1798,17 +3314,19 @@ puts flights_sm.head.as__data__frame
|
|
|
1798
3314
|
flights_sm = flights_sm.
|
|
1799
3315
|
mutate(gain: :dep_delay - :arr_delay,
|
|
1800
3316
|
speed: :distance / :air_time * 60)
|
|
1801
|
-
puts flights_sm.head
|
|
3317
|
+
puts flights_sm.head
|
|
1802
3318
|
```
|
|
1803
3319
|
|
|
1804
3320
|
```
|
|
1805
|
-
##
|
|
1806
|
-
##
|
|
1807
|
-
##
|
|
1808
|
-
##
|
|
1809
|
-
##
|
|
1810
|
-
##
|
|
1811
|
-
##
|
|
3321
|
+
## # A tibble: 6 x 9
|
|
3322
|
+
## year month day dep_delay arr_delay distance air_time gain speed
|
|
3323
|
+
## <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
|
|
3324
|
+
## 1 2013 1 1 2 11 1400 227 -9 370.
|
|
3325
|
+
## 2 2013 1 1 4 20 1416 227 -16 374.
|
|
3326
|
+
## 3 2013 1 1 2 33 1089 160 -31 408.
|
|
3327
|
+
## 4 2013 1 1 -1 -18 1576 183 17 517.
|
|
3328
|
+
## 5 2013 1 1 -6 -25 762 116 19 394.
|
|
3329
|
+
## 6 2013 1 1 -4 12 719 150 -16 288.
|
|
1812
3330
|
```
|
|
1813
3331
|
|
|
1814
3332
|
## Summarising data
|
|
@@ -1818,12 +3336,14 @@ a single value is obtained from the data frame:
|
|
|
1818
3336
|
|
|
1819
3337
|
|
|
1820
3338
|
```ruby
|
|
1821
|
-
puts flights.summarise(delay: E.mean(:dep_delay, na__rm: true))
|
|
3339
|
+
puts flights.summarise(delay: E.mean(:dep_delay, na__rm: true))
|
|
1822
3340
|
```
|
|
1823
3341
|
|
|
1824
3342
|
```
|
|
1825
|
-
##
|
|
1826
|
-
##
|
|
3343
|
+
## # A tibble: 1 x 1
|
|
3344
|
+
## delay
|
|
3345
|
+
## <dbl>
|
|
3346
|
+
## 1 12.6
|
|
1827
3347
|
```
|
|
1828
3348
|
|
|
1829
3349
|
When a data frame is groupe with 'group_by' summaries apply to the given group:
|
|
@@ -1831,17 +3351,20 @@ When a data frame is groupe with 'group_by' summaries apply to the given group:
|
|
|
1831
3351
|
|
|
1832
3352
|
```ruby
|
|
1833
3353
|
by_day = flights.group_by(:year, :month, :day)
|
|
1834
|
-
puts by_day.summarise(delay: :dep_delay.mean(na__rm: true)).head
|
|
3354
|
+
puts by_day.summarise(delay: :dep_delay.mean(na__rm: true)).head
|
|
1835
3355
|
```
|
|
1836
3356
|
|
|
1837
3357
|
```
|
|
1838
|
-
##
|
|
1839
|
-
##
|
|
1840
|
-
##
|
|
1841
|
-
##
|
|
1842
|
-
##
|
|
1843
|
-
##
|
|
1844
|
-
##
|
|
3358
|
+
## # A tibble: 6 x 4
|
|
3359
|
+
## # Groups: year, month [1]
|
|
3360
|
+
## year month day delay
|
|
3361
|
+
## * <int> <int> <int> <dbl>
|
|
3362
|
+
## 1 2013 1 1 11.5
|
|
3363
|
+
## 2 2013 1 2 13.9
|
|
3364
|
+
## 3 2013 1 3 11.0
|
|
3365
|
+
## 4 2013 1 4 8.95
|
|
3366
|
+
## 5 2013 1 5 5.73
|
|
3367
|
+
## 6 2013 1 6 7.15
|
|
1845
3368
|
```
|
|
1846
3369
|
|
|
1847
3370
|
Next we put many operations together by pipping them one after the other:
|
|
@@ -1856,17 +3379,19 @@ delays = flights.
|
|
|
1856
3379
|
delay: :arr_delay.mean(na__rm: true)).
|
|
1857
3380
|
filter(:count > 20, :dest != "NHL")
|
|
1858
3381
|
|
|
1859
|
-
puts delays.
|
|
3382
|
+
puts delays.head
|
|
1860
3383
|
```
|
|
1861
3384
|
|
|
1862
3385
|
```
|
|
1863
|
-
##
|
|
1864
|
-
##
|
|
1865
|
-
##
|
|
1866
|
-
##
|
|
1867
|
-
##
|
|
1868
|
-
##
|
|
1869
|
-
##
|
|
3386
|
+
## # A tibble: 6 x 4
|
|
3387
|
+
## dest count dist delay
|
|
3388
|
+
## <chr> <int> <dbl> <dbl>
|
|
3389
|
+
## 1 ABQ 254 1826 4.38
|
|
3390
|
+
## 2 ACK 265 199 4.85
|
|
3391
|
+
## 3 ALB 439 143 14.4
|
|
3392
|
+
## 4 ATL 17215 757. 11.3
|
|
3393
|
+
## 5 AUS 2439 1514. 6.02
|
|
3394
|
+
## 6 AVL 275 584. 8.00
|
|
1870
3395
|
```
|
|
1871
3396
|
|
|
1872
3397
|
# Using Data Table
|
|
@@ -2088,7 +3613,7 @@ puts mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
|
|
|
2088
3613
|
```
|
|
2089
3614
|
|
|
2090
3615
|
|
|
2091
|
-
<!-- -->
|
|
2092
3617
|
|
|
2093
3618
|
# Coding with Tidyverse
|
|
2094
3619
|
|
|
@@ -2266,11 +3791,11 @@ as.data.frame(df)
|
|
|
2266
3791
|
|
|
2267
3792
|
```
|
|
2268
3793
|
## g1 g2 a b
|
|
2269
|
-
## 1 1 1
|
|
2270
|
-
## 2 1 2
|
|
2271
|
-
## 3 2 1 5
|
|
2272
|
-
## 4 2 2
|
|
2273
|
-
## 5 2 1 1
|
|
3794
|
+
## 1 1 1 3 3
|
|
3795
|
+
## 2 1 2 2 1
|
|
3796
|
+
## 3 2 1 5 2
|
|
3797
|
+
## 4 2 2 4 5
|
|
3798
|
+
## 5 2 1 1 4
|
|
2274
3799
|
```
|
|
2275
3800
|
|
|
2276
3801
|
```r
|
|
@@ -2282,9 +3807,9 @@ as.data.frame(d2)
|
|
|
2282
3807
|
```
|
|
2283
3808
|
|
|
2284
3809
|
```
|
|
2285
|
-
## g1
|
|
2286
|
-
## 1 1
|
|
2287
|
-
## 2 2 3
|
|
3810
|
+
## g1 a
|
|
3811
|
+
## 1 1 2.500000
|
|
3812
|
+
## 2 2 3.333333
|
|
2288
3813
|
```
|
|
2289
3814
|
|
|
2290
3815
|
```r
|
|
@@ -2296,9 +3821,9 @@ as.data.frame(d2)
|
|
|
2296
3821
|
```
|
|
2297
3822
|
|
|
2298
3823
|
```
|
|
2299
|
-
## g2
|
|
2300
|
-
## 1 1
|
|
2301
|
-
## 2 2 3
|
|
3824
|
+
## g2 a
|
|
3825
|
+
## 1 1 3
|
|
3826
|
+
## 2 2 3
|
|
2302
3827
|
```
|
|
2303
3828
|
|
|
2304
3829
|
As shown by Hardley, one might expect this function to do the trick:
|
|
@@ -2330,11 +3855,11 @@ puts ~:df
|
|
|
2330
3855
|
|
|
2331
3856
|
```
|
|
2332
3857
|
## g1 g2 a b
|
|
2333
|
-
## 1 1 1
|
|
2334
|
-
## 2 1 2
|
|
2335
|
-
## 3 2 1 5
|
|
2336
|
-
## 4 2 2
|
|
2337
|
-
## 5 2 1 1
|
|
3858
|
+
## 1 1 1 3 3
|
|
3859
|
+
## 2 1 2 2 1
|
|
3860
|
+
## 3 2 1 5 2
|
|
3861
|
+
## 4 2 2 4 5
|
|
3862
|
+
## 5 2 1 1 4
|
|
2338
3863
|
```
|
|
2339
3864
|
|
|
2340
3865
|
We then create the 'my_summarize' method and call it passing the R data frame and
|
|
@@ -2347,26 +3872,30 @@ def my_summarize(df, group_var)
|
|
|
2347
3872
|
summarize(a: :a.mean)
|
|
2348
3873
|
end
|
|
2349
3874
|
|
|
2350
|
-
puts my_summarize(:df, :g1)
|
|
3875
|
+
puts my_summarize(:df, :g1)
|
|
2351
3876
|
```
|
|
2352
3877
|
|
|
2353
3878
|
```
|
|
2354
|
-
##
|
|
2355
|
-
##
|
|
2356
|
-
##
|
|
3879
|
+
## # A tibble: 2 x 2
|
|
3880
|
+
## g1 a
|
|
3881
|
+
## <dbl> <dbl>
|
|
3882
|
+
## 1 1 2.5
|
|
3883
|
+
## 2 2 3.33
|
|
2357
3884
|
```
|
|
2358
3885
|
|
|
2359
3886
|
It works!!! Well, let's make sure this was not just some coincidence
|
|
2360
3887
|
|
|
2361
3888
|
|
|
2362
3889
|
```ruby
|
|
2363
|
-
puts my_summarize(:df, :g2)
|
|
3890
|
+
puts my_summarize(:df, :g2)
|
|
2364
3891
|
```
|
|
2365
3892
|
|
|
2366
3893
|
```
|
|
2367
|
-
##
|
|
2368
|
-
##
|
|
2369
|
-
##
|
|
3894
|
+
## # A tibble: 2 x 2
|
|
3895
|
+
## g2 a
|
|
3896
|
+
## <dbl> <dbl>
|
|
3897
|
+
## 1 1 3
|
|
3898
|
+
## 2 2 3
|
|
2370
3899
|
```
|
|
2371
3900
|
|
|
2372
3901
|
Great, everything is fine! No magic, no new functions, no complexities, just normal, standard Ruby
|
|
@@ -2474,18 +4003,18 @@ puts my_mutate((~:df), :b)
|
|
|
2474
4003
|
|
|
2475
4004
|
```
|
|
2476
4005
|
## g1 g2 a b mean_a sum_a
|
|
2477
|
-
## 1 1 1
|
|
2478
|
-
## 2 1 2
|
|
2479
|
-
## 3 2 1 5
|
|
2480
|
-
## 4 2 2
|
|
2481
|
-
## 5 2 1 1
|
|
4006
|
+
## 1 1 1 3 3 3 15
|
|
4007
|
+
## 2 1 2 2 1 3 15
|
|
4008
|
+
## 3 2 1 5 2 3 15
|
|
4009
|
+
## 4 2 2 4 5 3 15
|
|
4010
|
+
## 5 2 1 1 4 3 15
|
|
2482
4011
|
##
|
|
2483
4012
|
## g1 g2 a b mean_b sum_b
|
|
2484
|
-
## 1 1 1
|
|
2485
|
-
## 2 1 2
|
|
2486
|
-
## 3 2 1 5
|
|
2487
|
-
## 4 2 2
|
|
2488
|
-
## 5 2 1 1
|
|
4013
|
+
## 1 1 1 3 3 3 15
|
|
4014
|
+
## 2 1 2 2 1 3 15
|
|
4015
|
+
## 3 2 1 5 2 3 15
|
|
4016
|
+
## 4 2 2 4 5 3 15
|
|
4017
|
+
## 5 2 1 1 4 3 15
|
|
2489
4018
|
```
|
|
2490
4019
|
It really seems that "Non Standard Evaluation" is actually quite standard in Galaaz! But, you
|
|
2491
4020
|
might have noticed a small change in the way the arguments to the mutate method were called.
|
|
@@ -2510,15 +4039,18 @@ def my_summarise3(df, *group_vars)
|
|
|
2510
4039
|
summarise(a: E.mean(:a))
|
|
2511
4040
|
end
|
|
2512
4041
|
|
|
2513
|
-
puts my_summarise3((~:df), :g1, :g2)
|
|
4042
|
+
puts my_summarise3((~:df), :g1, :g2)
|
|
2514
4043
|
```
|
|
2515
4044
|
|
|
2516
4045
|
```
|
|
2517
|
-
##
|
|
2518
|
-
##
|
|
2519
|
-
##
|
|
2520
|
-
##
|
|
2521
|
-
##
|
|
4046
|
+
## # A tibble: 4 x 3
|
|
4047
|
+
## # Groups: g1 [?]
|
|
4048
|
+
## g1 g2 a
|
|
4049
|
+
## <dbl> <dbl> <dbl>
|
|
4050
|
+
## 1 1 1 3
|
|
4051
|
+
## 2 1 2 2
|
|
4052
|
+
## 3 2 1 3
|
|
4053
|
+
## 4 2 2 4
|
|
2522
4054
|
```
|
|
2523
4055
|
|
|
2524
4056
|
## Why does R require NSE and Galaaz does not?
|
|
@@ -2570,38 +4102,21 @@ features of characters in the Starwars movies:
|
|
|
2570
4102
|
|
|
2571
4103
|
|
|
2572
4104
|
```ruby
|
|
2573
|
-
puts (~:starwars).head
|
|
2574
|
-
```
|
|
2575
|
-
|
|
2576
|
-
```
|
|
2577
|
-
##
|
|
2578
|
-
##
|
|
2579
|
-
##
|
|
2580
|
-
##
|
|
2581
|
-
##
|
|
2582
|
-
##
|
|
2583
|
-
##
|
|
2584
|
-
##
|
|
2585
|
-
##
|
|
2586
|
-
##
|
|
2587
|
-
##
|
|
2588
|
-
## 4 male Tatooine Human
|
|
2589
|
-
## 5 female Alderaan Human
|
|
2590
|
-
## 6 male Tatooine Human
|
|
2591
|
-
## films
|
|
2592
|
-
## 1 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
|
|
2593
|
-
## 2 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
|
|
2594
|
-
## 3 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
|
|
2595
|
-
## 4 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
|
|
2596
|
-
## 5 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
|
|
2597
|
-
## 6 Attack of the Clones, Revenge of the Sith, A New Hope
|
|
2598
|
-
## vehicles starships
|
|
2599
|
-
## 1 Snowspeeder, Imperial Speeder Bike X-wing, Imperial shuttle
|
|
2600
|
-
## 2
|
|
2601
|
-
## 3
|
|
2602
|
-
## 4 TIE Advanced x1
|
|
2603
|
-
## 5 Imperial Speeder Bike
|
|
2604
|
-
## 6
|
|
4105
|
+
puts (~:starwars).head
|
|
4106
|
+
```
|
|
4107
|
+
|
|
4108
|
+
```
|
|
4109
|
+
## # A tibble: 6 x 13
|
|
4110
|
+
## name height mass hair_color skin_color eye_color birth_year gender
|
|
4111
|
+
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
|
|
4112
|
+
## 1 Luke… 172 77 blond fair blue 19 male
|
|
4113
|
+
## 2 C-3PO 167 75 <NA> gold yellow 112 <NA>
|
|
4114
|
+
## 3 R2-D2 96 32 <NA> white, bl… red 33 <NA>
|
|
4115
|
+
## 4 Dart… 202 136 none white yellow 41.9 male
|
|
4116
|
+
## 5 Leia… 150 49 brown light brown 19 female
|
|
4117
|
+
## 6 Owen… 178 120 brown, gr… light blue 52 male
|
|
4118
|
+
## # … with 5 more variables: homeworld <chr>, species <chr>, films <list>,
|
|
4119
|
+
## # vehicles <list>, starships <list>
|
|
2605
4120
|
```
|
|
2606
4121
|
The grouped_mean function bellow will receive a grouping variable and calculate summaries for
|
|
2607
4122
|
the value\_variables given:
|
|
@@ -2653,26 +4168,28 @@ def grouped_mean(data, grouping_variables, value_variables)
|
|
|
2653
4168
|
rename_at(value_variables, E.funs(E.paste0("mean_", value_variables)))
|
|
2654
4169
|
end
|
|
2655
4170
|
|
|
2656
|
-
puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year"))
|
|
4171
|
+
puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year"))
|
|
2657
4172
|
```
|
|
2658
4173
|
|
|
2659
4174
|
```
|
|
2660
|
-
##
|
|
2661
|
-
##
|
|
2662
|
-
##
|
|
2663
|
-
## 3
|
|
2664
|
-
##
|
|
2665
|
-
##
|
|
2666
|
-
##
|
|
2667
|
-
##
|
|
2668
|
-
##
|
|
2669
|
-
##
|
|
2670
|
-
##
|
|
2671
|
-
##
|
|
2672
|
-
##
|
|
2673
|
-
##
|
|
2674
|
-
##
|
|
2675
|
-
##
|
|
4175
|
+
## # A tibble: 15 x 4
|
|
4176
|
+
## eye_color mean_mass mean_birth_year count
|
|
4177
|
+
## <chr> <dbl> <dbl> <dbl>
|
|
4178
|
+
## 1 black 76.3 33 10
|
|
4179
|
+
## 2 blue 86.5 67.1 19
|
|
4180
|
+
## 3 blue-gray 77 57 1
|
|
4181
|
+
## 4 brown 66.1 109. 21
|
|
4182
|
+
## 5 dark NaN NaN 1
|
|
4183
|
+
## 6 gold NaN NaN 1
|
|
4184
|
+
## 7 green, yellow 159 NaN 1
|
|
4185
|
+
## 8 hazel 66 34.5 3
|
|
4186
|
+
## 9 orange 282. 231 8
|
|
4187
|
+
## 10 pink NaN NaN 1
|
|
4188
|
+
## 11 red 81.4 33.7 5
|
|
4189
|
+
## 12 red, blue NaN NaN 1
|
|
4190
|
+
## 13 unknown 31.5 NaN 3
|
|
4191
|
+
## 14 white 48 NaN 1
|
|
4192
|
+
## 15 yellow 81.1 76.4 11
|
|
2676
4193
|
```
|
|
2677
4194
|
|
|
2678
4195
|
|
|
@@ -2681,7 +4198,6 @@ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year")).as__data
|
|
|
2681
4198
|
|
|
2682
4199
|
# Contributing
|
|
2683
4200
|
|
|
2684
|
-
|
|
2685
4201
|
* Fork it
|
|
2686
4202
|
* Create your feature branch (git checkout -b my-new-feature)
|
|
2687
4203
|
* Write Tests!
|
|
@@ -2689,3 +4205,4 @@ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year")).as__data
|
|
|
2689
4205
|
* Push to the branch (git push origin my-new-feature)
|
|
2690
4206
|
* Create new Pull Request
|
|
2691
4207
|
|
|
4208
|
+
# References
|