galaaz 0.4.2 → 0.4.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +25 -0
- data/Rakefile +8 -0
- data/bin/gknit +9 -5
- data/bin/gstudio +4 -2
- data/bin/gstudio.rb +32 -2
- data/blogs/dev/dev.html +219 -34
- data/blogs/dev/dev.md +26 -26
- data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
- data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
- data/blogs/dplyr/dplyr.rb +63 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +38 -26
- data/blogs/galaaz_ggplot/galaaz_ggplot.aux +16 -17
- data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.tex +65 -31
- data/blogs/oh_my/not_so.rb +2342 -0
- data/blogs/oh_my/oh_my.Rmd +493 -0
- data/blogs/oh_my/oh_my.html +680 -0
- data/blogs/oh_my/oh_my.md +597 -0
- data/blogs/oh_my/old.Rmd +2100 -0
- data/blogs/ruby_plot/figures/facets_with_decorations.png +0 -0
- data/blogs/ruby_plot/figures/facets_with_jitter.png +0 -0
- data/blogs/ruby_plot/figures/final_box_plot.png +0 -0
- data/blogs/ruby_plot/figures/final_violin_plot.png +0 -0
- data/blogs/ruby_plot/figures/violin_with_jitter.png +0 -0
- data/blogs/ruby_plot/ruby_plot.Rmd +147 -122
- data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +662 -0
- data/blogs/ruby_plot/ruby_plot.html +49 -54
- data/blogs/ruby_plot/ruby_plot.md +147 -122
- data/blogs/ruby_plot/ruby_plot.pdf +0 -0
- data/blogs/ruby_plot/ruby_plot.tex +776 -157
- data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +57 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +106 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +110 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +174 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +236 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +296 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +236 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +218 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +128 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +150 -0
- data/examples/islr/ch2.spec.rb +21 -18
- data/examples/islr/ch3_boston.rb +14 -5
- data/examples/islr/ch3_multiple_regression.rb +2 -3
- data/examples/islr/ch6.spec.rb +1 -1
- data/examples/islr/x_y_rnorm.jpg +0 -0
- data/lib/R_interface/r.rb +14 -10
- data/lib/R_interface/r_libs.R +9 -0
- data/lib/R_interface/r_methods.rb +77 -6
- data/lib/R_interface/{expression.rb → r_module_s.rb} +13 -14
- data/lib/R_interface/rbinary_operators.rb +58 -71
- data/lib/R_interface/rdata_frame.rb +2 -1
- data/lib/R_interface/rdevices.R +4 -0
- data/lib/R_interface/rdevices.rb +1 -1
- data/lib/R_interface/renvironment.rb +34 -1
- data/lib/R_interface/rexpression.rb +108 -2
- data/lib/R_interface/rindexed_object.rb +3 -1
- data/lib/R_interface/rlanguage.rb +18 -2
- data/lib/R_interface/rmatrix.rb +14 -0
- data/lib/R_interface/rmd_indexed_object.rb +5 -1
- data/lib/R_interface/robject.rb +61 -23
- data/lib/R_interface/rsupport.rb +111 -53
- data/lib/R_interface/rsymbol.rb +6 -5
- data/lib/R_interface/ruby_extensions.rb +130 -4
- data/lib/R_interface/runary_operators.rb +35 -3
- data/lib/R_interface/rvector.rb +1 -0
- data/lib/galaaz.rb +0 -2
- data/lib/gknit/knitr_engine.rb +58 -4
- data/lib/gknit/ruby_engine.rb +5 -6
- data/lib/util/exec_ruby.rb +55 -9
- data/specs/all.rb +13 -3
- data/specs/figures/dose_len.png +0 -0
- data/specs/r_dataframe.spec.rb +49 -26
- data/specs/r_environment.spec.rb +140 -0
- data/specs/r_eval.spec.rb +0 -15
- data/specs/r_formula.spec.rb +232 -0
- data/specs/r_function.spec.rb +7 -8
- data/specs/r_list.spec.rb +4 -0
- data/specs/r_list_apply.spec.rb +11 -11
- data/specs/r_matrix.spec.rb +3 -3
- data/specs/{r_plots.spec.rb~ → r_nse.spec.rb} +29 -6
- data/specs/r_vector_creation.spec.rb +6 -0
- data/specs/r_vector_object.spec.rb +2 -2
- data/specs/r_vector_operators.spec.rb +3 -3
- data/specs/r_vector_subsetting.spec.rb +4 -4
- data/specs/ruby_expression.spec.rb +324 -0
- data/specs/tmp.rb +12 -524
- data/sty/galaaz.sty +71 -0
- data/version.rb +1 -1
- metadata +31 -41
- data/bin/gknit2~ +0 -6
- data/bin/ogk~ +0 -4
- data/bin/prepareR.rb~ +0 -1
- data/blogs/dev/dev.Rmd~ +0 -104
- data/blogs/galaaz_ggplot/galaaz_ggplot.dvi +0 -0
- data/blogs/galaaz_ggplot/midwest_external_png~ +0 -1
- data/blogs/gknit/gknit.Rmd~ +0 -184
- data/blogs/gknit/gknit.Rnd~ +0 -17
- data/blogs/gknit/model.rb~ +0 -46
- data/blogs/ruby_plot/ruby_plot.Rmd~ +0 -215
- data/examples/islr/Figure.jpg +0 -0
- data/examples/misc/moneyball.rb~ +0 -16
- data/examples/misc/subsetting.rb~ +0 -372
- data/lib/R/eng_ruby.R~ +0 -63
- data/lib/R_interface/capture_plot.rb~ +0 -23
- data/lib/R_interface/r.rb~ +0 -121
- data/lib/R_interface/rdevices.rb~ +0 -27
- data/lib/gknit.rb~ +0 -26
- data/lib/gknit/knitr_engine.rb~ +0 -102
- data/lib/gknit/ruby_engine.rb~ +0 -72
- data/lib/util/inline_file.rb~ +0 -23
- data/r_requires/knitr.rb~ +0 -4
- data/specs/r_language.spec.rb +0 -157
data/blogs/gknit/gknit.Rnd~
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
---
|
2
|
-
title: "gKnit - Ruby Knitting with Galaaz"
|
3
|
-
subtitle: "An example of tightly coupling Ruby and R in GraalVM"
|
4
|
-
author: "Rodrigo Botafogo"
|
5
|
-
tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, ggplot2]
|
6
|
-
date: "16 October 2018"
|
7
|
-
output:
|
8
|
-
html_document:
|
9
|
-
md_document:
|
10
|
-
variant: markdown_github
|
11
|
-
---
|
12
|
-
|
13
|
-
```{r setup, echo=FALSE}
|
14
|
-
|
15
|
-
```
|
16
|
-
|
17
|
-
# Introduction
|
data/blogs/gknit/model.rb~
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
require 'galaaz'
|
2
|
-
|
3
|
-
# Loads the R 'caret' package. If not present, installs it
|
4
|
-
R.install_and_loads 'caret'
|
5
|
-
|
6
|
-
class Model
|
7
|
-
|
8
|
-
attr_reader :data
|
9
|
-
attr_reader :test
|
10
|
-
attr_reader :train
|
11
|
-
|
12
|
-
#==========================================================
|
13
|
-
#
|
14
|
-
#==========================================================
|
15
|
-
|
16
|
-
def initialize(data, percent_train:, seed: 123)
|
17
|
-
|
18
|
-
R.set__seed(seed)
|
19
|
-
@data = data
|
20
|
-
@percent_train = percent_train
|
21
|
-
@seed = seed
|
22
|
-
|
23
|
-
end
|
24
|
-
|
25
|
-
#==========================================================
|
26
|
-
#
|
27
|
-
#==========================================================
|
28
|
-
|
29
|
-
def partition
|
30
|
-
|
31
|
-
train_index =
|
32
|
-
R.createDataPartition(@data.mpg, p: @percet_train,
|
33
|
-
list: false, times: 1)
|
34
|
-
@train = @data[train_index, :all]
|
35
|
-
@test = @data[-train_index, :all]
|
36
|
-
|
37
|
-
end
|
38
|
-
|
39
|
-
end
|
40
|
-
|
41
|
-
mtcars = ~:mtcars
|
42
|
-
model = Model.new(mtcars, percent_train: 0.8)
|
43
|
-
model.partition
|
44
|
-
puts model.train.head
|
45
|
-
puts model.test.head
|
46
|
-
|
@@ -1,215 +0,0 @@
|
|
1
|
-
---
|
2
|
-
title: "High Quality Scientific Plotting with Ruby in GraalVM"
|
3
|
-
subtitle: "Also: Allowing R to use classes, modules, blocks, etc."
|
4
|
-
author: "Rodrigo Botafogo"
|
5
|
-
tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM]
|
6
|
-
date: "19 October 2018"
|
7
|
-
output:
|
8
|
-
html_document:
|
9
|
-
self_contained: true
|
10
|
-
keep_md: true
|
11
|
-
pdf_document:
|
12
|
-
includes:
|
13
|
-
in_header: ["../../sty/galaaz.sty"]
|
14
|
-
number_sections: yes
|
15
|
-
---
|
16
|
-
|
17
|
-
```{r setup, echo=FALSE}
|
18
|
-
|
19
|
-
```
|
20
|
-
|
21
|
-
# Introduction
|
22
|
-
|
23
|
-
Ruby is a dynamic, interpreted, reflective, object-oriented, general-purpose
|
24
|
-
programming language. It was designed and developed in the mid-1990s by Yukihiro
|
25
|
-
"Matz" Matsumoto in Japan. It reached high popularity with the development of Ruby on Rails
|
26
|
-
(RoR) by David Heinemeier Hansson. RoR is a web application framework which was first release
|
27
|
-
circa 2005 and makes extensive use of Ruby's metaprogramming features. With the advend of
|
28
|
-
RoR, Ruby became extremely popular and it peeked in popularity around 2008 according to the Tiobe
|
29
|
-
index (https://www.tiobe.com/tiobe-index/ruby/). From 2008 to 2015, it's popularity
|
30
|
-
declined consistently and then started picking up again during the next 3 years. At the time of
|
31
|
-
this writing (November 2018), Ruby is ranked 16th in the Tiobe index.
|
32
|
-
|
33
|
-
Python, considered a similar language to Ruby with similar features ranks 4th in the index. The
|
34
|
-
first three positions are takes by Java, C and C++. One criticism often heard about Ruby, is
|
35
|
-
that it is useful only for web applications while Python, with similar features has more diverse
|
36
|
-
libraries, being useful for web applications with the Django framework, but also for
|
37
|
-
scientific applications such as statistics, data analysis, big data, biology, etc. This
|
38
|
-
criticism is by no way wrong. For scientific computing, Ruby lags way behind Python and R, the
|
39
|
-
two most prestigous languages for this subject mater, with R being prefered by statisticians
|
40
|
-
while Python is prefered by everyone else, because of it's gentle learning curve and more
|
41
|
-
"natural" programming paradigm.
|
42
|
-
|
43
|
-
Comes GraalVM into the picture:
|
44
|
-
|
45
|
-
GraalVM is a universal virtual machine for running applications written in JavaScript,
|
46
|
-
Python 3, Ruby, R, JVM-based languages like Java, Scala, Kotlin, and LLVM-based languages
|
47
|
-
such as C and C++.
|
48
|
-
|
49
|
-
GraalVM removes the isolation between programming languages and enables interoperability in a
|
50
|
-
shared runtime. It can run either standalone or in the context of OpenJDK, Node.js,
|
51
|
-
Oracle Database, or MySQL.
|
52
|
-
|
53
|
-
GraalVM allows you to write polyglot applications with a seamless way to pass values from one
|
54
|
-
language to another. With GraalVM there is no copying or marshaling necessary as it is with
|
55
|
-
other polyglot systems. This lets you achieve high performance when language boundaries are
|
56
|
-
crossed. Most of the time there is no additional cost for crossing a language boundary at all.
|
57
|
-
|
58
|
-
Often developers have to make uncomfortable compromises that require them to rewrite
|
59
|
-
their software in other languages. For example:
|
60
|
-
|
61
|
-
* “That library is not available in my language. I need to rewrite it.”
|
62
|
-
* “That language would be the perfect fit for my problem, but we cannot run it in our environment.”
|
63
|
-
* “That problem is already solved in my language, but the language is too slow.”
|
64
|
-
|
65
|
-
With GraalVM we aim to allow developers to freely choose the right language for the task at
|
66
|
-
hand without making compromises.
|
67
|
-
|
68
|
-
As stated above, GraalVM is a _universal_ virtual machine that allows Ruby and R (and other
|
69
|
-
languages) to run on the same environment. GraalVM allows polyglot applications to
|
70
|
-
_seamlessly_ interact with one another and pass values from one language to the other. Based
|
71
|
-
on GraalVM, the Galaaz project was started. Galaaz indends to integrate Ruby and R and allow
|
72
|
-
those languages to _seamlessly_ interact in a way that the user will be unaware of such interaction.
|
73
|
-
|
74
|
-
Library wrapping is an usual way of briging features from one library into another language. For
|
75
|
-
instance, whenever Python needs to perform operations efficiently, C libraries are wrap in Python.
|
76
|
-
For the Python developer, the existence of such C library is of no importante. The problem with
|
77
|
-
library wrapping is that for any new library of interest, there is the need to hand craft a new
|
78
|
-
wrapper. With Galaaz, the same concept of wrapping was done, but instead of wrapping a C or an R
|
79
|
-
library, Galaaz intends to wraps the whole of R language. Doing so, all thousands of R libraries
|
80
|
-
are immediately available to Ruby developers and any new library developed in R will also become
|
81
|
-
available without requiring a new wrapping effort.
|
82
|
-
|
83
|
-
In this article, the graphing ggplot2 library from R will be accessed by Ruby transparently,
|
84
|
-
bringing to Ruby the power of high quality scientific plotting. It might seem, from
|
85
|
-
the exposed above, that Galaaz mainly benefits Ruby developers and might be of no
|
86
|
-
consequence to the R developer. This article will however show that migrating from R to
|
87
|
-
Ruby with Galaaz is a matter of small syntactic changes. Furthermore, R lacks some
|
88
|
-
fundamental constructs for code reuse and large system construction. Using Galaaz, the R
|
89
|
-
developer can easily migrate to a powerful OO language, at virtually no cost and then, as
|
90
|
-
needs requires, she can add them to her toolbox.
|
91
|
-
|
92
|
-
In this article we will explore the R ToothGrowth dataset. In doing so, we will create some plots.
|
93
|
-
Furthermore we will create a "Corporate Template" for our plots ensuring that any plot of the
|
94
|
-
same type will have a consistent visualisation.
|
95
|
-
|
96
|
-
# gKnit
|
97
|
-
|
98
|
-
This document was written using rmarkdown and the corresponding HTML was generated by the gKnit
|
99
|
-
application. gKnit is a wrapper around the powerful 'knitr' application which converts
|
100
|
-
rmarkdown text to many different output formats such as HTML, Latex, docx, etc. The gKnit
|
101
|
-
tool is still under active development and will soon be released.
|
102
|
-
|
103
|
-
In rmarkdown, text and code can be part of the same document, and code blocks are marked
|
104
|
-
with a special markup. Interested readers can easily google 'knitr' and 'rmarkdown'. in
|
105
|
-
gKnit, each Ruby block is evaluated independently and 'eval' in Ruby creates a new scope, so,
|
106
|
-
in order for a variable defined in a block to be accessible in another block, it has to be
|
107
|
-
a global variable, preceded by the '$' sign.
|
108
|
-
|
109
|
-
# Exploring the Dataset
|
110
|
-
|
111
|
-
Let start by exploring our selected dataset. In this dataset the response is the length of
|
112
|
-
odontoblasts (cells responsible for tooth growth) in 60 guinea pigs. Each animal
|
113
|
-
received one of three dose levels of vitamin C (0.5, 1, and 2 mg/day) by one of two
|
114
|
-
delivery methods, orange juice or ascorbic acid (a form of vitamin C and coded as VC).
|
115
|
-
|
116
|
-
In Galaaz, in order to have access to an R variable pointed by an R symbol we use the
|
117
|
-
corresponding Ruby symbol preceeded by the tilda ('~') function.
|
118
|
-
|
119
|
-
```{ruby tooth_growth}
|
120
|
-
# Read the R ToothGrowth variable and assign it to the
|
121
|
-
# Ruby tooth_growth variable
|
122
|
-
$tooth_growth = ~:ToothGrowth
|
123
|
-
# convert the dose to a factor
|
124
|
-
$tooth_growth.dose = $tooth_growth.dose.as__factor
|
125
|
-
|
126
|
-
# print the first few elements of the dataset
|
127
|
-
puts $tooth_growth.head
|
128
|
-
```
|
129
|
-
|
130
|
-
Great! We've managed to read the ToothGrowth dataset and take a look at its elements. Observe
|
131
|
-
that we have three columns in this dataset: 'len', 'supp' and 'dose'. Accessing a column,
|
132
|
-
for example the 'len' column, is done by doing '$tooth_growth.len'.
|
133
|
-
|
134
|
-
Let's explore some more details of this dataset. In particular, let's look at its dimensions,
|
135
|
-
structure and summary statistics.
|
136
|
-
|
137
|
-
```{ruby stats}
|
138
|
-
puts $tooth_growth.dim
|
139
|
-
# chdck why NULL
|
140
|
-
puts R.str(:ToothGrowth)
|
141
|
-
puts $tooth_growth.summary
|
142
|
-
```
|
143
|
-
|
144
|
-
Let's now create our first plot with the given data by accessing ggplot2 from Ruby. For Rubyist
|
145
|
-
that have never seen or used ggplot2, here is the description found on ggplot home page:
|
146
|
-
|
147
|
-
```
|
148
|
-
"ggplot2 is a system for declaratively creating graphics, based on _The Grammar of Graphics_.
|
149
|
-
You provide the data, tell ggplot2 how to map variables to aesthetics, what graphical
|
150
|
-
primitives to use, and it takes care of the details."
|
151
|
-
```
|
152
|
-
|
153
|
-
This description might be a bit cryptic and it is best to see it at work to understand it.
|
154
|
-
Basically, in the _grammar of graphics_ each component of the plot such as the grid, the axis,
|
155
|
-
the data, title, subtitle, etc. is added to the plot in layers to form the final graphics.
|
156
|
-
|
157
|
-
In this plot bellow, the 'dose' is plotted on the 'x' axis and the tooth length on the 'y' axis. Note
|
158
|
-
the specification in the the 'aes' method: 'E.aes(x: :dose, y: :len)', where ':dose' is the 'dose'
|
159
|
-
column of the dataset and ':len' the 'len' column. The 'aes' method is the _aesthetics_ for this
|
160
|
-
plot. Then, to this layer, the 'geom_boxplot' is added and the whole plot is printed.
|
161
|
-
|
162
|
-
Note also that we have a call to 'R.png' before plotting and 'R.dev__off' after the print
|
163
|
-
statement. 'R.png' opens a 'png' device for writing the plot. When 'R.dev__off' is called, the
|
164
|
-
device is closed and a 'png' file is created. If no name is given to the 'png' function, a file
|
165
|
-
named 'Rplot<nnn>' is generated, where <nnn> is the number of the plot. So, this first plot is
|
166
|
-
called 'Rplot001.png'. We can then include the generated 'png' file in
|
167
|
-
this document, by adding an rmarkdown directive.
|
168
|
-
|
169
|
-
```{ruby first_plot}
|
170
|
-
require 'ggplot'
|
171
|
-
|
172
|
-
R.png
|
173
|
-
|
174
|
-
e = $tooth_growth.ggplot(E.aes(x: :dose, y: :len))
|
175
|
-
print e + R.geom_boxplot
|
176
|
-
|
177
|
-
R.dev__off
|
178
|
-
```
|
179
|
-
|
180
|
-
![ToothGrowth](Rplot001.png)
|
181
|
-
|
182
|
-
We've just managed to generate our first plot in Ruby with only two lines of code. This plot,
|
183
|
-
however, if far from being pleasing to the eye.
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
# Conclusion
|
189
|
-
|
190
|
-
|
191
|
-
# Installing Galaaz
|
192
|
-
|
193
|
-
## Prerequisites
|
194
|
-
|
195
|
-
* GraalVM (>= rc8)
|
196
|
-
* TruffleRuby
|
197
|
-
* FastR
|
198
|
-
|
199
|
-
The following R packages will be automatically installed when necessary, but could be installed prior
|
200
|
-
to using gKnit if desired:
|
201
|
-
|
202
|
-
* ggplot2
|
203
|
-
* gridExtra
|
204
|
-
* knitr
|
205
|
-
|
206
|
-
Installation of R packages requires a development environment and can be time consuming. In Linux,
|
207
|
-
the gnu compiler and tools should be enough. I am not sure what is needed on the Mac.
|
208
|
-
|
209
|
-
## Preparation
|
210
|
-
|
211
|
-
* gem install galaaz
|
212
|
-
|
213
|
-
## Usage
|
214
|
-
|
215
|
-
* gknit <filename>
|
data/examples/islr/Figure.jpg
DELETED
Binary file
|
data/examples/misc/moneyball.rb~
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
|
3
|
-
require 'galaaz'
|
4
|
-
|
5
|
-
# This dataset comes from Baseball-Reference.com.
|
6
|
-
baseball = R.read__csv("baseball.csv")
|
7
|
-
# Lets look at the data available for Momeyball.
|
8
|
-
moneyball = baseball.subset(baseball.Year < 2002)
|
9
|
-
# Let's see if we can predict the number of wins, by looking at
|
10
|
-
# runs allowed (RA) and runs scored (RS). RD is the runs difference.
|
11
|
-
# We are making a linear model for predicting wins (W) based on RD
|
12
|
-
|
13
|
-
moneyball.RD = moneyball.RS - moneyball.RA
|
14
|
-
wins_reg = R.lm(+:W =~ +:RD, data: moneyball)
|
15
|
-
wins_reg.summary.pp
|
16
|
-
|
@@ -1,372 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
|
3
|
-
require 'galaaz'
|
4
|
-
|
5
|
-
# This examples were extracted from "Advanced R", by Hadley Wickham, available on the
|
6
|
-
# web at: http://adv-r.had.co.nz/Subsetting.html#applications
|
7
|
-
|
8
|
-
#------------------------------------------------------------------------------------------
|
9
|
-
# Lookup tables (character subsetting)
|
10
|
-
# Character matching provides a powerful way to make lookup tables.
|
11
|
-
# Say you want to convert abbreviations:
|
12
|
-
#------------------------------------------------------------------------------------------
|
13
|
-
|
14
|
-
x = R.c("m", "f", "u", "f", "f", "m", "m")
|
15
|
-
lookup = R.c(m: "Male", f: "Female", u: R::NA)
|
16
|
-
lookup[x].pp
|
17
|
-
print("\n")
|
18
|
-
|
19
|
-
# m f u f f m m
|
20
|
-
# "Male" "Female" NA "Female" "Female" "Male" "Male"
|
21
|
-
|
22
|
-
R.unname(lookup[x]).pp
|
23
|
-
print("\n")
|
24
|
-
|
25
|
-
# [1] "Male" "Female" NA "Female" "Female" "Male" "Male"
|
26
|
-
|
27
|
-
|
28
|
-
#------------------------------------------------------------------------------------------
|
29
|
-
# Matching and merging by hand (integer subsetting)
|
30
|
-
#------------------------------------------------------------------------------------------
|
31
|
-
|
32
|
-
# You may have a more complicated lookup table which has multiple columns of information.
|
33
|
-
# Suppose we have a vector of grades, and a table that describes their properties:
|
34
|
-
# In R a vector c(1, 2, 3) is a double vector, when using polyglot R.c(1, 2, 3) is an
|
35
|
-
# integer vector, the equivalent of doing c(1L, 2L, 3L) in R. Function 'match' does not
|
36
|
-
# work correctly with integer vector, it has to be a double.
|
37
|
-
grades = R.c(1.0, 2.0, 2.0, 3.0, 1.0)
|
38
|
-
|
39
|
-
info = R.data__frame(
|
40
|
-
grade: (3..1),
|
41
|
-
desc: R.c("Excellent", "Good", "Poor"),
|
42
|
-
fail: R.c(false, false, true)
|
43
|
-
)
|
44
|
-
|
45
|
-
# We want to duplicate the info table so that we have a row for each value in grades.
|
46
|
-
# We can do this in two ways, either using match() and integer subsetting,
|
47
|
-
# or rownames() and character subsetting:
|
48
|
-
|
49
|
-
# Using match
|
50
|
-
id = R.match(grades, info.grade)
|
51
|
-
info[id, :all].pp
|
52
|
-
print("\n")
|
53
|
-
|
54
|
-
# grade desc fail
|
55
|
-
# 3 1 Poor TRUE
|
56
|
-
# 2 2 Good FALSE
|
57
|
-
# 2.1 2 Good FALSE
|
58
|
-
# 1 3 Excellent FALSE
|
59
|
-
# 3.1 1 Poor TRUE
|
60
|
-
|
61
|
-
# Using rownames
|
62
|
-
info.rownames = info.grade
|
63
|
-
info[grades.as__character, :all].pp
|
64
|
-
print("\n")
|
65
|
-
|
66
|
-
# grade desc fail
|
67
|
-
# 1 3 Excellent FALSE
|
68
|
-
# 2 2 Good FALSE
|
69
|
-
# 2.1 2 Good FALSE
|
70
|
-
# 3 1 Poor TRUE
|
71
|
-
# 1.1 3 Excellent FALSE
|
72
|
-
|
73
|
-
#------------------------------------------------------------------------------------------
|
74
|
-
# Random samples/bootstrap (integer subsetting)
|
75
|
-
#------------------------------------------------------------------------------------------
|
76
|
-
|
77
|
-
# You can use integer indices to perform random sampling or bootstrapping
|
78
|
-
# of a vector or data frame. sample() generates a vector of indices, then
|
79
|
-
# subsetting to access the values:
|
80
|
-
df = R.data__frame(x: R.rep((1..3), each: 2), y: (6..1), z: R.letters[(1..6)])
|
81
|
-
|
82
|
-
# Set seed for reproducibility
|
83
|
-
R.set__seed(10)
|
84
|
-
|
85
|
-
# Randomly reorder
|
86
|
-
df[R.sample(df.nrow), :all].pp
|
87
|
-
print("\n")
|
88
|
-
|
89
|
-
# x y z
|
90
|
-
# 4 2 3 d
|
91
|
-
# 2 1 5 b
|
92
|
-
# 5 3 2 e
|
93
|
-
# 3 2 4 c
|
94
|
-
# 1 1 6 a
|
95
|
-
# 6 3 1 f
|
96
|
-
|
97
|
-
# Select 3 random rows
|
98
|
-
df[R.sample(df.nrow, 3), :all].pp
|
99
|
-
print("\n")
|
100
|
-
|
101
|
-
# x y z
|
102
|
-
# 2 1 5 b
|
103
|
-
# 6 3 1 f
|
104
|
-
# 3 2 4 c
|
105
|
-
|
106
|
-
# Select 6 bootstrap replicates
|
107
|
-
df[R.sample(df.nrow, 6, rep: true), :all].pp
|
108
|
-
print("\n")
|
109
|
-
|
110
|
-
# x y z
|
111
|
-
# 3 2 4 c
|
112
|
-
# 4 2 3 d
|
113
|
-
# 4.1 2 3 d
|
114
|
-
# 1 1 6 a
|
115
|
-
# 4.2 2 3 d
|
116
|
-
# 3.1 2 4 c
|
117
|
-
|
118
|
-
#------------------------------------------------------------------------------------------
|
119
|
-
# Ordering (integer subsetting)
|
120
|
-
#------------------------------------------------------------------------------------------
|
121
|
-
|
122
|
-
x = R.c("b", "c", "a")
|
123
|
-
x.order.pp
|
124
|
-
print("\n")
|
125
|
-
|
126
|
-
# [1] 3 1 2
|
127
|
-
|
128
|
-
x[x.order].pp
|
129
|
-
print("\n")
|
130
|
-
|
131
|
-
# [1] "a" "b" "c"
|
132
|
-
|
133
|
-
# Randomly reorder df
|
134
|
-
df2 = df[R.sample(df.nrow), (3..1)]
|
135
|
-
df2.pp
|
136
|
-
print("\n")
|
137
|
-
|
138
|
-
# z y x
|
139
|
-
# 3 c 4 2
|
140
|
-
# 1 a 6 1
|
141
|
-
# 2 b 5 1
|
142
|
-
# 4 d 3 2
|
143
|
-
# 6 f 1 3
|
144
|
-
# 5 e 2 3
|
145
|
-
|
146
|
-
df2[df2.x.order, :all].pp
|
147
|
-
print("\n")
|
148
|
-
|
149
|
-
# z y x
|
150
|
-
# 1 a 6 1
|
151
|
-
# 2 b 5 1
|
152
|
-
# 3 c 4 2
|
153
|
-
# 4 d 3 2
|
154
|
-
# 6 f 1 3
|
155
|
-
# 5 e 2 3
|
156
|
-
|
157
|
-
df2[:all, df2.names.order].pp
|
158
|
-
print("\n")
|
159
|
-
|
160
|
-
# x y z
|
161
|
-
# 3 2 4 c
|
162
|
-
# 1 1 6 a
|
163
|
-
# 2 1 5 b
|
164
|
-
# 4 2 3 d
|
165
|
-
# 6 3 1 f
|
166
|
-
# 5 3 2 e
|
167
|
-
|
168
|
-
#------------------------------------------------------------------------------------------
|
169
|
-
# Expanding aggregated counts (integer subsetting)
|
170
|
-
#
|
171
|
-
# Sometimes you get a data frame where identical rows have been collapsed into one and a
|
172
|
-
# count column has been added. rep() and integer subsetting make it easy to uncollapse
|
173
|
-
# the data by subsetting with a repeated row index:
|
174
|
-
#------------------------------------------------------------------------------------------
|
175
|
-
|
176
|
-
df = R.data__frame(x: R.c(2, 4, 1), y: R.c(9, 11, 6), n: R.c(3, 5, 1))
|
177
|
-
R.rep((1..df.nrow), df.n).pp
|
178
|
-
print("\n")
|
179
|
-
|
180
|
-
# [1] 1 1 1 2 2 2 2 2 3
|
181
|
-
|
182
|
-
df[R.rep((1..df.nrow), df.n), :all].pp
|
183
|
-
print("\n")
|
184
|
-
|
185
|
-
# x y n
|
186
|
-
# 1 2 9 3
|
187
|
-
# 1.1 2 9 3
|
188
|
-
# 1.2 2 9 3
|
189
|
-
# 2 4 11 5
|
190
|
-
# 2.1 4 11 5
|
191
|
-
# 2.2 4 11 5
|
192
|
-
# 2.3 4 11 5
|
193
|
-
# 2.4 4 11 5
|
194
|
-
# 3 1 6 1
|
195
|
-
|
196
|
-
#------------------------------------------------------------------------------------------
|
197
|
-
# Removing columns from data frames (character subsetting)
|
198
|
-
#
|
199
|
-
# There are two ways to remove columns from a data frame. You can set individual columns
|
200
|
-
# to nil:
|
201
|
-
#------------------------------------------------------------------------------------------
|
202
|
-
|
203
|
-
df = R.data__frame(x: (1..3), y: (3..1), z: R.letters[(1..3)])
|
204
|
-
# Not implemented yet
|
205
|
-
# df.z = nil
|
206
|
-
df.pp
|
207
|
-
print("\n")
|
208
|
-
|
209
|
-
df = R.data__frame(x: (1..3), y: (3..1), z: R.letters[(1..3)])
|
210
|
-
df[R.c("x", "y")].pp
|
211
|
-
print("\n")
|
212
|
-
|
213
|
-
# x y
|
214
|
-
# 1 1 3
|
215
|
-
# 2 2 2
|
216
|
-
# 3 3 1
|
217
|
-
|
218
|
-
df[df.names.setdiff("z")].pp
|
219
|
-
print("\n")
|
220
|
-
|
221
|
-
# x y
|
222
|
-
# 1 1 3
|
223
|
-
# 2 2 2
|
224
|
-
# 3 3 1
|
225
|
-
|
226
|
-
#------------------------------------------------------------------------------------------
|
227
|
-
# Selecting rows based on a condition (logical subsetting)
|
228
|
-
#
|
229
|
-
# Because it allows you to easily combine conditions from multiple columns, logical
|
230
|
-
# subsetting is probably the most commonly used technique for extracting rows out of
|
231
|
-
# a data frame.
|
232
|
-
#------------------------------------------------------------------------------------------
|
233
|
-
|
234
|
-
R.mtcars[R.mtcars.gear == 5, :all].pp
|
235
|
-
print("\n")
|
236
|
-
|
237
|
-
# mpg cyl disp hp drat wt qsec vs am gear carb
|
238
|
-
# Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
|
239
|
-
# Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
|
240
|
-
# Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.5 0 1 5 4
|
241
|
-
# Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.5 0 1 5 6
|
242
|
-
# Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.6 0 1 5 8
|
243
|
-
|
244
|
-
R.mtcars[(R.mtcars.gear == 5) & (R.mtcars.cyl == 4), :all].pp
|
245
|
-
print("\n")
|
246
|
-
|
247
|
-
# mpg cyl disp hp drat wt qsec vs am gear carb
|
248
|
-
# Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
|
249
|
-
# Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
|
250
|
-
|
251
|
-
|
252
|
-
#------------------------------------------------------------------------------------------
|
253
|
-
# Boolean algebra vs. sets (logical & integer subsetting)
|
254
|
-
#
|
255
|
-
# It’s useful to be aware of the natural equivalence between set operations (integer
|
256
|
-
# subsetting) and boolean algebra (logical subsetting)
|
257
|
-
#------------------------------------------------------------------------------------------
|
258
|
-
|
259
|
-
x = R.sample(10) < 4
|
260
|
-
x.which.pp
|
261
|
-
print("\n")
|
262
|
-
|
263
|
-
# [1] 3 7 10
|
264
|
-
|
265
|
-
#===
|
266
|
-
x1 = R.c((1..10)) % 2 == 0
|
267
|
-
x1.pp
|
268
|
-
print("\n")
|
269
|
-
|
270
|
-
# [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE
|
271
|
-
|
272
|
-
#===
|
273
|
-
x2 = x1.which
|
274
|
-
x2.pp
|
275
|
-
print("\n")
|
276
|
-
|
277
|
-
# [1] 2 4 6 8 10
|
278
|
-
|
279
|
-
#===
|
280
|
-
y1 = R.c((1..10)) % 5 == 0
|
281
|
-
y1.pp
|
282
|
-
print("\n")
|
283
|
-
|
284
|
-
# [1] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
|
285
|
-
|
286
|
-
#===
|
287
|
-
y2 = y1.which
|
288
|
-
y2.pp
|
289
|
-
print("\n")
|
290
|
-
|
291
|
-
# [1] 5 10
|
292
|
-
|
293
|
-
#===
|
294
|
-
# X & Y <-> intersect(x, y)
|
295
|
-
(x1 & y1).pp
|
296
|
-
print("\n")
|
297
|
-
|
298
|
-
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
|
299
|
-
|
300
|
-
#===
|
301
|
-
# This example shows the problem with having R objects returning either
|
302
|
-
# vector or scalar. We don't know the type of the result of applying
|
303
|
-
# intersect. If this is a vector, then we need to print it with pp
|
304
|
-
# but if this is a scalar, we need to print it with regular Ruby 'p' or
|
305
|
-
# 'print'
|
306
|
-
p R.intersect(x2, y2)
|
307
|
-
print("\n")
|
308
|
-
|
309
|
-
# 10
|
310
|
-
|
311
|
-
p x2.intersect y2
|
312
|
-
|
313
|
-
# 10
|
314
|
-
|
315
|
-
#===
|
316
|
-
# X | Y <-> union(x, y)
|
317
|
-
(x1 | y1).pp
|
318
|
-
print("\n")
|
319
|
-
|
320
|
-
# [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE
|
321
|
-
|
322
|
-
#===
|
323
|
-
R.union(x2, y2).pp
|
324
|
-
print("\n")
|
325
|
-
|
326
|
-
# [1] 2 4 6 8 10 5
|
327
|
-
|
328
|
-
(x2.union y2).pp
|
329
|
-
|
330
|
-
# [1] 2 4 6 8 10 5
|
331
|
-
|
332
|
-
#===
|
333
|
-
# X & !Y <-> setdiff(x, y)
|
334
|
-
(x1 & !y1).pp
|
335
|
-
print("\n")
|
336
|
-
|
337
|
-
# [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE FALSE
|
338
|
-
|
339
|
-
#===
|
340
|
-
R.setdiff(x2, y2).pp
|
341
|
-
print("\n")
|
342
|
-
|
343
|
-
# [1] 2 4 6 8
|
344
|
-
|
345
|
-
(x2.setdiff y2).pp
|
346
|
-
|
347
|
-
# [1] 2 4 6 8
|
348
|
-
|
349
|
-
|
350
|
-
#===
|
351
|
-
# xor(X, Y) <-> setdiff(union(x, y), intersect(x, y))
|
352
|
-
R.xor(x1, y1).pp
|
353
|
-
print("\n")
|
354
|
-
|
355
|
-
# [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE
|
356
|
-
|
357
|
-
# Writing the same as the last example in a Ruby style
|
358
|
-
(x1.xor y1).pp
|
359
|
-
|
360
|
-
# [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE
|
361
|
-
|
362
|
-
#===
|
363
|
-
R.setdiff(R.union(x2, y2), R.intersect(x2, y2)).pp
|
364
|
-
print("\n")
|
365
|
-
|
366
|
-
# [1] 2 4 6 8 5
|
367
|
-
|
368
|
-
# Writing the same as the last example in a Ruby style
|
369
|
-
((x2.union y2).setdiff (x2.intersect y2)).pp
|
370
|
-
print("\n")
|
371
|
-
|
372
|
-
# [1] 2 4 6 8 5
|