galaaz 0.4.6 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +3575 -118
- data/Rakefile +21 -4
- data/bin/gknit +152 -6
- data/bin/gknit-draft +105 -0
- data/bin/gknit-draft.rb +28 -0
- data/bin/gknit_Rscript +127 -0
- data/bin/grun +27 -1
- data/bin/gstudio +47 -4
- data/bin/{gstudio.rb → gstudio_irb.rb} +0 -0
- data/bin/gstudio_pry.rb +7 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +3 -12
- data/blogs/galaaz_ggplot/galaaz_ggplot.html +77 -222
- data/blogs/galaaz_ggplot/galaaz_ggplot.md +4 -31
- data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
- data/blogs/galaaz_ggplot/midwest.Rmd +1 -9
- data/blogs/gknit/gknit.Rmd +232 -123
- data/blogs/{dev/dev.html → gknit/gknit.html} +1897 -33
- data/blogs/gknit/gknit.pdf +0 -0
- data/blogs/gknit/lst.rds +0 -0
- data/blogs/gknit/stats.bib +27 -0
- data/blogs/manual/lst.rds +0 -0
- data/blogs/manual/manual.Rmd +1893 -47
- data/blogs/manual/manual.html +3153 -347
- data/blogs/manual/manual.md +3575 -118
- data/blogs/manual/manual.pdf +0 -0
- data/blogs/manual/manual.tex +4026 -0
- data/blogs/manual/manual_files/figure-html/bubble-1.png +0 -0
- data/blogs/manual/manual_files/figure-html/diverging_bar.png +0 -0
- data/blogs/manual/manual_files/figure-latex/bubble-1.png +0 -0
- data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
- data/blogs/{dev → manual}/model.rb +0 -0
- data/blogs/nse_dplyr/nse_dplyr.Rmd +849 -0
- data/blogs/nse_dplyr/nse_dplyr.html +878 -0
- data/blogs/nse_dplyr/nse_dplyr.md +1198 -0
- data/blogs/nse_dplyr/nse_dplyr.pdf +0 -0
- data/blogs/oh_my/oh_my.html +274 -386
- data/blogs/oh_my/oh_my.md +208 -205
- data/blogs/ruby_plot/ruby_plot.Rmd +64 -84
- data/blogs/ruby_plot/ruby_plot.html +235 -208
- data/blogs/ruby_plot/ruby_plot.md +239 -34
- data/blogs/ruby_plot/ruby_plot.pdf +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
- data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
- data/examples/Bibliography/master.bib +50 -0
- data/examples/Bibliography/stats.bib +72 -0
- data/examples/islr/ch2.spec.rb +1 -1
- data/examples/islr/ch3_boston.rb +4 -4
- data/examples/islr/x_y_rnorm.jpg +0 -0
- data/examples/latex_templates/Test-acm_article/Makefile +16 -0
- data/examples/latex_templates/Test-acm_article/Test-acm_article.Rmd +65 -0
- data/examples/latex_templates/Test-acm_article/acm_proc_article-sp.cls +1670 -0
- data/examples/latex_templates/Test-acm_article/sensys-abstract.cls +703 -0
- data/examples/latex_templates/Test-acm_article/sigproc.bib +59 -0
- data/examples/latex_templates/Test-acs_article/Test-acs_article.Rmd +260 -0
- data/examples/latex_templates/Test-acs_article/Test-acs_article.pdf +0 -0
- data/examples/latex_templates/Test-acs_article/acs-Test-acs_article.bib +11 -0
- data/examples/latex_templates/Test-acs_article/acs-my_output.bib +11 -0
- data/examples/latex_templates/Test-acs_article/acstest.bib +17 -0
- data/examples/latex_templates/Test-aea_article/AEA.cls +1414 -0
- data/examples/latex_templates/Test-aea_article/BibFile.bib +0 -0
- data/examples/latex_templates/Test-aea_article/Test-aea_article.Rmd +108 -0
- data/examples/latex_templates/Test-aea_article/Test-aea_article.pdf +0 -0
- data/examples/latex_templates/Test-aea_article/aea.bst +1269 -0
- data/examples/latex_templates/Test-aea_article/multicol.sty +853 -0
- data/examples/latex_templates/Test-aea_article/references.bib +0 -0
- data/examples/latex_templates/Test-aea_article/setspace.sty +546 -0
- data/examples/latex_templates/Test-amq_article/Test-amq_article.Rmd +256 -0
- data/examples/latex_templates/Test-amq_article/Test-amq_article.pdf +0 -0
- data/examples/latex_templates/Test-amq_article/Test-amq_article.pdfsync +3397 -0
- data/examples/latex_templates/Test-amq_article/pics/Figure2.pdf +0 -0
- data/examples/latex_templates/Test-ams_article/Test-ams_article.Rmd +215 -0
- data/examples/latex_templates/Test-ams_article/amstest.bib +436 -0
- data/examples/latex_templates/Test-asa_article/Test-asa_article.Rmd +153 -0
- data/examples/latex_templates/Test-asa_article/Test-asa_article.pdf +0 -0
- data/examples/latex_templates/Test-asa_article/agsm.bst +1353 -0
- data/examples/latex_templates/Test-asa_article/bibliography.bib +233 -0
- data/examples/latex_templates/Test-ieee_article/IEEEtran.bst +2409 -0
- data/examples/latex_templates/Test-ieee_article/IEEEtran.cls +6346 -0
- data/examples/latex_templates/Test-ieee_article/Test-ieee_article.Rmd +175 -0
- data/examples/latex_templates/Test-ieee_article/Test-ieee_article.pdf +0 -0
- data/examples/latex_templates/Test-ieee_article/mybibfile.bib +20 -0
- data/examples/latex_templates/Test-rjournal_article/RJournal.sty +335 -0
- data/examples/latex_templates/Test-rjournal_article/RJreferences.bib +18 -0
- data/examples/latex_templates/Test-rjournal_article/RJwrapper.pdf +0 -0
- data/examples/latex_templates/Test-rjournal_article/Test-rjournal_article.Rmd +52 -0
- data/examples/latex_templates/Test-springer_article/Test-springer_article.Rmd +65 -0
- data/examples/latex_templates/Test-springer_article/Test-springer_article.pdf +0 -0
- data/examples/latex_templates/Test-springer_article/bibliography.bib +26 -0
- data/examples/latex_templates/Test-springer_article/spbasic.bst +1658 -0
- data/examples/latex_templates/Test-springer_article/spmpsci.bst +1512 -0
- data/examples/latex_templates/Test-springer_article/spphys.bst +1443 -0
- data/examples/latex_templates/Test-springer_article/svglov3.clo +113 -0
- data/examples/latex_templates/Test-springer_article/svjour3.cls +1431 -0
- data/examples/misc/moneyball.rb +1 -1
- data/examples/misc/subsetting.rb +37 -37
- data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.Rmd +73 -0
- data/examples/rmarkdown/svm-rmarkdown-anon-ms-example/svm-rmarkdown-anon-ms-example.pdf +0 -0
- data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.Rmd +382 -0
- data/examples/rmarkdown/svm-rmarkdown-article-example/svm-rmarkdown-article-example.pdf +0 -0
- data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.Rmd +164 -0
- data/examples/rmarkdown/svm-rmarkdown-beamer-example/svm-rmarkdown-beamer-example.pdf +0 -0
- data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.Rmd +92 -0
- data/examples/rmarkdown/svm-rmarkdown-cv/svm-rmarkdown-cv.pdf +0 -0
- data/examples/rmarkdown/svm-rmarkdown-syllabus-example/attend-grade-relationships.csv +482 -0
- data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.Rmd +280 -0
- data/examples/rmarkdown/svm-rmarkdown-syllabus-example/svm-rmarkdown-syllabus-example.pdf +0 -0
- data/examples/rmarkdown/svm-xaringan-example/svm-xaringan-example.Rmd +386 -0
- data/lib/R_interface/r.rb +2 -2
- data/lib/R_interface/r_libs.R +6 -1
- data/lib/R_interface/r_methods.rb +12 -2
- data/lib/R_interface/rdata_frame.rb +8 -17
- data/lib/R_interface/rindexed_object.rb +1 -2
- data/lib/R_interface/rlist.rb +1 -0
- data/lib/R_interface/robject.rb +20 -23
- data/lib/R_interface/rpkg.rb +15 -6
- data/lib/R_interface/rsupport.rb +13 -19
- data/lib/R_interface/ruby_extensions.rb +14 -18
- data/lib/R_interface/rvector.rb +0 -12
- data/lib/gknit.rb +2 -0
- data/lib/gknit/draft.rb +105 -0
- data/lib/gknit/knitr_engine.rb +6 -37
- data/lib/util/exec_ruby.rb +22 -84
- data/lib/util/inline_file.rb +7 -3
- data/specs/figures/bg.jpeg +0 -0
- data/specs/figures/bg.png +0 -0
- data/specs/figures/bg.svg +2 -2
- data/specs/figures/dose_len.png +0 -0
- data/specs/figures/no_args.jpeg +0 -0
- data/specs/figures/no_args.png +0 -0
- data/specs/figures/no_args.svg +2 -2
- data/specs/figures/width_height.jpeg +0 -0
- data/specs/figures/width_height.png +0 -0
- data/specs/figures/width_height_units1.jpeg +0 -0
- data/specs/figures/width_height_units1.png +0 -0
- data/specs/figures/width_height_units2.jpeg +0 -0
- data/specs/figures/width_height_units2.png +0 -0
- data/specs/r_dataframe.spec.rb +184 -11
- data/specs/r_list.spec.rb +4 -4
- data/specs/r_list_apply.spec.rb +11 -10
- data/specs/ruby_expression.spec.rb +3 -11
- data/specs/tmp.rb +106 -34
- data/version.rb +1 -1
- metadata +96 -33
- data/bin/gknit_old_r +0 -236
- data/blogs/dev/dev.Rmd +0 -77
- data/blogs/dev/dev.md +0 -87
- data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
- data/blogs/dev/dev_files/figure-html/diverging_bar. +0 -0
- data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
- data/blogs/dplyr/dplyr.rb +0 -63
- data/blogs/galaaz_ggplot/galaaz_ggplot.aux +0 -43
- data/blogs/galaaz_ggplot/galaaz_ggplot.log +0 -640
- data/blogs/galaaz_ggplot/galaaz_ggplot.out +0 -10
- data/blogs/galaaz_ggplot/galaaz_ggplot.tex +0 -481
- data/blogs/galaaz_ggplot/midwest.png +0 -0
- data/blogs/galaaz_ggplot/scatter_plot.png +0 -0
- data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +0 -662
- data/blogs/ruby_plot/ruby_plot.tex +0 -1077
- data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +0 -57
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +0 -106
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +0 -110
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +0 -174
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +0 -236
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +0 -296
- data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +0 -236
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +0 -218
- data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +0 -128
- data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +0 -150
- data/examples/paper/paper.rb +0 -36
@@ -121,10 +121,7 @@ Ruby and dozens of other languages. Yet, while R and Python chunks can share da
|
|
121
121
|
languages, chunks are independent. This means that a variable defined in one chunk
|
122
122
|
cannot be used in another chunk.
|
123
123
|
|
124
|
-
With _gKnit_ Ruby code chunks can share data.
|
125
|
-
Ruby chunk executes in its own scope and thus, local variable defined in a chunk are
|
126
|
-
not accessible by other chunks. Yet, All chunks execute in the scope of a 'chunk'
|
127
|
-
class and instance variables ('@'), are available in all chunks.
|
124
|
+
With _gKnit_ Ruby code chunks can share data.
|
128
125
|
|
129
126
|
# Exploring the Dataset
|
130
127
|
|
@@ -139,10 +136,28 @@ in 60 guinea pigs, where each animal received one of three dose levels of Vitami
|
|
139
136
|
The ToothGrowth dataset contains three columns: 'len', 'supp' and 'dose'. Let's
|
140
137
|
take a look at a few rows of this dataset. In Galaaz, R variables are accessed
|
141
138
|
by using the corresponding Ruby symbol preceeded by the tilda ('~') function. Note in the
|
142
|
-
following chunk that 'ToothGrowth' is the R variable and Ruby's '
|
139
|
+
following chunk that 'ToothGrowth' is the R variable and Ruby's 'tooth_growth' is
|
143
140
|
assigned the value of '~:ToothGrowth'.
|
144
141
|
|
145
142
|
|
143
|
+
```ruby
|
144
|
+
# Read the R ToothGrowth variable and assign it to the
|
145
|
+
# Ruby instance variable tooth_growth that will be
|
146
|
+
# available to all Ruby chunks in this document.
|
147
|
+
tooth_growth = ~:ToothGrowth
|
148
|
+
# print the first few elements of the dataset
|
149
|
+
puts tooth_growth.head
|
150
|
+
```
|
151
|
+
|
152
|
+
```
|
153
|
+
## len supp dose
|
154
|
+
## 1 4.2 VC 0.5
|
155
|
+
## 2 11.5 VC 0.5
|
156
|
+
## 3 7.3 VC 0.5
|
157
|
+
## 4 5.8 VC 0.5
|
158
|
+
## 5 6.4 VC 0.5
|
159
|
+
## 6 10.0 VC 0.5
|
160
|
+
```
|
146
161
|
|
147
162
|
Great! We've managed to read the ToothGrowth dataset and take a look at its elements.
|
148
163
|
We see here the first 6 rows of the dataset. To access a column, follow the dataset name
|
@@ -150,6 +165,15 @@ with a dot ('.') and the name of the column. Also use dot notation to chain meth
|
|
150
165
|
in usual Ruby style.
|
151
166
|
|
152
167
|
|
168
|
+
```ruby
|
169
|
+
# Access the tooth_growth 'len' column and print the first few
|
170
|
+
# elements of this column with the 'head' method.
|
171
|
+
puts tooth_growth.len.head
|
172
|
+
```
|
173
|
+
|
174
|
+
```
|
175
|
+
## [1] 4.2 11.5 7.3 5.8 6.4 10.0
|
176
|
+
```
|
153
177
|
|
154
178
|
The 'dose' column contains a numeric value with either, 0.5, 1 or 2, although the
|
155
179
|
first 6 rows as seen above only contain the 0.5 values. Even though those are
|
@@ -159,11 +183,22 @@ function from Galaaz the dot ('.') in the function name is substituted by '__' (
|
|
159
183
|
The function 'as.factor' becomes 'R.as__factor' or just 'as__factor' when chaining.
|
160
184
|
|
161
185
|
|
186
|
+
```ruby
|
187
|
+
# convert the dose to a factor
|
188
|
+
tooth_growth.dose = tooth_growth.dose.as__factor
|
189
|
+
```
|
162
190
|
|
163
191
|
Let's explore some more details of this dataset. In particular, let's look at its dimensions,
|
164
192
|
structure and summary statistics.
|
165
193
|
|
166
194
|
|
195
|
+
```ruby
|
196
|
+
puts tooth_growth.dim
|
197
|
+
```
|
198
|
+
|
199
|
+
```
|
200
|
+
## [1] 60 3
|
201
|
+
```
|
167
202
|
|
168
203
|
This dataset has 60 rows, one for each subject and 3 columns, as we have already seen.
|
169
204
|
|
@@ -172,12 +207,35 @@ functions does not return anything and prints the structure of the dataset
|
|
172
207
|
as a side effect.
|
173
208
|
|
174
209
|
|
210
|
+
```ruby
|
211
|
+
tooth_growth.str
|
212
|
+
```
|
213
|
+
|
214
|
+
```
|
215
|
+
## 'data.frame': 60 obs. of 3 variables:
|
216
|
+
## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
|
217
|
+
## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
|
218
|
+
## $ dose: Factor w/ 3 levels "0.5","1","2": 1 1 1 1 1 1 1 1 1 1 ...
|
219
|
+
```
|
175
220
|
Observe that both variables 'supp' and 'dose' are factors. The system made variable 'supp'
|
176
221
|
a factor automatically, since it contais two strings OJ and VC.
|
177
222
|
|
178
223
|
Finally, using the summary method, we get the statistical summary for the dataset
|
179
224
|
|
180
225
|
|
226
|
+
```ruby
|
227
|
+
puts tooth_growth.summary
|
228
|
+
```
|
229
|
+
|
230
|
+
```
|
231
|
+
## len supp dose
|
232
|
+
## Min. : 4.20 OJ:30 0.5:20
|
233
|
+
## 1st Qu.:13.07 VC:30 1 :20
|
234
|
+
## Median :19.25 2 :20
|
235
|
+
## Mean :18.81
|
236
|
+
## 3rd Qu.:25.27
|
237
|
+
## Max. :33.90
|
238
|
+
```
|
181
239
|
|
182
240
|
# Doing the Data Analysis
|
183
241
|
|
@@ -198,27 +256,22 @@ to form the final graphics.
|
|
198
256
|
|
199
257
|
In order to make a plot, we use the 'ggplot' function to the dataset. In R, this would be
|
200
258
|
written as ```ggplot(<dataset>, ...)```. Galaaz gives you the flexibility to use
|
201
|
-
either ```R.ggplot(<dataset>, ...)``` or ```<dataset>.ggplot(...)```. In the graph
|
202
|
-
|
203
|
-
|
259
|
+
either ```R.ggplot(<dataset>, ...)``` or ```<dataset>.ggplot(...)```. In the graph
|
260
|
+
specification bellow, we use the second notation that looks more like Ruby.
|
261
|
+
ggplot uses the ‘aes’ method to specify
|
204
262
|
x and y axes; in this case, the 'dose' on the $x$ axis and the 'length' on
|
205
263
|
the $y$ axis: 'E.aes(x: :dose, y: :len)'. To specify the type of plot add a geom to
|
206
264
|
the plot. For a boxplot, the geom is R.geom_boxplot.
|
207
265
|
|
208
|
-
Note also that we have a call to 'R.png' before plotting and 'R.dev__off' after the print
|
209
|
-
statement. 'R.png' opens a 'png device' for outputting the plot. If we do no pass a
|
210
|
-
name to the 'png' function, the
|
211
|
-
image gets a default name of 'Rplot\<nnn\>' where \<nnn\> is the number of the plot.
|
212
|
-
'R.dev__off'
|
213
|
-
closes the device and creates the 'png' file. We can
|
214
|
-
then include the generated 'png' file in the document by adding an rmarkdown directive.
|
215
266
|
|
216
|
-
|
267
|
+
```ruby
|
268
|
+
require 'ggplot'
|
217
269
|
|
218
|
-
|
219
|
-
|
220
|
-
|
270
|
+
e = tooth_growth.ggplot(E.aes(x: :dose, y: :len))
|
271
|
+
print e + R.geom_boxplot
|
272
|
+
```
|
221
273
|
|
274
|
+
![](ruby_plot_files/figure-html/dose_len.png)<!-- -->
|
222
275
|
|
223
276
|
Great! We've just managed to create and save our first plot in Ruby with only
|
224
277
|
four lines of code. We can now easily see with this plot a clear trend: as the
|
@@ -239,24 +292,32 @@ automatically creates the facets based on the delivery method factors. The para
|
|
239
292
|
the 'facet_grid' method is a [_formula_](https://thomasleeper.com/Rcourse/Tutorials/formulae.html).
|
240
293
|
|
241
294
|
In Galaaz we give programmers the flexibility to use two different ways to write formulas.
|
242
|
-
In the first way,
|
243
|
-
in
|
295
|
+
In the first way, we use Ruby expressions and the '.til' function. The formula 'x ~ y', becomes
|
296
|
+
':x.til :y'. More information on expressions can be found in [Galaaz Manual](https://www.rubydoc.info/gems/galaaz/).
|
244
297
|
|
245
|
-
* R symbols are represented by the same Ruby symbol prefixed with the '+' method. The
|
246
|
-
symbol ```x``` in R becomes ```+:x``` in Ruby;
|
247
|
-
* The '~' operator in R becomes '=~' in Ruby. The formula ```x ~ y``` in R is written as
|
248
|
-
```+:x =~ +:y``` in Ruby;
|
249
|
-
* The '.' symbol in R becomes '+:all'
|
250
298
|
|
251
299
|
Another way of writing a formula is to use the 'formula' function with the actual formula as
|
252
300
|
a string. The formula ```x ~ y``` in R can be written as ```R.formula("x ~ y")```. For more
|
253
301
|
complex formulas, the use of the 'formula' function is preferred.
|
254
302
|
|
255
|
-
The formula
|
303
|
+
The formula ```:all.til :supp``` indicates to the 'facet_grid' function that it needs to
|
256
304
|
facet the plot based on the ```supp``` variable and split the plot vertically. Changing
|
257
|
-
the formula to
|
305
|
+
the formula to ```:supp.til :all``` would split the plot horizontally.
|
306
|
+
|
258
307
|
|
308
|
+
```ruby
|
259
309
|
|
310
|
+
base_tooth = tooth_growth.ggplot(E.aes(x: :dose, y: :len, group: :dose))
|
311
|
+
|
312
|
+
bp = base_tooth + R.geom_boxplot +
|
313
|
+
# Split in vertical direction
|
314
|
+
R.facet_grid(:all.til :supp)
|
315
|
+
|
316
|
+
puts bp
|
317
|
+
```
|
318
|
+
|
319
|
+
|
320
|
+
![](ruby_plot_files/figure-html/facet_by_delivery.png)<!-- -->
|
260
321
|
|
261
322
|
It now becomes clear that although both methods of delivery have a direct
|
262
323
|
impact on tooth growth, method (OJ) is non-linear having a higher impact with smaller
|
@@ -273,6 +334,13 @@ enough to add ```fill: :dose``` to the aesthetic of boxplot. With this command
|
|
273
334
|
factor gets its own color.
|
274
335
|
|
275
336
|
|
337
|
+
```ruby
|
338
|
+
bp = bp + R.geom_boxplot(E.aes(fill: :dose))
|
339
|
+
puts bp
|
340
|
+
```
|
341
|
+
|
342
|
+
|
343
|
+
![](ruby_plot_files/figure-html/facets_by_delivery_color.png)<!-- -->
|
276
344
|
|
277
345
|
Facetting helps us compare the general trends for each delivery method.
|
278
346
|
Adding color allow us to compare specifically how each dosage impacts the tooth growth.
|
@@ -304,6 +372,15 @@ each of the 60 pigs in the experiment. For that, add the function 'R.geom_point
|
|
304
372
|
plot.
|
305
373
|
|
306
374
|
|
375
|
+
```ruby
|
376
|
+
# Split in vertical direction
|
377
|
+
bp = bp + R.geom_point
|
378
|
+
|
379
|
+
puts bp
|
380
|
+
```
|
381
|
+
|
382
|
+
|
383
|
+
![](ruby_plot_files/figure-html/facets_with_points.png)<!-- -->
|
307
384
|
|
308
385
|
Now we can see the actual distribution of all the 60 subjects. Actually, this is not
|
309
386
|
totally true. We have a hard time seing all 60 subjects. It seems that some points
|
@@ -317,6 +394,13 @@ prevents data hiding. We also add
|
|
317
394
|
color and change the shape of the points, making them even easier to see.
|
318
395
|
|
319
396
|
|
397
|
+
```ruby
|
398
|
+
# Split in vertical direction
|
399
|
+
puts bp + R.geom_jitter(shape: 23, color: "cyan3", size: 1)
|
400
|
+
```
|
401
|
+
|
402
|
+
|
403
|
+
![](ruby_plot_files/figure-html/facets_with_jitter.png)<!-- -->
|
320
404
|
|
321
405
|
Now we can see all 60 points in the graph. We have here a much higher information density
|
322
406
|
and we can see outliers and subjects distribution.
|
@@ -352,6 +436,16 @@ This ordering seems more natural and
|
|
352
436
|
matches with the actual order of the colors in the plot.
|
353
437
|
|
354
438
|
|
439
|
+
```ruby
|
440
|
+
bp = bp +
|
441
|
+
R.scale_fill_manual(values: R.c("cyan", "deepskyblue", "deepskyblue4"),
|
442
|
+
breaks: R.c("2","1","0.5"))
|
443
|
+
|
444
|
+
puts bp
|
445
|
+
```
|
446
|
+
|
447
|
+
|
448
|
+
![](ruby_plot_files/figure-html/facets_by_delivery_color2.png)<!-- -->
|
355
449
|
|
356
450
|
## Violin Plot and Jitter
|
357
451
|
|
@@ -371,6 +465,18 @@ a boxplot known as a _violin plot_ with jittered data.
|
|
371
465
|
> The central dot represents the median average value.
|
372
466
|
|
373
467
|
|
468
|
+
```ruby
|
469
|
+
violin = base_tooth + R.geom_violin(E.aes(fill: :dose)) +
|
470
|
+
R.facet_grid(:all.til :supp) +
|
471
|
+
R.geom_jitter(shape: 23, color: "cyan3", size: 1) +
|
472
|
+
R.scale_fill_manual(values: R.c("cyan", "deepskyblue", "deepskyblue4"),
|
473
|
+
breaks: R.c("2","1","0.5"))
|
474
|
+
|
475
|
+
puts violin
|
476
|
+
```
|
477
|
+
|
478
|
+
|
479
|
+
![](ruby_plot_files/figure-html/violin_with_jitter.png)<!-- -->
|
374
480
|
|
375
481
|
This plot is an alternative to the original boxplot. For the final presentation, it is
|
376
482
|
important to think which graphics will be best understood by our audience. A violin plot
|
@@ -391,6 +497,23 @@ for information about the plot (for clarity, we defined a caption variable using
|
|
391
497
|
Here Doc style).
|
392
498
|
|
393
499
|
|
500
|
+
```ruby
|
501
|
+
caption = <<-EOT
|
502
|
+
Length of odontoblasts in 60 guinea pigs.
|
503
|
+
Each animal received one of three dose levels of vitamin C.
|
504
|
+
EOT
|
505
|
+
|
506
|
+
decorations =
|
507
|
+
R.labs(title: "Tooth Growth: Length vs Vitamin C Dose",
|
508
|
+
subtitle: "Faceted by delivery method, OJ or VC",
|
509
|
+
x: "Dose (mg)", y: "Teeth length",
|
510
|
+
caption: caption)
|
511
|
+
|
512
|
+
puts bp + decorations
|
513
|
+
```
|
514
|
+
|
515
|
+
|
516
|
+
![](ruby_plot_files/figure-html/facets_with_decorations.png)<!-- -->
|
394
517
|
|
395
518
|
## The Corp Theme
|
396
519
|
|
@@ -408,6 +531,57 @@ a shade o blue (color: '#00080'). Axis labels are moved near the end of the axi
|
|
408
531
|
written in 'bold'.
|
409
532
|
|
410
533
|
|
534
|
+
```ruby
|
535
|
+
module CorpTheme
|
536
|
+
|
537
|
+
R.install_and_loads 'RColorBrewer'
|
538
|
+
|
539
|
+
#---------------------------------------------------------------------------------
|
540
|
+
# face can be (1=plain, 2=bold, 3=italic, 4=bold-italic)
|
541
|
+
#---------------------------------------------------------------------------------
|
542
|
+
|
543
|
+
def self.text_element(size, face: "plain", hjust: nil)
|
544
|
+
E.element_text(color: "#000080",
|
545
|
+
face: face,
|
546
|
+
size: size,
|
547
|
+
hjust: hjust)
|
548
|
+
end
|
549
|
+
|
550
|
+
#---------------------------------------------------------------------------------
|
551
|
+
# Defines the plot theme (visualization). In this theme we remove major and minor
|
552
|
+
# grids, borders and background. We also turn-off scientific notation.
|
553
|
+
#---------------------------------------------------------------------------------
|
554
|
+
|
555
|
+
def self.global_theme(faceted = false)
|
556
|
+
|
557
|
+
R.options(scipen: 999) # turn-off scientific notation like 1e+48
|
558
|
+
# R.theme_set(R.theme_bw)
|
559
|
+
|
560
|
+
# remove major grids
|
561
|
+
gb = R.theme(panel__grid__major: E.element_blank())
|
562
|
+
# remove minor grids
|
563
|
+
gb = gb + R.theme(panel__grid__minor: E.element_blank)
|
564
|
+
# gb = R.theme(panel__grid__minor: E.element_blank)
|
565
|
+
# remove border
|
566
|
+
gb = gb + R.theme(panel__border: E.element_blank)
|
567
|
+
# remove background. When working with faceted graphs, the background makes
|
568
|
+
# it easier to see each facet, so leave it
|
569
|
+
gb = gb + R.theme(panel__background: E.element_blank) if !faceted
|
570
|
+
# Change axis font
|
571
|
+
gb = gb + R.theme(axis__text: text_element(8))
|
572
|
+
# change axis title font
|
573
|
+
gb = gb + R.theme(axis__title: text_element(10, face: "bold", hjust: 1))
|
574
|
+
# change font of title
|
575
|
+
gb = gb + R.theme(title: text_element(12, face: "bold"))
|
576
|
+
# change font of subtitle
|
577
|
+
gb = gb + R.theme(plot__subtitle: text_element(9))
|
578
|
+
# change font of captions
|
579
|
+
gb = gb + R.theme(plot__caption: text_element(8))
|
580
|
+
|
581
|
+
end
|
582
|
+
|
583
|
+
end
|
584
|
+
```
|
411
585
|
|
412
586
|
## Final Box Plot
|
413
587
|
|
@@ -415,17 +589,29 @@ We can now easily make our final boxplot and violin plot. All the layers for th
|
|
415
589
|
added in order to expose our understanding of the data and the need to present the result
|
416
590
|
to our audience.
|
417
591
|
|
418
|
-
The final specification is just the addition of all layers build up to this point (
|
419
|
-
the decorations (
|
592
|
+
The final specification is just the addition of all layers build up to this point ('bp'), plus
|
593
|
+
the decorations ('decorations'), plus the corporate theme.
|
420
594
|
|
421
595
|
Here is our final boxplot, without jitter.
|
422
596
|
|
423
597
|
|
598
|
+
```ruby
|
599
|
+
puts bp + decorations + CorpTheme.global_theme(faceted: true)
|
600
|
+
```
|
601
|
+
|
602
|
+
|
603
|
+
![](ruby_plot_files/figure-html/final_box_plot.png)<!-- -->
|
424
604
|
|
425
605
|
And here is the final violin plot, with jitter and the same look and feel of the corporate
|
426
606
|
boxplot.
|
427
607
|
|
428
608
|
|
609
|
+
```ruby
|
610
|
+
puts violin + decorations + CorpTheme.global_theme(faceted: true)
|
611
|
+
```
|
612
|
+
|
613
|
+
|
614
|
+
![](ruby_plot_files/figure-html/final_violin_plot.png)<!-- -->
|
429
615
|
|
430
616
|
## Another View
|
431
617
|
|
@@ -434,6 +620,26 @@ dose and not by supplement. This shows how easy it is to create new plots by ju
|
|
434
620
|
changing small statement on the _grammar of graphics_.
|
435
621
|
|
436
622
|
|
623
|
+
```ruby
|
624
|
+
caption = <<-EOT
|
625
|
+
Length of odontoblasts in 60 guinea pigs.
|
626
|
+
Each animal received one of three dose levels of vitamin C.
|
627
|
+
EOT
|
628
|
+
|
629
|
+
bp = tooth_growth.ggplot(E.aes(x: :supp, y: :len, group: :supp)) +
|
630
|
+
R.geom_boxplot(E.aes(fill: :supp)) + R.facet_grid(:all.til :dose) +
|
631
|
+
R.scale_fill_manual(values: R.c("cyan", "deepskyblue4")) +
|
632
|
+
R.labs(title: "Tooth Growth: Length by Dose",
|
633
|
+
subtitle: "Faceted by dose",
|
634
|
+
x: "Delivery method", y: "Teeth length",
|
635
|
+
caption: caption) +
|
636
|
+
CorpTheme.global_theme(faceted: true)
|
637
|
+
|
638
|
+
puts bp
|
639
|
+
```
|
640
|
+
|
641
|
+
|
642
|
+
![](ruby_plot_files/figure-html/facet_by_dose.png)<!-- -->
|
437
643
|
|
438
644
|
# Conclusion
|
439
645
|
|
@@ -449,10 +655,9 @@ Trying to bring to Ruby the power of R starting from scratch is an enourmous end
|
|
449
655
|
and would probably never be accomplished. Today's data scientists would certainly
|
450
656
|
stick with either Python or R. Now, both the Ruby and R communities can benefit
|
451
657
|
from this marriage, provided by Galaaz on top of GraalVM and Truffle's
|
452
|
-
polyglot environment. We presented
|
453
|
-
|
454
|
-
|
455
|
-
be extremely relevant.
|
658
|
+
polyglot environment. We presented the process to couple Ruby and R, but this
|
659
|
+
process can also be done to couple Ruby and JavaScript or Ruby and Python.
|
660
|
+
In a polyglot world a *uniglot* language might be extremely relevant.
|
456
661
|
|
457
662
|
From the perspective of performance, GraalVM and Truffle promises improvements that could
|
458
663
|
reach over 10 times, both for [FastR](https://medium.com/graalvm/faster-r-with-fastr-4b8db0e0dceb)
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,50 @@
|
|
1
|
+
% This file was created with JabRef 2.10.
|
2
|
+
% Encoding: UTF-8
|
3
|
+
|
4
|
+
@Book{vasquez2009twp,
|
5
|
+
Title = {The War Puzzle Revisited},
|
6
|
+
Author = {Vasquez, John A},
|
7
|
+
Publisher = {New York, NY: Cambridge University Press},
|
8
|
+
Year = {2009}
|
9
|
+
}
|
10
|
+
|
11
|
+
@Book{wagner2007ws,
|
12
|
+
Title = {War and the State: The Theory of International Politics},
|
13
|
+
Author = {R. Harrison Wagner},
|
14
|
+
Publisher = {Ann Arbor, MI: The University of Michigan Press},
|
15
|
+
Year = {2007},
|
16
|
+
Owner = {steve},
|
17
|
+
Timestamp = {2016.02.12}
|
18
|
+
}
|
19
|
+
|
20
|
+
@Book{xie2013ddrk,
|
21
|
+
title = {Dynamic Documents with {R} and knitr},
|
22
|
+
author = {Yihui Xie},
|
23
|
+
publisher = {Chapman and Hall/CRC},
|
24
|
+
address = {Boca Raton, Florida},
|
25
|
+
year = {2015},
|
26
|
+
edition = {2nd},
|
27
|
+
note = {ISBN 978-1498716963},
|
28
|
+
url = {https://yihui.name/knitr/},
|
29
|
+
}
|
30
|
+
|
31
|
+
@article{miller2013tdpi,
|
32
|
+
author = {Miller, Steven V.},
|
33
|
+
year = 2013,
|
34
|
+
title = {Territorial Disputes and the Politics of Individual Well-Being.},
|
35
|
+
journal = {Journal of Peace Research.},
|
36
|
+
volume = 50,
|
37
|
+
number = 6,
|
38
|
+
pages = {677-690}
|
39
|
+
}
|
40
|
+
|
41
|
+
@article{miller2016ieea,
|
42
|
+
author = {Gibler, Douglas M. and Steven V. Miller and Erin K. Little},
|
43
|
+
year = 2016,
|
44
|
+
title = {An Analysis of the Militarized Interstate Dispute (MID) Dataset, 1816-2001},
|
45
|
+
journal = {International Studies Quarterly.},
|
46
|
+
volume = 60,
|
47
|
+
number = 4,
|
48
|
+
pages = {719-730}
|
49
|
+
}
|
50
|
+
|