galaaz 0.4.7 → 0.4.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1179 -39
  3. data/Rakefile +4 -2
  4. data/bin/grun +1 -1
  5. data/bin/gstudio +1 -1
  6. data/blogs/dev/dev.Rmd +2 -56
  7. data/blogs/dev/dev.md +32 -61
  8. data/blogs/dev/dev2.Rmd +65 -0
  9. data/blogs/dplyr/dplyr.Rmd +29 -0
  10. data/blogs/{dev/dev.html → dplyr/dplyr.html} +88 -57
  11. data/blogs/dplyr/dplyr.md +58 -0
  12. data/blogs/gknit/gknit.html +1262 -25
  13. data/blogs/gknit/gknit.md +471 -27
  14. data/blogs/gknit/gknit_files/figure-html/bubble-1.png +0 -0
  15. data/blogs/manual/graph.rb +29 -0
  16. data/blogs/manual/manual.Rmd +567 -29
  17. data/blogs/manual/manual.html +743 -46
  18. data/blogs/manual/manual.md +1179 -39
  19. data/blogs/nse_dplyr/nse_dplyr.Rmd +466 -11
  20. data/blogs/nse_dplyr/nse_dplyr.html +472 -37
  21. data/blogs/nse_dplyr/nse_dplyr.md +645 -32
  22. data/blogs/ruby_plot/ruby_plot.Rmd +4 -4
  23. data/blogs/ruby_plot/ruby_plot.html +217 -2
  24. data/blogs/ruby_plot/ruby_plot.md +226 -1
  25. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  26. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +2 -2
  27. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  28. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +70 -70
  29. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  30. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +72 -72
  31. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  32. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +116 -116
  33. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  34. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +176 -176
  35. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  36. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +236 -236
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +176 -176
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +160 -160
  42. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  43. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +105 -105
  44. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +121 -121
  46. data/examples/islr/ch2.spec.rb +1 -1
  47. data/examples/islr/ch3_boston.rb +4 -4
  48. data/examples/islr/x_y_rnorm.jpg +0 -0
  49. data/lib/R_interface/r.rb +1 -1
  50. data/lib/R_interface/r_methods.rb +2 -2
  51. data/lib/R_interface/rdata_frame.rb +8 -5
  52. data/lib/R_interface/rindexed_object.rb +1 -2
  53. data/lib/R_interface/rlist.rb +1 -0
  54. data/lib/R_interface/robject.rb +0 -1
  55. data/lib/R_interface/rpkg.rb +14 -6
  56. data/lib/R_interface/rsupport.rb +7 -9
  57. data/lib/R_interface/ruby_extensions.rb +17 -5
  58. data/lib/gknit/knitr_engine.rb +9 -2
  59. data/lib/util/exec_ruby.rb +2 -2
  60. data/specs/r_dataframe.spec.rb +173 -0
  61. data/specs/r_list.spec.rb +4 -4
  62. data/specs/ruby_expression.spec.rb +2 -11
  63. data/specs/tmp.rb +76 -34
  64. data/version.rb +1 -1
  65. metadata +17 -6
  66. data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
  67. data/blogs/dev/dev_files/figure-html/diverging_bar. +0 -0
  68. data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
@@ -23,7 +23,7 @@ fontsize: 11pt
23
23
  # set global chunk options. We want all figures to be 'svg'
24
24
  # out.width will control the width of the ouput figure, in this case, we want it
25
25
  # to be 50% of the width
26
- knitr::opts_chunk$set(fig.width=1, fig.height=7, dev="svg", out.width = '50%')
26
+ # knitr::opts_chunk$set(fig.width=1, fig.height=7, dev="svg", out.width = '50%')
27
27
  ```
28
28
 
29
29
  According to Wikipedia "Ruby is a dynamic, interpreted, reflective, object-oriented,
@@ -291,7 +291,7 @@ the formula to ```+:supp =~ +:all``` would split the plot horizontally.
291
291
 
292
292
  @bp = @base_tooth + R.geom_boxplot +
293
293
  # Split in vertical direction
294
- R.facet_grid(+:all =~ +:supp)
294
+ R.facet_grid(:all.til :supp)
295
295
 
296
296
  puts @bp
297
297
  ```
@@ -427,7 +427,7 @@ a boxplot known as a _violin plot_ with jittered data.
427
427
 
428
428
  ```{ruby violin_with_jitter}
429
429
  @violin = @base_tooth + R.geom_violin(E.aes(fill: :dose)) +
430
- R.facet_grid(+:all =~ +:supp) +
430
+ R.facet_grid(:all.til :supp) +
431
431
  R.geom_jitter(shape: 23, color: "cyan3", size: 1) +
432
432
  R.scale_fill_manual(values: R.c("cyan", "deepskyblue", "deepskyblue4"),
433
433
  breaks: R.c("2","1","0.5"))
@@ -570,7 +570,7 @@ Each animal received one of three dose levels of vitamin C.
570
570
  EOT
571
571
 
572
572
  @bp = @tooth_growth.ggplot(E.aes(x: :supp, y: :len, group: :supp)) +
573
- R.geom_boxplot(E.aes(fill: :supp)) + R.facet_grid(+:all =~ +:dose) +
573
+ R.geom_boxplot(E.aes(fill: :supp)) + R.facet_grid(:all.til :dose) +
574
574
  R.scale_fill_manual(values: R.c("cyan", "deepskyblue4")) +
575
575
  R.labs(title: "Tooth Growth: Length by Dose",
576
576
  subtitle: "Faceted by dose",
@@ -283,6 +283,9 @@ img {
283
283
  button.code-folding-btn:focus {
284
284
  outline: none;
285
285
  }
286
+ summary {
287
+ display: list-item;
288
+ }
286
289
  </style>
287
290
 
288
291
 
@@ -290,10 +293,71 @@ button.code-folding-btn:focus {
290
293
  <div class="container-fluid main-container">
291
294
 
292
295
  <!-- tabsets -->
296
+
297
+ <style type="text/css">
298
+ .tabset-dropdown > .nav-tabs {
299
+ display: inline-table;
300
+ max-height: 500px;
301
+ min-height: 44px;
302
+ overflow-y: auto;
303
+ background: white;
304
+ border: 1px solid #ddd;
305
+ border-radius: 4px;
306
+ }
307
+
308
+ .tabset-dropdown > .nav-tabs > li.active:before {
309
+ content: "";
310
+ font-family: 'Glyphicons Halflings';
311
+ display: inline-block;
312
+ padding: 10px;
313
+ border-right: 1px solid #ddd;
314
+ }
315
+
316
+ .tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
317
+ content: "";
318
+ border: none;
319
+ }
320
+
321
+ .tabset-dropdown > .nav-tabs.nav-tabs-open:before {
322
+ content: "";
323
+ font-family: 'Glyphicons Halflings';
324
+ display: inline-block;
325
+ padding: 10px;
326
+ border-right: 1px solid #ddd;
327
+ }
328
+
329
+ .tabset-dropdown > .nav-tabs > li.active {
330
+ display: block;
331
+ }
332
+
333
+ .tabset-dropdown > .nav-tabs > li > a,
334
+ .tabset-dropdown > .nav-tabs > li > a:focus,
335
+ .tabset-dropdown > .nav-tabs > li > a:hover {
336
+ border: none;
337
+ display: inline-block;
338
+ border-radius: 4px;
339
+ }
340
+
341
+ .tabset-dropdown > .nav-tabs.nav-tabs-open > li {
342
+ display: block;
343
+ float: none;
344
+ }
345
+
346
+ .tabset-dropdown > .nav-tabs > li {
347
+ display: none;
348
+ }
349
+ </style>
350
+
293
351
  <script>
294
352
  $(document).ready(function () {
295
353
  window.buildTabsets("TOC");
296
354
  });
355
+
356
+ $(document).ready(function () {
357
+ $('.tabset-dropdown > .nav-tabs > li').click(function () {
358
+ $(this).parent().toggleClass('nav-tabs-open')
359
+ });
360
+ });
297
361
  </script>
298
362
 
299
363
  <!-- code folding -->
@@ -302,7 +366,6 @@ $(document).ready(function () {
302
366
 
303
367
 
304
368
 
305
-
306
369
  <div class="fluid-row" id="header">
307
370
 
308
371
 
@@ -347,13 +410,47 @@ $(document).ready(function () {
347
410
  <h1>Exploring the Dataset</h1>
348
411
  <p>Let’s start by exploring our selected dataset. ToothGrowth is an R dataset. A dataset is like a simple excel spreadsheet, in which each column has only one type of data. For instance one column can have float, the other integer, and a third strings. This dataset analyzes the length of odontoblasts (cells responsible for tooth growth) in 60 guinea pigs, where each animal received one of three dose levels of Vitamin C (0.5, 1, and 2 mg/day) by one of two delivery methods, orange juice OJ or ascorbic acid (a form of vitamin C and coded as VC).</p>
349
412
  <p>The ToothGrowth dataset contains three columns: ‘len’, ‘supp’ and ‘dose’. Let’s take a look at a few rows of this dataset. In Galaaz, R variables are accessed by using the corresponding Ruby symbol preceeded by the tilda (‘~’) function. Note in the following chunk that ‘ToothGrowth’ is the R variable and Ruby’s ‘<span class="citation">@tooth_growth</span>’ is assigned the value of ‘~:ToothGrowth’.</p>
413
+ <pre class="ruby"><code># Read the R ToothGrowth variable and assign it to the
414
+ # Ruby instance variable @tooth_growth that will be
415
+ # available to all Ruby chunks in this document.
416
+ @tooth_growth = ~:ToothGrowth
417
+ # print the first few elements of the dataset
418
+ puts @tooth_growth.head</code></pre>
419
+ <pre><code>## len supp dose
420
+ ## 1 4.2 VC 0.5
421
+ ## 2 11.5 VC 0.5
422
+ ## 3 7.3 VC 0.5
423
+ ## 4 5.8 VC 0.5
424
+ ## 5 6.4 VC 0.5
425
+ ## 6 10.0 VC 0.5</code></pre>
350
426
  <p>Great! We’ve managed to read the ToothGrowth dataset and take a look at its elements. We see here the first 6 rows of the dataset. To access a column, follow the dataset name with a dot (‘.’) and the name of the column. Also use dot notation to chain methods in usual Ruby style.</p>
427
+ <pre class="ruby"><code># Access the tooth_growth 'len' column and print the first few
428
+ # elements of this column with the 'head' method.
429
+ puts @tooth_growth.len.head</code></pre>
430
+ <pre><code>## [1] 4.2 11.5 7.3 5.8 6.4 10.0</code></pre>
351
431
  <p>The ‘dose’ column contains a numeric value with either, 0.5, 1 or 2, although the first 6 rows as seen above only contain the 0.5 values. Even though those are number, they are better interpreted as a <a href="https://swcarpentry.github.io/r-novice-inflammation/12-supp-factors/">factor or cathegory</a>. So, let’s convert our ‘dose’ column from numeric to ‘factor’. In R, the function ‘as.factor’ is used to convert data in a vector to factors. To use this function from Galaaz the dot (‘.’) in the function name is substituted by ’__’ (double underline). The function ‘as.factor’ becomes ’R.as__factor’ or just ’as__factor’ when chaining.</p>
432
+ <pre class="ruby"><code># convert the dose to a factor
433
+ @tooth_growth.dose = @tooth_growth.dose.as__factor</code></pre>
352
434
  <p>Let’s explore some more details of this dataset. In particular, let’s look at its dimensions, structure and summary statistics.</p>
435
+ <pre class="ruby"><code>puts @tooth_growth.dim</code></pre>
436
+ <pre><code>## [1] 60 3</code></pre>
353
437
  <p>This dataset has 60 rows, one for each subject and 3 columns, as we have already seen.</p>
354
438
  <p>Note that we do not need to call ‘puts’ when using the ‘str’ function. This functions does not return anything and prints the structure of the dataset as a side effect.</p>
439
+ <pre class="ruby"><code>@tooth_growth.str</code></pre>
440
+ <pre><code>## 'data.frame': 60 obs. of 3 variables:
441
+ ## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
442
+ ## $ supp: Factor w/ 2 levels &quot;OJ&quot;,&quot;VC&quot;: 2 2 2 2 2 2 2 2 2 2 ...
443
+ ## $ dose: Factor w/ 3 levels &quot;0.5&quot;,&quot;1&quot;,&quot;2&quot;: 1 1 1 1 1 1 1 1 1 1 ...</code></pre>
355
444
  <p>Observe that both variables ‘supp’ and ‘dose’ are factors. The system made variable ‘supp’ a factor automatically, since it contais two strings OJ and VC.</p>
356
445
  <p>Finally, using the summary method, we get the statistical summary for the dataset</p>
446
+ <pre class="ruby"><code>puts @tooth_growth.summary</code></pre>
447
+ <pre><code>## len supp dose
448
+ ## Min. : 4.20 OJ:30 0.5:20
449
+ ## 1st Qu.:13.07 VC:30 1 :20
450
+ ## Median :19.25 2 :20
451
+ ## Mean :18.81
452
+ ## 3rd Qu.:25.27
453
+ ## Max. :33.90</code></pre>
357
454
  </div>
358
455
  <div id="doing-the-data-analysis" class="section level1">
359
456
  <h1>Doing the Data Analysis</h1>
@@ -366,7 +463,11 @@ $(document).ready(function () {
366
463
  <p>This description might be a bit cryptic and it is best to see it at work to understand it. Basically, in the <em>grammar of graphics</em> developers add layers of components such as grid, axis, data, title, subtitle and also graphical primitives such as <em>bar plot</em>, <em>box plot</em>, to form the final graphics.</p>
367
464
  <p>In order to make a plot, we use the ‘ggplot’ function to the dataset. In R, this would be written as <code>ggplot(&lt;dataset&gt;, ...)</code>. Galaaz gives you the flexibility to use either <code>R.ggplot(&lt;dataset&gt;, ...)</code> or <code>&lt;dataset&gt;.ggplot(...)</code>. In the graph s pecification bellow, we use the second notation that looks more like Ruby. ggplot uses the ‘aes’ method to specify x and y axes; in this case, the ‘dose’ on the <span class="math inline">\(x\)</span> axis and the ‘length’ on the <span class="math inline">\(y\)</span> axis: ‘E.aes(x: :dose, y: :len)’. To specify the type of plot add a geom to the plot. For a boxplot, the geom is R.geom_boxplot.</p>
368
465
  <p>Note also that we have a call to ‘R.png’ before plotting and ’R.dev__off’ after the print statement. ‘R.png’ opens a ‘png device’ for outputting the plot. If we do no pass a name to the ‘png’ function, the image gets a default name of ‘Rplot&lt;nnn&gt;’ where &lt;nnn&gt; is the number of the plot. ’R.dev__off’ closes the device and creates the ‘png’ file. We can then include the generated ‘png’ file in the document by adding an rmarkdown directive.</p>
369
- <p><img src="" width="50%" /></p>
466
+ <pre class="ruby"><code>require 'ggplot'
467
+
468
+ e = @tooth_growth.ggplot(E.aes(x: :dose, y: :len))
469
+ print e + R.geom_boxplot</code></pre>
470
+ <p><img src="" /><!-- --></p>
370
471
  <p>Great! We’ve just managed to create and save our first plot in Ruby with only four lines of code. We can now easily see with this plot a clear trend: as the dose of the supplement is increased, so is the length of teeth.</p>
371
472
  </div>
372
473
  <div id="facetting-the-plot" class="section level2">
@@ -380,11 +481,22 @@ $(document).ready(function () {
380
481
  </ul>
381
482
  <p>Another way of writing a formula is to use the ‘formula’ function with the actual formula as a string. The formula <code>x ~ y</code> in R can be written as <code>R.formula(&quot;x ~ y&quot;)</code>. For more complex formulas, the use of the ‘formula’ function is preferred.</p>
382
483
  <p>The formula <code>+:all =~ +:supp</code> indicates to the ‘facet_grid’ function that it needs to facet the plot based on the <code>supp</code> variable and split the plot vertically. Changing the formula to <code>+:supp =~ +:all</code> would split the plot horizontally.</p>
484
+ <pre class="ruby"><code>@base_tooth = @tooth_growth.ggplot(E.aes(x: :dose, y: :len, group: :dose))
485
+
486
+ @bp = @base_tooth + R.geom_boxplot +
487
+ # Split in vertical direction
488
+ R.facet_grid(:all.til :supp)
489
+
490
+ puts @bp</code></pre>
491
+ <p><img src="" /><!-- --></p>
383
492
  <p>It now becomes clear that although both methods of delivery have a direct impact on tooth growth, method (OJ) is non-linear having a higher impact with smaller doses of ascorbic acid and reducing it’s impact as the dose increases. With the (VC) approach, the impact seems to be more linear.</p>
384
493
  </div>
385
494
  <div id="adding-color" class="section level2">
386
495
  <h2>Adding Color</h2>
387
496
  <p>If we were writing about data analysis, we would make a better analysis of the trends and improve the statistical analysis. But here we are interested in working with ggplot in Ruby. So, let’s add some color to this plot to make the trend and comparison more visible. In the following plot, the boxes are color coded by dose. To add color, it is enough to add <code>fill: :dose</code> to the aesthetic of boxplot. With this command each ‘dose’ factor gets its own color.</p>
497
+ <pre class="ruby"><code>@bp = @bp + R.geom_boxplot(E.aes(fill: :dose))
498
+ puts @bp</code></pre>
499
+ <p><img src="" /><!-- --></p>
388
500
  <p>Facetting helps us compare the general trends for each delivery method. Adding color allow us to compare specifically how each dosage impacts the tooth growth. It is possible to observe that with smaller doses, up to 1mg, OJ performs better than VC (red color). For 2mg, both OJ and VC have the same median, but OJ is less disperse (blue color). For 1mg (green color), OJ is significantly bettern than VC. By this very quick visual analysis, it seems that OJ is a better delivery method than VC.</p>
389
501
  </div>
390
502
  <div id="clarifying-the-data" class="section level2">
@@ -398,8 +510,16 @@ $(document).ready(function () {
398
510
  <p>A most unconventional design strategy is revealed: <em>to clarify, add detail.</em></p>
399
511
  </blockquote>
400
512
  <p>Let’s use this wisdom and add yet another layer of data to our plot, so that we clarify it with detail and do not leave large empty boxes. In this next plot, we add data points for each of the 60 pigs in the experiment. For that, add the function ‘R.geom_point’ to the plot.</p>
513
+ <pre class="ruby"><code># Split in vertical direction
514
+ @bp = @bp + R.geom_point
515
+
516
+ puts @bp</code></pre>
517
+ <p><img src="" /><!-- --></p>
401
518
  <p>Now we can see the actual distribution of all the 60 subjects. Actually, this is not totally true. We have a hard time seing all 60 subjects. It seems that some points might be placed one over the other hiding useful information.</p>
402
519
  <p>But no sweat! Another layer might solve the problem. In the following plot a new layer called ‘geom_jitter’ is added to the plot. Jitter adds a small amount of random variation to the location of each point, and is a useful way of handling overplotting caused by discreteness in smaller datasets. This makes it easier to see all of the points and prevents data hiding. We also add color and change the shape of the points, making them even easier to see.</p>
520
+ <pre class="ruby"><code># Split in vertical direction
521
+ puts @bp + R.geom_jitter(shape: 23, color: &quot;cyan3&quot;, size: 1)</code></pre>
522
+ <p><img src="" /><!-- --></p>
403
523
  <p>Now we can see all 60 points in the graph. We have here a much higher information density and we can see outliers and subjects distribution.</p>
404
524
  </div>
405
525
  </div>
@@ -411,6 +531,12 @@ $(document).ready(function () {
411
531
  <h2>Improving Colors</h2>
412
532
  <p>Let’s start by trying to improve colors. For now, we will not use the jitter layer. The previous plot has three bright colors that have no relashionship between them. Is there any obvious, or non-obvious for that matter, interpretation for the colors? Clearly, they are just random colors selected automatically by our software. Although those colors helped us understand the data, for a final presentation random colors can distract the viewer.</p>
413
533
  <p>In the following plot we use shades function ‘scale_fill_manual’ to change the colors of the boxes and order of labels. For colors, we use shades of blue for each dosage, with light blue (‘cyan’) representing the lower dose and deep blue (‘deepskyblue4’) the higher dose. Also, the legend could be improved: we use the ‘breaks’ parameter to put the smaller value (0.5) at the botton of the labels and the largest (2) at the top. This ordering seems more natural and matches with the actual order of the colors in the plot.</p>
534
+ <pre class="ruby"><code>@bp = @bp +
535
+ R.scale_fill_manual(values: R.c(&quot;cyan&quot;, &quot;deepskyblue&quot;, &quot;deepskyblue4&quot;),
536
+ breaks: R.c(&quot;2&quot;,&quot;1&quot;,&quot;0.5&quot;))
537
+
538
+ puts @bp</code></pre>
539
+ <p><img src="" /><!-- --></p>
414
540
  </div>
415
541
  <div id="violin-plot-and-jitter" class="section level2">
416
542
  <h2>Violin Plot and Jitter</h2>
@@ -420,29 +546,118 @@ $(document).ready(function () {
420
546
  <p>A violin plot is a method of plotting numeric data. It is similar to a box plot with a rotated kernel density plot on each side.</p>
421
547
  <p>A violin plot has four layers. The outer shape represents all possible results, with thickness indicating how common. (Thus the thickest section represents the mode average.) The next layer inside represents the values that occur 95% of the time. The next layer (if it exists) inside represents the values that occur 50% of the time. The central dot represents the median average value.</p>
422
548
  </blockquote>
549
+ <pre class="ruby"><code>@violin = @base_tooth + R.geom_violin(E.aes(fill: :dose)) +
550
+ R.facet_grid(:all.til :supp) +
551
+ R.geom_jitter(shape: 23, color: &quot;cyan3&quot;, size: 1) +
552
+ R.scale_fill_manual(values: R.c(&quot;cyan&quot;, &quot;deepskyblue&quot;, &quot;deepskyblue4&quot;),
553
+ breaks: R.c(&quot;2&quot;,&quot;1&quot;,&quot;0.5&quot;))
554
+
555
+ puts @violin</code></pre>
556
+ <p><img src="" /><!-- --></p>
423
557
  <p>This plot is an alternative to the original boxplot. For the final presentation, it is important to think which graphics will be best understood by our audience. A violin plot is a less known plot and could add mental overhead, yet, in my opinion, it does look a lit bit better than the boxplot and provides even more information than the boxplot with jitter.</p>
424
558
  </div>
425
559
  <div id="adding-decoration" class="section level2">
426
560
  <h2>Adding Decoration</h2>
427
561
  <p>Our final plot is starting to take shape, but a presentation plot should have at least a title, labels on the axes and maybe some other decorations. Let’s start adding those. Since decoration requires more graph area, this new plot has a ‘width’ and ‘height’ specification. When there is no specification, the default values from R for width and height are 480.</p>
428
562
  <p>The ‘labs’ function adds the required decoration. In this example we use ‘title’, ‘subtitle’, ‘x’ for the <span class="math inline">\(x\)</span> axis label and ‘y’, for the <span class="math inline">\(y\)</span> axis label, and ‘caption’ for information about the plot (for clarity, we defined a caption variable using Ruby’s Here Doc style).</p>
563
+ <pre class="ruby"><code>caption = &lt;&lt;-EOT
564
+ Length of odontoblasts in 60 guinea pigs.
565
+ Each animal received one of three dose levels of vitamin C.
566
+ EOT
567
+
568
+ @decorations =
569
+ R.labs(title: &quot;Tooth Growth: Length vs Vitamin C Dose&quot;,
570
+ subtitle: &quot;Faceted by delivery method, OJ or VC&quot;,
571
+ x: &quot;Dose (mg)&quot;, y: &quot;Teeth length&quot;,
572
+ caption: caption)
573
+
574
+ puts @bp + @decorations</code></pre>
575
+ <p><img src="" /><!-- --></p>
429
576
  </div>
430
577
  <div id="the-corp-theme" class="section level2">
431
578
  <h2>The Corp Theme</h2>
432
579
  <p>We are almost done. But the default plot configuration does not yet look nice to the eye. We are still distracted by many aspects of the graph. First, the back font color does not look good. Then plot background, borders, grids all add clutter to the plot.</p>
433
580
  <p>We will now define our corporate theme. in a module that can be used/loaded for all plots, similar to CSS or any other style definition.</p>
434
581
  <p>In this theme, we remove borders and grids. The background if left for faceted plots but removed for non-faceted plots. Font colors are a shade o blue (color: ‘#00080’). Axis labels are moved near the end of the axis and written in ‘bold’.</p>
582
+ <pre class="ruby"><code>module CorpTheme
583
+
584
+ R.install_and_loads 'RColorBrewer'
585
+
586
+ #---------------------------------------------------------------------------------
587
+ # face can be (1=plain, 2=bold, 3=italic, 4=bold-italic)
588
+ #---------------------------------------------------------------------------------
589
+
590
+ def self.text_element(size, face: &quot;plain&quot;, hjust: nil)
591
+ E.element_text(color: &quot;#000080&quot;,
592
+ face: face,
593
+ size: size,
594
+ hjust: hjust)
595
+ end
596
+
597
+ #---------------------------------------------------------------------------------
598
+ # Defines the plot theme (visualization). In this theme we remove major and minor
599
+ # grids, borders and background. We also turn-off scientific notation.
600
+ #---------------------------------------------------------------------------------
601
+
602
+ def self.global_theme(faceted = false)
603
+
604
+ R.options(scipen: 999) # turn-off scientific notation like 1e+48
605
+ # R.theme_set(R.theme_bw)
606
+
607
+ # remove major grids
608
+ gb = R.theme(panel__grid__major: E.element_blank())
609
+ # remove minor grids
610
+ gb = gb + R.theme(panel__grid__minor: E.element_blank)
611
+ # gb = R.theme(panel__grid__minor: E.element_blank)
612
+ # remove border
613
+ gb = gb + R.theme(panel__border: E.element_blank)
614
+ # remove background. When working with faceted graphs, the background makes
615
+ # it easier to see each facet, so leave it
616
+ gb = gb + R.theme(panel__background: E.element_blank) if !faceted
617
+ # Change axis font
618
+ gb = gb + R.theme(axis__text: text_element(8))
619
+ # change axis title font
620
+ gb = gb + R.theme(axis__title: text_element(10, face: &quot;bold&quot;, hjust: 1))
621
+ # change font of title
622
+ gb = gb + R.theme(title: text_element(12, face: &quot;bold&quot;))
623
+ # change font of subtitle
624
+ gb = gb + R.theme(plot__subtitle: text_element(9))
625
+ # change font of captions
626
+ gb = gb + R.theme(plot__caption: text_element(8))
627
+
628
+ end
629
+
630
+ end</code></pre>
435
631
  </div>
436
632
  <div id="final-box-plot" class="section level2">
437
633
  <h2>Final Box Plot</h2>
438
634
  <p>We can now easily make our final boxplot and violin plot. All the layers for the plot were added in order to expose our understanding of the data and the need to present the result to our audience.</p>
439
635
  <p>The final specification is just the addition of all layers build up to this point (<span class="citation">@bp</span>), plus the decorations (<span class="citation">@decorations</span>), plus the corporate theme.</p>
440
636
  <p>Here is our final boxplot, without jitter.</p>
637
+ <pre class="ruby"><code>puts @bp + @decorations + CorpTheme.global_theme(faceted: true)</code></pre>
638
+ <p><img src="" /><!-- --></p>
441
639
  <p>And here is the final violin plot, with jitter and the same look and feel of the corporate boxplot.</p>
640
+ <pre class="ruby"><code>puts @violin + @decorations + CorpTheme.global_theme(faceted: true)</code></pre>
641
+ <p><img src="" /><!-- --></p>
442
642
  </div>
443
643
  <div id="another-view" class="section level2">
444
644
  <h2>Another View</h2>
445
645
  <p>We now make another plot, with the same look and feel as before but facetted by dose and not by supplement. This shows how easy it is to create new plots by just changing small statement on the <em>grammar of graphics</em>.</p>
646
+ <pre class="ruby"><code>caption = &lt;&lt;-EOT
647
+ Length of odontoblasts in 60 guinea pigs.
648
+ Each animal received one of three dose levels of vitamin C.
649
+ EOT
650
+
651
+ @bp = @tooth_growth.ggplot(E.aes(x: :supp, y: :len, group: :supp)) +
652
+ R.geom_boxplot(E.aes(fill: :supp)) + R.facet_grid(:all.til :dose) +
653
+ R.scale_fill_manual(values: R.c(&quot;cyan&quot;, &quot;deepskyblue4&quot;)) +
654
+ R.labs(title: &quot;Tooth Growth: Length by Dose&quot;,
655
+ subtitle: &quot;Faceted by dose&quot;,
656
+ x: &quot;Delivery method&quot;, y: &quot;Teeth length&quot;,
657
+ caption: caption) +
658
+ CorpTheme.global_theme(faceted: true)
659
+ puts @bp</code></pre>
660
+ <p><img src="" /><!-- --></p>
446
661
  </div>
447
662
  </div>
448
663
  <div id="conclusion" class="section level1">
@@ -143,6 +143,24 @@ following chunk that 'ToothGrowth' is the R variable and Ruby's '@tooth_growth'
143
143
  assigned the value of '~:ToothGrowth'.
144
144
 
145
145
 
146
+ ```ruby
147
+ # Read the R ToothGrowth variable and assign it to the
148
+ # Ruby instance variable @tooth_growth that will be
149
+ # available to all Ruby chunks in this document.
150
+ @tooth_growth = ~:ToothGrowth
151
+ # print the first few elements of the dataset
152
+ puts @tooth_growth.head
153
+ ```
154
+
155
+ ```
156
+ ## len supp dose
157
+ ## 1 4.2 VC 0.5
158
+ ## 2 11.5 VC 0.5
159
+ ## 3 7.3 VC 0.5
160
+ ## 4 5.8 VC 0.5
161
+ ## 5 6.4 VC 0.5
162
+ ## 6 10.0 VC 0.5
163
+ ```
146
164
 
147
165
  Great! We've managed to read the ToothGrowth dataset and take a look at its elements.
148
166
  We see here the first 6 rows of the dataset. To access a column, follow the dataset name
@@ -150,6 +168,15 @@ with a dot ('.') and the name of the column. Also use dot notation to chain meth
150
168
  in usual Ruby style.
151
169
 
152
170
 
171
+ ```ruby
172
+ # Access the tooth_growth 'len' column and print the first few
173
+ # elements of this column with the 'head' method.
174
+ puts @tooth_growth.len.head
175
+ ```
176
+
177
+ ```
178
+ ## [1] 4.2 11.5 7.3 5.8 6.4 10.0
179
+ ```
153
180
 
154
181
  The 'dose' column contains a numeric value with either, 0.5, 1 or 2, although the
155
182
  first 6 rows as seen above only contain the 0.5 values. Even though those are
@@ -159,11 +186,22 @@ function from Galaaz the dot ('.') in the function name is substituted by '__' (
159
186
  The function 'as.factor' becomes 'R.as__factor' or just 'as__factor' when chaining.
160
187
 
161
188
 
189
+ ```ruby
190
+ # convert the dose to a factor
191
+ @tooth_growth.dose = @tooth_growth.dose.as__factor
192
+ ```
162
193
 
163
194
  Let's explore some more details of this dataset. In particular, let's look at its dimensions,
164
195
  structure and summary statistics.
165
196
 
166
197
 
198
+ ```ruby
199
+ puts @tooth_growth.dim
200
+ ```
201
+
202
+ ```
203
+ ## [1] 60 3
204
+ ```
167
205
 
168
206
  This dataset has 60 rows, one for each subject and 3 columns, as we have already seen.
169
207
 
@@ -172,12 +210,35 @@ functions does not return anything and prints the structure of the dataset
172
210
  as a side effect.
173
211
 
174
212
 
213
+ ```ruby
214
+ @tooth_growth.str
215
+ ```
216
+
217
+ ```
218
+ ## 'data.frame': 60 obs. of 3 variables:
219
+ ## $ len : num 4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
220
+ ## $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
221
+ ## $ dose: Factor w/ 3 levels "0.5","1","2": 1 1 1 1 1 1 1 1 1 1 ...
222
+ ```
175
223
  Observe that both variables 'supp' and 'dose' are factors. The system made variable 'supp'
176
224
  a factor automatically, since it contais two strings OJ and VC.
177
225
 
178
226
  Finally, using the summary method, we get the statistical summary for the dataset
179
227
 
180
228
 
229
+ ```ruby
230
+ puts @tooth_growth.summary
231
+ ```
232
+
233
+ ```
234
+ ## len supp dose
235
+ ## Min. : 4.20 OJ:30 0.5:20
236
+ ## 1st Qu.:13.07 VC:30 1 :20
237
+ ## Median :19.25 2 :20
238
+ ## Mean :18.81
239
+ ## 3rd Qu.:25.27
240
+ ## Max. :33.90
241
+ ```
181
242
 
182
243
  # Doing the Data Analysis
183
244
 
@@ -213,7 +274,15 @@ image gets a default name of 'Rplot\<nnn\>' where \<nnn\> is the number of the p
213
274
  closes the device and creates the 'png' file. We can
214
275
  then include the generated 'png' file in the document by adding an rmarkdown directive.
215
276
 
216
- <img src="/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg" width="50%" />
277
+
278
+ ```ruby
279
+ require 'ggplot'
280
+
281
+ e = @tooth_growth.ggplot(E.aes(x: :dose, y: :len))
282
+ print e + R.geom_boxplot
283
+ ```
284
+
285
+ ![](/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png)<!-- -->
217
286
 
218
287
  [//]: # (Including the 'png' file generated above. In future releases)
219
288
  [//]: # (of gKnit, the figures should be automatically saved and the name)
@@ -257,6 +326,18 @@ facet the plot based on the ```supp``` variable and split the plot vertically.
257
326
  the formula to ```+:supp =~ +:all``` would split the plot horizontally.
258
327
 
259
328
 
329
+ ```ruby
330
+ @base_tooth = @tooth_growth.ggplot(E.aes(x: :dose, y: :len, group: :dose))
331
+
332
+ @bp = @base_tooth + R.geom_boxplot +
333
+ # Split in vertical direction
334
+ R.facet_grid(:all.til :supp)
335
+
336
+ puts @bp
337
+ ```
338
+
339
+
340
+ ![](/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png)<!-- -->
260
341
 
261
342
  It now becomes clear that although both methods of delivery have a direct
262
343
  impact on tooth growth, method (OJ) is non-linear having a higher impact with smaller
@@ -273,6 +354,13 @@ enough to add ```fill: :dose``` to the aesthetic of boxplot. With this command
273
354
  factor gets its own color.
274
355
 
275
356
 
357
+ ```ruby
358
+ @bp = @bp + R.geom_boxplot(E.aes(fill: :dose))
359
+ puts @bp
360
+ ```
361
+
362
+
363
+ ![](/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png)<!-- -->
276
364
 
277
365
  Facetting helps us compare the general trends for each delivery method.
278
366
  Adding color allow us to compare specifically how each dosage impacts the tooth growth.
@@ -304,6 +392,15 @@ each of the 60 pigs in the experiment. For that, add the function 'R.geom_point
304
392
  plot.
305
393
 
306
394
 
395
+ ```ruby
396
+ # Split in vertical direction
397
+ @bp = @bp + R.geom_point
398
+
399
+ puts @bp
400
+ ```
401
+
402
+
403
+ ![](/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png)<!-- -->
307
404
 
308
405
  Now we can see the actual distribution of all the 60 subjects. Actually, this is not
309
406
  totally true. We have a hard time seing all 60 subjects. It seems that some points
@@ -317,6 +414,13 @@ prevents data hiding. We also add
317
414
  color and change the shape of the points, making them even easier to see.
318
415
 
319
416
 
417
+ ```ruby
418
+ # Split in vertical direction
419
+ puts @bp + R.geom_jitter(shape: 23, color: "cyan3", size: 1)
420
+ ```
421
+
422
+
423
+ ![](/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png)<!-- -->
320
424
 
321
425
  Now we can see all 60 points in the graph. We have here a much higher information density
322
426
  and we can see outliers and subjects distribution.
@@ -352,6 +456,16 @@ This ordering seems more natural and
352
456
  matches with the actual order of the colors in the plot.
353
457
 
354
458
 
459
+ ```ruby
460
+ @bp = @bp +
461
+ R.scale_fill_manual(values: R.c("cyan", "deepskyblue", "deepskyblue4"),
462
+ breaks: R.c("2","1","0.5"))
463
+
464
+ puts @bp
465
+ ```
466
+
467
+
468
+ ![](/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png)<!-- -->
355
469
 
356
470
  ## Violin Plot and Jitter
357
471
 
@@ -371,6 +485,18 @@ a boxplot known as a _violin plot_ with jittered data.
371
485
  > The central dot represents the median average value.
372
486
 
373
487
 
488
+ ```ruby
489
+ @violin = @base_tooth + R.geom_violin(E.aes(fill: :dose)) +
490
+ R.facet_grid(:all.til :supp) +
491
+ R.geom_jitter(shape: 23, color: "cyan3", size: 1) +
492
+ R.scale_fill_manual(values: R.c("cyan", "deepskyblue", "deepskyblue4"),
493
+ breaks: R.c("2","1","0.5"))
494
+
495
+ puts @violin
496
+ ```
497
+
498
+
499
+ ![](/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png)<!-- -->
374
500
 
375
501
  This plot is an alternative to the original boxplot. For the final presentation, it is
376
502
  important to think which graphics will be best understood by our audience. A violin plot
@@ -391,6 +517,23 @@ for information about the plot (for clarity, we defined a caption variable using
391
517
  Here Doc style).
392
518
 
393
519
 
520
+ ```ruby
521
+ caption = <<-EOT
522
+ Length of odontoblasts in 60 guinea pigs.
523
+ Each animal received one of three dose levels of vitamin C.
524
+ EOT
525
+
526
+ @decorations =
527
+ R.labs(title: "Tooth Growth: Length vs Vitamin C Dose",
528
+ subtitle: "Faceted by delivery method, OJ or VC",
529
+ x: "Dose (mg)", y: "Teeth length",
530
+ caption: caption)
531
+
532
+ puts @bp + @decorations
533
+ ```
534
+
535
+
536
+ ![](/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png)<!-- -->
394
537
 
395
538
  ## The Corp Theme
396
539
 
@@ -408,6 +551,57 @@ a shade o blue (color: '#00080'). Axis labels are moved near the end of the axi
408
551
  written in 'bold'.
409
552
 
410
553
 
554
+ ```ruby
555
+ module CorpTheme
556
+
557
+ R.install_and_loads 'RColorBrewer'
558
+
559
+ #---------------------------------------------------------------------------------
560
+ # face can be (1=plain, 2=bold, 3=italic, 4=bold-italic)
561
+ #---------------------------------------------------------------------------------
562
+
563
+ def self.text_element(size, face: "plain", hjust: nil)
564
+ E.element_text(color: "#000080",
565
+ face: face,
566
+ size: size,
567
+ hjust: hjust)
568
+ end
569
+
570
+ #---------------------------------------------------------------------------------
571
+ # Defines the plot theme (visualization). In this theme we remove major and minor
572
+ # grids, borders and background. We also turn-off scientific notation.
573
+ #---------------------------------------------------------------------------------
574
+
575
+ def self.global_theme(faceted = false)
576
+
577
+ R.options(scipen: 999) # turn-off scientific notation like 1e+48
578
+ # R.theme_set(R.theme_bw)
579
+
580
+ # remove major grids
581
+ gb = R.theme(panel__grid__major: E.element_blank())
582
+ # remove minor grids
583
+ gb = gb + R.theme(panel__grid__minor: E.element_blank)
584
+ # gb = R.theme(panel__grid__minor: E.element_blank)
585
+ # remove border
586
+ gb = gb + R.theme(panel__border: E.element_blank)
587
+ # remove background. When working with faceted graphs, the background makes
588
+ # it easier to see each facet, so leave it
589
+ gb = gb + R.theme(panel__background: E.element_blank) if !faceted
590
+ # Change axis font
591
+ gb = gb + R.theme(axis__text: text_element(8))
592
+ # change axis title font
593
+ gb = gb + R.theme(axis__title: text_element(10, face: "bold", hjust: 1))
594
+ # change font of title
595
+ gb = gb + R.theme(title: text_element(12, face: "bold"))
596
+ # change font of subtitle
597
+ gb = gb + R.theme(plot__subtitle: text_element(9))
598
+ # change font of captions
599
+ gb = gb + R.theme(plot__caption: text_element(8))
600
+
601
+ end
602
+
603
+ end
604
+ ```
411
605
 
412
606
  ## Final Box Plot
413
607
 
@@ -421,11 +615,23 @@ the decorations (@decorations), plus the corporate theme.
421
615
  Here is our final boxplot, without jitter.
422
616
 
423
617
 
618
+ ```ruby
619
+ puts @bp + @decorations + CorpTheme.global_theme(faceted: true)
620
+ ```
621
+
622
+
623
+ ![](/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png)<!-- -->
424
624
 
425
625
  And here is the final violin plot, with jitter and the same look and feel of the corporate
426
626
  boxplot.
427
627
 
428
628
 
629
+ ```ruby
630
+ puts @violin + @decorations + CorpTheme.global_theme(faceted: true)
631
+ ```
632
+
633
+
634
+ ![](/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png)<!-- -->
429
635
 
430
636
  ## Another View
431
637
 
@@ -434,6 +640,25 @@ dose and not by supplement. This shows how easy it is to create new plots by ju
434
640
  changing small statement on the _grammar of graphics_.
435
641
 
436
642
 
643
+ ```ruby
644
+ caption = <<-EOT
645
+ Length of odontoblasts in 60 guinea pigs.
646
+ Each animal received one of three dose levels of vitamin C.
647
+ EOT
648
+
649
+ @bp = @tooth_growth.ggplot(E.aes(x: :supp, y: :len, group: :supp)) +
650
+ R.geom_boxplot(E.aes(fill: :supp)) + R.facet_grid(:all.til :dose) +
651
+ R.scale_fill_manual(values: R.c("cyan", "deepskyblue4")) +
652
+ R.labs(title: "Tooth Growth: Length by Dose",
653
+ subtitle: "Faceted by dose",
654
+ x: "Delivery method", y: "Teeth length",
655
+ caption: caption) +
656
+ CorpTheme.global_theme(faceted: true)
657
+ puts @bp
658
+ ```
659
+
660
+
661
+ ![](/home/rbotafogo/desenv/galaaz/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png)<!-- -->
437
662
 
438
663
  # Conclusion
439
664