galaaz 0.4.7 → 0.4.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1179 -39
  3. data/Rakefile +4 -2
  4. data/bin/grun +1 -1
  5. data/bin/gstudio +1 -1
  6. data/blogs/dev/dev.Rmd +2 -56
  7. data/blogs/dev/dev.md +32 -61
  8. data/blogs/dev/dev2.Rmd +65 -0
  9. data/blogs/dplyr/dplyr.Rmd +29 -0
  10. data/blogs/{dev/dev.html → dplyr/dplyr.html} +88 -57
  11. data/blogs/dplyr/dplyr.md +58 -0
  12. data/blogs/gknit/gknit.html +1262 -25
  13. data/blogs/gknit/gknit.md +471 -27
  14. data/blogs/gknit/gknit_files/figure-html/bubble-1.png +0 -0
  15. data/blogs/manual/graph.rb +29 -0
  16. data/blogs/manual/manual.Rmd +567 -29
  17. data/blogs/manual/manual.html +743 -46
  18. data/blogs/manual/manual.md +1179 -39
  19. data/blogs/nse_dplyr/nse_dplyr.Rmd +466 -11
  20. data/blogs/nse_dplyr/nse_dplyr.html +472 -37
  21. data/blogs/nse_dplyr/nse_dplyr.md +645 -32
  22. data/blogs/ruby_plot/ruby_plot.Rmd +4 -4
  23. data/blogs/ruby_plot/ruby_plot.html +217 -2
  24. data/blogs/ruby_plot/ruby_plot.md +226 -1
  25. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  26. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +2 -2
  27. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  28. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +70 -70
  29. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  30. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +72 -72
  31. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  32. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +116 -116
  33. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  34. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +176 -176
  35. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  36. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +236 -236
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +176 -176
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +160 -160
  42. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  43. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +105 -105
  44. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +121 -121
  46. data/examples/islr/ch2.spec.rb +1 -1
  47. data/examples/islr/ch3_boston.rb +4 -4
  48. data/examples/islr/x_y_rnorm.jpg +0 -0
  49. data/lib/R_interface/r.rb +1 -1
  50. data/lib/R_interface/r_methods.rb +2 -2
  51. data/lib/R_interface/rdata_frame.rb +8 -5
  52. data/lib/R_interface/rindexed_object.rb +1 -2
  53. data/lib/R_interface/rlist.rb +1 -0
  54. data/lib/R_interface/robject.rb +0 -1
  55. data/lib/R_interface/rpkg.rb +14 -6
  56. data/lib/R_interface/rsupport.rb +7 -9
  57. data/lib/R_interface/ruby_extensions.rb +17 -5
  58. data/lib/gknit/knitr_engine.rb +9 -2
  59. data/lib/util/exec_ruby.rb +2 -2
  60. data/specs/r_dataframe.spec.rb +173 -0
  61. data/specs/r_list.spec.rb +4 -4
  62. data/specs/ruby_expression.spec.rb +2 -11
  63. data/specs/tmp.rb +76 -34
  64. data/version.rb +1 -1
  65. metadata +17 -6
  66. data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
  67. data/blogs/dev/dev_files/figure-html/diverging_bar. +0 -0
  68. data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
@@ -283,6 +283,9 @@ img {
283
283
  button.code-folding-btn:focus {
284
284
  outline: none;
285
285
  }
286
+ summary {
287
+ display: list-item;
288
+ }
286
289
  </style>
287
290
 
288
291
 
@@ -290,10 +293,71 @@ button.code-folding-btn:focus {
290
293
  <div class="container-fluid main-container">
291
294
 
292
295
  <!-- tabsets -->
296
+
297
+ <style type="text/css">
298
+ .tabset-dropdown > .nav-tabs {
299
+ display: inline-table;
300
+ max-height: 500px;
301
+ min-height: 44px;
302
+ overflow-y: auto;
303
+ background: white;
304
+ border: 1px solid #ddd;
305
+ border-radius: 4px;
306
+ }
307
+
308
+ .tabset-dropdown > .nav-tabs > li.active:before {
309
+ content: "";
310
+ font-family: 'Glyphicons Halflings';
311
+ display: inline-block;
312
+ padding: 10px;
313
+ border-right: 1px solid #ddd;
314
+ }
315
+
316
+ .tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
317
+ content: "";
318
+ border: none;
319
+ }
320
+
321
+ .tabset-dropdown > .nav-tabs.nav-tabs-open:before {
322
+ content: "";
323
+ font-family: 'Glyphicons Halflings';
324
+ display: inline-block;
325
+ padding: 10px;
326
+ border-right: 1px solid #ddd;
327
+ }
328
+
329
+ .tabset-dropdown > .nav-tabs > li.active {
330
+ display: block;
331
+ }
332
+
333
+ .tabset-dropdown > .nav-tabs > li > a,
334
+ .tabset-dropdown > .nav-tabs > li > a:focus,
335
+ .tabset-dropdown > .nav-tabs > li > a:hover {
336
+ border: none;
337
+ display: inline-block;
338
+ border-radius: 4px;
339
+ }
340
+
341
+ .tabset-dropdown > .nav-tabs.nav-tabs-open > li {
342
+ display: block;
343
+ float: none;
344
+ }
345
+
346
+ .tabset-dropdown > .nav-tabs > li {
347
+ display: none;
348
+ }
349
+ </style>
350
+
293
351
  <script>
294
352
  $(document).ready(function () {
295
353
  window.buildTabsets("TOC");
296
354
  });
355
+
356
+ $(document).ready(function () {
357
+ $('.tabset-dropdown > .nav-tabs > li').click(function () {
358
+ $(this).parent().toggleClass('nav-tabs-open')
359
+ });
360
+ });
297
361
  </script>
298
362
 
299
363
  <!-- code folding -->
@@ -302,7 +366,6 @@ $(document).ready(function () {
302
366
 
303
367
 
304
368
 
305
-
306
369
  <div class="fluid-row" id="header">
307
370
 
308
371
 
@@ -382,10 +445,18 @@ $(document).ready(function () {
382
445
  </blockquote></li>
383
446
  </ul>
384
447
  </div>
385
- <div id="basic-types" class="section level1">
386
- <h1>Basic Types</h1>
387
- <div id="vectors" class="section level2">
388
- <h2>Vectors</h2>
448
+ <div id="gknitting-a-document" class="section level1">
449
+ <h1>gKnitting a Document</h1>
450
+ <p>This manual has been formatted usign gKnit. gKnit uses Knitr and R markdown to knit a document in Ruby or R and output it in any of the available formats for R markdown.<br />
451
+ gKnit runs atop of GraalVM, and Galaaz. In gKnit, Ruby variables are persisted between chunks, making it an ideal solution for literate programming.<br />
452
+ Also, since it is based on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming with Ruby and R is quite natural.</p>
453
+ <p>gknit was describe in more depth in:</p>
454
+ <ul>
455
+ <li>xxx.xxxx.xxx</li>
456
+ </ul>
457
+ </div>
458
+ <div id="vector" class="section level1">
459
+ <h1>Vector</h1>
389
460
  <p>Vectors can be thought of as contiguous cells containing data. Cells are accessed through indexing operations such as x[5]. Galaaz has six basic (‘atomic’) vector types: logical, integer, real, complex, string (or character) and raw. The modes and storage modes for the different vector types are listed in the following table.</p>
390
461
  <table>
391
462
  <thead>
@@ -433,8 +504,8 @@ $(document).ready(function () {
433
504
  <pre class="ruby"><code>@vec = R.c(1, 2, 3)
434
505
  puts @vec</code></pre>
435
506
  <pre><code>## [1] 1 2 3</code></pre>
436
- <p>Lets take a look at the type, mode and storage.mode of our vector <span class="citation">@vec</span>. In order to print this out, we are creating a data frame ‘df’ and printing it out. A data frame, for those not familiar with it, it basically a table. Here we create the data frame and add the column name by passing named parameters for each column, such as ‘typeof:’, ‘mode:’ and ’storage__mode’. You should also note here that the double underscore is converted to a ‘.’.</p>
437
- <p>In R, the method used to create a data frame is ‘data.frame’, in Galaaz we use data__frame’.</p>
507
+ <p>Lets take a look at the type, mode and storage.mode of our vector <span class="citation">@vec</span>. In order to print this out, we are creating a data frame ‘df’ and printing it out. A data frame, for those not familiar with it, is basically a table. Here we create the data frame and add the column name by passing named parameters for each column, such as ‘typeof:’, ‘mode:’ and ’storage__mode?‘. You should also note here that the double underscore is converted to a’.’. So, when printed ‘storage__mode’ will actually print as ‘storage.mode’.</p>
508
+ <p>Data frames will later be more carefully described. In R, the method used to create a data frame is ‘data.frame’, in Galaaz we use data__frame’.</p>
438
509
  <pre class="ruby"><code>df = R.data__frame(typeof: @vec.typeof, mode: @vec.mode, storage__mode: @vec.storage__mode)
439
510
  puts df</code></pre>
440
511
  <pre><code>## typeof mode storage.mode
@@ -479,23 +550,104 @@ double
479
550
  ## undefined local variable or method `hello' for RubyChunk:Class</code></pre>
480
551
  <pre><code>## Message:
481
552
  ## (eval):1:in `exec_ruby'
482
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:137:in `instance_eval'
483
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:137:in `exec_ruby'
484
- ## /home/rbotafogo/desenv/galaaz/lib/gknit/ruby_engine.rb:55:in `block in initialize'
553
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:141:in `instance_eval'
554
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:141:in `exec_ruby'
555
+ ## /home/rbotafogo/desenv/galaaz/lib/gknit/knitr_engine.rb:657:in `block in initialize'
485
556
  ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
486
557
  ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
487
558
  ## (eval):3:in `function(...) {\n rb_method(...)'
488
559
  ## unknown.r:1:in `in_dir'
489
- ## unknown.r:1:in `block_exec'
490
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc12/jre/languages/R/library/knitr/R/block.R:91:in `call_block'
491
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc12/jre/languages/R/library/knitr/R/block.R:6:in `process_group.block'
492
- ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc12/jre/languages/R/library/knitr/R/block.R:3:in `&lt;no source&gt;'
560
+ ## unknown.r:1:in `block_exec:BLOCK0'
561
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc15/jre/languages/R/library/knitr/R/block.R:102:in `block_exec'
562
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc15/jre/languages/R/library/knitr/R/block.R:92:in `call_block'
563
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc15/jre/languages/R/library/knitr/R/block.R:6:in `process_group.block'
564
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc15/jre/languages/R/library/knitr/R/block.R:3:in `&lt;no source&gt;'
493
565
  ## unknown.r:1:in `withCallingHandlers'
494
566
  ## unknown.r:1:in `process_file'
495
- ## unknown.r:1:in `&lt;no source&gt;'
496
- ## unknown.r:1:in `&lt;no source&gt;'
497
- ## &lt;REPL&gt;:4:in `&lt;repl wrapper&gt;'
567
+ ## unknown.r:1:in `&lt;no source&gt;:BLOCK1'
568
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc15/jre/languages/R/library/knitr/R/output.R:129:in `&lt;no source&gt;'
569
+ ## unknown.r:1:in `&lt;no source&gt;:BLOCK1'
570
+ ## /home/rbotafogo/lib/graalvm-ce-1.0.0-rc15/jre/languages/R/library/rmarkdown/R/render.R:162:in `&lt;no source&gt;'
571
+ ## &lt;REPL&gt;:5:in `&lt;repl wrapper&gt;'
498
572
  ## &lt;REPL&gt;:1</code></pre>
573
+ <p>Here is a vector with logical values</p>
574
+ <pre class="ruby"><code>@vec = R.c(true, true, false, false, true)
575
+ puts @vec</code></pre>
576
+ <pre><code>## [1] TRUE TRUE FALSE FALSE TRUE</code></pre>
577
+ <div id="combining-vectors" class="section level2">
578
+ <h2>Combining Vectors</h2>
579
+ <p>The ‘c’ functions used to create vectors can also be used to combine two vectors:</p>
580
+ <pre class="ruby"><code>@vec1 = R.c(10.0, 20.0, 30.0)
581
+ @vec2 = R.c(4.0, 5.0, 6.0)
582
+ @vec = R.c(@vec1, @vec2)
583
+ puts @vec</code></pre>
584
+ <pre><code>## [1] 10 20 30 4 5 6</code></pre>
585
+ <p>In galaaz, methods can be chainned (somewhat like the pipe operator in R %&gt;%, but more generic). In this next example, method ‘c’ is chainned after ‘<span class="citation">@vec1</span>’. This also looks like ‘c’ is a method of the vector, but in reallity, this is actually closer to the pipe operator. When Galaaz identifies that ‘c’ is not a method of ‘vec’ it actually tries to call ‘R.c’ with ‘<span class="citation">@vec1</span>’ as the first argument concatenated with all the other available arguments. The code bellow is automatically converted to the code above.</p>
586
+ <pre class="ruby"><code>@vec = @vec1.c(@vec2)
587
+ puts @vec</code></pre>
588
+ <pre><code>## [1] 10 20 30 4 5 6</code></pre>
589
+ </div>
590
+ <div id="vector-arithmetic" class="section level2">
591
+ <h2>Vector Arithmetic</h2>
592
+ <p>Arithmetic operations on vectors are performed element by element:</p>
593
+ <pre class="ruby"><code>puts @vec1 + @vec2</code></pre>
594
+ <pre><code>## [1] 14 25 36</code></pre>
595
+ <pre class="ruby"><code>puts @vec1 * 5</code></pre>
596
+ <pre><code>## [1] 50 100 150</code></pre>
597
+ <p>When vectors have different length, a recycling rule is applied to the shorter vector:</p>
598
+ <pre class="ruby"><code>@vec3 = R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
599
+ puts @vec4 = @vec1 + @vec3</code></pre>
600
+ <pre><code>## [1] 11 22 33 14 25 36 17 28 39</code></pre>
601
+ </div>
602
+ <div id="vector-indexing" class="section level2">
603
+ <h2>Vector Indexing</h2>
604
+ <p>Vectors can be indexed by using the ‘[]’ operator:</p>
605
+ <pre class="ruby"><code>puts @vec4[3]</code></pre>
606
+ <pre><code>## [1] 33</code></pre>
607
+ <p>We can also index a vector with another vector. For example, in the code bellow, we take elements 1, 3, 5, and 7 from <span class="citation">@vec3</span>:</p>
608
+ <pre class="ruby"><code>puts @vec4[R.c(1, 3, 5, 7)]</code></pre>
609
+ <pre><code>## [1] 11 33 25 17</code></pre>
610
+ <p>Repeating an index and having indices out of order is valid code:</p>
611
+ <pre class="ruby"><code>puts @vec4[R.c(1, 3, 3, 1)]</code></pre>
612
+ <pre><code>## [1] 11 33 33 11</code></pre>
613
+ <p>It is also possible to index a vector with a negative number or negative vector. In these cases the indexed values are not returned:</p>
614
+ <pre class="ruby"><code>puts @vec4[-3]
615
+ puts @vec4[-R.c(1, 3, 5, 7)]</code></pre>
616
+ <pre><code>## [1] 11 22 14 25 36 17 28 39
617
+ ## [1] 22 14 36 28 39</code></pre>
618
+ <p>If an index is out of range, a missing value (NA) will be reported.</p>
619
+ <pre class="ruby"><code>puts @vec4[30]</code></pre>
620
+ <pre><code>## [1] NA</code></pre>
621
+ <p>It is also possible to index a vector by range:</p>
622
+ <pre class="ruby"><code>puts @vec4[(2..5)]</code></pre>
623
+ <pre><code>## [1] 22 33 14 25</code></pre>
624
+ <p>Elements in a vector can be named using the ‘names’ attribute of a vector:</p>
625
+ <pre class="ruby"><code>full_name = R.c(&quot;Rodrigo&quot;, &quot;A&quot;, &quot;Botafogo&quot;)
626
+ full_name.names = R.c(&quot;First&quot;, &quot;Middle&quot;, &quot;Last&quot;)
627
+ puts full_name</code></pre>
628
+ <pre><code>## First Middle Last
629
+ ## &quot;Rodrigo&quot; &quot;A&quot; &quot;Botafogo&quot;</code></pre>
630
+ <p>Or it can also be named by using the ‘c’ function with named paramenters:</p>
631
+ <pre class="ruby"><code>full_name = R.c(First: &quot;Rodrigo&quot;, Middle: &quot;A&quot;, Last: &quot;Botafogo&quot;)
632
+ puts full_name</code></pre>
633
+ <pre><code>## First Middle Last
634
+ ## &quot;Rodrigo&quot; &quot;A&quot; &quot;Botafogo&quot;</code></pre>
635
+ </div>
636
+ <div id="extracting-native-ruby-types-from-a-vector" class="section level2">
637
+ <h2>Extracting Native Ruby Types from a Vector</h2>
638
+ <p>Vectors created with ‘R.c’ are of class R::Vector. You might have noticed that when indexing a vector, a new vector is returned, even if this vector has one single element. In order to use R::Vector with other ruby classes it might be necessary to extract the actual Ruby native type from the vector. In order to do this extraction the ‘&gt;&gt;’ operator is used.</p>
639
+ <pre class="ruby"><code>puts @vec4
640
+ puts @vec4 &gt;&gt; 0
641
+ puts @vec4 &gt;&gt; 4</code></pre>
642
+ <pre><code>## [1] 11 22 33 14 25 36 17 28 39
643
+ ## 11.0
644
+ ## 25.0</code></pre>
645
+ <p>Note that indexing with ‘&gt;&gt;’ starts at 0 and not at 1, also, we cannot do negative indexing.</p>
646
+ </div>
647
+ </div>
648
+ <div id="accessing-r-variables" class="section level1">
649
+ <h1>Accessing R variables</h1>
650
+ <p>Galaaz allows Ruby to access variables created in R. For example, the ‘mtcars’ data set is available in R and can be accessed from Ruby by using the ‘tilda’ operator followed by the symbol for the variable, in this case ‘:mtcar’. In the code bellow method ‘outputs’ is used to output the ‘mtcars’ data set nicely formatted in HTML by use of the ‘kable’ and ‘kable_styling’ functions. Method ‘outputs’ is only available when used with ‘gknit’.</p>
499
651
  <pre class="ruby"><code>outputs (~:mtcars).kable.kable_styling</code></pre>
500
652
  <table class="table" style="margin-left: auto; margin-right: auto;">
501
653
  <thead>
@@ -1757,36 +1909,581 @@ Volvo 142E
1757
1909
  </tbody>
1758
1910
  </table>
1759
1911
  </div>
1760
- <div id="graphics-with-ggplot" class="section level2">
1761
- <h2>Graphics with ggplot</h2>
1762
- <pre class="ruby"><code>require 'ggplot'
1763
-
1764
- R.theme_set R.theme_bw
1765
-
1766
- # Data Prep
1767
- mtcars = ~:mtcars
1768
- mtcars.car_name = R.rownames(:mtcars)
1769
- # compute normalized mpg
1770
- mtcars.mpg_z = ((mtcars.mpg - mtcars.mpg.mean)/mtcars.mpg.sd).round 2
1771
- mtcars.mpg_type = mtcars.mpg_z &lt; 0 ? &quot;below&quot; : &quot;above&quot;
1772
- mtcars = mtcars[mtcars.mpg_z.order, :all]
1773
- # convert to factor to retain sorted order in plot
1774
- mtcars.car_name = mtcars.car_name.factor levels: mtcars.car_name
1775
-
1776
- # Diverging Barcharts
1777
- gg = mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
1778
- R.geom_bar(E.aes(fill: :mpg_type), stat: 'identity', width: 0.5) +
1779
- R.scale_fill_manual(name: &quot;Mileage&quot;,
1780
- labels: R.c(&quot;Above Average&quot;, &quot;Below Average&quot;),
1781
- values: R.c(&quot;above&quot;: &quot;#00ba38&quot;, &quot;below&quot;: &quot;#f8766d&quot;)) +
1782
- R.labs(subtitle: &quot;Normalised mileage from 'mtcars'&quot;,
1783
- title: &quot;Diverging Bars&quot;) +
1784
- R.coord_flip()
1785
-
1786
- puts gg</code></pre>
1787
- <p><img src="" /><!-- --></p>
1788
- <p>[TO BE CONTINUED…]</p>
1912
+ <div id="matrix" class="section level1">
1913
+ <h1>Matrix</h1>
1914
+ <p>A matrix is a collection of elements organized as a two dimensional table. A matrix can be created by the ‘matrix’ function:</p>
1915
+ <pre class="ruby"><code>@mat = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
1916
+ nrow: 3,
1917
+ ncol: 3)
1918
+
1919
+ puts @mat</code></pre>
1920
+ <pre><code>## [,1] [,2] [,3]
1921
+ ## [1,] 1 4 7
1922
+ ## [2,] 2 5 8
1923
+ ## [3,] 3 6 9</code></pre>
1924
+ <p>Note that matrices data is organized by column first. It is possible to organize the matrix memory by row first passing an extra argument to the ‘matrix’ function:</p>
1925
+ <pre class="ruby"><code>@mat_row = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
1926
+ nrow: 3,
1927
+ ncol: 3,
1928
+ byrow: true)
1929
+
1930
+ puts @mat_row</code></pre>
1931
+ <pre><code>## [,1] [,2] [,3]
1932
+ ## [1,] 1 2 3
1933
+ ## [2,] 4 5 6
1934
+ ## [3,] 7 8 9</code></pre>
1935
+ <div id="indexing-a-matrix" class="section level2">
1936
+ <h2>Indexing a Matrix</h2>
1937
+ <p>A matrix can be indexed by [row, column]:</p>
1938
+ <pre class="ruby"><code>puts @mat_row[1, 1]
1939
+ puts @mat_row[2, 3]</code></pre>
1940
+ <pre><code>## [1] 1
1941
+ ## [1] 6</code></pre>
1942
+ <p>It is possible to index an entire row or column with the ‘:all’ keyword</p>
1943
+ <pre class="ruby"><code>puts @mat_row[1, :all]
1944
+ puts @mat_row[:all, 2]</code></pre>
1945
+ <pre><code>## [1] 1 2 3
1946
+ ## [1] 2 5 8</code></pre>
1947
+ <p>Indexing with a vector is also possible for matrices. In the following example we want rows 1 and 3 and columns 2 and 3 building a 2 x 2 matrix.</p>
1948
+ <pre class="ruby"><code>puts @mat_row[R.c(1, 3), R.c(2, 3)]</code></pre>
1949
+ <pre><code>## [,1] [,2]
1950
+ ## [1,] 2 3
1951
+ ## [2,] 8 9</code></pre>
1952
+ <p>Matrices can be combined with functions ‘rbind’ and ‘cbind’</p>
1953
+ <pre class="ruby"><code>puts @mat_row.rbind(@mat)
1954
+ puts @mat_row.cbind(@mat)</code></pre>
1955
+ <pre><code>## [,1] [,2] [,3]
1956
+ ## [1,] 1 2 3
1957
+ ## [2,] 4 5 6
1958
+ ## [3,] 7 8 9
1959
+ ## [4,] 1 4 7
1960
+ ## [5,] 2 5 8
1961
+ ## [6,] 3 6 9
1962
+ ## [,1] [,2] [,3] [,4] [,5] [,6]
1963
+ ## [1,] 1 2 3 1 4 7
1964
+ ## [2,] 4 5 6 2 5 8
1965
+ ## [3,] 7 8 9 3 6 9</code></pre>
1966
+ </div>
1967
+ </div>
1968
+ <div id="list" class="section level1">
1969
+ <h1>List</h1>
1970
+ <p>A list is a data structure that can contain sublists of different types, while vector and matrix can only hold one type of element.</p>
1971
+ <pre class="ruby"><code>nums = R.c(1.0, 2.0, 3.0)
1972
+ strs = R.c(&quot;a&quot;, &quot;b&quot;, &quot;c&quot;, &quot;d&quot;)
1973
+ bool = R.c(true, true, false)
1974
+ @lst = R.list(nums: nums, strs: strs, bool: bool)
1975
+ puts @lst</code></pre>
1976
+ <pre><code>## $nums
1977
+ ## [1] 1 2 3
1978
+ ##
1979
+ ## $strs
1980
+ ## [1] &quot;a&quot; &quot;b&quot; &quot;c&quot; &quot;d&quot;
1981
+ ##
1982
+ ## $bool
1983
+ ## [1] TRUE TRUE FALSE</code></pre>
1984
+ <p>Note that ‘<span class="citation">@lst</span>’ elements are named elements.</p>
1985
+ <div id="list-indexing" class="section level2">
1986
+ <h2>List Indexing</h2>
1987
+ <p>List indexing, also called slicing, is done using the ‘[]’ operator and the ‘[[]]’ operator. Let’s first start with the ‘[]’ operator. The list above has three sublist indexing with ‘[]’ will return one of the sublists.</p>
1988
+ <pre class="ruby"><code>puts @lst[1]</code></pre>
1989
+ <pre><code>## $nums
1990
+ ## [1] 1 2 3</code></pre>
1991
+ <p>Note that when using ‘[]’ a new list is returned. When using the double square bracket operator the value returned is the actual element of the list in the given position and not a slice of the original list</p>
1992
+ <pre class="ruby"><code>puts @lst[[1]]</code></pre>
1993
+ <pre><code>## [1] 1 2 3</code></pre>
1994
+ <p>When elements are named, as dones with <span class="citation">@lst</span>, indexing can be done by name:</p>
1995
+ <pre class="ruby"><code>puts @lst[['bool']][[1]] &gt;&gt; 0</code></pre>
1996
+ <pre><code>## true</code></pre>
1997
+ <p>In this example, first the ‘bool’ element of the list was extracted, not as a list, but as a vector, then the first element of the vector was extracted (note that vectors also accept the ‘[[]]’ operator) and then the vector was indexed by its first element, extracting the native Ruby type.</p>
1998
+ </div>
1999
+ </div>
2000
+ <div id="data-frame" class="section level1">
2001
+ <h1>Data Frame</h1>
2002
+ <p>A data frame is a table like structure in which each column has the same number of rows. Data frames are the basic structure for storing data for data analysis. We have already seen a data frame previously when we accessed variable ‘~:mtcars’. In order to create a data frame, function ’data__frame’ is used:</p>
2003
+ <pre class="ruby"><code>df = R.data__frame(
2004
+ year: R.c(2010, 2011, 2012),
2005
+ income: R.c(1000.0, 1500.0, 2000.0))
2006
+
2007
+ puts df</code></pre>
2008
+ <pre><code>## year income
2009
+ ## 1 2010 1000
2010
+ ## 2 2011 1500
2011
+ ## 3 2012 2000</code></pre>
2012
+ <div id="data-frame-indexing" class="section level2">
2013
+ <h2>Data Frame Indexing</h2>
2014
+ <p>A data frame can be indexed the same way as a matrix, by using ‘[row, column]’, where row and column can either be a numeric or the name of the row or column</p>
2015
+ <pre class="ruby"><code>puts (~:mtcars).head
2016
+ puts (~:mtcars)[1, 2]
2017
+ puts (~:mtcars)['Datsun 710', 'mpg']</code></pre>
2018
+ <pre><code>## mpg cyl disp hp drat wt qsec vs am gear carb
2019
+ ## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
2020
+ ## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
2021
+ ## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
2022
+ ## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
2023
+ ## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
2024
+ ## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
2025
+ ## [1] 6
2026
+ ## [1] 22.8</code></pre>
2027
+ <p>Extracting a column from a data frame as a vector can be done by using the double square bracket operator:</p>
2028
+ <pre class="ruby"><code>puts (~:mtcars)[['mpg']]</code></pre>
2029
+ <pre><code>## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
2030
+ ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
2031
+ ## [29] 15.8 19.7 15.0 21.4</code></pre>
2032
+ <p>A data frame column can also be accessed as if it were an instance variable of the data frame:</p>
2033
+ <pre class="ruby"><code>puts (~:mtcars).mpg</code></pre>
2034
+ <pre><code>## [1] 21.0 21.0 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 17.8 16.4 17.3 15.2
2035
+ ## [15] 10.4 10.4 14.7 32.4 30.4 33.9 21.5 15.5 15.2 13.3 19.2 27.3 26.0 30.4
2036
+ ## [29] 15.8 19.7 15.0 21.4</code></pre>
2037
+ <p>Slicing a data frame can be done by indexing it with a vector (we use ‘head’ to reduce the output):</p>
2038
+ <pre class="ruby"><code>puts (~:mtcars)[R.c('mpg', 'hp')].head</code></pre>
2039
+ <pre><code>## mpg hp
2040
+ ## Mazda RX4 21.0 110
2041
+ ## Mazda RX4 Wag 21.0 110
2042
+ ## Datsun 710 22.8 93
2043
+ ## Hornet 4 Drive 21.4 110
2044
+ ## Hornet Sportabout 18.7 175
2045
+ ## Valiant 18.1 105</code></pre>
2046
+ <p>A row slice can be obtained by indexing by row and using the ‘:all’ keyword for the column:</p>
2047
+ <pre class="ruby"><code>puts (~:mtcars)[R.c('Datsun 710', 'Camaro Z28'), :all]</code></pre>
2048
+ <pre><code>## mpg cyl disp hp drat wt qsec vs am gear carb
2049
+ ## Datsun 710 22.8 4 108 93 3.85 2.32 18.61 1 1 4 1
2050
+ ## Camaro Z28 13.3 8 350 245 3.73 3.84 15.41 0 0 3 4</code></pre>
2051
+ <p>Finally, a data frame can also be indexed with a logical vector. In this next example, the ‘am’ column of :mtcars is compared with 0 (with method ‘eq’). When ‘am’ is equal to 0 the car is automatic. So, by doing ‘(~:mtcars).am.eq 0’ a logical vector is created with ‘true’ whenever ‘am’ is 0 and ‘false’ otherwise. Using this logical vector, the data frame is indexed, returning a new data frame in which all cars have automatic transmission.</p>
2052
+ <pre class="ruby"><code># obtain a vector with 'true' for cars with automatic transmission
2053
+ automatic = (~:mtcars).am.eq 0
2054
+ puts automatic
2055
+
2056
+ # slice the data frame by using this vector
2057
+ puts (~:mtcars)[automatic, :all]</code></pre>
2058
+ <pre><code>## [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
2059
+ ## [12] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE
2060
+ ## [23] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
2061
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2062
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
2063
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
2064
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
2065
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
2066
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
2067
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
2068
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
2069
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
2070
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
2071
+ ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
2072
+ ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
2073
+ ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
2074
+ ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
2075
+ ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
2076
+ ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
2077
+ ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
2078
+ ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
2079
+ ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
2080
+ ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2</code></pre>
2081
+ </div>
1789
2082
  </div>
2083
+ <div id="writing-expressions-in-galaaz" class="section level1">
2084
+ <h1>Writing Expressions in Galaaz</h1>
2085
+ <p>Galaaz extends Ruby to work with complex expressions, similar to R’s expressions build with ‘quote’ (base R) or ‘quo’ (tidyverse). Let’s take a look at some of those expressions.</p>
2086
+ <div id="expressions-from-operators" class="section level2">
2087
+ <h2>Expressions from operators</h2>
2088
+ <p>The code bellow creates an expression summing two symbols</p>
2089
+ <pre class="ruby"><code>exp1 = :a + :b
2090
+ puts exp1</code></pre>
2091
+ <pre><code>## a + b</code></pre>
2092
+ <p>We can build any complex mathematical expression</p>
2093
+ <pre class="ruby"><code>exp2 = (:a + :b) * 2.0 + :c ** 2 / :z
2094
+ puts exp2</code></pre>
2095
+ <pre><code>## (a + b) * 2 + c^2L/z</code></pre>
2096
+ <p>It is also possible to use inequality operators in building expressions</p>
2097
+ <pre class="ruby"><code>exp3 = (:a + :b) &gt;= :z
2098
+ puts exp3</code></pre>
2099
+ <pre><code>## a + b &gt;= z</code></pre>
2100
+ <p>Galaaz provides both symbolic representations for operators, such as (&gt;, &lt;, !=) as functional notation for those operators such as (.gt, .ge, etc.). So the same expression written above can also be written as</p>
2101
+ <pre class="ruby"><code>exp4 = (:a + :b).ge :z
2102
+ puts exp4</code></pre>
2103
+ <pre><code>## a + b &gt;= z</code></pre>
2104
+ <p>Two type of expression can only be created with the functional representation of the operators, those are expressions involving ‘==’, and ‘=’. In order to write an expression involving ‘==’ we need to use the method ‘.eq’ and for ‘=’ we need the function ‘.assign’</p>
2105
+ <pre class="ruby"><code>exp5 = (:a + :b).eq :z
2106
+ puts exp5</code></pre>
2107
+ <pre><code>## a + b == z</code></pre>
2108
+ <pre class="ruby"><code>exp6 = :y.assign :a + :b
2109
+ puts exp6</code></pre>
2110
+ <pre><code>## y &lt;- a + b</code></pre>
2111
+ <p>In general we think that using the functional notation is preferable to using the symbolic notation as otherwise, we end up writing invalid expressions such as</p>
2112
+ <pre class="ruby"><code>exp_wrong = (:a + :b) == :z
2113
+ puts exp_wrong</code></pre>
2114
+ <pre><code>## Message:
2115
+ ## Error in function (x, y, num.eq = TRUE, single.NA = TRUE, attrib.as.set = TRUE, :
2116
+ ## object 'a' not found (RError)
2117
+ ## Translated to internal error</code></pre>
2118
+ <p>and it might be difficult to understand what is going on here. The problem lies with the fact that when using ‘==’ we are comparing expression (:a + :b) to expression :z with ‘==’. When the comparison is executed, the system tries to evaluate :a, :b and :z, and those symbols at this time are not bound to anything and we get a “object ‘a’ not found” message. If we only use functional notation, this type of error will not occur.</p>
2119
+ </div>
2120
+ <div id="expressions-with-r-methods" class="section level2">
2121
+ <h2>Expressions with R methods</h2>
2122
+ <p>It is often necessary to create an expression that uses a method or function. For instance, in mathematics, it’s quite natural to write an expressin such as <span class="math inline">\(y = sin(x)\)</span>. In this case, the ‘sin’ function is part of the expression and should not immediately executed. Now, let’s say that ‘x’ is an angle of 45<span class="math inline">\(^\circ\)</span> and we acttually want our expression to be <span class="math inline">\(y = 0.850...\)</span>. When we want the function to be part of the expression, we call the function preceeding it by the letter E, such as ‘E.sin(x)’</p>
2123
+ <pre class="ruby"><code>exp7 = :y.assign E.sin(:x)
2124
+ puts exp7</code></pre>
2125
+ <pre><code>## y &lt;- sin(x)</code></pre>
2126
+ </div>
2127
+ </div>
2128
+ <div id="manipulating-data" class="section level1">
2129
+ <h1>Manipulating Data</h1>
2130
+ <p>One of the major benefits of Galaaz is to bring strong data manipulation to Ruby. The following examples were extracted from Hardley’s “R for Data Science” (<a href="https://r4ds.had.co.nz/" class="uri">https://r4ds.had.co.nz/</a>). This is a highly recommended book for those not already familiar with the ‘tidyverse’ style of programming in R. In the sections to follow, we will limit ourselves to convert the R code to Galaaz.</p>
2131
+ <p>For these examples, we will investigate the nycflights13 data set available on the package by the same name. We use function ‘R.install_and_loads’ that checks if the library is available locally, and if not, installs it. This data frame contains all 336,776 flights that departed from New York City in 2013. The data comes from the US Bureau of Transportation Statistics.</p>
2132
+ <pre class="ruby"><code>R.install_and_loads('nycflights13')
2133
+ R.library('dplyr')</code></pre>
2134
+ <pre class="ruby"><code>@flights = ~:flights
2135
+ puts @flights.head.as__data__frame</code></pre>
2136
+ <pre><code>## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
2137
+ ## 1 2013 1 1 517 515 2 830 819
2138
+ ## 2 2013 1 1 533 529 4 850 830
2139
+ ## 3 2013 1 1 542 540 2 923 850
2140
+ ## 4 2013 1 1 544 545 -1 1004 1022
2141
+ ## 5 2013 1 1 554 600 -6 812 837
2142
+ ## 6 2013 1 1 554 558 -4 740 728
2143
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
2144
+ ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
2145
+ ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
2146
+ ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
2147
+ ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
2148
+ ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
2149
+ ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
2150
+ ## minute time_hour
2151
+ ## 1 15 2013-01-01 05:00:00
2152
+ ## 2 29 2013-01-01 05:00:00
2153
+ ## 3 40 2013-01-01 05:00:00
2154
+ ## 4 45 2013-01-01 05:00:00
2155
+ ## 5 0 2013-01-01 06:00:00
2156
+ ## 6 58 2013-01-01 05:00:00</code></pre>
2157
+ <div id="filtering-rows-with-filter" class="section level2">
2158
+ <h2>Filtering rows with Filter</h2>
2159
+ <p>In this example we filter the flights data set by giving to the filter function two expressions: the first :month.eq 1</p>
2160
+ <pre class="ruby"><code>puts @flights.filter((:month.eq 1), (:day.eq 1)).head.as__data__frame</code></pre>
2161
+ <pre><code>## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
2162
+ ## 1 2013 1 1 517 515 2 830 819
2163
+ ## 2 2013 1 1 533 529 4 850 830
2164
+ ## 3 2013 1 1 542 540 2 923 850
2165
+ ## 4 2013 1 1 544 545 -1 1004 1022
2166
+ ## 5 2013 1 1 554 600 -6 812 837
2167
+ ## 6 2013 1 1 554 558 -4 740 728
2168
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
2169
+ ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
2170
+ ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
2171
+ ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
2172
+ ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
2173
+ ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
2174
+ ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
2175
+ ## minute time_hour
2176
+ ## 1 15 2013-01-01 05:00:00
2177
+ ## 2 29 2013-01-01 05:00:00
2178
+ ## 3 40 2013-01-01 05:00:00
2179
+ ## 4 45 2013-01-01 05:00:00
2180
+ ## 5 0 2013-01-01 06:00:00
2181
+ ## 6 58 2013-01-01 05:00:00</code></pre>
2182
+ </div>
2183
+ <div id="logical-operators" class="section level2">
2184
+ <h2>Logical Operators</h2>
2185
+ <p>All flights that departed in November of December</p>
2186
+ <pre class="ruby"><code>puts @flights.filter((:month.eq 11) | (:month.eq 12)).head.as__data__frame</code></pre>
2187
+ <pre><code>## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
2188
+ ## 1 2013 11 1 5 2359 6 352 345
2189
+ ## 2 2013 11 1 35 2250 105 123 2356
2190
+ ## 3 2013 11 1 455 500 -5 641 651
2191
+ ## 4 2013 11 1 539 545 -6 856 827
2192
+ ## 5 2013 11 1 542 545 -3 831 855
2193
+ ## 6 2013 11 1 549 600 -11 912 923
2194
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
2195
+ ## 1 7 B6 745 N568JB JFK PSE 205 1617 23
2196
+ ## 2 87 B6 1816 N353JB JFK SYR 36 209 22
2197
+ ## 3 -10 US 1895 N192UW EWR CLT 88 529 5
2198
+ ## 4 29 UA 1714 N38727 LGA IAH 229 1416 5
2199
+ ## 5 -24 AA 2243 N5CLAA JFK MIA 147 1089 5
2200
+ ## 6 -11 UA 303 N595UA JFK SFO 359 2586 6
2201
+ ## minute time_hour
2202
+ ## 1 59 2013-11-01 23:00:00
2203
+ ## 2 50 2013-11-01 22:00:00
2204
+ ## 3 0 2013-11-01 05:00:00
2205
+ ## 4 45 2013-11-01 05:00:00
2206
+ ## 5 45 2013-11-01 05:00:00
2207
+ ## 6 0 2013-11-01 06:00:00</code></pre>
2208
+ <p>The same as above, but using the ‘in’ operator. In R, it is possible to define many operators by doing %<op>%. The %in% operator checks if a value is in a vector. In order to use those operators from Galaaz the ‘._’ method is used, where the first argument is the operator’s symbol, in this case ‘:in’ and the second argument is the vector:</p>
2209
+ <pre class="ruby"><code>puts @flights.filter(:month._ :in, R.c(11, 12)).head.as__data__frame</code></pre>
2210
+ <pre><code>## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
2211
+ ## 1 2013 11 1 5 2359 6 352 345
2212
+ ## 2 2013 11 1 35 2250 105 123 2356
2213
+ ## 3 2013 11 1 455 500 -5 641 651
2214
+ ## 4 2013 11 1 539 545 -6 856 827
2215
+ ## 5 2013 11 1 542 545 -3 831 855
2216
+ ## 6 2013 11 1 549 600 -11 912 923
2217
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
2218
+ ## 1 7 B6 745 N568JB JFK PSE 205 1617 23
2219
+ ## 2 87 B6 1816 N353JB JFK SYR 36 209 22
2220
+ ## 3 -10 US 1895 N192UW EWR CLT 88 529 5
2221
+ ## 4 29 UA 1714 N38727 LGA IAH 229 1416 5
2222
+ ## 5 -24 AA 2243 N5CLAA JFK MIA 147 1089 5
2223
+ ## 6 -11 UA 303 N595UA JFK SFO 359 2586 6
2224
+ ## minute time_hour
2225
+ ## 1 59 2013-11-01 23:00:00
2226
+ ## 2 50 2013-11-01 22:00:00
2227
+ ## 3 0 2013-11-01 05:00:00
2228
+ ## 4 45 2013-11-01 05:00:00
2229
+ ## 5 45 2013-11-01 05:00:00
2230
+ ## 6 0 2013-11-01 06:00:00</code></pre>
2231
+ </div>
2232
+ <div id="filtering-with-na-not-available" class="section level2">
2233
+ <h2>Filtering with NA (Not Available)</h2>
2234
+ <p>Let’s first create a ‘tibble’ with a Not Available value (R::NA). Tibbles are a modern version of a data frame and operate very similarly to one. It differs in how it outputs the values and the result of some subsetting operations that are more consistent than what is obtained from data frame.</p>
2235
+ <pre class="ruby"><code>@df = R.tibble(x: R.c(1, R::NA, 3))
2236
+ puts @df.as__data__frame</code></pre>
2237
+ <pre><code>## x
2238
+ ## 1 1
2239
+ ## 2 NA
2240
+ ## 3 3</code></pre>
2241
+ <p>Now filtering by :x &gt; 1 shows all lines that satisfy this condition, where the row with R:NA does not.</p>
2242
+ <pre class="ruby"><code>puts @df.filter(:x &gt; 1).as__data__frame</code></pre>
2243
+ <pre><code>## x
2244
+ ## 1 3</code></pre>
2245
+ <p>To match an NA use method ’is__na’</p>
2246
+ <pre class="ruby"><code>puts @df.filter((:x.is__na) | (:x &gt; 1)).as__data__frame</code></pre>
2247
+ <pre><code>## x
2248
+ ## 1 NA
2249
+ ## 2 3</code></pre>
2250
+ </div>
2251
+ <div id="arrange-rows-with-arrange" class="section level2">
2252
+ <h2>Arrange Rows with arrange</h2>
2253
+ <p>Arrange reorders the rows of a data frame by the given arguments.</p>
2254
+ <pre class="ruby"><code>puts @flights.arrange(:year, :month, :day).head.as__data__frame</code></pre>
2255
+ <pre><code>## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
2256
+ ## 1 2013 1 1 517 515 2 830 819
2257
+ ## 2 2013 1 1 533 529 4 850 830
2258
+ ## 3 2013 1 1 542 540 2 923 850
2259
+ ## 4 2013 1 1 544 545 -1 1004 1022
2260
+ ## 5 2013 1 1 554 600 -6 812 837
2261
+ ## 6 2013 1 1 554 558 -4 740 728
2262
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
2263
+ ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
2264
+ ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
2265
+ ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
2266
+ ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
2267
+ ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
2268
+ ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
2269
+ ## minute time_hour
2270
+ ## 1 15 2013-01-01 05:00:00
2271
+ ## 2 29 2013-01-01 05:00:00
2272
+ ## 3 40 2013-01-01 05:00:00
2273
+ ## 4 45 2013-01-01 05:00:00
2274
+ ## 5 0 2013-01-01 06:00:00
2275
+ ## 6 58 2013-01-01 05:00:00</code></pre>
2276
+ <p>To arrange in descending order, use function ‘desc’</p>
2277
+ <pre class="ruby"><code>puts @flights.arrange(:dep_delay.desc).head.as__data__frame</code></pre>
2278
+ <pre><code>## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
2279
+ ## 1 2013 1 9 641 900 1301 1242 1530
2280
+ ## 2 2013 6 15 1432 1935 1137 1607 2120
2281
+ ## 3 2013 1 10 1121 1635 1126 1239 1810
2282
+ ## 4 2013 9 20 1139 1845 1014 1457 2210
2283
+ ## 5 2013 7 22 845 1600 1005 1044 1815
2284
+ ## 6 2013 4 10 1100 1900 960 1342 2211
2285
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
2286
+ ## 1 1272 HA 51 N384HA JFK HNL 640 4983 9
2287
+ ## 2 1127 MQ 3535 N504MQ JFK CMH 74 483 19
2288
+ ## 3 1109 MQ 3695 N517MQ EWR ORD 111 719 16
2289
+ ## 4 1007 AA 177 N338AA JFK SFO 354 2586 18
2290
+ ## 5 989 MQ 3075 N665MQ JFK CVG 96 589 16
2291
+ ## 6 931 DL 2391 N959DL JFK TPA 139 1005 19
2292
+ ## minute time_hour
2293
+ ## 1 0 2013-01-09 09:00:00
2294
+ ## 2 35 2013-06-15 19:00:00
2295
+ ## 3 35 2013-01-10 16:00:00
2296
+ ## 4 45 2013-09-20 18:00:00
2297
+ ## 5 0 2013-07-22 16:00:00
2298
+ ## 6 0 2013-04-10 19:00:00</code></pre>
2299
+ </div>
2300
+ <div id="selecting-columns" class="section level2">
2301
+ <h2>Selecting columns</h2>
2302
+ <p>To select specific columns from a dataset we use function ‘select’:</p>
2303
+ <pre class="ruby"><code>puts @flights.select(:year, :month, :day).head.as__data__frame</code></pre>
2304
+ <pre><code>## year month day
2305
+ ## 1 2013 1 1
2306
+ ## 2 2013 1 1
2307
+ ## 3 2013 1 1
2308
+ ## 4 2013 1 1
2309
+ ## 5 2013 1 1
2310
+ ## 6 2013 1 1</code></pre>
2311
+ <p>It is also possible to select column in a given range</p>
2312
+ <pre class="ruby"><code>puts @flights.select(:year.up_to :day).head.as__data__frame</code></pre>
2313
+ <pre><code>## year month day
2314
+ ## 1 2013 1 1
2315
+ ## 2 2013 1 1
2316
+ ## 3 2013 1 1
2317
+ ## 4 2013 1 1
2318
+ ## 5 2013 1 1
2319
+ ## 6 2013 1 1</code></pre>
2320
+ <p>Select all columns that start with a given name sequence</p>
2321
+ <pre class="ruby"><code>puts @flights.select(E.starts_with('arr')).head.as__data__frame</code></pre>
2322
+ <pre><code>## arr_time arr_delay
2323
+ ## 1 830 11
2324
+ ## 2 850 20
2325
+ ## 3 923 33
2326
+ ## 4 1004 -18
2327
+ ## 5 812 -25
2328
+ ## 6 740 12</code></pre>
2329
+ <p>Other functions that can be used:</p>
2330
+ <ul>
2331
+ <li><p>ends_with(“xyz”): matches names that end with “xyz”.</p></li>
2332
+ <li><p>contains(“ijk”): matches names that contain “ijk”.</p></li>
2333
+ <li><p>matches(“(.)\1”): selects variables that match a regular expression. This one matches any variables that contain repeated characters.</p></li>
2334
+ <li><p>num_range(“x”, (1..3)): matches x1, x2 and x3</p></li>
2335
+ </ul>
2336
+ <p>A helper function that comes in handy when we just want to rearrange column order is ‘Everything’:</p>
2337
+ <pre class="ruby"><code>puts @flights.select(:year, :month, :day, E.everything).head.as__data__frame</code></pre>
2338
+ <pre><code>## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
2339
+ ## 1 2013 1 1 517 515 2 830 819
2340
+ ## 2 2013 1 1 533 529 4 850 830
2341
+ ## 3 2013 1 1 542 540 2 923 850
2342
+ ## 4 2013 1 1 544 545 -1 1004 1022
2343
+ ## 5 2013 1 1 554 600 -6 812 837
2344
+ ## 6 2013 1 1 554 558 -4 740 728
2345
+ ## arr_delay carrier flight tailnum origin dest air_time distance hour
2346
+ ## 1 11 UA 1545 N14228 EWR IAH 227 1400 5
2347
+ ## 2 20 UA 1714 N24211 LGA IAH 227 1416 5
2348
+ ## 3 33 AA 1141 N619AA JFK MIA 160 1089 5
2349
+ ## 4 -18 B6 725 N804JB JFK BQN 183 1576 5
2350
+ ## 5 -25 DL 461 N668DN LGA ATL 116 762 6
2351
+ ## 6 12 UA 1696 N39463 EWR ORD 150 719 5
2352
+ ## minute time_hour
2353
+ ## 1 15 2013-01-01 05:00:00
2354
+ ## 2 29 2013-01-01 05:00:00
2355
+ ## 3 40 2013-01-01 05:00:00
2356
+ ## 4 45 2013-01-01 05:00:00
2357
+ ## 5 0 2013-01-01 06:00:00
2358
+ ## 6 58 2013-01-01 05:00:00</code></pre>
2359
+ </div>
2360
+ <div id="add-variables-to-a-dataframe-with-mutate" class="section level2">
2361
+ <h2>Add variables to a dataframe with ‘mutate’</h2>
2362
+ <pre class="ruby"><code>@flights_sm = @flights.
2363
+ select((:year.up_to :day),
2364
+ E.ends_with('delay'),
2365
+ :distance,
2366
+ :air_time)
2367
+
2368
+ puts @flights_sm.head.as__data__frame</code></pre>
2369
+ <pre><code>## year month day dep_delay arr_delay distance air_time
2370
+ ## 1 2013 1 1 2 11 1400 227
2371
+ ## 2 2013 1 1 4 20 1416 227
2372
+ ## 3 2013 1 1 2 33 1089 160
2373
+ ## 4 2013 1 1 -1 -18 1576 183
2374
+ ## 5 2013 1 1 -6 -25 762 116
2375
+ ## 6 2013 1 1 -4 12 719 150</code></pre>
2376
+ <pre class="ruby"><code>@flights_sm = @flights_sm.
2377
+ mutate(gain: :dep_delay - :arr_delay,
2378
+ speed: :distance / :air_time * 60)
2379
+ puts @flights_sm.head.as__data__frame</code></pre>
2380
+ <pre><code>## year month day dep_delay arr_delay distance air_time gain speed
2381
+ ## 1 2013 1 1 2 11 1400 227 -9 370.0441
2382
+ ## 2 2013 1 1 4 20 1416 227 -16 374.2731
2383
+ ## 3 2013 1 1 2 33 1089 160 -31 408.3750
2384
+ ## 4 2013 1 1 -1 -18 1576 183 17 516.7213
2385
+ ## 5 2013 1 1 -6 -25 762 116 19 394.1379
2386
+ ## 6 2013 1 1 -4 12 719 150 -16 287.6000</code></pre>
2387
+ </div>
2388
+ </div>
2389
+ <div id="graphics-in-galaaz" class="section level1">
2390
+ <h1>Graphics in Galaaz</h1>
2391
+ <p>Creating graphics in Galaaz is quite easy, as it can use all the power of ggplot2. There are many resources in the web that teaches ggplot, so here we give a quick example of ggplot integration with Ruby. We continue to use the :mtcars dataset and we will plot a diverging bar plot, showing cars that have ‘above’ or ‘below’ gas consuption. Let’s first prepare the data frame with the necessary data:</p>
2392
+ <pre class="ruby"><code># copy the R variable :mtcars to the Ruby mtcars variable
2393
+ @mtcars = ~:mtcars
2394
+
2395
+ # create a new column 'car_name' to store the car names so that it can be
2396
+ # used for plotting. The 'rownames' of the data frame cannot be used as
2397
+ # data for plotting
2398
+ @mtcars.car_name = R.rownames(:mtcars)
2399
+
2400
+ # compute normalized mpg and add it to a new column called mpg_z
2401
+ # Note that the mean value for mpg can be obtained by calling the 'mean'
2402
+ # function on the vector 'mtcars.mpg'. The same with the standard
2403
+ # deviation 'sd'. The vector is then rounded to two digits with 'round 2'
2404
+ @mtcars.mpg_z = ((@mtcars.mpg - @mtcars.mpg.mean)/@mtcars.mpg.sd).round 2
2405
+
2406
+ # create a new column 'mpg_type'. Function 'ifelse' is a vectorized function
2407
+ # that looks at every element of the mpg_z vector and if the value is below
2408
+ # 0, returns 'below', otherwise returns 'above'
2409
+ @mtcars.mpg_type = (@mtcars.mpg_z &lt; 0).ifelse(&quot;below&quot;, &quot;above&quot;)
2410
+
2411
+ # order the mtcar data set by the mpg_z vector from smaler to larger values
2412
+ @mtcars = @mtcars[@mtcars.mpg_z.order, :all]
2413
+
2414
+ # convert the car_name column to a factor to retain sorted order in plot
2415
+ @mtcars.car_name = @mtcars.car_name.factor levels: @mtcars.car_name
2416
+
2417
+ # let's look at the final data frame
2418
+ puts @mtcars</code></pre>
2419
+ <pre><code>## mpg cyl disp hp drat wt qsec vs am gear carb
2420
+ ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
2421
+ ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
2422
+ ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
2423
+ ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
2424
+ ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
2425
+ ## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
2426
+ ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
2427
+ ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
2428
+ ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
2429
+ ## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
2430
+ ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
2431
+ ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
2432
+ ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
2433
+ ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
2434
+ ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
2435
+ ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
2436
+ ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
2437
+ ## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
2438
+ ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
2439
+ ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
2440
+ ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
2441
+ ## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
2442
+ ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
2443
+ ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
2444
+ ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
2445
+ ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
2446
+ ## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
2447
+ ## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
2448
+ ## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
2449
+ ## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
2450
+ ## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
2451
+ ## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
2452
+ ## car_name mpg_z mpg_type
2453
+ ## Cadillac Fleetwood Cadillac Fleetwood -1.61 below
2454
+ ## Lincoln Continental Lincoln Continental -1.61 below
2455
+ ## Camaro Z28 Camaro Z28 -1.13 below
2456
+ ## Duster 360 Duster 360 -0.96 below
2457
+ ## Chrysler Imperial Chrysler Imperial -0.89 below
2458
+ ## Maserati Bora Maserati Bora -0.84 below
2459
+ ## Merc 450SLC Merc 450SLC -0.81 below
2460
+ ## AMC Javelin AMC Javelin -0.81 below
2461
+ ## Dodge Challenger Dodge Challenger -0.76 below
2462
+ ## Ford Pantera L Ford Pantera L -0.71 below
2463
+ ## Merc 450SE Merc 450SE -0.61 below
2464
+ ## Merc 450SL Merc 450SL -0.46 below
2465
+ ## Merc 280C Merc 280C -0.38 below
2466
+ ## Valiant Valiant -0.33 below
2467
+ ## Hornet Sportabout Hornet Sportabout -0.23 below
2468
+ ## Merc 280 Merc 280 -0.15 below
2469
+ ## Pontiac Firebird Pontiac Firebird -0.15 below
2470
+ ## Ferrari Dino Ferrari Dino -0.06 below
2471
+ ## Mazda RX4 Mazda RX4 0.15 above
2472
+ ## Mazda RX4 Wag Mazda RX4 Wag 0.15 above
2473
+ ## Hornet 4 Drive Hornet 4 Drive 0.22 above
2474
+ ## Volvo 142E Volvo 142E 0.22 above
2475
+ ## Toyota Corona Toyota Corona 0.23 above
2476
+ ## Datsun 710 Datsun 710 0.45 above
2477
+ ## Merc 230 Merc 230 0.45 above
2478
+ ## Merc 240D Merc 240D 0.72 above
2479
+ ## Porsche 914-2 Porsche 914-2 0.98 above
2480
+ ## Fiat X1-9 Fiat X1-9 1.20 above
2481
+ ## Honda Civic Honda Civic 1.71 above
2482
+ ## Lotus Europa Lotus Europa 1.71 above
2483
+ ## Fiat 128 Fiat 128 2.04 above
2484
+ ## Toyota Corolla Toyota Corolla 2.29 above</code></pre>
2485
+ <p>Now, lets plot the diverging bar plot. When using gKnit, there is no need to call ‘R.awt’ to create a plotting device, since gKnit does take care of it:</p>
2486
+ <p>[TO BE CONTINUED…]</p>
1790
2487
  </div>
1791
2488
  <div id="contributing" class="section level1">
1792
2489
  <h1>Contributing</h1>