galaaz 0.4.9 → 0.4.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +798 -285
  3. data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +3 -12
  4. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +5 -7
  5. data/blogs/galaaz_ggplot/galaaz_ggplot.html +69 -29
  6. data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
  7. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
  8. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
  9. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/midwest_rb.pdf +0 -0
  10. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/scatter_plot_rb.pdf +0 -0
  11. data/blogs/galaaz_ggplot/midwest.Rmd +1 -9
  12. data/blogs/gknit/gknit.Rmd +37 -40
  13. data/blogs/gknit/gknit.html +32 -30
  14. data/blogs/gknit/gknit.md +36 -37
  15. data/blogs/gknit/gknit.pdf +0 -0
  16. data/blogs/gknit/gknit.tex +35 -37
  17. data/blogs/manual/manual.Rmd +548 -125
  18. data/blogs/manual/manual.html +509 -286
  19. data/blogs/manual/manual.md +798 -285
  20. data/blogs/manual/manual.pdf +0 -0
  21. data/blogs/manual/manual.tex +2816 -0
  22. data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
  23. data/blogs/nse_dplyr/nse_dplyr.Rmd +240 -74
  24. data/blogs/nse_dplyr/nse_dplyr.html +191 -87
  25. data/blogs/nse_dplyr/nse_dplyr.md +361 -107
  26. data/blogs/nse_dplyr/nse_dplyr.pdf +0 -0
  27. data/blogs/nse_dplyr/nse_dplyr.tex +1373 -0
  28. data/blogs/ruby_plot/ruby_plot.Rmd +61 -81
  29. data/blogs/ruby_plot/ruby_plot.html +54 -57
  30. data/blogs/ruby_plot/ruby_plot.md +48 -67
  31. data/blogs/ruby_plot/ruby_plot.pdf +0 -0
  32. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  33. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  34. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  35. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  36. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-latex/dose_len.png +0 -0
  43. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_delivery.png +0 -0
  44. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_dose.png +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color.png +0 -0
  46. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color2.png +0 -0
  47. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_decorations.png +0 -0
  48. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_jitter.png +0 -0
  49. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_points.png +0 -0
  50. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_box_plot.png +0 -0
  51. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_violin_plot.png +0 -0
  52. data/blogs/ruby_plot/ruby_plot_files/figure-latex/violin_with_jitter.png +0 -0
  53. data/lib/R_interface/rdata_frame.rb +0 -12
  54. data/lib/R_interface/robject.rb +14 -14
  55. data/lib/R_interface/ruby_extensions.rb +3 -31
  56. data/lib/R_interface/rvector.rb +0 -12
  57. data/lib/gknit/knitr_engine.rb +5 -3
  58. data/lib/util/exec_ruby.rb +22 -61
  59. data/specs/tmp.rb +26 -12
  60. data/version.rb +1 -1
  61. metadata +22 -17
  62. data/bin/gknit_old_r +0 -236
  63. data/blogs/dev/dev.Rmd +0 -23
  64. data/blogs/dev/dev.md +0 -58
  65. data/blogs/dev/dev2.Rmd +0 -65
  66. data/blogs/dev/model.rb +0 -41
  67. data/blogs/dplyr/dplyr.Rmd +0 -29
  68. data/blogs/dplyr/dplyr.html +0 -433
  69. data/blogs/dplyr/dplyr.md +0 -58
  70. data/blogs/dplyr/dplyr.rb +0 -63
  71. data/blogs/galaaz_ggplot/galaaz_ggplot.log +0 -640
  72. data/blogs/galaaz_ggplot/galaaz_ggplot.md +0 -431
  73. data/blogs/galaaz_ggplot/galaaz_ggplot.tex +0 -481
  74. data/blogs/galaaz_ggplot/midwest.png +0 -0
  75. data/blogs/galaaz_ggplot/scatter_plot.png +0 -0
  76. data/blogs/ruby_plot/ruby_plot.tex +0 -1077
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f328b999e2b5b132053b133a1f1fcc3ddd6f5a295cc040629a6989150a765775
4
- data.tar.gz: f6bce01abc3f189f1e16f8cf2691a7bd14d2962f295dccda5a00e564e4470cc5
3
+ metadata.gz: 5028519688d5197e29ea9198499c8093f96aa27e498a0eb974367187d7d151da
4
+ data.tar.gz: f5bad7debd953898f0335e04e83089137025c759a3910cf6d74061b53f4eb37e
5
5
  SHA512:
6
- metadata.gz: 5b287e9d5883723a8e378d88c5df28d7097ca3ca14b6648641415c2734d4204473f71ad173d45b0946e5e02e67d9c1da168a06aeaf2472c388232c7acf44cdc5
7
- data.tar.gz: 57a0c432b785e89ee1df8c38f2710736e2150b2024e11b1ed1e0437e1903a96934b6be135fe1ffe54d9fb891527d15275f1c119cdf6f1a453d47c991b268d273
6
+ metadata.gz: 5b14427f32a5db4f2c9754c1ee7fea356c939727152a626c616c3dff1372cddb4fd4d982dc761c2a2e2ca1c211b8a0215d26c2b11eb162cd2f7ab5f0c1c9344e
7
+ data.tar.gz: 94c7da10fd04a9136b9a36582574ae04c9f3a4767f1a3dd04137a64f4e104cb8c3c0906752c627cef27ff81b7bbca0bde83aa58e9e5b742005079b30c46616a2
data/README.md CHANGED
@@ -74,15 +74,13 @@ Panda, SciPy, SciKit-Learn and a couple more.
74
74
  # gKnitting a Document
75
75
 
76
76
  This manual has been formatted usign gKnit. gKnit uses Knitr and R markdown to knit
77
- a document in Ruby or R and output it in any of the available formats for R markdown.
77
+ a document in Ruby or R and output it in any of the available formats for R markdown.
78
78
  gKnit runs atop of GraalVM, and Galaaz. In gKnit, Ruby variables are persisted between
79
- chunks, making it an ideal solution for literate programming.
80
- Also, since it is based on Galaaz, Ruby chunks can have access to R variables and Polyglot
81
- Programming with Ruby and R is quite natural.
79
+ chunks, making it an ideal solution for literate programming. Also, since it is based
80
+ on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming with
81
+ Ruby and R is quite natural.
82
82
 
83
- gknit was describe in more depth in:
84
-
85
- * xxx.xxxx.xxx
83
+ [gknit is described in more details here](https://towardsdatascience.com/how-to-do-reproducible-research-in-ruby-with-gknit-c26d2684d64e)
86
84
 
87
85
  # Vector
88
86
 
@@ -110,15 +108,15 @@ To create a vector the 'c' (concatenate) method from the 'R' module should be us
110
108
 
111
109
 
112
110
  ```ruby
113
- @vec = R.c(1, 2, 3)
114
- puts @vec
111
+ vec = R.c(1, 2, 3)
112
+ puts vec
115
113
  ```
116
114
 
117
115
  ```
118
116
  ## [1] 1 2 3
119
117
  ```
120
118
 
121
- Lets take a look at the type, mode and storage.mode of our vector @vec. In order to print
119
+ Lets take a look at the type, mode and storage.mode of our vector vec. In order to print
122
120
  this out, we are creating a data frame 'df' and printing it out. A data frame, for those
123
121
  not familiar with it, is basically a table. Here we create the data frame and add the
124
122
  column name by passing named parameters for each column, such as 'typeof:', 'mode:' and
@@ -130,7 +128,7 @@ data frame is 'data.frame', in Galaaz we use 'data\_\_frame'.
130
128
 
131
129
 
132
130
  ```ruby
133
- df = R.data__frame(typeof: @vec.typeof, mode: @vec.mode, storage__mode: @vec.storage__mode)
131
+ df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
134
132
  puts df
135
133
  ```
136
134
 
@@ -146,8 +144,8 @@ follows normal Ruby rules and the number 1 is an integer and 1.0 is a float.
146
144
 
147
145
 
148
146
  ```ruby
149
- @vec = R.c(1.0, 2, 3)
150
- puts @vec
147
+ vec = R.c(1.0, 2, 3)
148
+ puts vec
151
149
  ```
152
150
 
153
151
  ```
@@ -156,7 +154,7 @@ puts @vec
156
154
 
157
155
 
158
156
  ```ruby
159
- df = R.data__frame(typeof: @vec.typeof, mode: @vec.mode, storage__mode: @vec.storage__mode)
157
+ df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
160
158
  outputs df.kable.kable_styling
161
159
  ```
162
160
 
@@ -189,14 +187,14 @@ vec = R.c(1, hello, 5)
189
187
 
190
188
  ```
191
189
  ## Message:
192
- ## undefined local variable or method `hello' for RubyChunk:Class
190
+ ## undefined local variable or method `hello' for #<RC:0x2e0 @out_list=nil>:RC
193
191
  ```
194
192
 
195
193
  ```
196
194
  ## Message:
197
- ## (eval):1:in `exec_ruby'
198
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:141:in `instance_eval'
199
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:141:in `exec_ruby'
195
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:103:in `get_binding'
196
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `eval'
197
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `exec_ruby'
200
198
  ## /home/rbotafogo/desenv/galaaz/lib/gknit/knitr_engine.rb:650:in `block in initialize'
201
199
  ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
202
200
  ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
@@ -221,8 +219,8 @@ Here is a vector with logical values
221
219
 
222
220
 
223
221
  ```ruby
224
- @vec = R.c(true, true, false, false, true)
225
- puts @vec
222
+ vec = R.c(true, true, false, false, true)
223
+ puts vec
226
224
  ```
227
225
 
228
226
  ```
@@ -235,26 +233,26 @@ The 'c' functions used to create vectors can also be used to combine two vectors
235
233
 
236
234
 
237
235
  ```ruby
238
- @vec1 = R.c(10.0, 20.0, 30.0)
239
- @vec2 = R.c(4.0, 5.0, 6.0)
240
- @vec = R.c(@vec1, @vec2)
241
- puts @vec
236
+ vec1 = R.c(10.0, 20.0, 30.0)
237
+ vec2 = R.c(4.0, 5.0, 6.0)
238
+ vec = R.c(vec1, vec2)
239
+ puts vec
242
240
  ```
243
241
 
244
242
  ```
245
243
  ## [1] 10 20 30 4 5 6
246
244
  ```
247
245
  In galaaz, methods can be chainned (somewhat like the pipe operator in R %>%, but more generic).
248
- In this next example, method 'c' is chainned after '@vec1'. This also looks like 'c' is a
246
+ In this next example, method 'c' is chainned after 'vec1'. This also looks like 'c' is a
249
247
  method of the vector, but in reallity, this is actually closer to the pipe operator. When
250
248
  Galaaz identifies that 'c' is not a method of 'vec' it actually tries to call 'R.c' with
251
- '@vec1' as the first argument concatenated with all the other available arguments. The code
249
+ 'vec1' as the first argument concatenated with all the other available arguments. The code
252
250
  bellow is automatically converted to the code above.
253
251
 
254
252
 
255
253
  ```ruby
256
- @vec = @vec1.c(@vec2)
257
- puts @vec
254
+ vec = vec1.c(vec2)
255
+ puts vec
258
256
  ```
259
257
 
260
258
  ```
@@ -267,7 +265,7 @@ Arithmetic operations on vectors are performed element by element:
267
265
 
268
266
 
269
267
  ```ruby
270
- puts @vec1 + @vec2
268
+ puts vec1 + vec2
271
269
  ```
272
270
 
273
271
  ```
@@ -276,7 +274,7 @@ puts @vec1 + @vec2
276
274
 
277
275
 
278
276
  ```ruby
279
- puts @vec1 * 5
277
+ puts vec1 * 5
280
278
  ```
281
279
 
282
280
  ```
@@ -287,8 +285,8 @@ When vectors have different length, a recycling rule is applied to the shorter v
287
285
 
288
286
 
289
287
  ```ruby
290
- @vec3 = R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
291
- puts @vec4 = @vec1 + @vec3
288
+ vec3 = R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
289
+ puts vec4 = vec1 + vec3
292
290
  ```
293
291
 
294
292
  ```
@@ -301,7 +299,7 @@ Vectors can be indexed by using the '[]' operator:
301
299
 
302
300
 
303
301
  ```ruby
304
- puts @vec4[3]
302
+ puts vec4[3]
305
303
  ```
306
304
 
307
305
  ```
@@ -309,11 +307,11 @@ puts @vec4[3]
309
307
  ```
310
308
 
311
309
  We can also index a vector with another vector. For example, in the code bellow, we take elements
312
- 1, 3, 5, and 7 from @vec3:
310
+ 1, 3, 5, and 7 from vec3:
313
311
 
314
312
 
315
313
  ```ruby
316
- puts @vec4[R.c(1, 3, 5, 7)]
314
+ puts vec4[R.c(1, 3, 5, 7)]
317
315
  ```
318
316
 
319
317
  ```
@@ -324,7 +322,7 @@ Repeating an index and having indices out of order is valid code:
324
322
 
325
323
 
326
324
  ```ruby
327
- puts @vec4[R.c(1, 3, 3, 1)]
325
+ puts vec4[R.c(1, 3, 3, 1)]
328
326
  ```
329
327
 
330
328
  ```
@@ -336,8 +334,8 @@ the indexed values are not returned:
336
334
 
337
335
 
338
336
  ```ruby
339
- puts @vec4[-3]
340
- puts @vec4[-R.c(1, 3, 5, 7)]
337
+ puts vec4[-3]
338
+ puts vec4[-R.c(1, 3, 5, 7)]
341
339
  ```
342
340
 
343
341
  ```
@@ -349,7 +347,7 @@ If an index is out of range, a missing value (NA) will be reported.
349
347
 
350
348
 
351
349
  ```ruby
352
- puts @vec4[30]
350
+ puts vec4[30]
353
351
  ```
354
352
 
355
353
  ```
@@ -360,7 +358,7 @@ It is also possible to index a vector by range:
360
358
 
361
359
 
362
360
  ```ruby
363
- puts @vec4[(2..5)]
361
+ puts vec4[(2..5)]
364
362
  ```
365
363
 
366
364
  ```
@@ -403,9 +401,9 @@ from the vector. In order to do this extraction the '>>' operator is used.
403
401
 
404
402
 
405
403
  ```ruby
406
- puts @vec4
407
- puts @vec4 >> 0
408
- puts @vec4 >> 4
404
+ puts vec4
405
+ puts vec4 >> 0
406
+ puts vec4 >> 4
409
407
  ```
410
408
 
411
409
  ```
@@ -905,11 +903,11 @@ created by the 'matrix' function:
905
903
 
906
904
 
907
905
  ```ruby
908
- @mat = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
909
- nrow: 3,
910
- ncol: 3)
906
+ mat = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
907
+ nrow: 3,
908
+ ncol: 3)
911
909
 
912
- puts @mat
910
+ puts mat
913
911
  ```
914
912
 
915
913
  ```
@@ -923,12 +921,12 @@ memory by row first passing an extra argument to the 'matrix' function:
923
921
 
924
922
 
925
923
  ```ruby
926
- @mat_row = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
927
- nrow: 3,
928
- ncol: 3,
929
- byrow: true)
924
+ mat_row = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
925
+ nrow: 3,
926
+ ncol: 3,
927
+ byrow: true)
930
928
 
931
- puts @mat_row
929
+ puts mat_row
932
930
  ```
933
931
 
934
932
  ```
@@ -944,8 +942,8 @@ A matrix can be indexed by [row, column]:
944
942
 
945
943
 
946
944
  ```ruby
947
- puts @mat_row[1, 1]
948
- puts @mat_row[2, 3]
945
+ puts mat_row[1, 1]
946
+ puts mat_row[2, 3]
949
947
  ```
950
948
 
951
949
  ```
@@ -956,8 +954,8 @@ It is possible to index an entire row or column with the ':all' keyword
956
954
 
957
955
 
958
956
  ```ruby
959
- puts @mat_row[1, :all]
960
- puts @mat_row[:all, 2]
957
+ puts mat_row[1, :all]
958
+ puts mat_row[:all, 2]
961
959
  ```
962
960
 
963
961
  ```
@@ -970,7 +968,7 @@ rows 1 and 3 and columns 2 and 3 building a 2 x 2 matrix.
970
968
 
971
969
 
972
970
  ```ruby
973
- puts @mat_row[R.c(1, 3), R.c(2, 3)]
971
+ puts mat_row[R.c(1, 3), R.c(2, 3)]
974
972
  ```
975
973
 
976
974
  ```
@@ -979,12 +977,11 @@ puts @mat_row[R.c(1, 3), R.c(2, 3)]
979
977
  ## [2,] 8 9
980
978
  ```
981
979
 
982
- Matrices can be combined with functions 'rbind' and 'cbind'
980
+ Matrices can be combined with functions 'rbind':
983
981
 
984
982
 
985
983
  ```ruby
986
- puts @mat_row.rbind(@mat)
987
- puts @mat_row.cbind(@mat)
984
+ puts mat_row.rbind(mat)
988
985
  ```
989
986
 
990
987
  ```
@@ -995,6 +992,16 @@ puts @mat_row.cbind(@mat)
995
992
  ## [4,] 1 4 7
996
993
  ## [5,] 2 5 8
997
994
  ## [6,] 3 6 9
995
+ ```
996
+
997
+ and 'cbind':
998
+
999
+
1000
+ ```ruby
1001
+ puts mat_row.cbind(mat)
1002
+ ```
1003
+
1004
+ ```
998
1005
  ## [,1] [,2] [,3] [,4] [,5] [,6]
999
1006
  ## [1,] 1 2 3 1 4 7
1000
1007
  ## [2,] 4 5 6 2 5 8
@@ -1011,8 +1018,8 @@ can only hold one type of element.
1011
1018
  nums = R.c(1.0, 2.0, 3.0)
1012
1019
  strs = R.c("a", "b", "c", "d")
1013
1020
  bool = R.c(true, true, false)
1014
- @lst = R.list(nums: nums, strs: strs, bool: bool)
1015
- puts @lst
1021
+ lst = R.list(nums: nums, strs: strs, bool: bool)
1022
+ puts lst
1016
1023
  ```
1017
1024
 
1018
1025
  ```
@@ -1026,7 +1033,7 @@ puts @lst
1026
1033
  ## [1] TRUE TRUE FALSE
1027
1034
  ```
1028
1035
 
1029
- Note that '@lst' elements are named elements.
1036
+ Note that 'lst' elements are named elements.
1030
1037
 
1031
1038
 
1032
1039
  ## List Indexing
@@ -1037,7 +1044,7 @@ return one of the sublists.
1037
1044
 
1038
1045
 
1039
1046
  ```ruby
1040
- puts @lst[1]
1047
+ puts lst[1]
1041
1048
  ```
1042
1049
 
1043
1050
  ```
@@ -1052,18 +1059,18 @@ the original list
1052
1059
 
1053
1060
 
1054
1061
  ```ruby
1055
- puts @lst[[1]]
1062
+ puts lst[[1]]
1056
1063
  ```
1057
1064
 
1058
1065
  ```
1059
1066
  ## [1] 1 2 3
1060
1067
  ```
1061
1068
 
1062
- When elements are named, as dones with @lst, indexing can be done by name:
1069
+ When elements are named, as dones with lst, indexing can be done by name:
1063
1070
 
1064
1071
 
1065
1072
  ```ruby
1066
- puts @lst[['bool']][[1]] >> 0
1073
+ puts lst[['bool']][[1]] >> 0
1067
1074
  ```
1068
1075
 
1069
1076
  ```
@@ -1183,23 +1190,31 @@ puts (~:mtcars)[R.c('Datsun 710', 'Camaro Z28'), :all]
1183
1190
  Finally, a data frame can also be indexed with a logical vector. In this next example, the
1184
1191
  'am' column of :mtcars is compared with 0 (with method 'eq'). When 'am' is equal to 0 the
1185
1192
  car is automatic. So, by doing '(~:mtcars).am.eq 0' a logical vector is created with
1186
- 'true' whenever 'am' is 0 and 'false' otherwise. Using this logical vector, the data frame
1187
- is indexed, returning a new data frame in which all cars have automatic transmission.
1193
+ 'true' whenever 'am' is 0 and 'false' otherwise.
1188
1194
 
1189
1195
 
1190
1196
  ```ruby
1191
1197
  # obtain a vector with 'true' for cars with automatic transmission
1192
1198
  automatic = (~:mtcars).am.eq 0
1193
1199
  puts automatic
1194
-
1195
- # slice the data frame by using this vector
1196
- puts (~:mtcars)[automatic, :all]
1197
1200
  ```
1198
1201
 
1199
1202
  ```
1200
1203
  ## [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
1201
1204
  ## [12] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE
1202
1205
  ## [23] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
1206
+ ```
1207
+
1208
+ Using this logical vector, the data frame is indexed, returning a new data frame in
1209
+ which all cars have automatic transmission.
1210
+
1211
+
1212
+ ```ruby
1213
+ # slice the data frame by using this vector
1214
+ puts (~:mtcars)[automatic, :all]
1215
+ ```
1216
+
1217
+ ```
1203
1218
  ## mpg cyl disp hp drat wt qsec vs am gear carb
1204
1219
  ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
1205
1220
  ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
@@ -1342,6 +1357,62 @@ puts exp7
1342
1357
  ## y <- sin(x)
1343
1358
  ```
1344
1359
 
1360
+ Expressions can also be written using '.' notation:
1361
+
1362
+
1363
+ ```ruby
1364
+ exp8 = :y.assign :x.sin
1365
+ puts exp8
1366
+ ```
1367
+
1368
+ ```
1369
+ ## y <- sin(x)
1370
+ ```
1371
+
1372
+ When a function has multiple arguments, the first one can be used before the '.':
1373
+
1374
+
1375
+ ```ruby
1376
+ exp9 = :x.c(:y)
1377
+ puts exp9
1378
+ ```
1379
+
1380
+ ```
1381
+ ## c(x, y)
1382
+ ```
1383
+
1384
+ ## Evaluating an Expression
1385
+
1386
+ Expressions can be evaluated by calling function 'eval' with a binding. A binding can be provided
1387
+ with a list:
1388
+
1389
+
1390
+ ```ruby
1391
+ exp = (:a + :b) * 2.0 + :c ** 2 / :z
1392
+ puts exp.eval(R.list(a: 10, b: 20, c: 30, z: 40))
1393
+ ```
1394
+
1395
+ ```
1396
+ ## [1] 82.5
1397
+ ```
1398
+
1399
+ ... with a data frame:
1400
+
1401
+
1402
+ ```ruby
1403
+ df = R.data__frame(
1404
+ a: R.c(1, 2, 3),
1405
+ b: R.c(10, 20, 30),
1406
+ c: R.c(100, 200, 300),
1407
+ z: R.c(1000, 2000, 3000))
1408
+
1409
+ puts exp.eval(df)
1410
+ ```
1411
+
1412
+ ```
1413
+ ## [1] 32 64 96
1414
+ ```
1415
+
1345
1416
  # Manipulating Data
1346
1417
 
1347
1418
  One of the major benefits of Galaaz is to bring strong data manipulation to Ruby. The following
@@ -1365,8 +1436,8 @@ R.library('dplyr')
1365
1436
 
1366
1437
 
1367
1438
  ```ruby
1368
- @flights = ~:flights
1369
- puts @flights.head.as__data__frame
1439
+ flights = ~:flights
1440
+ puts flights.head.as__data__frame
1370
1441
  ```
1371
1442
 
1372
1443
  ```
@@ -1400,7 +1471,7 @@ the first :month.eq 1
1400
1471
 
1401
1472
 
1402
1473
  ```ruby
1403
- puts @flights.filter((:month.eq 1), (:day.eq 1)).head.as__data__frame
1474
+ puts flights.filter((:month.eq 1), (:day.eq 1)).head.as__data__frame
1404
1475
  ```
1405
1476
 
1406
1477
  ```
@@ -1433,7 +1504,7 @@ All flights that departed in November of December
1433
1504
 
1434
1505
 
1435
1506
  ```ruby
1436
- puts @flights.filter((:month.eq 11) | (:month.eq 12)).head.as__data__frame
1507
+ puts flights.filter((:month.eq 11) | (:month.eq 12)).head.as__data__frame
1437
1508
  ```
1438
1509
 
1439
1510
  ```
@@ -1467,7 +1538,7 @@ symbol, in this case ':in' and the second argument is the vector:
1467
1538
 
1468
1539
 
1469
1540
  ```ruby
1470
- puts @flights.filter(:month._ :in, R.c(11, 12)).head.as__data__frame
1541
+ puts flights.filter(:month._ :in, R.c(11, 12)).head.as__data__frame
1471
1542
  ```
1472
1543
 
1473
1544
  ```
@@ -1503,8 +1574,8 @@ what is obtained from data frame.
1503
1574
 
1504
1575
 
1505
1576
  ```ruby
1506
- @df = R.tibble(x: R.c(1, R::NA, 3))
1507
- puts @df.as__data__frame
1577
+ df = R.tibble(x: R.c(1, R::NA, 3))
1578
+ puts df.as__data__frame
1508
1579
  ```
1509
1580
 
1510
1581
  ```
@@ -1519,7 +1590,7 @@ not.
1519
1590
 
1520
1591
 
1521
1592
  ```ruby
1522
- puts @df.filter(:x > 1).as__data__frame
1593
+ puts df.filter(:x > 1).as__data__frame
1523
1594
  ```
1524
1595
 
1525
1596
  ```
@@ -1531,7 +1602,7 @@ To match an NA use method 'is__na'
1531
1602
 
1532
1603
 
1533
1604
  ```ruby
1534
- puts @df.filter((:x.is__na) | (:x > 1)).as__data__frame
1605
+ puts df.filter((:x.is__na) | (:x > 1)).as__data__frame
1535
1606
  ```
1536
1607
 
1537
1608
  ```
@@ -1546,7 +1617,7 @@ Arrange reorders the rows of a data frame by the given arguments.
1546
1617
 
1547
1618
 
1548
1619
  ```ruby
1549
- puts @flights.arrange(:year, :month, :day).head.as__data__frame
1620
+ puts flights.arrange(:year, :month, :day).head.as__data__frame
1550
1621
  ```
1551
1622
 
1552
1623
  ```
@@ -1577,7 +1648,7 @@ To arrange in descending order, use function 'desc'
1577
1648
 
1578
1649
 
1579
1650
  ```ruby
1580
- puts @flights.arrange(:dep_delay.desc).head.as__data__frame
1651
+ puts flights.arrange(:dep_delay.desc).head.as__data__frame
1581
1652
  ```
1582
1653
 
1583
1654
  ```
@@ -1610,7 +1681,7 @@ To select specific columns from a dataset we use function 'select':
1610
1681
 
1611
1682
 
1612
1683
  ```ruby
1613
- puts @flights.select(:year, :month, :day).head.as__data__frame
1684
+ puts flights.select(:year, :month, :day).head.as__data__frame
1614
1685
  ```
1615
1686
 
1616
1687
  ```
@@ -1627,7 +1698,7 @@ It is also possible to select column in a given range
1627
1698
 
1628
1699
 
1629
1700
  ```ruby
1630
- puts @flights.select(:year.up_to :day).head.as__data__frame
1701
+ puts flights.select(:year.up_to :day).head.as__data__frame
1631
1702
  ```
1632
1703
 
1633
1704
  ```
@@ -1644,7 +1715,7 @@ Select all columns that start with a given name sequence
1644
1715
 
1645
1716
 
1646
1717
  ```ruby
1647
- puts @flights.select(E.starts_with('arr')).head.as__data__frame
1718
+ puts flights.select(E.starts_with('arr')).head.as__data__frame
1648
1719
  ```
1649
1720
 
1650
1721
  ```
@@ -1672,7 +1743,7 @@ A helper function that comes in handy when we just want to rearrange column orde
1672
1743
 
1673
1744
 
1674
1745
  ```ruby
1675
- puts @flights.select(:year, :month, :day, E.everything).head.as__data__frame
1746
+ puts flights.select(:year, :month, :day, E.everything).head.as__data__frame
1676
1747
  ```
1677
1748
 
1678
1749
  ```
@@ -1703,13 +1774,13 @@ puts @flights.select(:year, :month, :day, E.everything).head.as__data__frame
1703
1774
 
1704
1775
 
1705
1776
  ```ruby
1706
- @flights_sm = @flights.
1707
- select((:year.up_to :day),
1708
- E.ends_with('delay'),
1709
- :distance,
1710
- :air_time)
1777
+ flights_sm = flights.
1778
+ select((:year.up_to :day),
1779
+ E.ends_with('delay'),
1780
+ :distance,
1781
+ :air_time)
1711
1782
 
1712
- puts @flights_sm.head.as__data__frame
1783
+ puts flights_sm.head.as__data__frame
1713
1784
  ```
1714
1785
 
1715
1786
  ```
@@ -1724,10 +1795,10 @@ puts @flights_sm.head.as__data__frame
1724
1795
 
1725
1796
 
1726
1797
  ```ruby
1727
- @flights_sm = @flights_sm.
1728
- mutate(gain: :dep_delay - :arr_delay,
1729
- speed: :distance / :air_time * 60)
1730
- puts @flights_sm.head.as__data__frame
1798
+ flights_sm = flights_sm.
1799
+ mutate(gain: :dep_delay - :arr_delay,
1800
+ speed: :distance / :air_time * 60)
1801
+ puts flights_sm.head.as__data__frame
1731
1802
  ```
1732
1803
 
1733
1804
  ```
@@ -1747,7 +1818,7 @@ a single value is obtained from the data frame:
1747
1818
 
1748
1819
 
1749
1820
  ```ruby
1750
- puts @flights.summarise(delay: E.mean(:dep_delay, na__rm: true)).as__data__frame
1821
+ puts flights.summarise(delay: E.mean(:dep_delay, na__rm: true)).as__data__frame
1751
1822
  ```
1752
1823
 
1753
1824
  ```
@@ -1759,7 +1830,7 @@ When a data frame is groupe with 'group_by' summaries apply to the given group:
1759
1830
 
1760
1831
 
1761
1832
  ```ruby
1762
- by_day = @flights.group_by(:year, :month, :day)
1833
+ by_day = flights.group_by(:year, :month, :day)
1763
1834
  puts by_day.summarise(delay: :dep_delay.mean(na__rm: true)).head.as__data__frame
1764
1835
  ```
1765
1836
 
@@ -1777,7 +1848,7 @@ Next we put many operations together by pipping them one after the other:
1777
1848
 
1778
1849
 
1779
1850
  ```ruby
1780
- delays = @flights.
1851
+ delays = flights.
1781
1852
  group_by(:dest).
1782
1853
  summarise(
1783
1854
  count: E.n,
@@ -1785,108 +1856,17 @@ delays = @flights.
1785
1856
  delay: :arr_delay.mean(na__rm: true)).
1786
1857
  filter(:count > 20, :dest != "NHL")
1787
1858
 
1788
- puts delays.as__data__frame
1789
- ```
1790
-
1791
- ```
1792
- ## dest count dist delay
1793
- ## 1 ABQ 254 1826.00000 4.38188976
1794
- ## 2 ACK 265 199.00000 4.85227273
1795
- ## 3 ALB 439 143.00000 14.39712919
1796
- ## 4 ATL 17215 757.10822 11.30011285
1797
- ## 5 AUS 2439 1514.25297 6.01990875
1798
- ## 6 AVL 275 583.58182 8.00383142
1799
- ## 7 BDL 443 116.00000 7.04854369
1800
- ## 8 BGR 375 378.00000 8.02793296
1801
- ## 9 BHM 297 865.99663 16.87732342
1802
- ## 10 BNA 6333 758.21348 11.81245891
1803
- ## 11 BOS 15508 190.63696 2.91439222
1804
- ## 12 BQN 896 1578.98326 8.24549550
1805
- ## 13 BTV 2589 265.09154 8.95099602
1806
- ## 14 BUF 4681 296.80837 8.94595186
1807
- ## 15 BUR 371 2465.00000 8.17567568
1808
- ## 16 BWI 1781 179.41830 10.72673385
1809
- ## 17 BZN 36 1882.00000 7.60000000
1810
- ## 18 CAE 116 603.55172 41.76415094
1811
- ## 19 CAK 864 397.00000 19.69833729
1812
- ## 20 CHO 52 305.00000 9.50000000
1813
- ## 21 CHS 2884 632.91678 10.59296847
1814
- ## 22 CLE 4573 414.17428 9.18161129
1815
- ## 23 CLT 14064 538.02730 7.36031885
1816
- ## 24 CMH 3524 476.55505 10.60132291
1817
- ## 25 CRW 138 444.00000 14.67164179
1818
- ## 26 CVG 3941 575.15986 15.36456376
1819
- ## 27 DAY 1525 537.10230 12.68048606
1820
- ## 28 DCA 9705 211.00618 9.06695204
1821
- ## 29 DEN 7266 1614.67836 8.60650021
1822
- ## 30 DFW 8738 1383.04303 0.32212685
1823
- ## 31 DSM 569 1020.88752 19.00573614
1824
- ## 32 DTW 9384 498.12852 5.42996346
1825
- ## 33 EGE 213 1735.70892 6.30434783
1826
- ## 34 FLL 12055 1070.06877 8.08212154
1827
- ## 35 GRR 765 605.78170 18.18956044
1828
- ## 36 GSO 1606 449.84184 14.11260054
1829
- ## 37 GSP 849 595.95995 15.93544304
1830
- ## 38 HNL 707 4972.67468 -1.36519258
1831
- ## 39 HOU 2115 1420.15508 7.17618819
1832
- ## 40 IAD 5700 224.84684 13.86420212
1833
- ## 41 IAH 7198 1407.20672 4.24079040
1834
- ## 42 ILM 110 500.00000 4.63551402
1835
- ## 43 IND 2077 652.26288 9.94043412
1836
- ## 44 JAC 25 1875.60000 28.09523810
1837
- ## 45 JAX 2720 824.67610 11.84483416
1838
- ## 46 LAS 5997 2240.96148 0.25772849
1839
- ## 47 LAX 16174 2468.62236 0.54711094
1840
- ## 48 LGB 668 2465.00000 -0.06202723
1841
- ## 49 MCI 2008 1097.69522 14.51405836
1842
- ## 50 MCO 14082 943.11057 5.45464309
1843
- ## 51 MDW 4113 718.04595 12.36422360
1844
- ## 52 MEM 1789 954.20123 10.64531435
1845
- ## 53 MHT 1009 207.02973 14.78755365
1846
- ## 54 MIA 11728 1091.55244 0.29905978
1847
- ## 55 MKE 2802 733.38151 14.16722038
1848
- ## 56 MSN 572 803.95455 20.19604317
1849
- ## 57 MSP 7185 1017.40167 7.27016886
1850
- ## 58 MSY 3799 1177.70571 6.49017497
1851
- ## 59 MVY 221 173.00000 -0.28571429
1852
- ## 60 MYR 59 550.66102 4.60344828
1853
- ## 61 OAK 312 2576.00000 3.07766990
1854
- ## 62 OKC 346 1325.00000 30.61904762
1855
- ## 63 OMA 849 1135.56655 14.69889841
1856
- ## 64 ORD 17283 729.00081 5.87661475
1857
- ## 65 ORF 1536 288.52344 10.94909344
1858
- ## 66 PBI 6554 1028.83811 8.56297210
1859
- ## 67 PDX 1354 2445.56573 5.14157973
1860
- ## 68 PHL 1632 94.32353 10.12719014
1861
- ## 69 PHX 4656 2141.30326 2.09704733
1862
- ## 70 PIT 2875 334.06122 7.68099053
1863
- ## 71 PSE 365 1617.00000 7.87150838
1864
- ## 72 PVD 376 160.00000 16.23463687
1865
- ## 73 PWM 2352 276.12840 11.66040210
1866
- ## 74 RDU 8163 426.75769 10.05238095
1867
- ## 75 RIC 2454 281.40465 20.11125320
1868
- ## 76 ROC 2416 259.25083 11.56064461
1869
- ## 77 RSW 3537 1072.85327 3.23814963
1870
- ## 78 SAN 2737 2437.29923 3.13916574
1871
- ## 79 SAT 686 1578.34111 6.94537178
1872
- ## 80 SAV 804 709.18408 15.12950601
1873
- ## 81 SDF 1157 645.98358 12.66938406
1874
- ## 82 SEA 3923 2412.66531 -1.09909910
1875
- ## 83 SFO 13331 2577.92356 2.67289152
1876
- ## 84 SJC 329 2569.00000 3.44817073
1877
- ## 85 SJU 5819 1599.83365 2.52052659
1878
- ## 86 SLC 2467 1986.98662 0.17625459
1879
- ## 87 SMF 284 2521.00000 12.10992908
1880
- ## 88 SNA 825 2434.00000 -7.86822660
1881
- ## 89 SRQ 1211 1044.65153 3.08243131
1882
- ## 90 STL 4339 878.72321 11.07846451
1883
- ## 91 STT 522 1626.98276 -3.83590734
1884
- ## 92 SYR 1761 205.92164 8.90392501
1885
- ## 93 TPA 7466 1003.93557 7.40852503
1886
- ## 94 TUL 315 1215.00000 33.65986395
1887
- ## 95 TVC 101 652.38614 12.96842105
1888
- ## 96 TYS 631 638.80983 24.06920415
1889
- ## 97 XNA 1036 1142.50579 7.46572581
1859
+ puts delays.as__data__frame.head
1860
+ ```
1861
+
1862
+ ```
1863
+ ## dest count dist delay
1864
+ ## 1 ABQ 254 1826.0000 4.381890
1865
+ ## 2 ACK 265 199.0000 4.852273
1866
+ ## 3 ALB 439 143.0000 14.397129
1867
+ ## 4 ATL 17215 757.1082 11.300113
1868
+ ## 5 AUS 2439 1514.2530 6.019909
1869
+ ## 6 AVL 275 583.5818 8.003831
1890
1870
  ```
1891
1871
 
1892
1872
  # Using Data Table
@@ -1897,9 +1877,9 @@ R.library('data.table')
1897
1877
  R.install_and_loads('curl')
1898
1878
 
1899
1879
  input = "https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv"
1900
- @flights = R.fread(input)
1901
- puts @flights
1902
- puts @flights.dim
1880
+ flights = R.fread(input)
1881
+ puts flights
1882
+ puts flights.dim
1903
1883
  ```
1904
1884
 
1905
1885
  ```
@@ -1958,17 +1938,17 @@ puts data_table.ID
1958
1938
 
1959
1939
  ```ruby
1960
1940
  # subset rows in i
1961
- ans = @flights[(:origin.eq "JFK") & (:month.eq 6)]
1941
+ ans = flights[(:origin.eq "JFK") & (:month.eq 6)]
1962
1942
  puts ans.head
1963
1943
 
1964
1944
  # Get the first two rows from flights.
1965
1945
 
1966
- ans = @flights[(1..2)]
1946
+ ans = flights[(1..2)]
1967
1947
  puts ans
1968
1948
 
1969
1949
  # Sort flights first by column origin in ascending order, and then by dest in descending order:
1970
1950
 
1971
- # ans = @flights[E.order(:origin, -(:dest))]
1951
+ # ans = flights[E.order(:origin, -(:dest))]
1972
1952
  # puts ans.head
1973
1953
  ```
1974
1954
 
@@ -2000,15 +1980,15 @@ puts ans
2000
1980
  # Select column(s) in j
2001
1981
  # select arr_delay column, but return it as a vector.
2002
1982
 
2003
- ans = @flights[:all, :arr_delay]
1983
+ ans = flights[:all, :arr_delay]
2004
1984
  puts ans.head
2005
1985
 
2006
1986
  # Select arr_delay column, but return as a data.table instead.
2007
1987
 
2008
- ans = @flights[:all, :arr_delay.list]
1988
+ ans = flights[:all, :arr_delay.list]
2009
1989
  puts ans.head
2010
1990
 
2011
- ans = @flights[:all, E.list(:arr_delay, :dep_delay)]
1991
+ ans = flights[:all, E.list(:arr_delay, :dep_delay)]
2012
1992
  ```
2013
1993
 
2014
1994
  ```
@@ -2033,68 +2013,42 @@ the data frame with the necessary data:
2033
2013
 
2034
2014
  ```ruby
2035
2015
  # copy the R variable :mtcars to the Ruby mtcars variable
2036
- @mtcars = ~:mtcars
2016
+ mtcars = ~:mtcars
2037
2017
 
2038
2018
  # create a new column 'car_name' to store the car names so that it can be
2039
2019
  # used for plotting. The 'rownames' of the data frame cannot be used as
2040
2020
  # data for plotting
2041
- @mtcars.car_name = R.rownames(:mtcars)
2021
+ mtcars.car_name = R.rownames(:mtcars)
2042
2022
 
2043
2023
  # compute normalized mpg and add it to a new column called mpg_z
2044
2024
  # Note that the mean value for mpg can be obtained by calling the 'mean'
2045
2025
  # function on the vector 'mtcars.mpg'. The same with the standard
2046
2026
  # deviation 'sd'. The vector is then rounded to two digits with 'round 2'
2047
- @mtcars.mpg_z = ((@mtcars.mpg - @mtcars.mpg.mean)/@mtcars.mpg.sd).round 2
2027
+ mtcars.mpg_z = ((mtcars.mpg - mtcars.mpg.mean)/mtcars.mpg.sd).round 2
2048
2028
 
2049
2029
  # create a new column 'mpg_type'. Function 'ifelse' is a vectorized function
2050
2030
  # that looks at every element of the mpg_z vector and if the value is below
2051
2031
  # 0, returns 'below', otherwise returns 'above'
2052
- @mtcars.mpg_type = (@mtcars.mpg_z < 0).ifelse("below", "above")
2032
+ mtcars.mpg_type = (mtcars.mpg_z < 0).ifelse("below", "above")
2053
2033
 
2054
2034
  # order the mtcar data set by the mpg_z vector from smaler to larger values
2055
- @mtcars = @mtcars[@mtcars.mpg_z.order, :all]
2035
+ mtcars = mtcars[mtcars.mpg_z.order, :all]
2056
2036
 
2057
2037
  # convert the car_name column to a factor to retain sorted order in plot
2058
- @mtcars.car_name = @mtcars.car_name.factor levels: @mtcars.car_name
2038
+ mtcars.car_name = mtcars.car_name.factor levels: mtcars.car_name
2059
2039
 
2060
2040
  # let's look at the final data frame
2061
- puts @mtcars
2041
+ puts mtcars.head
2062
2042
  ```
2063
2043
 
2064
2044
  ```
2065
- ## mpg cyl disp hp drat wt qsec vs am gear carb
2066
- ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
2067
- ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
2068
- ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
2069
- ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
2070
- ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
2071
- ## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
2072
- ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
2073
- ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
2074
- ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
2075
- ## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
2076
- ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
2077
- ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
2078
- ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
2079
- ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
2080
- ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
2081
- ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
2082
- ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
2083
- ## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
2084
- ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
2085
- ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
2086
- ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
2087
- ## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
2088
- ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
2089
- ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
2090
- ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
2091
- ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
2092
- ## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
2093
- ## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
2094
- ## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
2095
- ## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
2096
- ## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
2097
- ## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
2045
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2046
+ ## Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
2047
+ ## Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
2048
+ ## Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
2049
+ ## Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4
2050
+ ## Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
2051
+ ## Maserati Bora 15.0 8 301 335 3.54 3.570 14.60 0 1 5 8
2098
2052
  ## car_name mpg_z mpg_type
2099
2053
  ## Cadillac Fleetwood Cadillac Fleetwood -1.61 below
2100
2054
  ## Lincoln Continental Lincoln Continental -1.61 below
@@ -2102,32 +2056,6 @@ puts @mtcars
2102
2056
  ## Duster 360 Duster 360 -0.96 below
2103
2057
  ## Chrysler Imperial Chrysler Imperial -0.89 below
2104
2058
  ## Maserati Bora Maserati Bora -0.84 below
2105
- ## Merc 450SLC Merc 450SLC -0.81 below
2106
- ## AMC Javelin AMC Javelin -0.81 below
2107
- ## Dodge Challenger Dodge Challenger -0.76 below
2108
- ## Ford Pantera L Ford Pantera L -0.71 below
2109
- ## Merc 450SE Merc 450SE -0.61 below
2110
- ## Merc 450SL Merc 450SL -0.46 below
2111
- ## Merc 280C Merc 280C -0.38 below
2112
- ## Valiant Valiant -0.33 below
2113
- ## Hornet Sportabout Hornet Sportabout -0.23 below
2114
- ## Merc 280 Merc 280 -0.15 below
2115
- ## Pontiac Firebird Pontiac Firebird -0.15 below
2116
- ## Ferrari Dino Ferrari Dino -0.06 below
2117
- ## Mazda RX4 Mazda RX4 0.15 above
2118
- ## Mazda RX4 Wag Mazda RX4 Wag 0.15 above
2119
- ## Hornet 4 Drive Hornet 4 Drive 0.22 above
2120
- ## Volvo 142E Volvo 142E 0.22 above
2121
- ## Toyota Corona Toyota Corona 0.23 above
2122
- ## Datsun 710 Datsun 710 0.45 above
2123
- ## Merc 230 Merc 230 0.45 above
2124
- ## Merc 240D Merc 240D 0.72 above
2125
- ## Porsche 914-2 Porsche 914-2 0.98 above
2126
- ## Fiat X1-9 Fiat X1-9 1.20 above
2127
- ## Honda Civic Honda Civic 1.71 above
2128
- ## Lotus Europa Lotus Europa 1.71 above
2129
- ## Fiat 128 Fiat 128 2.04 above
2130
- ## Toyota Corolla Toyota Corolla 2.29 above
2131
2059
  ```
2132
2060
  Now, lets plot the diverging bar plot. When using gKnit, there is no need to call
2133
2061
  'R.awt' to create a plotting device, since gKnit does take care of it. Galaaz
@@ -2149,19 +2077,604 @@ but in this graph we want the bars to be horizontally layed so we add 'coord\_fl
2149
2077
  ```ruby
2150
2078
  require 'ggplot'
2151
2079
 
2152
- puts @mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
2153
- R.geom_bar(E.aes(fill: :mpg_type), stat: 'identity', width: 0.5) +
2154
- R.scale_fill_manual(name: 'Mileage',
2155
- labels: R.c('Above Average', 'Below Average'),
2156
- values: R.c('above': '#00ba38', 'below': '#f8766d')) +
2157
- R.labs(subtitle: "Normalised mileage from 'mtcars'",
2158
- title: "Diverging Bars") +
2159
- R.coord_flip
2080
+ puts mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
2081
+ R.geom_bar(E.aes(fill: :mpg_type), stat: 'identity', width: 0.5) +
2082
+ R.scale_fill_manual(name: 'Mileage',
2083
+ labels: R.c('Above Average', 'Below Average'),
2084
+ values: R.c('above': '#00ba38', 'below': '#f8766d')) +
2085
+ R.labs(subtitle: "Normalised mileage from 'mtcars'",
2086
+ title: "Diverging Bars") +
2087
+ R.coord_flip
2160
2088
  ```
2161
2089
 
2162
2090
 
2163
2091
  ![](/home/rbotafogo/desenv/galaaz/blogs/manual/manual_files/figure-html/diverging_bar.png)<!-- -->
2164
2092
 
2093
+ # Coding with Tidyverse
2094
+
2095
+ In R, and when coding with 'tidyverse', arguments to a function are usually not
2096
+ *referencially transparent*. That is, you can’t replace a value with a seemingly equivalent
2097
+ object that you’ve defined elsewhere. To see the problem, let's first define a data frame:
2098
+
2099
+
2100
+ ```ruby
2101
+ df = R.data__frame(x: (1..3), y: (3..1))
2102
+ puts df
2103
+ ```
2104
+
2105
+ ```
2106
+ ## x y
2107
+ ## 1 1 3
2108
+ ## 2 2 2
2109
+ ## 3 3 1
2110
+ ```
2111
+
2112
+ and now, let's look at this code:
2113
+
2114
+
2115
+ ```r
2116
+ my_var <- x
2117
+ filter(df, my_var == 1)
2118
+ ```
2119
+ It generates the following error: "object 'x' not found.
2120
+
2121
+ However, in Galaaz, arguments are referencially transparent as can be seen by the
2122
+ code bellow. Note initally that 'my_var = :x' will not give the error "object 'x' not found"
2123
+ since ':x' is treated as an expression and assigned to my\_var. Then when doing (my\_var.eq 1),
2124
+ my\_var is a variable that resolves to ':x' and it becomes equivalent to (:x.eq 1) which is
2125
+ what we want.
2126
+
2127
+
2128
+ ```ruby
2129
+ my_var = :x
2130
+ puts df.filter(my_var.eq 1)
2131
+ ```
2132
+
2133
+ ```
2134
+ ## x y
2135
+ ## 1 1 3
2136
+ ```
2137
+ As stated by Hardley
2138
+
2139
+ > dplyr code is ambiguous. Depending on what variables are defined where,
2140
+ > filter(df, x == y) could be equivalent to any of:
2141
+
2142
+ ```
2143
+ df[df$x == df$y, ]
2144
+ df[df$x == y, ]
2145
+ df[x == df$y, ]
2146
+ df[x == y, ]
2147
+ ```
2148
+ In galaaz this ambiguity does not exist, filter(df, x.eq y) is not a valid expression as
2149
+ expressions are build with symbols. In doing filter(df, :x.eq y) we are looking for elements
2150
+ of the 'x' column that are equal to a previously defined y variable. Finally in
2151
+ filter(df, :x.eq :y) we are looking for elements in which the 'x' column value is equal to
2152
+ the 'y' column value. This can be seen in the following two chunks of code:
2153
+
2154
+
2155
+ ```ruby
2156
+ y = 1
2157
+ x = 2
2158
+
2159
+ # looking for values where the 'x' column is equal to the 'y' column
2160
+ puts df.filter(:x.eq :y)
2161
+ ```
2162
+
2163
+ ```
2164
+ ## x y
2165
+ ## 1 2 2
2166
+ ```
2167
+
2168
+
2169
+ ```ruby
2170
+ # looking for values where the 'x' column is equal to the 'y' variable
2171
+ # in this case, the number 1
2172
+ puts df.filter(:x.eq y)
2173
+ ```
2174
+
2175
+ ```
2176
+ ## x y
2177
+ ## 1 1 3
2178
+ ```
2179
+ ## Writing a function that applies to different data sets
2180
+
2181
+ Let's suppose that we want to write a function that receives as the first argument a data frame
2182
+ and as second argument an expression that adds a column to the data frame that is equal to the
2183
+ sum of elements in column 'a' plus 'x'.
2184
+
2185
+ Here is the intended behaviour using the 'mutate' function of 'dplyr':
2186
+
2187
+ ```
2188
+ mutate(df1, y = a + x)
2189
+ mutate(df2, y = a + x)
2190
+ mutate(df3, y = a + x)
2191
+ mutate(df4, y = a + x)
2192
+ ```
2193
+ The naive approach to writing an R function to solve this problem is:
2194
+
2195
+ ```
2196
+ mutate_y <- function(df) {
2197
+ mutate(df, y = a + x)
2198
+ }
2199
+ ```
2200
+ Unfortunately, in R, this function can fail silently if one of the variables isn’t present
2201
+ in the data frame, but is present in the global environment. We will not go through here how
2202
+ to solve this problem in R.
2203
+
2204
+ In Galaaz the method mutate_y bellow will work fine and will never fail silently.
2205
+
2206
+
2207
+ ```ruby
2208
+ def mutate_y(df)
2209
+ df.mutate(:y.assign :a + :x)
2210
+ end
2211
+ ```
2212
+ Here we create a data frame that has only one column named 'x':
2213
+
2214
+
2215
+ ```ruby
2216
+ df1 = R.data__frame(x: (1..3))
2217
+ puts df1
2218
+ ```
2219
+
2220
+ ```
2221
+ ## x
2222
+ ## 1 1
2223
+ ## 2 2
2224
+ ## 3 3
2225
+ ```
2226
+
2227
+ Note that method mutate_y will fail independetly from the fact that variable 'a' is defined and
2228
+ in the scope of the method. Variable 'a' has no relationship with the symbol ':a' used in the
2229
+ definition of 'mutate\_y' above:
2230
+
2231
+
2232
+ ```ruby
2233
+ a = 10
2234
+ mutate_y(df1)
2235
+ ```
2236
+
2237
+ ```
2238
+ ## Message:
2239
+ ## Error in mutate_impl(.data, dots) :
2240
+ ## Evaluation error: object 'a' not found.
2241
+ ## In addition: Warning message:
2242
+ ## In mutate_impl(.data, dots) :
2243
+ ## mismatched protect/unprotect (unprotect with empty protect stack) (RError)
2244
+ ## Translated to internal error
2245
+ ```
2246
+ ## Different expressions
2247
+
2248
+ Let's move to the next problem as presented by Hardley where trying to write a function in R
2249
+ that will receive two argumens, the first a variable and the second an expression is not trivial.
2250
+ Bellow we create a data frame and we want to write a function that groups data by a variable and
2251
+ summarises it by an expression:
2252
+
2253
+
2254
+ ```r
2255
+ set.seed(123)
2256
+
2257
+ df <- data.frame(
2258
+ g1 = c(1, 1, 2, 2, 2),
2259
+ g2 = c(1, 2, 1, 2, 1),
2260
+ a = sample(5),
2261
+ b = sample(5)
2262
+ )
2263
+
2264
+ as.data.frame(df)
2265
+ ```
2266
+
2267
+ ```
2268
+ ## g1 g2 a b
2269
+ ## 1 1 1 2 1
2270
+ ## 2 1 2 4 3
2271
+ ## 3 2 1 5 4
2272
+ ## 4 2 2 3 2
2273
+ ## 5 2 1 1 5
2274
+ ```
2275
+
2276
+ ```r
2277
+ d2 <- df %>%
2278
+ group_by(g1) %>%
2279
+ summarise(a = mean(a))
2280
+
2281
+ as.data.frame(d2)
2282
+ ```
2283
+
2284
+ ```
2285
+ ## g1 a
2286
+ ## 1 1 3
2287
+ ## 2 2 3
2288
+ ```
2289
+
2290
+ ```r
2291
+ d2 <- df %>%
2292
+ group_by(g2) %>%
2293
+ summarise(a = mean(a))
2294
+
2295
+ as.data.frame(d2)
2296
+ ```
2297
+
2298
+ ```
2299
+ ## g2 a
2300
+ ## 1 1 2.666667
2301
+ ## 2 2 3.500000
2302
+ ```
2303
+
2304
+ As shown by Hardley, one might expect this function to do the trick:
2305
+
2306
+
2307
+ ```r
2308
+ my_summarise <- function(df, group_var) {
2309
+ df %>%
2310
+ group_by(group_var) %>%
2311
+ summarise(a = mean(a))
2312
+ }
2313
+
2314
+ # my_summarise(df, g1)
2315
+ #> Error: Column `group_var` is unknown
2316
+ ```
2317
+
2318
+ In order to solve this problem, coding with dplyr requires the introduction of many new concepts
2319
+ and functions such as 'quo', 'quos', 'enquo', 'enquos', '!!' (bang bang), '!!!' (triple bang).
2320
+ Again, we'll leave to Hardley the explanation on how to use all those functions.
2321
+
2322
+ Now, let's try to implement the same function in galaaz. The next code block first prints the
2323
+ 'df' data frame defined previously in R (to access an R variable from Galaaz, we use the tilda
2324
+ operator '~' applied to the R variable name as symbol, i.e., ':df'.
2325
+
2326
+
2327
+ ```ruby
2328
+ puts ~:df
2329
+ ```
2330
+
2331
+ ```
2332
+ ## g1 g2 a b
2333
+ ## 1 1 1 2 1
2334
+ ## 2 1 2 4 3
2335
+ ## 3 2 1 5 4
2336
+ ## 4 2 2 3 2
2337
+ ## 5 2 1 1 5
2338
+ ```
2339
+
2340
+ We then create the 'my_summarize' method and call it passing the R data frame and
2341
+ the group by variable ':g1':
2342
+
2343
+
2344
+ ```ruby
2345
+ def my_summarize(df, group_var)
2346
+ df.group_by(group_var).
2347
+ summarize(a: :a.mean)
2348
+ end
2349
+
2350
+ puts my_summarize(:df, :g1).as__data__frame
2351
+ ```
2352
+
2353
+ ```
2354
+ ## g1 a
2355
+ ## 1 1 3
2356
+ ## 2 2 3
2357
+ ```
2358
+
2359
+ It works!!! Well, let's make sure this was not just some coincidence
2360
+
2361
+
2362
+ ```ruby
2363
+ puts my_summarize(:df, :g2).as__data__frame
2364
+ ```
2365
+
2366
+ ```
2367
+ ## g2 a
2368
+ ## 1 1 2.666667
2369
+ ## 2 2 3.500000
2370
+ ```
2371
+
2372
+ Great, everything is fine! No magic, no new functions, no complexities, just normal, standard Ruby
2373
+ code. If you've ever done NSE in R, this certainly feels much safer and easy to implement.
2374
+
2375
+ ## Different input variables
2376
+
2377
+ In the previous section we've managed to get rid of all NSE formulation for a simple example, but
2378
+ does this remain true for more complex examples, or will the Galaaz way prove inpractical for
2379
+ more complex code?
2380
+
2381
+ In the next example Hardley proposes us to write a function that given an expression such as 'a'
2382
+ or 'a * b', calculates three summaries. What we want a function that does the same as these R
2383
+ statements:
2384
+
2385
+ ```
2386
+ summarise(df, mean = mean(a), sum = sum(a), n = n())
2387
+ #> # A tibble: 1 x 3
2388
+ #> mean sum n
2389
+ #> <dbl> <int> <int>
2390
+ #> 1 3 15 5
2391
+
2392
+ summarise(df, mean = mean(a * b), sum = sum(a * b), n = n())
2393
+ #> # A tibble: 1 x 3
2394
+ #> mean sum n
2395
+ #> <dbl> <int> <int>
2396
+ #> 1 9 45 5
2397
+ ```
2398
+
2399
+ Let's try it in galaaz:
2400
+
2401
+
2402
+ ```ruby
2403
+ def my_summarise2(df, expr)
2404
+ df.summarize(
2405
+ mean: E.mean(expr),
2406
+ sum: E.sum(expr),
2407
+ n: E.n
2408
+ )
2409
+ end
2410
+
2411
+ puts my_summarise2((~:df), :a)
2412
+ puts "\n"
2413
+ puts my_summarise2((~:df), :a * :b)
2414
+ ```
2415
+
2416
+ ```
2417
+ ## mean sum n
2418
+ ## 1 3 15 5
2419
+ ##
2420
+ ## mean sum n
2421
+ ## 1 9 45 5
2422
+ ```
2423
+
2424
+ Once again, there is no need to use any special theory or functions. The only point to be
2425
+ careful about is the use of 'E' to build expressions from functions 'mean', 'sum' and 'n'.
2426
+
2427
+ ## Different input and output variable
2428
+
2429
+ Now the next challenge presented by Hardley is to vary the name of the output variables based on
2430
+ the received expression. So, if the input expression is 'a', we want our data frame columns to
2431
+ be named 'mean\_a' and 'sum\_a'. Now, if the input expression is 'b', columns
2432
+ should be named 'mean\_b' and 'sum\_b'.
2433
+
2434
+ ```
2435
+ mutate(df, mean_a = mean(a), sum_a = sum(a))
2436
+ #> # A tibble: 5 x 6
2437
+ #> g1 g2 a b mean_a sum_a
2438
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
2439
+ #> 1 1 1 1 3 3 15
2440
+ #> 2 1 2 4 2 3 15
2441
+ #> 3 2 1 2 1 3 15
2442
+ #> 4 2 2 5 4 3 15
2443
+ #> # … with 1 more row
2444
+
2445
+ mutate(df, mean_b = mean(b), sum_b = sum(b))
2446
+ #> # A tibble: 5 x 6
2447
+ #> g1 g2 a b mean_b sum_b
2448
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
2449
+ #> 1 1 1 1 3 3 15
2450
+ #> 2 1 2 4 2 3 15
2451
+ #> 3 2 1 2 1 3 15
2452
+ #> 4 2 2 5 4 3 15
2453
+ #> # … with 1 more row
2454
+ ```
2455
+ In order to solve this problem in R, Hardley needs to introduce some more new functions and notations:
2456
+ 'quo_name' and the ':=' operator from package 'rlang'
2457
+
2458
+ Here is our Ruby code:
2459
+
2460
+
2461
+ ```ruby
2462
+ def my_mutate(df, expr)
2463
+ mean_name = "mean_#{expr.to_s}"
2464
+ sum_name = "sum_#{expr.to_s}"
2465
+
2466
+ df.mutate(mean_name => E.mean(expr),
2467
+ sum_name => E.sum(expr))
2468
+ end
2469
+
2470
+ puts my_mutate((~:df), :a)
2471
+ puts "\n"
2472
+ puts my_mutate((~:df), :b)
2473
+ ```
2474
+
2475
+ ```
2476
+ ## g1 g2 a b mean_a sum_a
2477
+ ## 1 1 1 2 1 3 15
2478
+ ## 2 1 2 4 3 3 15
2479
+ ## 3 2 1 5 4 3 15
2480
+ ## 4 2 2 3 2 3 15
2481
+ ## 5 2 1 1 5 3 15
2482
+ ##
2483
+ ## g1 g2 a b mean_b sum_b
2484
+ ## 1 1 1 2 1 3 15
2485
+ ## 2 1 2 4 3 3 15
2486
+ ## 3 2 1 5 4 3 15
2487
+ ## 4 2 2 3 2 3 15
2488
+ ## 5 2 1 1 5 3 15
2489
+ ```
2490
+ It really seems that "Non Standard Evaluation" is actually quite standard in Galaaz! But, you
2491
+ might have noticed a small change in the way the arguments to the mutate method were called.
2492
+ In a previous example we used df.summarise(mean: E.mean(:a), ...) where the column name was
2493
+ followed by a ':' colom. In this example, we have df.mutate(mean_name => E.mean(expr), ...)
2494
+ and variable mean\_name is not followed by ':' but by '=>'. This is standard Ruby notation.
2495
+
2496
+ [explain....]
2497
+
2498
+ ## Capturing multiple variables
2499
+
2500
+ Moving on with new complexities, Hardley proposes us to solve the problem in which the
2501
+ summarise function will receive any number of grouping variables.
2502
+
2503
+ This again is quite standard Ruby. In order to receive an undefined number of paramenters
2504
+ the paramenter is preceded by '*':
2505
+
2506
+
2507
+ ```ruby
2508
+ def my_summarise3(df, *group_vars)
2509
+ df.group_by(*group_vars).
2510
+ summarise(a: E.mean(:a))
2511
+ end
2512
+
2513
+ puts my_summarise3((~:df), :g1, :g2).as__data__frame
2514
+ ```
2515
+
2516
+ ```
2517
+ ## g1 g2 a
2518
+ ## 1 1 1 2
2519
+ ## 2 1 2 4
2520
+ ## 3 2 1 3
2521
+ ## 4 2 2 3
2522
+ ```
2523
+
2524
+ ## Why does R require NSE and Galaaz does not?
2525
+
2526
+ NSE introduces a number of new concepts, such as 'quoting', 'quasiquotation', 'unquoting' and
2527
+ 'unquote-splicing', while in Galaaz none of those concepts are needed. What gives?
2528
+
2529
+ R is an extremely flexible language and it has lazy evaluation of parameters. When in R a
2530
+ function is called as 'summarise(df, a = b)', the summarise function receives the litteral
2531
+ 'a = b' parameter and can work with this as if it were a string. In R, it is not clear what
2532
+ a and b are, they can be expressions or they can be variables, it is up to the function to
2533
+ decide what 'a = b' means.
2534
+
2535
+ In Ruby, there is no lazy evaluation of parameters and 'a' is always a variable and so is 'b'.
2536
+ Variables assume their value as soon as they are used, so 'x = a' is immediately evaluate and
2537
+ variable 'x' will receive the value of variable 'a' as soon as the Ruby statement is executed.
2538
+ Ruby also provides the notion of a symbol; ':a' is a symbol and does not evaluate to anything.
2539
+ Galaaz uses Ruby symbols to build expressions that are not bound to anything: ':a.eq :b' is
2540
+ clearly an expression and has no relationship whatsoever with the statment 'a = b'. By using
2541
+ symbols, variables and expressions all the possible ambiguities that are found in R are
2542
+ eliminated in Galaaz.
2543
+
2544
+ The main problem that remains, is that in R, functions are not clearly documented as what type
2545
+ of input they are expecting, they might be expecting regular variables or they might be
2546
+ expecting expressions and the R function will know how to deal with an input of the form
2547
+ 'a = b', now for the Ruby developer it might not be immediately clear if it should call the
2548
+ function passing the value 'true' if variable 'a' is equal to variable 'b' or if it should
2549
+ call the function passing the expression ':a.eq :b'.
2550
+
2551
+
2552
+ ## Advanced dplyr features
2553
+
2554
+ In the blog: Programming with dplyr by using dplyr (https://www.r-bloggers.com/programming-with-dplyr-by-using-dplyr/) Iñaki Úcar shows surprise that some R users are trying to code in dplyr avoiding
2555
+ the use of NSE. For instance he says:
2556
+
2557
+ > Take the example of seplyr. It stands for standard evaluation dplyr, and enables us to
2558
+ > program over dplyr without having “to bring in (or study) any deep-theory or
2559
+ > heavy-weight tools such as rlang/tidyeval”.
2560
+
2561
+ For me, there isn't really any surprise that users are trying to avoid dplyr deep-theory. R
2562
+ users frequently are not programmers and learning to code is already hard business, on top
2563
+ of that, having to learn how to 'quote' or 'enquo' or 'quos' or 'enquos' is not necessarily
2564
+ a 'piece of cake'. So much so, that 'tidyeval' has some more advanced functions that instead
2565
+ of using quoted expressions, uses strings as arguments.
2566
+
2567
+ In the following examples, we show the use of functions 'group\_by\_at', 'summarise\_at' and
2568
+ 'rename\_at' that receive strings as argument. The data frame used in 'starwars' that describes
2569
+ features of characters in the Starwars movies:
2570
+
2571
+
2572
+ ```ruby
2573
+ puts (~:starwars).head.as__data__frame
2574
+ ```
2575
+
2576
+ ```
2577
+ ## name height mass hair_color skin_color eye_color birth_year
2578
+ ## 1 Luke Skywalker 172 77 blond fair blue 19.0
2579
+ ## 2 C-3PO 167 75 <NA> gold yellow 112.0
2580
+ ## 3 R2-D2 96 32 <NA> white, blue red 33.0
2581
+ ## 4 Darth Vader 202 136 none white yellow 41.9
2582
+ ## 5 Leia Organa 150 49 brown light brown 19.0
2583
+ ## 6 Owen Lars 178 120 brown, grey light blue 52.0
2584
+ ## gender homeworld species
2585
+ ## 1 male Tatooine Human
2586
+ ## 2 <NA> Tatooine Droid
2587
+ ## 3 <NA> Naboo Droid
2588
+ ## 4 male Tatooine Human
2589
+ ## 5 female Alderaan Human
2590
+ ## 6 male Tatooine Human
2591
+ ## films
2592
+ ## 1 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2593
+ ## 2 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
2594
+ ## 3 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2595
+ ## 4 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
2596
+ ## 5 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2597
+ ## 6 Attack of the Clones, Revenge of the Sith, A New Hope
2598
+ ## vehicles starships
2599
+ ## 1 Snowspeeder, Imperial Speeder Bike X-wing, Imperial shuttle
2600
+ ## 2
2601
+ ## 3
2602
+ ## 4 TIE Advanced x1
2603
+ ## 5 Imperial Speeder Bike
2604
+ ## 6
2605
+ ```
2606
+ The grouped_mean function bellow will receive a grouping variable and calculate summaries for
2607
+ the value\_variables given:
2608
+
2609
+
2610
+ ```r
2611
+ grouped_mean <- function(data, grouping_variables, value_variables) {
2612
+ data %>%
2613
+ group_by_at(grouping_variables) %>%
2614
+ mutate(count = n()) %>%
2615
+ summarise_at(c(value_variables, "count"), mean, na.rm = TRUE) %>%
2616
+ rename_at(value_variables, funs(paste0("mean_", .)))
2617
+ }
2618
+
2619
+ gm = starwars %>%
2620
+ grouped_mean("eye_color", c("mass", "birth_year"))
2621
+
2622
+ as.data.frame(gm)
2623
+ ```
2624
+
2625
+ ```
2626
+ ## eye_color mean_mass mean_birth_year count
2627
+ ## 1 black 76.28571 33.00000 10
2628
+ ## 2 blue 86.51667 67.06923 19
2629
+ ## 3 blue-gray 77.00000 57.00000 1
2630
+ ## 4 brown 66.09231 108.96429 21
2631
+ ## 5 dark NaN NaN 1
2632
+ ## 6 gold NaN NaN 1
2633
+ ## 7 green, yellow 159.00000 NaN 1
2634
+ ## 8 hazel 66.00000 34.50000 3
2635
+ ## 9 orange 282.33333 231.00000 8
2636
+ ## 10 pink NaN NaN 1
2637
+ ## 11 red 81.40000 33.66667 5
2638
+ ## 12 red, blue NaN NaN 1
2639
+ ## 13 unknown 31.50000 NaN 3
2640
+ ## 14 white 48.00000 NaN 1
2641
+ ## 15 yellow 81.11111 76.38000 11
2642
+ ```
2643
+
2644
+ The same code with Galaaz, becomes:
2645
+
2646
+
2647
+ ```ruby
2648
+ def grouped_mean(data, grouping_variables, value_variables)
2649
+ data.
2650
+ group_by_at(grouping_variables).
2651
+ mutate(count: E.n).
2652
+ summarise_at(E.c(value_variables, "count"), ~:mean, na__rm: true).
2653
+ rename_at(value_variables, E.funs(E.paste0("mean_", value_variables)))
2654
+ end
2655
+
2656
+ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year")).as__data__frame
2657
+ ```
2658
+
2659
+ ```
2660
+ ## eye_color mean_mass mean_birth_year count
2661
+ ## 1 black 76.28571 33.00000 10
2662
+ ## 2 blue 86.51667 67.06923 19
2663
+ ## 3 blue-gray 77.00000 57.00000 1
2664
+ ## 4 brown 66.09231 108.96429 21
2665
+ ## 5 dark NaN NaN 1
2666
+ ## 6 gold NaN NaN 1
2667
+ ## 7 green, yellow 159.00000 NaN 1
2668
+ ## 8 hazel 66.00000 34.50000 3
2669
+ ## 9 orange 282.33333 231.00000 8
2670
+ ## 10 pink NaN NaN 1
2671
+ ## 11 red 81.40000 33.66667 5
2672
+ ## 12 red, blue NaN NaN 1
2673
+ ## 13 unknown 31.50000 NaN 3
2674
+ ## 14 white 48.00000 NaN 1
2675
+ ## 15 yellow 81.11111 76.38000 11
2676
+ ```
2677
+
2165
2678
 
2166
2679
  [TO BE CONTINUED...]
2167
2680