galaaz 0.4.9 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +798 -285
  3. data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +3 -12
  4. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +5 -7
  5. data/blogs/galaaz_ggplot/galaaz_ggplot.html +69 -29
  6. data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
  7. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/midwest_rb.png +0 -0
  8. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-html/scatter_plot_rb.png +0 -0
  9. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/midwest_rb.pdf +0 -0
  10. data/blogs/galaaz_ggplot/galaaz_ggplot_files/figure-latex/scatter_plot_rb.pdf +0 -0
  11. data/blogs/galaaz_ggplot/midwest.Rmd +1 -9
  12. data/blogs/gknit/gknit.Rmd +37 -40
  13. data/blogs/gknit/gknit.html +32 -30
  14. data/blogs/gknit/gknit.md +36 -37
  15. data/blogs/gknit/gknit.pdf +0 -0
  16. data/blogs/gknit/gknit.tex +35 -37
  17. data/blogs/manual/manual.Rmd +548 -125
  18. data/blogs/manual/manual.html +509 -286
  19. data/blogs/manual/manual.md +798 -285
  20. data/blogs/manual/manual.pdf +0 -0
  21. data/blogs/manual/manual.tex +2816 -0
  22. data/blogs/manual/manual_files/figure-latex/diverging_bar.pdf +0 -0
  23. data/blogs/nse_dplyr/nse_dplyr.Rmd +240 -74
  24. data/blogs/nse_dplyr/nse_dplyr.html +191 -87
  25. data/blogs/nse_dplyr/nse_dplyr.md +361 -107
  26. data/blogs/nse_dplyr/nse_dplyr.pdf +0 -0
  27. data/blogs/nse_dplyr/nse_dplyr.tex +1373 -0
  28. data/blogs/ruby_plot/ruby_plot.Rmd +61 -81
  29. data/blogs/ruby_plot/ruby_plot.html +54 -57
  30. data/blogs/ruby_plot/ruby_plot.md +48 -67
  31. data/blogs/ruby_plot/ruby_plot.pdf +0 -0
  32. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.png +0 -0
  33. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.png +0 -0
  34. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.png +0 -0
  35. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.png +0 -0
  36. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.png +0 -0
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.png +0 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.png +0 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.png +0 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.png +0 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-latex/dose_len.png +0 -0
  43. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_delivery.png +0 -0
  44. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facet_by_dose.png +0 -0
  45. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color.png +0 -0
  46. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_by_delivery_color2.png +0 -0
  47. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_decorations.png +0 -0
  48. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_jitter.png +0 -0
  49. data/blogs/ruby_plot/ruby_plot_files/figure-latex/facets_with_points.png +0 -0
  50. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_box_plot.png +0 -0
  51. data/blogs/ruby_plot/ruby_plot_files/figure-latex/final_violin_plot.png +0 -0
  52. data/blogs/ruby_plot/ruby_plot_files/figure-latex/violin_with_jitter.png +0 -0
  53. data/lib/R_interface/rdata_frame.rb +0 -12
  54. data/lib/R_interface/robject.rb +14 -14
  55. data/lib/R_interface/ruby_extensions.rb +3 -31
  56. data/lib/R_interface/rvector.rb +0 -12
  57. data/lib/gknit/knitr_engine.rb +5 -3
  58. data/lib/util/exec_ruby.rb +22 -61
  59. data/specs/tmp.rb +26 -12
  60. data/version.rb +1 -1
  61. metadata +22 -17
  62. data/bin/gknit_old_r +0 -236
  63. data/blogs/dev/dev.Rmd +0 -23
  64. data/blogs/dev/dev.md +0 -58
  65. data/blogs/dev/dev2.Rmd +0 -65
  66. data/blogs/dev/model.rb +0 -41
  67. data/blogs/dplyr/dplyr.Rmd +0 -29
  68. data/blogs/dplyr/dplyr.html +0 -433
  69. data/blogs/dplyr/dplyr.md +0 -58
  70. data/blogs/dplyr/dplyr.rb +0 -63
  71. data/blogs/galaaz_ggplot/galaaz_ggplot.log +0 -640
  72. data/blogs/galaaz_ggplot/galaaz_ggplot.md +0 -431
  73. data/blogs/galaaz_ggplot/galaaz_ggplot.tex +0 -481
  74. data/blogs/galaaz_ggplot/midwest.png +0 -0
  75. data/blogs/galaaz_ggplot/scatter_plot.png +0 -0
  76. data/blogs/ruby_plot/ruby_plot.tex +0 -1077
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f328b999e2b5b132053b133a1f1fcc3ddd6f5a295cc040629a6989150a765775
4
- data.tar.gz: f6bce01abc3f189f1e16f8cf2691a7bd14d2962f295dccda5a00e564e4470cc5
3
+ metadata.gz: 5028519688d5197e29ea9198499c8093f96aa27e498a0eb974367187d7d151da
4
+ data.tar.gz: f5bad7debd953898f0335e04e83089137025c759a3910cf6d74061b53f4eb37e
5
5
  SHA512:
6
- metadata.gz: 5b287e9d5883723a8e378d88c5df28d7097ca3ca14b6648641415c2734d4204473f71ad173d45b0946e5e02e67d9c1da168a06aeaf2472c388232c7acf44cdc5
7
- data.tar.gz: 57a0c432b785e89ee1df8c38f2710736e2150b2024e11b1ed1e0437e1903a96934b6be135fe1ffe54d9fb891527d15275f1c119cdf6f1a453d47c991b268d273
6
+ metadata.gz: 5b14427f32a5db4f2c9754c1ee7fea356c939727152a626c616c3dff1372cddb4fd4d982dc761c2a2e2ca1c211b8a0215d26c2b11eb162cd2f7ab5f0c1c9344e
7
+ data.tar.gz: 94c7da10fd04a9136b9a36582574ae04c9f3a4767f1a3dd04137a64f4e104cb8c3c0906752c627cef27ff81b7bbca0bde83aa58e9e5b742005079b30c46616a2
data/README.md CHANGED
@@ -74,15 +74,13 @@ Panda, SciPy, SciKit-Learn and a couple more.
74
74
  # gKnitting a Document
75
75
 
76
76
  This manual has been formatted usign gKnit. gKnit uses Knitr and R markdown to knit
77
- a document in Ruby or R and output it in any of the available formats for R markdown.
77
+ a document in Ruby or R and output it in any of the available formats for R markdown.
78
78
  gKnit runs atop of GraalVM, and Galaaz. In gKnit, Ruby variables are persisted between
79
- chunks, making it an ideal solution for literate programming.
80
- Also, since it is based on Galaaz, Ruby chunks can have access to R variables and Polyglot
81
- Programming with Ruby and R is quite natural.
79
+ chunks, making it an ideal solution for literate programming. Also, since it is based
80
+ on Galaaz, Ruby chunks can have access to R variables and Polyglot Programming with
81
+ Ruby and R is quite natural.
82
82
 
83
- gknit was describe in more depth in:
84
-
85
- * xxx.xxxx.xxx
83
+ [gknit is described in more details here](https://towardsdatascience.com/how-to-do-reproducible-research-in-ruby-with-gknit-c26d2684d64e)
86
84
 
87
85
  # Vector
88
86
 
@@ -110,15 +108,15 @@ To create a vector the 'c' (concatenate) method from the 'R' module should be us
110
108
 
111
109
 
112
110
  ```ruby
113
- @vec = R.c(1, 2, 3)
114
- puts @vec
111
+ vec = R.c(1, 2, 3)
112
+ puts vec
115
113
  ```
116
114
 
117
115
  ```
118
116
  ## [1] 1 2 3
119
117
  ```
120
118
 
121
- Lets take a look at the type, mode and storage.mode of our vector @vec. In order to print
119
+ Lets take a look at the type, mode and storage.mode of our vector vec. In order to print
122
120
  this out, we are creating a data frame 'df' and printing it out. A data frame, for those
123
121
  not familiar with it, is basically a table. Here we create the data frame and add the
124
122
  column name by passing named parameters for each column, such as 'typeof:', 'mode:' and
@@ -130,7 +128,7 @@ data frame is 'data.frame', in Galaaz we use 'data\_\_frame'.
130
128
 
131
129
 
132
130
  ```ruby
133
- df = R.data__frame(typeof: @vec.typeof, mode: @vec.mode, storage__mode: @vec.storage__mode)
131
+ df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
134
132
  puts df
135
133
  ```
136
134
 
@@ -146,8 +144,8 @@ follows normal Ruby rules and the number 1 is an integer and 1.0 is a float.
146
144
 
147
145
 
148
146
  ```ruby
149
- @vec = R.c(1.0, 2, 3)
150
- puts @vec
147
+ vec = R.c(1.0, 2, 3)
148
+ puts vec
151
149
  ```
152
150
 
153
151
  ```
@@ -156,7 +154,7 @@ puts @vec
156
154
 
157
155
 
158
156
  ```ruby
159
- df = R.data__frame(typeof: @vec.typeof, mode: @vec.mode, storage__mode: @vec.storage__mode)
157
+ df = R.data__frame(typeof: vec.typeof, mode: vec.mode, storage__mode: vec.storage__mode)
160
158
  outputs df.kable.kable_styling
161
159
  ```
162
160
 
@@ -189,14 +187,14 @@ vec = R.c(1, hello, 5)
189
187
 
190
188
  ```
191
189
  ## Message:
192
- ## undefined local variable or method `hello' for RubyChunk:Class
190
+ ## undefined local variable or method `hello' for #<RC:0x2e0 @out_list=nil>:RC
193
191
  ```
194
192
 
195
193
  ```
196
194
  ## Message:
197
- ## (eval):1:in `exec_ruby'
198
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:141:in `instance_eval'
199
- ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:141:in `exec_ruby'
195
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:103:in `get_binding'
196
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `eval'
197
+ ## /home/rbotafogo/desenv/galaaz/lib/util/exec_ruby.rb:102:in `exec_ruby'
200
198
  ## /home/rbotafogo/desenv/galaaz/lib/gknit/knitr_engine.rb:650:in `block in initialize'
201
199
  ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `call'
202
200
  ## /home/rbotafogo/desenv/galaaz/lib/R_interface/ruby_callback.rb:77:in `callback'
@@ -221,8 +219,8 @@ Here is a vector with logical values
221
219
 
222
220
 
223
221
  ```ruby
224
- @vec = R.c(true, true, false, false, true)
225
- puts @vec
222
+ vec = R.c(true, true, false, false, true)
223
+ puts vec
226
224
  ```
227
225
 
228
226
  ```
@@ -235,26 +233,26 @@ The 'c' functions used to create vectors can also be used to combine two vectors
235
233
 
236
234
 
237
235
  ```ruby
238
- @vec1 = R.c(10.0, 20.0, 30.0)
239
- @vec2 = R.c(4.0, 5.0, 6.0)
240
- @vec = R.c(@vec1, @vec2)
241
- puts @vec
236
+ vec1 = R.c(10.0, 20.0, 30.0)
237
+ vec2 = R.c(4.0, 5.0, 6.0)
238
+ vec = R.c(vec1, vec2)
239
+ puts vec
242
240
  ```
243
241
 
244
242
  ```
245
243
  ## [1] 10 20 30 4 5 6
246
244
  ```
247
245
  In galaaz, methods can be chainned (somewhat like the pipe operator in R %>%, but more generic).
248
- In this next example, method 'c' is chainned after '@vec1'. This also looks like 'c' is a
246
+ In this next example, method 'c' is chainned after 'vec1'. This also looks like 'c' is a
249
247
  method of the vector, but in reallity, this is actually closer to the pipe operator. When
250
248
  Galaaz identifies that 'c' is not a method of 'vec' it actually tries to call 'R.c' with
251
- '@vec1' as the first argument concatenated with all the other available arguments. The code
249
+ 'vec1' as the first argument concatenated with all the other available arguments. The code
252
250
  bellow is automatically converted to the code above.
253
251
 
254
252
 
255
253
  ```ruby
256
- @vec = @vec1.c(@vec2)
257
- puts @vec
254
+ vec = vec1.c(vec2)
255
+ puts vec
258
256
  ```
259
257
 
260
258
  ```
@@ -267,7 +265,7 @@ Arithmetic operations on vectors are performed element by element:
267
265
 
268
266
 
269
267
  ```ruby
270
- puts @vec1 + @vec2
268
+ puts vec1 + vec2
271
269
  ```
272
270
 
273
271
  ```
@@ -276,7 +274,7 @@ puts @vec1 + @vec2
276
274
 
277
275
 
278
276
  ```ruby
279
- puts @vec1 * 5
277
+ puts vec1 * 5
280
278
  ```
281
279
 
282
280
  ```
@@ -287,8 +285,8 @@ When vectors have different length, a recycling rule is applied to the shorter v
287
285
 
288
286
 
289
287
  ```ruby
290
- @vec3 = R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
291
- puts @vec4 = @vec1 + @vec3
288
+ vec3 = R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
289
+ puts vec4 = vec1 + vec3
292
290
  ```
293
291
 
294
292
  ```
@@ -301,7 +299,7 @@ Vectors can be indexed by using the '[]' operator:
301
299
 
302
300
 
303
301
  ```ruby
304
- puts @vec4[3]
302
+ puts vec4[3]
305
303
  ```
306
304
 
307
305
  ```
@@ -309,11 +307,11 @@ puts @vec4[3]
309
307
  ```
310
308
 
311
309
  We can also index a vector with another vector. For example, in the code bellow, we take elements
312
- 1, 3, 5, and 7 from @vec3:
310
+ 1, 3, 5, and 7 from vec3:
313
311
 
314
312
 
315
313
  ```ruby
316
- puts @vec4[R.c(1, 3, 5, 7)]
314
+ puts vec4[R.c(1, 3, 5, 7)]
317
315
  ```
318
316
 
319
317
  ```
@@ -324,7 +322,7 @@ Repeating an index and having indices out of order is valid code:
324
322
 
325
323
 
326
324
  ```ruby
327
- puts @vec4[R.c(1, 3, 3, 1)]
325
+ puts vec4[R.c(1, 3, 3, 1)]
328
326
  ```
329
327
 
330
328
  ```
@@ -336,8 +334,8 @@ the indexed values are not returned:
336
334
 
337
335
 
338
336
  ```ruby
339
- puts @vec4[-3]
340
- puts @vec4[-R.c(1, 3, 5, 7)]
337
+ puts vec4[-3]
338
+ puts vec4[-R.c(1, 3, 5, 7)]
341
339
  ```
342
340
 
343
341
  ```
@@ -349,7 +347,7 @@ If an index is out of range, a missing value (NA) will be reported.
349
347
 
350
348
 
351
349
  ```ruby
352
- puts @vec4[30]
350
+ puts vec4[30]
353
351
  ```
354
352
 
355
353
  ```
@@ -360,7 +358,7 @@ It is also possible to index a vector by range:
360
358
 
361
359
 
362
360
  ```ruby
363
- puts @vec4[(2..5)]
361
+ puts vec4[(2..5)]
364
362
  ```
365
363
 
366
364
  ```
@@ -403,9 +401,9 @@ from the vector. In order to do this extraction the '>>' operator is used.
403
401
 
404
402
 
405
403
  ```ruby
406
- puts @vec4
407
- puts @vec4 >> 0
408
- puts @vec4 >> 4
404
+ puts vec4
405
+ puts vec4 >> 0
406
+ puts vec4 >> 4
409
407
  ```
410
408
 
411
409
  ```
@@ -905,11 +903,11 @@ created by the 'matrix' function:
905
903
 
906
904
 
907
905
  ```ruby
908
- @mat = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
909
- nrow: 3,
910
- ncol: 3)
906
+ mat = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
907
+ nrow: 3,
908
+ ncol: 3)
911
909
 
912
- puts @mat
910
+ puts mat
913
911
  ```
914
912
 
915
913
  ```
@@ -923,12 +921,12 @@ memory by row first passing an extra argument to the 'matrix' function:
923
921
 
924
922
 
925
923
  ```ruby
926
- @mat_row = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
927
- nrow: 3,
928
- ncol: 3,
929
- byrow: true)
924
+ mat_row = R.matrix(R.c(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0),
925
+ nrow: 3,
926
+ ncol: 3,
927
+ byrow: true)
930
928
 
931
- puts @mat_row
929
+ puts mat_row
932
930
  ```
933
931
 
934
932
  ```
@@ -944,8 +942,8 @@ A matrix can be indexed by [row, column]:
944
942
 
945
943
 
946
944
  ```ruby
947
- puts @mat_row[1, 1]
948
- puts @mat_row[2, 3]
945
+ puts mat_row[1, 1]
946
+ puts mat_row[2, 3]
949
947
  ```
950
948
 
951
949
  ```
@@ -956,8 +954,8 @@ It is possible to index an entire row or column with the ':all' keyword
956
954
 
957
955
 
958
956
  ```ruby
959
- puts @mat_row[1, :all]
960
- puts @mat_row[:all, 2]
957
+ puts mat_row[1, :all]
958
+ puts mat_row[:all, 2]
961
959
  ```
962
960
 
963
961
  ```
@@ -970,7 +968,7 @@ rows 1 and 3 and columns 2 and 3 building a 2 x 2 matrix.
970
968
 
971
969
 
972
970
  ```ruby
973
- puts @mat_row[R.c(1, 3), R.c(2, 3)]
971
+ puts mat_row[R.c(1, 3), R.c(2, 3)]
974
972
  ```
975
973
 
976
974
  ```
@@ -979,12 +977,11 @@ puts @mat_row[R.c(1, 3), R.c(2, 3)]
979
977
  ## [2,] 8 9
980
978
  ```
981
979
 
982
- Matrices can be combined with functions 'rbind' and 'cbind'
980
+ Matrices can be combined with functions 'rbind':
983
981
 
984
982
 
985
983
  ```ruby
986
- puts @mat_row.rbind(@mat)
987
- puts @mat_row.cbind(@mat)
984
+ puts mat_row.rbind(mat)
988
985
  ```
989
986
 
990
987
  ```
@@ -995,6 +992,16 @@ puts @mat_row.cbind(@mat)
995
992
  ## [4,] 1 4 7
996
993
  ## [5,] 2 5 8
997
994
  ## [6,] 3 6 9
995
+ ```
996
+
997
+ and 'cbind':
998
+
999
+
1000
+ ```ruby
1001
+ puts mat_row.cbind(mat)
1002
+ ```
1003
+
1004
+ ```
998
1005
  ## [,1] [,2] [,3] [,4] [,5] [,6]
999
1006
  ## [1,] 1 2 3 1 4 7
1000
1007
  ## [2,] 4 5 6 2 5 8
@@ -1011,8 +1018,8 @@ can only hold one type of element.
1011
1018
  nums = R.c(1.0, 2.0, 3.0)
1012
1019
  strs = R.c("a", "b", "c", "d")
1013
1020
  bool = R.c(true, true, false)
1014
- @lst = R.list(nums: nums, strs: strs, bool: bool)
1015
- puts @lst
1021
+ lst = R.list(nums: nums, strs: strs, bool: bool)
1022
+ puts lst
1016
1023
  ```
1017
1024
 
1018
1025
  ```
@@ -1026,7 +1033,7 @@ puts @lst
1026
1033
  ## [1] TRUE TRUE FALSE
1027
1034
  ```
1028
1035
 
1029
- Note that '@lst' elements are named elements.
1036
+ Note that 'lst' elements are named elements.
1030
1037
 
1031
1038
 
1032
1039
  ## List Indexing
@@ -1037,7 +1044,7 @@ return one of the sublists.
1037
1044
 
1038
1045
 
1039
1046
  ```ruby
1040
- puts @lst[1]
1047
+ puts lst[1]
1041
1048
  ```
1042
1049
 
1043
1050
  ```
@@ -1052,18 +1059,18 @@ the original list
1052
1059
 
1053
1060
 
1054
1061
  ```ruby
1055
- puts @lst[[1]]
1062
+ puts lst[[1]]
1056
1063
  ```
1057
1064
 
1058
1065
  ```
1059
1066
  ## [1] 1 2 3
1060
1067
  ```
1061
1068
 
1062
- When elements are named, as dones with @lst, indexing can be done by name:
1069
+ When elements are named, as dones with lst, indexing can be done by name:
1063
1070
 
1064
1071
 
1065
1072
  ```ruby
1066
- puts @lst[['bool']][[1]] >> 0
1073
+ puts lst[['bool']][[1]] >> 0
1067
1074
  ```
1068
1075
 
1069
1076
  ```
@@ -1183,23 +1190,31 @@ puts (~:mtcars)[R.c('Datsun 710', 'Camaro Z28'), :all]
1183
1190
  Finally, a data frame can also be indexed with a logical vector. In this next example, the
1184
1191
  'am' column of :mtcars is compared with 0 (with method 'eq'). When 'am' is equal to 0 the
1185
1192
  car is automatic. So, by doing '(~:mtcars).am.eq 0' a logical vector is created with
1186
- 'true' whenever 'am' is 0 and 'false' otherwise. Using this logical vector, the data frame
1187
- is indexed, returning a new data frame in which all cars have automatic transmission.
1193
+ 'true' whenever 'am' is 0 and 'false' otherwise.
1188
1194
 
1189
1195
 
1190
1196
  ```ruby
1191
1197
  # obtain a vector with 'true' for cars with automatic transmission
1192
1198
  automatic = (~:mtcars).am.eq 0
1193
1199
  puts automatic
1194
-
1195
- # slice the data frame by using this vector
1196
- puts (~:mtcars)[automatic, :all]
1197
1200
  ```
1198
1201
 
1199
1202
  ```
1200
1203
  ## [1] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
1201
1204
  ## [12] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE
1202
1205
  ## [23] TRUE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
1206
+ ```
1207
+
1208
+ Using this logical vector, the data frame is indexed, returning a new data frame in
1209
+ which all cars have automatic transmission.
1210
+
1211
+
1212
+ ```ruby
1213
+ # slice the data frame by using this vector
1214
+ puts (~:mtcars)[automatic, :all]
1215
+ ```
1216
+
1217
+ ```
1203
1218
  ## mpg cyl disp hp drat wt qsec vs am gear carb
1204
1219
  ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
1205
1220
  ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
@@ -1342,6 +1357,62 @@ puts exp7
1342
1357
  ## y <- sin(x)
1343
1358
  ```
1344
1359
 
1360
+ Expressions can also be written using '.' notation:
1361
+
1362
+
1363
+ ```ruby
1364
+ exp8 = :y.assign :x.sin
1365
+ puts exp8
1366
+ ```
1367
+
1368
+ ```
1369
+ ## y <- sin(x)
1370
+ ```
1371
+
1372
+ When a function has multiple arguments, the first one can be used before the '.':
1373
+
1374
+
1375
+ ```ruby
1376
+ exp9 = :x.c(:y)
1377
+ puts exp9
1378
+ ```
1379
+
1380
+ ```
1381
+ ## c(x, y)
1382
+ ```
1383
+
1384
+ ## Evaluating an Expression
1385
+
1386
+ Expressions can be evaluated by calling function 'eval' with a binding. A binding can be provided
1387
+ with a list:
1388
+
1389
+
1390
+ ```ruby
1391
+ exp = (:a + :b) * 2.0 + :c ** 2 / :z
1392
+ puts exp.eval(R.list(a: 10, b: 20, c: 30, z: 40))
1393
+ ```
1394
+
1395
+ ```
1396
+ ## [1] 82.5
1397
+ ```
1398
+
1399
+ ... with a data frame:
1400
+
1401
+
1402
+ ```ruby
1403
+ df = R.data__frame(
1404
+ a: R.c(1, 2, 3),
1405
+ b: R.c(10, 20, 30),
1406
+ c: R.c(100, 200, 300),
1407
+ z: R.c(1000, 2000, 3000))
1408
+
1409
+ puts exp.eval(df)
1410
+ ```
1411
+
1412
+ ```
1413
+ ## [1] 32 64 96
1414
+ ```
1415
+
1345
1416
  # Manipulating Data
1346
1417
 
1347
1418
  One of the major benefits of Galaaz is to bring strong data manipulation to Ruby. The following
@@ -1365,8 +1436,8 @@ R.library('dplyr')
1365
1436
 
1366
1437
 
1367
1438
  ```ruby
1368
- @flights = ~:flights
1369
- puts @flights.head.as__data__frame
1439
+ flights = ~:flights
1440
+ puts flights.head.as__data__frame
1370
1441
  ```
1371
1442
 
1372
1443
  ```
@@ -1400,7 +1471,7 @@ the first :month.eq 1
1400
1471
 
1401
1472
 
1402
1473
  ```ruby
1403
- puts @flights.filter((:month.eq 1), (:day.eq 1)).head.as__data__frame
1474
+ puts flights.filter((:month.eq 1), (:day.eq 1)).head.as__data__frame
1404
1475
  ```
1405
1476
 
1406
1477
  ```
@@ -1433,7 +1504,7 @@ All flights that departed in November of December
1433
1504
 
1434
1505
 
1435
1506
  ```ruby
1436
- puts @flights.filter((:month.eq 11) | (:month.eq 12)).head.as__data__frame
1507
+ puts flights.filter((:month.eq 11) | (:month.eq 12)).head.as__data__frame
1437
1508
  ```
1438
1509
 
1439
1510
  ```
@@ -1467,7 +1538,7 @@ symbol, in this case ':in' and the second argument is the vector:
1467
1538
 
1468
1539
 
1469
1540
  ```ruby
1470
- puts @flights.filter(:month._ :in, R.c(11, 12)).head.as__data__frame
1541
+ puts flights.filter(:month._ :in, R.c(11, 12)).head.as__data__frame
1471
1542
  ```
1472
1543
 
1473
1544
  ```
@@ -1503,8 +1574,8 @@ what is obtained from data frame.
1503
1574
 
1504
1575
 
1505
1576
  ```ruby
1506
- @df = R.tibble(x: R.c(1, R::NA, 3))
1507
- puts @df.as__data__frame
1577
+ df = R.tibble(x: R.c(1, R::NA, 3))
1578
+ puts df.as__data__frame
1508
1579
  ```
1509
1580
 
1510
1581
  ```
@@ -1519,7 +1590,7 @@ not.
1519
1590
 
1520
1591
 
1521
1592
  ```ruby
1522
- puts @df.filter(:x > 1).as__data__frame
1593
+ puts df.filter(:x > 1).as__data__frame
1523
1594
  ```
1524
1595
 
1525
1596
  ```
@@ -1531,7 +1602,7 @@ To match an NA use method 'is__na'
1531
1602
 
1532
1603
 
1533
1604
  ```ruby
1534
- puts @df.filter((:x.is__na) | (:x > 1)).as__data__frame
1605
+ puts df.filter((:x.is__na) | (:x > 1)).as__data__frame
1535
1606
  ```
1536
1607
 
1537
1608
  ```
@@ -1546,7 +1617,7 @@ Arrange reorders the rows of a data frame by the given arguments.
1546
1617
 
1547
1618
 
1548
1619
  ```ruby
1549
- puts @flights.arrange(:year, :month, :day).head.as__data__frame
1620
+ puts flights.arrange(:year, :month, :day).head.as__data__frame
1550
1621
  ```
1551
1622
 
1552
1623
  ```
@@ -1577,7 +1648,7 @@ To arrange in descending order, use function 'desc'
1577
1648
 
1578
1649
 
1579
1650
  ```ruby
1580
- puts @flights.arrange(:dep_delay.desc).head.as__data__frame
1651
+ puts flights.arrange(:dep_delay.desc).head.as__data__frame
1581
1652
  ```
1582
1653
 
1583
1654
  ```
@@ -1610,7 +1681,7 @@ To select specific columns from a dataset we use function 'select':
1610
1681
 
1611
1682
 
1612
1683
  ```ruby
1613
- puts @flights.select(:year, :month, :day).head.as__data__frame
1684
+ puts flights.select(:year, :month, :day).head.as__data__frame
1614
1685
  ```
1615
1686
 
1616
1687
  ```
@@ -1627,7 +1698,7 @@ It is also possible to select column in a given range
1627
1698
 
1628
1699
 
1629
1700
  ```ruby
1630
- puts @flights.select(:year.up_to :day).head.as__data__frame
1701
+ puts flights.select(:year.up_to :day).head.as__data__frame
1631
1702
  ```
1632
1703
 
1633
1704
  ```
@@ -1644,7 +1715,7 @@ Select all columns that start with a given name sequence
1644
1715
 
1645
1716
 
1646
1717
  ```ruby
1647
- puts @flights.select(E.starts_with('arr')).head.as__data__frame
1718
+ puts flights.select(E.starts_with('arr')).head.as__data__frame
1648
1719
  ```
1649
1720
 
1650
1721
  ```
@@ -1672,7 +1743,7 @@ A helper function that comes in handy when we just want to rearrange column orde
1672
1743
 
1673
1744
 
1674
1745
  ```ruby
1675
- puts @flights.select(:year, :month, :day, E.everything).head.as__data__frame
1746
+ puts flights.select(:year, :month, :day, E.everything).head.as__data__frame
1676
1747
  ```
1677
1748
 
1678
1749
  ```
@@ -1703,13 +1774,13 @@ puts @flights.select(:year, :month, :day, E.everything).head.as__data__frame
1703
1774
 
1704
1775
 
1705
1776
  ```ruby
1706
- @flights_sm = @flights.
1707
- select((:year.up_to :day),
1708
- E.ends_with('delay'),
1709
- :distance,
1710
- :air_time)
1777
+ flights_sm = flights.
1778
+ select((:year.up_to :day),
1779
+ E.ends_with('delay'),
1780
+ :distance,
1781
+ :air_time)
1711
1782
 
1712
- puts @flights_sm.head.as__data__frame
1783
+ puts flights_sm.head.as__data__frame
1713
1784
  ```
1714
1785
 
1715
1786
  ```
@@ -1724,10 +1795,10 @@ puts @flights_sm.head.as__data__frame
1724
1795
 
1725
1796
 
1726
1797
  ```ruby
1727
- @flights_sm = @flights_sm.
1728
- mutate(gain: :dep_delay - :arr_delay,
1729
- speed: :distance / :air_time * 60)
1730
- puts @flights_sm.head.as__data__frame
1798
+ flights_sm = flights_sm.
1799
+ mutate(gain: :dep_delay - :arr_delay,
1800
+ speed: :distance / :air_time * 60)
1801
+ puts flights_sm.head.as__data__frame
1731
1802
  ```
1732
1803
 
1733
1804
  ```
@@ -1747,7 +1818,7 @@ a single value is obtained from the data frame:
1747
1818
 
1748
1819
 
1749
1820
  ```ruby
1750
- puts @flights.summarise(delay: E.mean(:dep_delay, na__rm: true)).as__data__frame
1821
+ puts flights.summarise(delay: E.mean(:dep_delay, na__rm: true)).as__data__frame
1751
1822
  ```
1752
1823
 
1753
1824
  ```
@@ -1759,7 +1830,7 @@ When a data frame is groupe with 'group_by' summaries apply to the given group:
1759
1830
 
1760
1831
 
1761
1832
  ```ruby
1762
- by_day = @flights.group_by(:year, :month, :day)
1833
+ by_day = flights.group_by(:year, :month, :day)
1763
1834
  puts by_day.summarise(delay: :dep_delay.mean(na__rm: true)).head.as__data__frame
1764
1835
  ```
1765
1836
 
@@ -1777,7 +1848,7 @@ Next we put many operations together by pipping them one after the other:
1777
1848
 
1778
1849
 
1779
1850
  ```ruby
1780
- delays = @flights.
1851
+ delays = flights.
1781
1852
  group_by(:dest).
1782
1853
  summarise(
1783
1854
  count: E.n,
@@ -1785,108 +1856,17 @@ delays = @flights.
1785
1856
  delay: :arr_delay.mean(na__rm: true)).
1786
1857
  filter(:count > 20, :dest != "NHL")
1787
1858
 
1788
- puts delays.as__data__frame
1789
- ```
1790
-
1791
- ```
1792
- ## dest count dist delay
1793
- ## 1 ABQ 254 1826.00000 4.38188976
1794
- ## 2 ACK 265 199.00000 4.85227273
1795
- ## 3 ALB 439 143.00000 14.39712919
1796
- ## 4 ATL 17215 757.10822 11.30011285
1797
- ## 5 AUS 2439 1514.25297 6.01990875
1798
- ## 6 AVL 275 583.58182 8.00383142
1799
- ## 7 BDL 443 116.00000 7.04854369
1800
- ## 8 BGR 375 378.00000 8.02793296
1801
- ## 9 BHM 297 865.99663 16.87732342
1802
- ## 10 BNA 6333 758.21348 11.81245891
1803
- ## 11 BOS 15508 190.63696 2.91439222
1804
- ## 12 BQN 896 1578.98326 8.24549550
1805
- ## 13 BTV 2589 265.09154 8.95099602
1806
- ## 14 BUF 4681 296.80837 8.94595186
1807
- ## 15 BUR 371 2465.00000 8.17567568
1808
- ## 16 BWI 1781 179.41830 10.72673385
1809
- ## 17 BZN 36 1882.00000 7.60000000
1810
- ## 18 CAE 116 603.55172 41.76415094
1811
- ## 19 CAK 864 397.00000 19.69833729
1812
- ## 20 CHO 52 305.00000 9.50000000
1813
- ## 21 CHS 2884 632.91678 10.59296847
1814
- ## 22 CLE 4573 414.17428 9.18161129
1815
- ## 23 CLT 14064 538.02730 7.36031885
1816
- ## 24 CMH 3524 476.55505 10.60132291
1817
- ## 25 CRW 138 444.00000 14.67164179
1818
- ## 26 CVG 3941 575.15986 15.36456376
1819
- ## 27 DAY 1525 537.10230 12.68048606
1820
- ## 28 DCA 9705 211.00618 9.06695204
1821
- ## 29 DEN 7266 1614.67836 8.60650021
1822
- ## 30 DFW 8738 1383.04303 0.32212685
1823
- ## 31 DSM 569 1020.88752 19.00573614
1824
- ## 32 DTW 9384 498.12852 5.42996346
1825
- ## 33 EGE 213 1735.70892 6.30434783
1826
- ## 34 FLL 12055 1070.06877 8.08212154
1827
- ## 35 GRR 765 605.78170 18.18956044
1828
- ## 36 GSO 1606 449.84184 14.11260054
1829
- ## 37 GSP 849 595.95995 15.93544304
1830
- ## 38 HNL 707 4972.67468 -1.36519258
1831
- ## 39 HOU 2115 1420.15508 7.17618819
1832
- ## 40 IAD 5700 224.84684 13.86420212
1833
- ## 41 IAH 7198 1407.20672 4.24079040
1834
- ## 42 ILM 110 500.00000 4.63551402
1835
- ## 43 IND 2077 652.26288 9.94043412
1836
- ## 44 JAC 25 1875.60000 28.09523810
1837
- ## 45 JAX 2720 824.67610 11.84483416
1838
- ## 46 LAS 5997 2240.96148 0.25772849
1839
- ## 47 LAX 16174 2468.62236 0.54711094
1840
- ## 48 LGB 668 2465.00000 -0.06202723
1841
- ## 49 MCI 2008 1097.69522 14.51405836
1842
- ## 50 MCO 14082 943.11057 5.45464309
1843
- ## 51 MDW 4113 718.04595 12.36422360
1844
- ## 52 MEM 1789 954.20123 10.64531435
1845
- ## 53 MHT 1009 207.02973 14.78755365
1846
- ## 54 MIA 11728 1091.55244 0.29905978
1847
- ## 55 MKE 2802 733.38151 14.16722038
1848
- ## 56 MSN 572 803.95455 20.19604317
1849
- ## 57 MSP 7185 1017.40167 7.27016886
1850
- ## 58 MSY 3799 1177.70571 6.49017497
1851
- ## 59 MVY 221 173.00000 -0.28571429
1852
- ## 60 MYR 59 550.66102 4.60344828
1853
- ## 61 OAK 312 2576.00000 3.07766990
1854
- ## 62 OKC 346 1325.00000 30.61904762
1855
- ## 63 OMA 849 1135.56655 14.69889841
1856
- ## 64 ORD 17283 729.00081 5.87661475
1857
- ## 65 ORF 1536 288.52344 10.94909344
1858
- ## 66 PBI 6554 1028.83811 8.56297210
1859
- ## 67 PDX 1354 2445.56573 5.14157973
1860
- ## 68 PHL 1632 94.32353 10.12719014
1861
- ## 69 PHX 4656 2141.30326 2.09704733
1862
- ## 70 PIT 2875 334.06122 7.68099053
1863
- ## 71 PSE 365 1617.00000 7.87150838
1864
- ## 72 PVD 376 160.00000 16.23463687
1865
- ## 73 PWM 2352 276.12840 11.66040210
1866
- ## 74 RDU 8163 426.75769 10.05238095
1867
- ## 75 RIC 2454 281.40465 20.11125320
1868
- ## 76 ROC 2416 259.25083 11.56064461
1869
- ## 77 RSW 3537 1072.85327 3.23814963
1870
- ## 78 SAN 2737 2437.29923 3.13916574
1871
- ## 79 SAT 686 1578.34111 6.94537178
1872
- ## 80 SAV 804 709.18408 15.12950601
1873
- ## 81 SDF 1157 645.98358 12.66938406
1874
- ## 82 SEA 3923 2412.66531 -1.09909910
1875
- ## 83 SFO 13331 2577.92356 2.67289152
1876
- ## 84 SJC 329 2569.00000 3.44817073
1877
- ## 85 SJU 5819 1599.83365 2.52052659
1878
- ## 86 SLC 2467 1986.98662 0.17625459
1879
- ## 87 SMF 284 2521.00000 12.10992908
1880
- ## 88 SNA 825 2434.00000 -7.86822660
1881
- ## 89 SRQ 1211 1044.65153 3.08243131
1882
- ## 90 STL 4339 878.72321 11.07846451
1883
- ## 91 STT 522 1626.98276 -3.83590734
1884
- ## 92 SYR 1761 205.92164 8.90392501
1885
- ## 93 TPA 7466 1003.93557 7.40852503
1886
- ## 94 TUL 315 1215.00000 33.65986395
1887
- ## 95 TVC 101 652.38614 12.96842105
1888
- ## 96 TYS 631 638.80983 24.06920415
1889
- ## 97 XNA 1036 1142.50579 7.46572581
1859
+ puts delays.as__data__frame.head
1860
+ ```
1861
+
1862
+ ```
1863
+ ## dest count dist delay
1864
+ ## 1 ABQ 254 1826.0000 4.381890
1865
+ ## 2 ACK 265 199.0000 4.852273
1866
+ ## 3 ALB 439 143.0000 14.397129
1867
+ ## 4 ATL 17215 757.1082 11.300113
1868
+ ## 5 AUS 2439 1514.2530 6.019909
1869
+ ## 6 AVL 275 583.5818 8.003831
1890
1870
  ```
1891
1871
 
1892
1872
  # Using Data Table
@@ -1897,9 +1877,9 @@ R.library('data.table')
1897
1877
  R.install_and_loads('curl')
1898
1878
 
1899
1879
  input = "https://raw.githubusercontent.com/Rdatatable/data.table/master/vignettes/flights14.csv"
1900
- @flights = R.fread(input)
1901
- puts @flights
1902
- puts @flights.dim
1880
+ flights = R.fread(input)
1881
+ puts flights
1882
+ puts flights.dim
1903
1883
  ```
1904
1884
 
1905
1885
  ```
@@ -1958,17 +1938,17 @@ puts data_table.ID
1958
1938
 
1959
1939
  ```ruby
1960
1940
  # subset rows in i
1961
- ans = @flights[(:origin.eq "JFK") & (:month.eq 6)]
1941
+ ans = flights[(:origin.eq "JFK") & (:month.eq 6)]
1962
1942
  puts ans.head
1963
1943
 
1964
1944
  # Get the first two rows from flights.
1965
1945
 
1966
- ans = @flights[(1..2)]
1946
+ ans = flights[(1..2)]
1967
1947
  puts ans
1968
1948
 
1969
1949
  # Sort flights first by column origin in ascending order, and then by dest in descending order:
1970
1950
 
1971
- # ans = @flights[E.order(:origin, -(:dest))]
1951
+ # ans = flights[E.order(:origin, -(:dest))]
1972
1952
  # puts ans.head
1973
1953
  ```
1974
1954
 
@@ -2000,15 +1980,15 @@ puts ans
2000
1980
  # Select column(s) in j
2001
1981
  # select arr_delay column, but return it as a vector.
2002
1982
 
2003
- ans = @flights[:all, :arr_delay]
1983
+ ans = flights[:all, :arr_delay]
2004
1984
  puts ans.head
2005
1985
 
2006
1986
  # Select arr_delay column, but return as a data.table instead.
2007
1987
 
2008
- ans = @flights[:all, :arr_delay.list]
1988
+ ans = flights[:all, :arr_delay.list]
2009
1989
  puts ans.head
2010
1990
 
2011
- ans = @flights[:all, E.list(:arr_delay, :dep_delay)]
1991
+ ans = flights[:all, E.list(:arr_delay, :dep_delay)]
2012
1992
  ```
2013
1993
 
2014
1994
  ```
@@ -2033,68 +2013,42 @@ the data frame with the necessary data:
2033
2013
 
2034
2014
  ```ruby
2035
2015
  # copy the R variable :mtcars to the Ruby mtcars variable
2036
- @mtcars = ~:mtcars
2016
+ mtcars = ~:mtcars
2037
2017
 
2038
2018
  # create a new column 'car_name' to store the car names so that it can be
2039
2019
  # used for plotting. The 'rownames' of the data frame cannot be used as
2040
2020
  # data for plotting
2041
- @mtcars.car_name = R.rownames(:mtcars)
2021
+ mtcars.car_name = R.rownames(:mtcars)
2042
2022
 
2043
2023
  # compute normalized mpg and add it to a new column called mpg_z
2044
2024
  # Note that the mean value for mpg can be obtained by calling the 'mean'
2045
2025
  # function on the vector 'mtcars.mpg'. The same with the standard
2046
2026
  # deviation 'sd'. The vector is then rounded to two digits with 'round 2'
2047
- @mtcars.mpg_z = ((@mtcars.mpg - @mtcars.mpg.mean)/@mtcars.mpg.sd).round 2
2027
+ mtcars.mpg_z = ((mtcars.mpg - mtcars.mpg.mean)/mtcars.mpg.sd).round 2
2048
2028
 
2049
2029
  # create a new column 'mpg_type'. Function 'ifelse' is a vectorized function
2050
2030
  # that looks at every element of the mpg_z vector and if the value is below
2051
2031
  # 0, returns 'below', otherwise returns 'above'
2052
- @mtcars.mpg_type = (@mtcars.mpg_z < 0).ifelse("below", "above")
2032
+ mtcars.mpg_type = (mtcars.mpg_z < 0).ifelse("below", "above")
2053
2033
 
2054
2034
  # order the mtcar data set by the mpg_z vector from smaler to larger values
2055
- @mtcars = @mtcars[@mtcars.mpg_z.order, :all]
2035
+ mtcars = mtcars[mtcars.mpg_z.order, :all]
2056
2036
 
2057
2037
  # convert the car_name column to a factor to retain sorted order in plot
2058
- @mtcars.car_name = @mtcars.car_name.factor levels: @mtcars.car_name
2038
+ mtcars.car_name = mtcars.car_name.factor levels: mtcars.car_name
2059
2039
 
2060
2040
  # let's look at the final data frame
2061
- puts @mtcars
2041
+ puts mtcars.head
2062
2042
  ```
2063
2043
 
2064
2044
  ```
2065
- ## mpg cyl disp hp drat wt qsec vs am gear carb
2066
- ## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
2067
- ## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
2068
- ## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
2069
- ## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
2070
- ## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
2071
- ## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
2072
- ## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
2073
- ## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
2074
- ## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
2075
- ## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
2076
- ## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
2077
- ## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
2078
- ## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
2079
- ## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
2080
- ## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
2081
- ## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
2082
- ## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
2083
- ## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
2084
- ## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
2085
- ## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
2086
- ## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
2087
- ## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
2088
- ## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
2089
- ## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
2090
- ## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
2091
- ## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
2092
- ## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
2093
- ## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
2094
- ## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
2095
- ## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
2096
- ## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
2097
- ## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
2045
+ ## mpg cyl disp hp drat wt qsec vs am gear carb
2046
+ ## Cadillac Fleetwood 10.4 8 472 205 2.93 5.250 17.98 0 0 3 4
2047
+ ## Lincoln Continental 10.4 8 460 215 3.00 5.424 17.82 0 0 3 4
2048
+ ## Camaro Z28 13.3 8 350 245 3.73 3.840 15.41 0 0 3 4
2049
+ ## Duster 360 14.3 8 360 245 3.21 3.570 15.84 0 0 3 4
2050
+ ## Chrysler Imperial 14.7 8 440 230 3.23 5.345 17.42 0 0 3 4
2051
+ ## Maserati Bora 15.0 8 301 335 3.54 3.570 14.60 0 1 5 8
2098
2052
  ## car_name mpg_z mpg_type
2099
2053
  ## Cadillac Fleetwood Cadillac Fleetwood -1.61 below
2100
2054
  ## Lincoln Continental Lincoln Continental -1.61 below
@@ -2102,32 +2056,6 @@ puts @mtcars
2102
2056
  ## Duster 360 Duster 360 -0.96 below
2103
2057
  ## Chrysler Imperial Chrysler Imperial -0.89 below
2104
2058
  ## Maserati Bora Maserati Bora -0.84 below
2105
- ## Merc 450SLC Merc 450SLC -0.81 below
2106
- ## AMC Javelin AMC Javelin -0.81 below
2107
- ## Dodge Challenger Dodge Challenger -0.76 below
2108
- ## Ford Pantera L Ford Pantera L -0.71 below
2109
- ## Merc 450SE Merc 450SE -0.61 below
2110
- ## Merc 450SL Merc 450SL -0.46 below
2111
- ## Merc 280C Merc 280C -0.38 below
2112
- ## Valiant Valiant -0.33 below
2113
- ## Hornet Sportabout Hornet Sportabout -0.23 below
2114
- ## Merc 280 Merc 280 -0.15 below
2115
- ## Pontiac Firebird Pontiac Firebird -0.15 below
2116
- ## Ferrari Dino Ferrari Dino -0.06 below
2117
- ## Mazda RX4 Mazda RX4 0.15 above
2118
- ## Mazda RX4 Wag Mazda RX4 Wag 0.15 above
2119
- ## Hornet 4 Drive Hornet 4 Drive 0.22 above
2120
- ## Volvo 142E Volvo 142E 0.22 above
2121
- ## Toyota Corona Toyota Corona 0.23 above
2122
- ## Datsun 710 Datsun 710 0.45 above
2123
- ## Merc 230 Merc 230 0.45 above
2124
- ## Merc 240D Merc 240D 0.72 above
2125
- ## Porsche 914-2 Porsche 914-2 0.98 above
2126
- ## Fiat X1-9 Fiat X1-9 1.20 above
2127
- ## Honda Civic Honda Civic 1.71 above
2128
- ## Lotus Europa Lotus Europa 1.71 above
2129
- ## Fiat 128 Fiat 128 2.04 above
2130
- ## Toyota Corolla Toyota Corolla 2.29 above
2131
2059
  ```
2132
2060
  Now, lets plot the diverging bar plot. When using gKnit, there is no need to call
2133
2061
  'R.awt' to create a plotting device, since gKnit does take care of it. Galaaz
@@ -2149,19 +2077,604 @@ but in this graph we want the bars to be horizontally layed so we add 'coord\_fl
2149
2077
  ```ruby
2150
2078
  require 'ggplot'
2151
2079
 
2152
- puts @mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
2153
- R.geom_bar(E.aes(fill: :mpg_type), stat: 'identity', width: 0.5) +
2154
- R.scale_fill_manual(name: 'Mileage',
2155
- labels: R.c('Above Average', 'Below Average'),
2156
- values: R.c('above': '#00ba38', 'below': '#f8766d')) +
2157
- R.labs(subtitle: "Normalised mileage from 'mtcars'",
2158
- title: "Diverging Bars") +
2159
- R.coord_flip
2080
+ puts mtcars.ggplot(E.aes(x: :car_name, y: :mpg_z, label: :mpg_z)) +
2081
+ R.geom_bar(E.aes(fill: :mpg_type), stat: 'identity', width: 0.5) +
2082
+ R.scale_fill_manual(name: 'Mileage',
2083
+ labels: R.c('Above Average', 'Below Average'),
2084
+ values: R.c('above': '#00ba38', 'below': '#f8766d')) +
2085
+ R.labs(subtitle: "Normalised mileage from 'mtcars'",
2086
+ title: "Diverging Bars") +
2087
+ R.coord_flip
2160
2088
  ```
2161
2089
 
2162
2090
 
2163
2091
  ![](/home/rbotafogo/desenv/galaaz/blogs/manual/manual_files/figure-html/diverging_bar.png)<!-- -->
2164
2092
 
2093
+ # Coding with Tidyverse
2094
+
2095
+ In R, and when coding with 'tidyverse', arguments to a function are usually not
2096
+ *referencially transparent*. That is, you can’t replace a value with a seemingly equivalent
2097
+ object that you’ve defined elsewhere. To see the problem, let's first define a data frame:
2098
+
2099
+
2100
+ ```ruby
2101
+ df = R.data__frame(x: (1..3), y: (3..1))
2102
+ puts df
2103
+ ```
2104
+
2105
+ ```
2106
+ ## x y
2107
+ ## 1 1 3
2108
+ ## 2 2 2
2109
+ ## 3 3 1
2110
+ ```
2111
+
2112
+ and now, let's look at this code:
2113
+
2114
+
2115
+ ```r
2116
+ my_var <- x
2117
+ filter(df, my_var == 1)
2118
+ ```
2119
+ It generates the following error: "object 'x' not found.
2120
+
2121
+ However, in Galaaz, arguments are referencially transparent as can be seen by the
2122
+ code bellow. Note initally that 'my_var = :x' will not give the error "object 'x' not found"
2123
+ since ':x' is treated as an expression and assigned to my\_var. Then when doing (my\_var.eq 1),
2124
+ my\_var is a variable that resolves to ':x' and it becomes equivalent to (:x.eq 1) which is
2125
+ what we want.
2126
+
2127
+
2128
+ ```ruby
2129
+ my_var = :x
2130
+ puts df.filter(my_var.eq 1)
2131
+ ```
2132
+
2133
+ ```
2134
+ ## x y
2135
+ ## 1 1 3
2136
+ ```
2137
+ As stated by Hardley
2138
+
2139
+ > dplyr code is ambiguous. Depending on what variables are defined where,
2140
+ > filter(df, x == y) could be equivalent to any of:
2141
+
2142
+ ```
2143
+ df[df$x == df$y, ]
2144
+ df[df$x == y, ]
2145
+ df[x == df$y, ]
2146
+ df[x == y, ]
2147
+ ```
2148
+ In galaaz this ambiguity does not exist, filter(df, x.eq y) is not a valid expression as
2149
+ expressions are build with symbols. In doing filter(df, :x.eq y) we are looking for elements
2150
+ of the 'x' column that are equal to a previously defined y variable. Finally in
2151
+ filter(df, :x.eq :y) we are looking for elements in which the 'x' column value is equal to
2152
+ the 'y' column value. This can be seen in the following two chunks of code:
2153
+
2154
+
2155
+ ```ruby
2156
+ y = 1
2157
+ x = 2
2158
+
2159
+ # looking for values where the 'x' column is equal to the 'y' column
2160
+ puts df.filter(:x.eq :y)
2161
+ ```
2162
+
2163
+ ```
2164
+ ## x y
2165
+ ## 1 2 2
2166
+ ```
2167
+
2168
+
2169
+ ```ruby
2170
+ # looking for values where the 'x' column is equal to the 'y' variable
2171
+ # in this case, the number 1
2172
+ puts df.filter(:x.eq y)
2173
+ ```
2174
+
2175
+ ```
2176
+ ## x y
2177
+ ## 1 1 3
2178
+ ```
2179
+ ## Writing a function that applies to different data sets
2180
+
2181
+ Let's suppose that we want to write a function that receives as the first argument a data frame
2182
+ and as second argument an expression that adds a column to the data frame that is equal to the
2183
+ sum of elements in column 'a' plus 'x'.
2184
+
2185
+ Here is the intended behaviour using the 'mutate' function of 'dplyr':
2186
+
2187
+ ```
2188
+ mutate(df1, y = a + x)
2189
+ mutate(df2, y = a + x)
2190
+ mutate(df3, y = a + x)
2191
+ mutate(df4, y = a + x)
2192
+ ```
2193
+ The naive approach to writing an R function to solve this problem is:
2194
+
2195
+ ```
2196
+ mutate_y <- function(df) {
2197
+ mutate(df, y = a + x)
2198
+ }
2199
+ ```
2200
+ Unfortunately, in R, this function can fail silently if one of the variables isn’t present
2201
+ in the data frame, but is present in the global environment. We will not go through here how
2202
+ to solve this problem in R.
2203
+
2204
+ In Galaaz the method mutate_y bellow will work fine and will never fail silently.
2205
+
2206
+
2207
+ ```ruby
2208
+ def mutate_y(df)
2209
+ df.mutate(:y.assign :a + :x)
2210
+ end
2211
+ ```
2212
+ Here we create a data frame that has only one column named 'x':
2213
+
2214
+
2215
+ ```ruby
2216
+ df1 = R.data__frame(x: (1..3))
2217
+ puts df1
2218
+ ```
2219
+
2220
+ ```
2221
+ ## x
2222
+ ## 1 1
2223
+ ## 2 2
2224
+ ## 3 3
2225
+ ```
2226
+
2227
+ Note that method mutate_y will fail independetly from the fact that variable 'a' is defined and
2228
+ in the scope of the method. Variable 'a' has no relationship with the symbol ':a' used in the
2229
+ definition of 'mutate\_y' above:
2230
+
2231
+
2232
+ ```ruby
2233
+ a = 10
2234
+ mutate_y(df1)
2235
+ ```
2236
+
2237
+ ```
2238
+ ## Message:
2239
+ ## Error in mutate_impl(.data, dots) :
2240
+ ## Evaluation error: object 'a' not found.
2241
+ ## In addition: Warning message:
2242
+ ## In mutate_impl(.data, dots) :
2243
+ ## mismatched protect/unprotect (unprotect with empty protect stack) (RError)
2244
+ ## Translated to internal error
2245
+ ```
2246
+ ## Different expressions
2247
+
2248
+ Let's move to the next problem as presented by Hardley where trying to write a function in R
2249
+ that will receive two argumens, the first a variable and the second an expression is not trivial.
2250
+ Bellow we create a data frame and we want to write a function that groups data by a variable and
2251
+ summarises it by an expression:
2252
+
2253
+
2254
+ ```r
2255
+ set.seed(123)
2256
+
2257
+ df <- data.frame(
2258
+ g1 = c(1, 1, 2, 2, 2),
2259
+ g2 = c(1, 2, 1, 2, 1),
2260
+ a = sample(5),
2261
+ b = sample(5)
2262
+ )
2263
+
2264
+ as.data.frame(df)
2265
+ ```
2266
+
2267
+ ```
2268
+ ## g1 g2 a b
2269
+ ## 1 1 1 2 1
2270
+ ## 2 1 2 4 3
2271
+ ## 3 2 1 5 4
2272
+ ## 4 2 2 3 2
2273
+ ## 5 2 1 1 5
2274
+ ```
2275
+
2276
+ ```r
2277
+ d2 <- df %>%
2278
+ group_by(g1) %>%
2279
+ summarise(a = mean(a))
2280
+
2281
+ as.data.frame(d2)
2282
+ ```
2283
+
2284
+ ```
2285
+ ## g1 a
2286
+ ## 1 1 3
2287
+ ## 2 2 3
2288
+ ```
2289
+
2290
+ ```r
2291
+ d2 <- df %>%
2292
+ group_by(g2) %>%
2293
+ summarise(a = mean(a))
2294
+
2295
+ as.data.frame(d2)
2296
+ ```
2297
+
2298
+ ```
2299
+ ## g2 a
2300
+ ## 1 1 2.666667
2301
+ ## 2 2 3.500000
2302
+ ```
2303
+
2304
+ As shown by Hardley, one might expect this function to do the trick:
2305
+
2306
+
2307
+ ```r
2308
+ my_summarise <- function(df, group_var) {
2309
+ df %>%
2310
+ group_by(group_var) %>%
2311
+ summarise(a = mean(a))
2312
+ }
2313
+
2314
+ # my_summarise(df, g1)
2315
+ #> Error: Column `group_var` is unknown
2316
+ ```
2317
+
2318
+ In order to solve this problem, coding with dplyr requires the introduction of many new concepts
2319
+ and functions such as 'quo', 'quos', 'enquo', 'enquos', '!!' (bang bang), '!!!' (triple bang).
2320
+ Again, we'll leave to Hardley the explanation on how to use all those functions.
2321
+
2322
+ Now, let's try to implement the same function in galaaz. The next code block first prints the
2323
+ 'df' data frame defined previously in R (to access an R variable from Galaaz, we use the tilda
2324
+ operator '~' applied to the R variable name as symbol, i.e., ':df'.
2325
+
2326
+
2327
+ ```ruby
2328
+ puts ~:df
2329
+ ```
2330
+
2331
+ ```
2332
+ ## g1 g2 a b
2333
+ ## 1 1 1 2 1
2334
+ ## 2 1 2 4 3
2335
+ ## 3 2 1 5 4
2336
+ ## 4 2 2 3 2
2337
+ ## 5 2 1 1 5
2338
+ ```
2339
+
2340
+ We then create the 'my_summarize' method and call it passing the R data frame and
2341
+ the group by variable ':g1':
2342
+
2343
+
2344
+ ```ruby
2345
+ def my_summarize(df, group_var)
2346
+ df.group_by(group_var).
2347
+ summarize(a: :a.mean)
2348
+ end
2349
+
2350
+ puts my_summarize(:df, :g1).as__data__frame
2351
+ ```
2352
+
2353
+ ```
2354
+ ## g1 a
2355
+ ## 1 1 3
2356
+ ## 2 2 3
2357
+ ```
2358
+
2359
+ It works!!! Well, let's make sure this was not just some coincidence
2360
+
2361
+
2362
+ ```ruby
2363
+ puts my_summarize(:df, :g2).as__data__frame
2364
+ ```
2365
+
2366
+ ```
2367
+ ## g2 a
2368
+ ## 1 1 2.666667
2369
+ ## 2 2 3.500000
2370
+ ```
2371
+
2372
+ Great, everything is fine! No magic, no new functions, no complexities, just normal, standard Ruby
2373
+ code. If you've ever done NSE in R, this certainly feels much safer and easy to implement.
2374
+
2375
+ ## Different input variables
2376
+
2377
+ In the previous section we've managed to get rid of all NSE formulation for a simple example, but
2378
+ does this remain true for more complex examples, or will the Galaaz way prove inpractical for
2379
+ more complex code?
2380
+
2381
+ In the next example Hardley proposes us to write a function that given an expression such as 'a'
2382
+ or 'a * b', calculates three summaries. What we want a function that does the same as these R
2383
+ statements:
2384
+
2385
+ ```
2386
+ summarise(df, mean = mean(a), sum = sum(a), n = n())
2387
+ #> # A tibble: 1 x 3
2388
+ #> mean sum n
2389
+ #> <dbl> <int> <int>
2390
+ #> 1 3 15 5
2391
+
2392
+ summarise(df, mean = mean(a * b), sum = sum(a * b), n = n())
2393
+ #> # A tibble: 1 x 3
2394
+ #> mean sum n
2395
+ #> <dbl> <int> <int>
2396
+ #> 1 9 45 5
2397
+ ```
2398
+
2399
+ Let's try it in galaaz:
2400
+
2401
+
2402
+ ```ruby
2403
+ def my_summarise2(df, expr)
2404
+ df.summarize(
2405
+ mean: E.mean(expr),
2406
+ sum: E.sum(expr),
2407
+ n: E.n
2408
+ )
2409
+ end
2410
+
2411
+ puts my_summarise2((~:df), :a)
2412
+ puts "\n"
2413
+ puts my_summarise2((~:df), :a * :b)
2414
+ ```
2415
+
2416
+ ```
2417
+ ## mean sum n
2418
+ ## 1 3 15 5
2419
+ ##
2420
+ ## mean sum n
2421
+ ## 1 9 45 5
2422
+ ```
2423
+
2424
+ Once again, there is no need to use any special theory or functions. The only point to be
2425
+ careful about is the use of 'E' to build expressions from functions 'mean', 'sum' and 'n'.
2426
+
2427
+ ## Different input and output variable
2428
+
2429
+ Now the next challenge presented by Hardley is to vary the name of the output variables based on
2430
+ the received expression. So, if the input expression is 'a', we want our data frame columns to
2431
+ be named 'mean\_a' and 'sum\_a'. Now, if the input expression is 'b', columns
2432
+ should be named 'mean\_b' and 'sum\_b'.
2433
+
2434
+ ```
2435
+ mutate(df, mean_a = mean(a), sum_a = sum(a))
2436
+ #> # A tibble: 5 x 6
2437
+ #> g1 g2 a b mean_a sum_a
2438
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
2439
+ #> 1 1 1 1 3 3 15
2440
+ #> 2 1 2 4 2 3 15
2441
+ #> 3 2 1 2 1 3 15
2442
+ #> 4 2 2 5 4 3 15
2443
+ #> # … with 1 more row
2444
+
2445
+ mutate(df, mean_b = mean(b), sum_b = sum(b))
2446
+ #> # A tibble: 5 x 6
2447
+ #> g1 g2 a b mean_b sum_b
2448
+ #> <dbl> <dbl> <int> <int> <dbl> <int>
2449
+ #> 1 1 1 1 3 3 15
2450
+ #> 2 1 2 4 2 3 15
2451
+ #> 3 2 1 2 1 3 15
2452
+ #> 4 2 2 5 4 3 15
2453
+ #> # … with 1 more row
2454
+ ```
2455
+ In order to solve this problem in R, Hardley needs to introduce some more new functions and notations:
2456
+ 'quo_name' and the ':=' operator from package 'rlang'
2457
+
2458
+ Here is our Ruby code:
2459
+
2460
+
2461
+ ```ruby
2462
+ def my_mutate(df, expr)
2463
+ mean_name = "mean_#{expr.to_s}"
2464
+ sum_name = "sum_#{expr.to_s}"
2465
+
2466
+ df.mutate(mean_name => E.mean(expr),
2467
+ sum_name => E.sum(expr))
2468
+ end
2469
+
2470
+ puts my_mutate((~:df), :a)
2471
+ puts "\n"
2472
+ puts my_mutate((~:df), :b)
2473
+ ```
2474
+
2475
+ ```
2476
+ ## g1 g2 a b mean_a sum_a
2477
+ ## 1 1 1 2 1 3 15
2478
+ ## 2 1 2 4 3 3 15
2479
+ ## 3 2 1 5 4 3 15
2480
+ ## 4 2 2 3 2 3 15
2481
+ ## 5 2 1 1 5 3 15
2482
+ ##
2483
+ ## g1 g2 a b mean_b sum_b
2484
+ ## 1 1 1 2 1 3 15
2485
+ ## 2 1 2 4 3 3 15
2486
+ ## 3 2 1 5 4 3 15
2487
+ ## 4 2 2 3 2 3 15
2488
+ ## 5 2 1 1 5 3 15
2489
+ ```
2490
+ It really seems that "Non Standard Evaluation" is actually quite standard in Galaaz! But, you
2491
+ might have noticed a small change in the way the arguments to the mutate method were called.
2492
+ In a previous example we used df.summarise(mean: E.mean(:a), ...) where the column name was
2493
+ followed by a ':' colom. In this example, we have df.mutate(mean_name => E.mean(expr), ...)
2494
+ and variable mean\_name is not followed by ':' but by '=>'. This is standard Ruby notation.
2495
+
2496
+ [explain....]
2497
+
2498
+ ## Capturing multiple variables
2499
+
2500
+ Moving on with new complexities, Hardley proposes us to solve the problem in which the
2501
+ summarise function will receive any number of grouping variables.
2502
+
2503
+ This again is quite standard Ruby. In order to receive an undefined number of paramenters
2504
+ the paramenter is preceded by '*':
2505
+
2506
+
2507
+ ```ruby
2508
+ def my_summarise3(df, *group_vars)
2509
+ df.group_by(*group_vars).
2510
+ summarise(a: E.mean(:a))
2511
+ end
2512
+
2513
+ puts my_summarise3((~:df), :g1, :g2).as__data__frame
2514
+ ```
2515
+
2516
+ ```
2517
+ ## g1 g2 a
2518
+ ## 1 1 1 2
2519
+ ## 2 1 2 4
2520
+ ## 3 2 1 3
2521
+ ## 4 2 2 3
2522
+ ```
2523
+
2524
+ ## Why does R require NSE and Galaaz does not?
2525
+
2526
+ NSE introduces a number of new concepts, such as 'quoting', 'quasiquotation', 'unquoting' and
2527
+ 'unquote-splicing', while in Galaaz none of those concepts are needed. What gives?
2528
+
2529
+ R is an extremely flexible language and it has lazy evaluation of parameters. When in R a
2530
+ function is called as 'summarise(df, a = b)', the summarise function receives the litteral
2531
+ 'a = b' parameter and can work with this as if it were a string. In R, it is not clear what
2532
+ a and b are, they can be expressions or they can be variables, it is up to the function to
2533
+ decide what 'a = b' means.
2534
+
2535
+ In Ruby, there is no lazy evaluation of parameters and 'a' is always a variable and so is 'b'.
2536
+ Variables assume their value as soon as they are used, so 'x = a' is immediately evaluate and
2537
+ variable 'x' will receive the value of variable 'a' as soon as the Ruby statement is executed.
2538
+ Ruby also provides the notion of a symbol; ':a' is a symbol and does not evaluate to anything.
2539
+ Galaaz uses Ruby symbols to build expressions that are not bound to anything: ':a.eq :b' is
2540
+ clearly an expression and has no relationship whatsoever with the statment 'a = b'. By using
2541
+ symbols, variables and expressions all the possible ambiguities that are found in R are
2542
+ eliminated in Galaaz.
2543
+
2544
+ The main problem that remains, is that in R, functions are not clearly documented as what type
2545
+ of input they are expecting, they might be expecting regular variables or they might be
2546
+ expecting expressions and the R function will know how to deal with an input of the form
2547
+ 'a = b', now for the Ruby developer it might not be immediately clear if it should call the
2548
+ function passing the value 'true' if variable 'a' is equal to variable 'b' or if it should
2549
+ call the function passing the expression ':a.eq :b'.
2550
+
2551
+
2552
+ ## Advanced dplyr features
2553
+
2554
+ In the blog: Programming with dplyr by using dplyr (https://www.r-bloggers.com/programming-with-dplyr-by-using-dplyr/) Iñaki Úcar shows surprise that some R users are trying to code in dplyr avoiding
2555
+ the use of NSE. For instance he says:
2556
+
2557
+ > Take the example of seplyr. It stands for standard evaluation dplyr, and enables us to
2558
+ > program over dplyr without having “to bring in (or study) any deep-theory or
2559
+ > heavy-weight tools such as rlang/tidyeval”.
2560
+
2561
+ For me, there isn't really any surprise that users are trying to avoid dplyr deep-theory. R
2562
+ users frequently are not programmers and learning to code is already hard business, on top
2563
+ of that, having to learn how to 'quote' or 'enquo' or 'quos' or 'enquos' is not necessarily
2564
+ a 'piece of cake'. So much so, that 'tidyeval' has some more advanced functions that instead
2565
+ of using quoted expressions, uses strings as arguments.
2566
+
2567
+ In the following examples, we show the use of functions 'group\_by\_at', 'summarise\_at' and
2568
+ 'rename\_at' that receive strings as argument. The data frame used in 'starwars' that describes
2569
+ features of characters in the Starwars movies:
2570
+
2571
+
2572
+ ```ruby
2573
+ puts (~:starwars).head.as__data__frame
2574
+ ```
2575
+
2576
+ ```
2577
+ ## name height mass hair_color skin_color eye_color birth_year
2578
+ ## 1 Luke Skywalker 172 77 blond fair blue 19.0
2579
+ ## 2 C-3PO 167 75 <NA> gold yellow 112.0
2580
+ ## 3 R2-D2 96 32 <NA> white, blue red 33.0
2581
+ ## 4 Darth Vader 202 136 none white yellow 41.9
2582
+ ## 5 Leia Organa 150 49 brown light brown 19.0
2583
+ ## 6 Owen Lars 178 120 brown, grey light blue 52.0
2584
+ ## gender homeworld species
2585
+ ## 1 male Tatooine Human
2586
+ ## 2 <NA> Tatooine Droid
2587
+ ## 3 <NA> Naboo Droid
2588
+ ## 4 male Tatooine Human
2589
+ ## 5 female Alderaan Human
2590
+ ## 6 male Tatooine Human
2591
+ ## films
2592
+ ## 1 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2593
+ ## 2 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
2594
+ ## 3 Attack of the Clones, The Phantom Menace, Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2595
+ ## 4 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope
2596
+ ## 5 Revenge of the Sith, Return of the Jedi, The Empire Strikes Back, A New Hope, The Force Awakens
2597
+ ## 6 Attack of the Clones, Revenge of the Sith, A New Hope
2598
+ ## vehicles starships
2599
+ ## 1 Snowspeeder, Imperial Speeder Bike X-wing, Imperial shuttle
2600
+ ## 2
2601
+ ## 3
2602
+ ## 4 TIE Advanced x1
2603
+ ## 5 Imperial Speeder Bike
2604
+ ## 6
2605
+ ```
2606
+ The grouped_mean function bellow will receive a grouping variable and calculate summaries for
2607
+ the value\_variables given:
2608
+
2609
+
2610
+ ```r
2611
+ grouped_mean <- function(data, grouping_variables, value_variables) {
2612
+ data %>%
2613
+ group_by_at(grouping_variables) %>%
2614
+ mutate(count = n()) %>%
2615
+ summarise_at(c(value_variables, "count"), mean, na.rm = TRUE) %>%
2616
+ rename_at(value_variables, funs(paste0("mean_", .)))
2617
+ }
2618
+
2619
+ gm = starwars %>%
2620
+ grouped_mean("eye_color", c("mass", "birth_year"))
2621
+
2622
+ as.data.frame(gm)
2623
+ ```
2624
+
2625
+ ```
2626
+ ## eye_color mean_mass mean_birth_year count
2627
+ ## 1 black 76.28571 33.00000 10
2628
+ ## 2 blue 86.51667 67.06923 19
2629
+ ## 3 blue-gray 77.00000 57.00000 1
2630
+ ## 4 brown 66.09231 108.96429 21
2631
+ ## 5 dark NaN NaN 1
2632
+ ## 6 gold NaN NaN 1
2633
+ ## 7 green, yellow 159.00000 NaN 1
2634
+ ## 8 hazel 66.00000 34.50000 3
2635
+ ## 9 orange 282.33333 231.00000 8
2636
+ ## 10 pink NaN NaN 1
2637
+ ## 11 red 81.40000 33.66667 5
2638
+ ## 12 red, blue NaN NaN 1
2639
+ ## 13 unknown 31.50000 NaN 3
2640
+ ## 14 white 48.00000 NaN 1
2641
+ ## 15 yellow 81.11111 76.38000 11
2642
+ ```
2643
+
2644
+ The same code with Galaaz, becomes:
2645
+
2646
+
2647
+ ```ruby
2648
+ def grouped_mean(data, grouping_variables, value_variables)
2649
+ data.
2650
+ group_by_at(grouping_variables).
2651
+ mutate(count: E.n).
2652
+ summarise_at(E.c(value_variables, "count"), ~:mean, na__rm: true).
2653
+ rename_at(value_variables, E.funs(E.paste0("mean_", value_variables)))
2654
+ end
2655
+
2656
+ puts grouped_mean((~:starwars), "eye_color", E.c("mass", "birth_year")).as__data__frame
2657
+ ```
2658
+
2659
+ ```
2660
+ ## eye_color mean_mass mean_birth_year count
2661
+ ## 1 black 76.28571 33.00000 10
2662
+ ## 2 blue 86.51667 67.06923 19
2663
+ ## 3 blue-gray 77.00000 57.00000 1
2664
+ ## 4 brown 66.09231 108.96429 21
2665
+ ## 5 dark NaN NaN 1
2666
+ ## 6 gold NaN NaN 1
2667
+ ## 7 green, yellow 159.00000 NaN 1
2668
+ ## 8 hazel 66.00000 34.50000 3
2669
+ ## 9 orange 282.33333 231.00000 8
2670
+ ## 10 pink NaN NaN 1
2671
+ ## 11 red 81.40000 33.66667 5
2672
+ ## 12 red, blue NaN NaN 1
2673
+ ## 13 unknown 31.50000 NaN 3
2674
+ ## 14 white 48.00000 NaN 1
2675
+ ## 15 yellow 81.11111 76.38000 11
2676
+ ```
2677
+
2165
2678
 
2166
2679
  [TO BE CONTINUED...]
2167
2680