galaaz 0.4.2 → 0.4.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +25 -0
  3. data/Rakefile +8 -0
  4. data/bin/gknit +9 -5
  5. data/bin/gstudio +4 -2
  6. data/bin/gstudio.rb +32 -2
  7. data/blogs/dev/dev.html +219 -34
  8. data/blogs/dev/dev.md +26 -26
  9. data/blogs/dev/dev_files/figure-html/bubble-1.png +0 -0
  10. data/blogs/dev/dev_files/figure-html/diverging_bar.png +0 -0
  11. data/blogs/dplyr/dplyr.rb +63 -0
  12. data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +38 -26
  13. data/blogs/galaaz_ggplot/galaaz_ggplot.aux +16 -17
  14. data/blogs/galaaz_ggplot/galaaz_ggplot.pdf +0 -0
  15. data/blogs/galaaz_ggplot/galaaz_ggplot.tex +65 -31
  16. data/blogs/oh_my/not_so.rb +2342 -0
  17. data/blogs/oh_my/oh_my.Rmd +493 -0
  18. data/blogs/oh_my/oh_my.html +680 -0
  19. data/blogs/oh_my/oh_my.md +597 -0
  20. data/blogs/oh_my/old.Rmd +2100 -0
  21. data/blogs/ruby_plot/figures/facets_with_decorations.png +0 -0
  22. data/blogs/ruby_plot/figures/facets_with_jitter.png +0 -0
  23. data/blogs/ruby_plot/figures/final_box_plot.png +0 -0
  24. data/blogs/ruby_plot/figures/final_violin_plot.png +0 -0
  25. data/blogs/ruby_plot/figures/violin_with_jitter.png +0 -0
  26. data/blogs/ruby_plot/ruby_plot.Rmd +147 -122
  27. data/blogs/ruby_plot/ruby_plot.Rmd_external_figs +662 -0
  28. data/blogs/ruby_plot/ruby_plot.html +49 -54
  29. data/blogs/ruby_plot/ruby_plot.md +147 -122
  30. data/blogs/ruby_plot/ruby_plot.pdf +0 -0
  31. data/blogs/ruby_plot/ruby_plot.tex +776 -157
  32. data/blogs/ruby_plot/ruby_plot_files/figure-html/dose_len.svg +57 -0
  33. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_delivery.svg +106 -0
  34. data/blogs/ruby_plot/ruby_plot_files/figure-html/facet_by_dose.svg +110 -0
  35. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color.svg +174 -0
  36. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_by_delivery_color2.svg +236 -0
  37. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_decorations.png +0 -0
  38. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_jitter.svg +296 -0
  39. data/blogs/ruby_plot/ruby_plot_files/figure-html/facets_with_points.svg +236 -0
  40. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_box_plot.svg +218 -0
  41. data/blogs/ruby_plot/ruby_plot_files/figure-html/final_violin_plot.svg +128 -0
  42. data/blogs/ruby_plot/ruby_plot_files/figure-html/violin_with_jitter.svg +150 -0
  43. data/examples/islr/ch2.spec.rb +21 -18
  44. data/examples/islr/ch3_boston.rb +14 -5
  45. data/examples/islr/ch3_multiple_regression.rb +2 -3
  46. data/examples/islr/ch6.spec.rb +1 -1
  47. data/examples/islr/x_y_rnorm.jpg +0 -0
  48. data/lib/R_interface/r.rb +14 -10
  49. data/lib/R_interface/r_libs.R +9 -0
  50. data/lib/R_interface/r_methods.rb +77 -6
  51. data/lib/R_interface/{expression.rb → r_module_s.rb} +13 -14
  52. data/lib/R_interface/rbinary_operators.rb +58 -71
  53. data/lib/R_interface/rdata_frame.rb +2 -1
  54. data/lib/R_interface/rdevices.R +4 -0
  55. data/lib/R_interface/rdevices.rb +1 -1
  56. data/lib/R_interface/renvironment.rb +34 -1
  57. data/lib/R_interface/rexpression.rb +108 -2
  58. data/lib/R_interface/rindexed_object.rb +3 -1
  59. data/lib/R_interface/rlanguage.rb +18 -2
  60. data/lib/R_interface/rmatrix.rb +14 -0
  61. data/lib/R_interface/rmd_indexed_object.rb +5 -1
  62. data/lib/R_interface/robject.rb +61 -23
  63. data/lib/R_interface/rsupport.rb +111 -53
  64. data/lib/R_interface/rsymbol.rb +6 -5
  65. data/lib/R_interface/ruby_extensions.rb +130 -4
  66. data/lib/R_interface/runary_operators.rb +35 -3
  67. data/lib/R_interface/rvector.rb +1 -0
  68. data/lib/galaaz.rb +0 -2
  69. data/lib/gknit/knitr_engine.rb +58 -4
  70. data/lib/gknit/ruby_engine.rb +5 -6
  71. data/lib/util/exec_ruby.rb +55 -9
  72. data/specs/all.rb +13 -3
  73. data/specs/figures/dose_len.png +0 -0
  74. data/specs/r_dataframe.spec.rb +49 -26
  75. data/specs/r_environment.spec.rb +140 -0
  76. data/specs/r_eval.spec.rb +0 -15
  77. data/specs/r_formula.spec.rb +232 -0
  78. data/specs/r_function.spec.rb +7 -8
  79. data/specs/r_list.spec.rb +4 -0
  80. data/specs/r_list_apply.spec.rb +11 -11
  81. data/specs/r_matrix.spec.rb +3 -3
  82. data/specs/{r_plots.spec.rb~ → r_nse.spec.rb} +29 -6
  83. data/specs/r_vector_creation.spec.rb +6 -0
  84. data/specs/r_vector_object.spec.rb +2 -2
  85. data/specs/r_vector_operators.spec.rb +3 -3
  86. data/specs/r_vector_subsetting.spec.rb +4 -4
  87. data/specs/ruby_expression.spec.rb +324 -0
  88. data/specs/tmp.rb +12 -524
  89. data/sty/galaaz.sty +71 -0
  90. data/version.rb +1 -1
  91. metadata +31 -41
  92. data/bin/gknit2~ +0 -6
  93. data/bin/ogk~ +0 -4
  94. data/bin/prepareR.rb~ +0 -1
  95. data/blogs/dev/dev.Rmd~ +0 -104
  96. data/blogs/galaaz_ggplot/galaaz_ggplot.dvi +0 -0
  97. data/blogs/galaaz_ggplot/midwest_external_png~ +0 -1
  98. data/blogs/gknit/gknit.Rmd~ +0 -184
  99. data/blogs/gknit/gknit.Rnd~ +0 -17
  100. data/blogs/gknit/model.rb~ +0 -46
  101. data/blogs/ruby_plot/ruby_plot.Rmd~ +0 -215
  102. data/examples/islr/Figure.jpg +0 -0
  103. data/examples/misc/moneyball.rb~ +0 -16
  104. data/examples/misc/subsetting.rb~ +0 -372
  105. data/lib/R/eng_ruby.R~ +0 -63
  106. data/lib/R_interface/capture_plot.rb~ +0 -23
  107. data/lib/R_interface/r.rb~ +0 -121
  108. data/lib/R_interface/rdevices.rb~ +0 -27
  109. data/lib/gknit.rb~ +0 -26
  110. data/lib/gknit/knitr_engine.rb~ +0 -102
  111. data/lib/gknit/ruby_engine.rb~ +0 -72
  112. data/lib/util/inline_file.rb~ +0 -23
  113. data/r_requires/knitr.rb~ +0 -4
  114. data/specs/r_language.spec.rb +0 -157
@@ -1,17 +0,0 @@
1
- ---
2
- title: "gKnit - Ruby Knitting with Galaaz"
3
- subtitle: "An example of tightly coupling Ruby and R in GraalVM"
4
- author: "Rodrigo Botafogo"
5
- tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM, ggplot2]
6
- date: "16 October 2018"
7
- output:
8
- html_document:
9
- md_document:
10
- variant: markdown_github
11
- ---
12
-
13
- ```{r setup, echo=FALSE}
14
-
15
- ```
16
-
17
- # Introduction
@@ -1,46 +0,0 @@
1
- require 'galaaz'
2
-
3
- # Loads the R 'caret' package. If not present, installs it
4
- R.install_and_loads 'caret'
5
-
6
- class Model
7
-
8
- attr_reader :data
9
- attr_reader :test
10
- attr_reader :train
11
-
12
- #==========================================================
13
- #
14
- #==========================================================
15
-
16
- def initialize(data, percent_train:, seed: 123)
17
-
18
- R.set__seed(seed)
19
- @data = data
20
- @percent_train = percent_train
21
- @seed = seed
22
-
23
- end
24
-
25
- #==========================================================
26
- #
27
- #==========================================================
28
-
29
- def partition
30
-
31
- train_index =
32
- R.createDataPartition(@data.mpg, p: @percet_train,
33
- list: false, times: 1)
34
- @train = @data[train_index, :all]
35
- @test = @data[-train_index, :all]
36
-
37
- end
38
-
39
- end
40
-
41
- mtcars = ~:mtcars
42
- model = Model.new(mtcars, percent_train: 0.8)
43
- model.partition
44
- puts model.train.head
45
- puts model.test.head
46
-
@@ -1,215 +0,0 @@
1
- ---
2
- title: "High Quality Scientific Plotting with Ruby in GraalVM"
3
- subtitle: "Also: Allowing R to use classes, modules, blocks, etc."
4
- author: "Rodrigo Botafogo"
5
- tags: [Galaaz, Ruby, R, TruffleRuby, FastR, GraalVM]
6
- date: "19 October 2018"
7
- output:
8
- html_document:
9
- self_contained: true
10
- keep_md: true
11
- pdf_document:
12
- includes:
13
- in_header: ["../../sty/galaaz.sty"]
14
- number_sections: yes
15
- ---
16
-
17
- ```{r setup, echo=FALSE}
18
-
19
- ```
20
-
21
- # Introduction
22
-
23
- Ruby is a dynamic, interpreted, reflective, object-oriented, general-purpose
24
- programming language. It was designed and developed in the mid-1990s by Yukihiro
25
- "Matz" Matsumoto in Japan. It reached high popularity with the development of Ruby on Rails
26
- (RoR) by David Heinemeier Hansson. RoR is a web application framework which was first release
27
- circa 2005 and makes extensive use of Ruby's metaprogramming features. With the advend of
28
- RoR, Ruby became extremely popular and it peeked in popularity around 2008 according to the Tiobe
29
- index (https://www.tiobe.com/tiobe-index/ruby/). From 2008 to 2015, it's popularity
30
- declined consistently and then started picking up again during the next 3 years. At the time of
31
- this writing (November 2018), Ruby is ranked 16th in the Tiobe index.
32
-
33
- Python, considered a similar language to Ruby with similar features ranks 4th in the index. The
34
- first three positions are takes by Java, C and C++. One criticism often heard about Ruby, is
35
- that it is useful only for web applications while Python, with similar features has more diverse
36
- libraries, being useful for web applications with the Django framework, but also for
37
- scientific applications such as statistics, data analysis, big data, biology, etc. This
38
- criticism is by no way wrong. For scientific computing, Ruby lags way behind Python and R, the
39
- two most prestigous languages for this subject mater, with R being prefered by statisticians
40
- while Python is prefered by everyone else, because of it's gentle learning curve and more
41
- "natural" programming paradigm.
42
-
43
- Comes GraalVM into the picture:
44
-
45
- GraalVM is a universal virtual machine for running applications written in JavaScript,
46
- Python 3, Ruby, R, JVM-based languages like Java, Scala, Kotlin, and LLVM-based languages
47
- such as C and C++.
48
-
49
- GraalVM removes the isolation between programming languages and enables interoperability in a
50
- shared runtime. It can run either standalone or in the context of OpenJDK, Node.js,
51
- Oracle Database, or MySQL.
52
-
53
- GraalVM allows you to write polyglot applications with a seamless way to pass values from one
54
- language to another. With GraalVM there is no copying or marshaling necessary as it is with
55
- other polyglot systems. This lets you achieve high performance when language boundaries are
56
- crossed. Most of the time there is no additional cost for crossing a language boundary at all.
57
-
58
- Often developers have to make uncomfortable compromises that require them to rewrite
59
- their software in other languages. For example:
60
-
61
- * “That library is not available in my language. I need to rewrite it.”
62
- * “That language would be the perfect fit for my problem, but we cannot run it in our environment.”
63
- * “That problem is already solved in my language, but the language is too slow.”
64
-
65
- With GraalVM we aim to allow developers to freely choose the right language for the task at
66
- hand without making compromises.
67
-
68
- As stated above, GraalVM is a _universal_ virtual machine that allows Ruby and R (and other
69
- languages) to run on the same environment. GraalVM allows polyglot applications to
70
- _seamlessly_ interact with one another and pass values from one language to the other. Based
71
- on GraalVM, the Galaaz project was started. Galaaz indends to integrate Ruby and R and allow
72
- those languages to _seamlessly_ interact in a way that the user will be unaware of such interaction.
73
-
74
- Library wrapping is an usual way of briging features from one library into another language. For
75
- instance, whenever Python needs to perform operations efficiently, C libraries are wrap in Python.
76
- For the Python developer, the existence of such C library is of no importante. The problem with
77
- library wrapping is that for any new library of interest, there is the need to hand craft a new
78
- wrapper. With Galaaz, the same concept of wrapping was done, but instead of wrapping a C or an R
79
- library, Galaaz intends to wraps the whole of R language. Doing so, all thousands of R libraries
80
- are immediately available to Ruby developers and any new library developed in R will also become
81
- available without requiring a new wrapping effort.
82
-
83
- In this article, the graphing ggplot2 library from R will be accessed by Ruby transparently,
84
- bringing to Ruby the power of high quality scientific plotting. It might seem, from
85
- the exposed above, that Galaaz mainly benefits Ruby developers and might be of no
86
- consequence to the R developer. This article will however show that migrating from R to
87
- Ruby with Galaaz is a matter of small syntactic changes. Furthermore, R lacks some
88
- fundamental constructs for code reuse and large system construction. Using Galaaz, the R
89
- developer can easily migrate to a powerful OO language, at virtually no cost and then, as
90
- needs requires, she can add them to her toolbox.
91
-
92
- In this article we will explore the R ToothGrowth dataset. In doing so, we will create some plots.
93
- Furthermore we will create a "Corporate Template" for our plots ensuring that any plot of the
94
- same type will have a consistent visualisation.
95
-
96
- # gKnit
97
-
98
- This document was written using rmarkdown and the corresponding HTML was generated by the gKnit
99
- application. gKnit is a wrapper around the powerful 'knitr' application which converts
100
- rmarkdown text to many different output formats such as HTML, Latex, docx, etc. The gKnit
101
- tool is still under active development and will soon be released.
102
-
103
- In rmarkdown, text and code can be part of the same document, and code blocks are marked
104
- with a special markup. Interested readers can easily google 'knitr' and 'rmarkdown'. in
105
- gKnit, each Ruby block is evaluated independently and 'eval' in Ruby creates a new scope, so,
106
- in order for a variable defined in a block to be accessible in another block, it has to be
107
- a global variable, preceded by the '$' sign.
108
-
109
- # Exploring the Dataset
110
-
111
- Let start by exploring our selected dataset. In this dataset the response is the length of
112
- odontoblasts (cells responsible for tooth growth) in 60 guinea pigs. Each animal
113
- received one of three dose levels of vitamin C (0.5, 1, and 2 mg/day) by one of two
114
- delivery methods, orange juice or ascorbic acid (a form of vitamin C and coded as VC).
115
-
116
- In Galaaz, in order to have access to an R variable pointed by an R symbol we use the
117
- corresponding Ruby symbol preceeded by the tilda ('~') function.
118
-
119
- ```{ruby tooth_growth}
120
- # Read the R ToothGrowth variable and assign it to the
121
- # Ruby tooth_growth variable
122
- $tooth_growth = ~:ToothGrowth
123
- # convert the dose to a factor
124
- $tooth_growth.dose = $tooth_growth.dose.as__factor
125
-
126
- # print the first few elements of the dataset
127
- puts $tooth_growth.head
128
- ```
129
-
130
- Great! We've managed to read the ToothGrowth dataset and take a look at its elements. Observe
131
- that we have three columns in this dataset: 'len', 'supp' and 'dose'. Accessing a column,
132
- for example the 'len' column, is done by doing '$tooth_growth.len'.
133
-
134
- Let's explore some more details of this dataset. In particular, let's look at its dimensions,
135
- structure and summary statistics.
136
-
137
- ```{ruby stats}
138
- puts $tooth_growth.dim
139
- # chdck why NULL
140
- puts R.str(:ToothGrowth)
141
- puts $tooth_growth.summary
142
- ```
143
-
144
- Let's now create our first plot with the given data by accessing ggplot2 from Ruby. For Rubyist
145
- that have never seen or used ggplot2, here is the description found on ggplot home page:
146
-
147
- ```
148
- "ggplot2 is a system for declaratively creating graphics, based on _The Grammar of Graphics_.
149
- You provide the data, tell ggplot2 how to map variables to aesthetics, what graphical
150
- primitives to use, and it takes care of the details."
151
- ```
152
-
153
- This description might be a bit cryptic and it is best to see it at work to understand it.
154
- Basically, in the _grammar of graphics_ each component of the plot such as the grid, the axis,
155
- the data, title, subtitle, etc. is added to the plot in layers to form the final graphics.
156
-
157
- In this plot bellow, the 'dose' is plotted on the 'x' axis and the tooth length on the 'y' axis. Note
158
- the specification in the the 'aes' method: 'E.aes(x: :dose, y: :len)', where ':dose' is the 'dose'
159
- column of the dataset and ':len' the 'len' column. The 'aes' method is the _aesthetics_ for this
160
- plot. Then, to this layer, the 'geom_boxplot' is added and the whole plot is printed.
161
-
162
- Note also that we have a call to 'R.png' before plotting and 'R.dev__off' after the print
163
- statement. 'R.png' opens a 'png' device for writing the plot. When 'R.dev__off' is called, the
164
- device is closed and a 'png' file is created. If no name is given to the 'png' function, a file
165
- named 'Rplot<nnn>' is generated, where <nnn> is the number of the plot. So, this first plot is
166
- called 'Rplot001.png'. We can then include the generated 'png' file in
167
- this document, by adding an rmarkdown directive.
168
-
169
- ```{ruby first_plot}
170
- require 'ggplot'
171
-
172
- R.png
173
-
174
- e = $tooth_growth.ggplot(E.aes(x: :dose, y: :len))
175
- print e + R.geom_boxplot
176
-
177
- R.dev__off
178
- ```
179
-
180
- ![ToothGrowth](Rplot001.png)
181
-
182
- We've just managed to generate our first plot in Ruby with only two lines of code. This plot,
183
- however, if far from being pleasing to the eye.
184
-
185
-
186
-
187
-
188
- # Conclusion
189
-
190
-
191
- # Installing Galaaz
192
-
193
- ## Prerequisites
194
-
195
- * GraalVM (>= rc8)
196
- * TruffleRuby
197
- * FastR
198
-
199
- The following R packages will be automatically installed when necessary, but could be installed prior
200
- to using gKnit if desired:
201
-
202
- * ggplot2
203
- * gridExtra
204
- * knitr
205
-
206
- Installation of R packages requires a development environment and can be time consuming. In Linux,
207
- the gnu compiler and tools should be enough. I am not sure what is needed on the Mac.
208
-
209
- ## Preparation
210
-
211
- * gem install galaaz
212
-
213
- ## Usage
214
-
215
- * gknit <filename>
Binary file
@@ -1,16 +0,0 @@
1
- # coding: utf-8
2
-
3
- require 'galaaz'
4
-
5
- # This dataset comes from Baseball-Reference.com.
6
- baseball = R.read__csv("baseball.csv")
7
- # Lets look at the data available for Momeyball.
8
- moneyball = baseball.subset(baseball.Year < 2002)
9
- # Let's see if we can predict the number of wins, by looking at
10
- # runs allowed (RA) and runs scored (RS). RD is the runs difference.
11
- # We are making a linear model for predicting wins (W) based on RD
12
-
13
- moneyball.RD = moneyball.RS - moneyball.RA
14
- wins_reg = R.lm(+:W =~ +:RD, data: moneyball)
15
- wins_reg.summary.pp
16
-
@@ -1,372 +0,0 @@
1
- # coding: utf-8
2
-
3
- require 'galaaz'
4
-
5
- # This examples were extracted from "Advanced R", by Hadley Wickham, available on the
6
- # web at: http://adv-r.had.co.nz/Subsetting.html#applications
7
-
8
- #------------------------------------------------------------------------------------------
9
- # Lookup tables (character subsetting)
10
- # Character matching provides a powerful way to make lookup tables.
11
- # Say you want to convert abbreviations:
12
- #------------------------------------------------------------------------------------------
13
-
14
- x = R.c("m", "f", "u", "f", "f", "m", "m")
15
- lookup = R.c(m: "Male", f: "Female", u: R::NA)
16
- lookup[x].pp
17
- print("\n")
18
-
19
- # m f u f f m m
20
- # "Male" "Female" NA "Female" "Female" "Male" "Male"
21
-
22
- R.unname(lookup[x]).pp
23
- print("\n")
24
-
25
- # [1] "Male" "Female" NA "Female" "Female" "Male" "Male"
26
-
27
-
28
- #------------------------------------------------------------------------------------------
29
- # Matching and merging by hand (integer subsetting)
30
- #------------------------------------------------------------------------------------------
31
-
32
- # You may have a more complicated lookup table which has multiple columns of information.
33
- # Suppose we have a vector of grades, and a table that describes their properties:
34
- # In R a vector c(1, 2, 3) is a double vector, when using polyglot R.c(1, 2, 3) is an
35
- # integer vector, the equivalent of doing c(1L, 2L, 3L) in R. Function 'match' does not
36
- # work correctly with integer vector, it has to be a double.
37
- grades = R.c(1.0, 2.0, 2.0, 3.0, 1.0)
38
-
39
- info = R.data__frame(
40
- grade: (3..1),
41
- desc: R.c("Excellent", "Good", "Poor"),
42
- fail: R.c(false, false, true)
43
- )
44
-
45
- # We want to duplicate the info table so that we have a row for each value in grades.
46
- # We can do this in two ways, either using match() and integer subsetting,
47
- # or rownames() and character subsetting:
48
-
49
- # Using match
50
- id = R.match(grades, info.grade)
51
- info[id, :all].pp
52
- print("\n")
53
-
54
- # grade desc fail
55
- # 3 1 Poor TRUE
56
- # 2 2 Good FALSE
57
- # 2.1 2 Good FALSE
58
- # 1 3 Excellent FALSE
59
- # 3.1 1 Poor TRUE
60
-
61
- # Using rownames
62
- info.rownames = info.grade
63
- info[grades.as__character, :all].pp
64
- print("\n")
65
-
66
- # grade desc fail
67
- # 1 3 Excellent FALSE
68
- # 2 2 Good FALSE
69
- # 2.1 2 Good FALSE
70
- # 3 1 Poor TRUE
71
- # 1.1 3 Excellent FALSE
72
-
73
- #------------------------------------------------------------------------------------------
74
- # Random samples/bootstrap (integer subsetting)
75
- #------------------------------------------------------------------------------------------
76
-
77
- # You can use integer indices to perform random sampling or bootstrapping
78
- # of a vector or data frame. sample() generates a vector of indices, then
79
- # subsetting to access the values:
80
- df = R.data__frame(x: R.rep((1..3), each: 2), y: (6..1), z: R.letters[(1..6)])
81
-
82
- # Set seed for reproducibility
83
- R.set__seed(10)
84
-
85
- # Randomly reorder
86
- df[R.sample(df.nrow), :all].pp
87
- print("\n")
88
-
89
- # x y z
90
- # 4 2 3 d
91
- # 2 1 5 b
92
- # 5 3 2 e
93
- # 3 2 4 c
94
- # 1 1 6 a
95
- # 6 3 1 f
96
-
97
- # Select 3 random rows
98
- df[R.sample(df.nrow, 3), :all].pp
99
- print("\n")
100
-
101
- # x y z
102
- # 2 1 5 b
103
- # 6 3 1 f
104
- # 3 2 4 c
105
-
106
- # Select 6 bootstrap replicates
107
- df[R.sample(df.nrow, 6, rep: true), :all].pp
108
- print("\n")
109
-
110
- # x y z
111
- # 3 2 4 c
112
- # 4 2 3 d
113
- # 4.1 2 3 d
114
- # 1 1 6 a
115
- # 4.2 2 3 d
116
- # 3.1 2 4 c
117
-
118
- #------------------------------------------------------------------------------------------
119
- # Ordering (integer subsetting)
120
- #------------------------------------------------------------------------------------------
121
-
122
- x = R.c("b", "c", "a")
123
- x.order.pp
124
- print("\n")
125
-
126
- # [1] 3 1 2
127
-
128
- x[x.order].pp
129
- print("\n")
130
-
131
- # [1] "a" "b" "c"
132
-
133
- # Randomly reorder df
134
- df2 = df[R.sample(df.nrow), (3..1)]
135
- df2.pp
136
- print("\n")
137
-
138
- # z y x
139
- # 3 c 4 2
140
- # 1 a 6 1
141
- # 2 b 5 1
142
- # 4 d 3 2
143
- # 6 f 1 3
144
- # 5 e 2 3
145
-
146
- df2[df2.x.order, :all].pp
147
- print("\n")
148
-
149
- # z y x
150
- # 1 a 6 1
151
- # 2 b 5 1
152
- # 3 c 4 2
153
- # 4 d 3 2
154
- # 6 f 1 3
155
- # 5 e 2 3
156
-
157
- df2[:all, df2.names.order].pp
158
- print("\n")
159
-
160
- # x y z
161
- # 3 2 4 c
162
- # 1 1 6 a
163
- # 2 1 5 b
164
- # 4 2 3 d
165
- # 6 3 1 f
166
- # 5 3 2 e
167
-
168
- #------------------------------------------------------------------------------------------
169
- # Expanding aggregated counts (integer subsetting)
170
- #
171
- # Sometimes you get a data frame where identical rows have been collapsed into one and a
172
- # count column has been added. rep() and integer subsetting make it easy to uncollapse
173
- # the data by subsetting with a repeated row index:
174
- #------------------------------------------------------------------------------------------
175
-
176
- df = R.data__frame(x: R.c(2, 4, 1), y: R.c(9, 11, 6), n: R.c(3, 5, 1))
177
- R.rep((1..df.nrow), df.n).pp
178
- print("\n")
179
-
180
- # [1] 1 1 1 2 2 2 2 2 3
181
-
182
- df[R.rep((1..df.nrow), df.n), :all].pp
183
- print("\n")
184
-
185
- # x y n
186
- # 1 2 9 3
187
- # 1.1 2 9 3
188
- # 1.2 2 9 3
189
- # 2 4 11 5
190
- # 2.1 4 11 5
191
- # 2.2 4 11 5
192
- # 2.3 4 11 5
193
- # 2.4 4 11 5
194
- # 3 1 6 1
195
-
196
- #------------------------------------------------------------------------------------------
197
- # Removing columns from data frames (character subsetting)
198
- #
199
- # There are two ways to remove columns from a data frame. You can set individual columns
200
- # to nil:
201
- #------------------------------------------------------------------------------------------
202
-
203
- df = R.data__frame(x: (1..3), y: (3..1), z: R.letters[(1..3)])
204
- # Not implemented yet
205
- # df.z = nil
206
- df.pp
207
- print("\n")
208
-
209
- df = R.data__frame(x: (1..3), y: (3..1), z: R.letters[(1..3)])
210
- df[R.c("x", "y")].pp
211
- print("\n")
212
-
213
- # x y
214
- # 1 1 3
215
- # 2 2 2
216
- # 3 3 1
217
-
218
- df[df.names.setdiff("z")].pp
219
- print("\n")
220
-
221
- # x y
222
- # 1 1 3
223
- # 2 2 2
224
- # 3 3 1
225
-
226
- #------------------------------------------------------------------------------------------
227
- # Selecting rows based on a condition (logical subsetting)
228
- #
229
- # Because it allows you to easily combine conditions from multiple columns, logical
230
- # subsetting is probably the most commonly used technique for extracting rows out of
231
- # a data frame.
232
- #------------------------------------------------------------------------------------------
233
-
234
- R.mtcars[R.mtcars.gear == 5, :all].pp
235
- print("\n")
236
-
237
- # mpg cyl disp hp drat wt qsec vs am gear carb
238
- # Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
239
- # Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
240
- # Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.5 0 1 5 4
241
- # Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.5 0 1 5 6
242
- # Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.6 0 1 5 8
243
-
244
- R.mtcars[(R.mtcars.gear == 5) & (R.mtcars.cyl == 4), :all].pp
245
- print("\n")
246
-
247
- # mpg cyl disp hp drat wt qsec vs am gear carb
248
- # Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
249
- # Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
250
-
251
-
252
- #------------------------------------------------------------------------------------------
253
- # Boolean algebra vs. sets (logical & integer subsetting)
254
- #
255
- # It’s useful to be aware of the natural equivalence between set operations (integer
256
- # subsetting) and boolean algebra (logical subsetting)
257
- #------------------------------------------------------------------------------------------
258
-
259
- x = R.sample(10) < 4
260
- x.which.pp
261
- print("\n")
262
-
263
- # [1] 3 7 10
264
-
265
- #===
266
- x1 = R.c((1..10)) % 2 == 0
267
- x1.pp
268
- print("\n")
269
-
270
- # [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE
271
-
272
- #===
273
- x2 = x1.which
274
- x2.pp
275
- print("\n")
276
-
277
- # [1] 2 4 6 8 10
278
-
279
- #===
280
- y1 = R.c((1..10)) % 5 == 0
281
- y1.pp
282
- print("\n")
283
-
284
- # [1] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
285
-
286
- #===
287
- y2 = y1.which
288
- y2.pp
289
- print("\n")
290
-
291
- # [1] 5 10
292
-
293
- #===
294
- # X & Y <-> intersect(x, y)
295
- (x1 & y1).pp
296
- print("\n")
297
-
298
- # [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
299
-
300
- #===
301
- # This example shows the problem with having R objects returning either
302
- # vector or scalar. We don't know the type of the result of applying
303
- # intersect. If this is a vector, then we need to print it with pp
304
- # but if this is a scalar, we need to print it with regular Ruby 'p' or
305
- # 'print'
306
- p R.intersect(x2, y2)
307
- print("\n")
308
-
309
- # 10
310
-
311
- p x2.intersect y2
312
-
313
- # 10
314
-
315
- #===
316
- # X | Y <-> union(x, y)
317
- (x1 | y1).pp
318
- print("\n")
319
-
320
- # [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE
321
-
322
- #===
323
- R.union(x2, y2).pp
324
- print("\n")
325
-
326
- # [1] 2 4 6 8 10 5
327
-
328
- (x2.union y2).pp
329
-
330
- # [1] 2 4 6 8 10 5
331
-
332
- #===
333
- # X & !Y <-> setdiff(x, y)
334
- (x1 & !y1).pp
335
- print("\n")
336
-
337
- # [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE FALSE
338
-
339
- #===
340
- R.setdiff(x2, y2).pp
341
- print("\n")
342
-
343
- # [1] 2 4 6 8
344
-
345
- (x2.setdiff y2).pp
346
-
347
- # [1] 2 4 6 8
348
-
349
-
350
- #===
351
- # xor(X, Y) <-> setdiff(union(x, y), intersect(x, y))
352
- R.xor(x1, y1).pp
353
- print("\n")
354
-
355
- # [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE
356
-
357
- # Writing the same as the last example in a Ruby style
358
- (x1.xor y1).pp
359
-
360
- # [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE
361
-
362
- #===
363
- R.setdiff(R.union(x2, y2), R.intersect(x2, y2)).pp
364
- print("\n")
365
-
366
- # [1] 2 4 6 8 5
367
-
368
- # Writing the same as the last example in a Ruby style
369
- ((x2.union y2).setdiff (x2.intersect y2)).pp
370
- print("\n")
371
-
372
- # [1] 2 4 6 8 5