galaaz 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +325 -32
- data/Rakefile +14 -0
- data/bin/galaaz +0 -3
- data/bin/gknit +28 -0
- data/bin/gstudio +6 -0
- data/bin/gstudio.rb +6 -0
- data/bin/ogk~ +4 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +335 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.html +460 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.md +327 -0
- data/blogs/galaaz_ggplot/midwest.Rmd +39 -0
- data/blogs/galaaz_ggplot/midwest.html +188 -0
- data/blogs/galaaz_ggplot/midwest.png +0 -0
- data/blogs/galaaz_ggplot/scatter_plot.png +0 -0
- data/examples/50Plots_MasterList/Images/midwest-scatterplot.PNG +0 -0
- data/examples/50Plots_MasterList/ScatterPlot.rb +159 -0
- data/examples/R/calc.R +21 -0
- data/examples/R/java_interop.R +29 -0
- data/examples/{baseball.csv → misc/baseball.csv} +0 -0
- data/examples/{ggplot.rb → misc/ggplot.rb} +0 -0
- data/examples/misc/moneyball.rb +33 -0
- data/examples/{baseball.rb → misc/moneyball.rb~} +0 -0
- data/examples/misc/subsetting.rb +374 -0
- data/examples/{subsetting.rb → misc/subsetting.rb~} +0 -0
- data/lib/{expression.rb → R/expression.rb} +0 -0
- data/lib/{r.rb → R/r.rb} +1 -0
- data/lib/R/r.rb~ +121 -0
- data/lib/{r_methods.rb → R/r_methods.rb} +0 -0
- data/lib/{rbinary_operators.rb → R/rbinary_operators.rb} +0 -0
- data/lib/{rclosure.rb → R/rclosure.rb} +0 -0
- data/lib/{rdata_frame.rb → R/rdata_frame.rb} +0 -0
- data/lib/{renvironment.rb → R/renvironment.rb} +0 -0
- data/lib/{rexpression.rb → R/rexpression.rb} +0 -0
- data/lib/{rindexed_object.rb → R/rindexed_object.rb} +0 -0
- data/lib/{rlanguage.rb → R/rlanguage.rb} +0 -0
- data/lib/{rlist.rb → R/rlist.rb} +0 -0
- data/lib/{rmatrix.rb → R/rmatrix.rb} +0 -0
- data/lib/{rmd_indexed_object.rb → R/rmd_indexed_object.rb} +0 -0
- data/lib/{robject.rb → R/robject.rb} +0 -0
- data/lib/{rpkg.rb → R/rpkg.rb} +0 -0
- data/lib/{rsupport.rb → R/rsupport.rb} +0 -0
- data/lib/{rsupport_scope.rb → R/rsupport_scope.rb} +0 -0
- data/lib/{rsymbol.rb → R/rsymbol.rb} +0 -0
- data/lib/{ruby_callback.rb → R/ruby_callback.rb} +0 -0
- data/lib/{ruby_extensions.rb → R/ruby_extensions.rb} +0 -0
- data/lib/{runary_operators.rb → R/runary_operators.rb} +0 -0
- data/lib/{rvector.rb → R/rvector.rb} +0 -0
- data/lib/galaaz.rb +2 -1
- data/lib/util/exec_ruby.rb +44 -0
- data/specs/tmp.rb +167 -1
- data/version.rb +1 -1
- metadata +63 -28
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,159 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# @author Rodrigo Botafogo
|
5
|
+
#
|
6
|
+
# Copyright © 2018 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
7
|
+
# and distribute this software and its documentation, without fee and without a signed
|
8
|
+
# licensing agreement, is hereby granted, provided that the above copyright notice, this
|
9
|
+
# paragraph and the following two paragraphs appear in all copies, modifications, and
|
10
|
+
# distributions.
|
11
|
+
#
|
12
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
13
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
14
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
15
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
16
|
+
#
|
17
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
18
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
19
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
20
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
21
|
+
# OR MODIFICATIONS.
|
22
|
+
##########################################################################################
|
23
|
+
|
24
|
+
require 'galaaz'
|
25
|
+
require 'ggplot'
|
26
|
+
|
27
|
+
module CorpTheme
|
28
|
+
|
29
|
+
#--------------------------------------------------------------------------------------
|
30
|
+
# Defines the plot theme (visualization). In this theme we remove major and minor
|
31
|
+
# grids, borders and background. We also turn-off scientific notation.
|
32
|
+
#--------------------------------------------------------------------------------------
|
33
|
+
|
34
|
+
def self.global_theme
|
35
|
+
|
36
|
+
R.options(scipen: 999) # turn-off scientific notation like 1e+48
|
37
|
+
|
38
|
+
# remove major grids
|
39
|
+
global_theme = R.theme(panel__grid__major: E.element_blank())
|
40
|
+
# remove minor grids
|
41
|
+
global_theme = global_theme + R.theme(panel__grid__minor: E.element_blank)
|
42
|
+
# remove border
|
43
|
+
global_theme = global_theme + R.theme(panel__border: E.element_blank)
|
44
|
+
# remove background
|
45
|
+
global_theme = global_theme + R.theme(panel__background: E.element_blank)
|
46
|
+
# Change axis font
|
47
|
+
global_theme = global_theme +
|
48
|
+
R.theme(axis__text: E.element_text(size: 8, color: "#000080"))
|
49
|
+
# change color of axis titles
|
50
|
+
global_theme = global_theme +
|
51
|
+
R.theme(axis__title: E.element_text(
|
52
|
+
color: "#000080",
|
53
|
+
face: "bold",
|
54
|
+
size: 8,
|
55
|
+
hjust: 1))
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
class ScatterPlot
|
62
|
+
|
63
|
+
attr_accessor :title
|
64
|
+
attr_accessor :subtitle
|
65
|
+
attr_accessor :caption
|
66
|
+
attr_accessor :x_label
|
67
|
+
attr_accessor :y_label
|
68
|
+
|
69
|
+
#--------------------------------------------------------------------------------------
|
70
|
+
# Initialize the plot with the data and the x and y variables
|
71
|
+
#--------------------------------------------------------------------------------------
|
72
|
+
|
73
|
+
def initialize(data, x:, y:)
|
74
|
+
@data = data
|
75
|
+
@x = x
|
76
|
+
@y = y
|
77
|
+
end
|
78
|
+
|
79
|
+
#--------------------------------------------------------------------------------------
|
80
|
+
# Define groupings by color and size
|
81
|
+
#--------------------------------------------------------------------------------------
|
82
|
+
|
83
|
+
def group_by(color: nil, size: nil)
|
84
|
+
@color_by = color
|
85
|
+
@size_by = size
|
86
|
+
end
|
87
|
+
|
88
|
+
#--------------------------------------------------------------------------------------
|
89
|
+
# Add a smoothing line, and if confidence is true the add a confidence interval, if
|
90
|
+
# false does not add the confidence interval
|
91
|
+
#--------------------------------------------------------------------------------------
|
92
|
+
|
93
|
+
def add_smoothing_line(method:, confidence: true)
|
94
|
+
@method = method
|
95
|
+
@confidence = confidence
|
96
|
+
end
|
97
|
+
|
98
|
+
#--------------------------------------------------------------------------------------
|
99
|
+
# Creates the graph title, properly formated for this theme
|
100
|
+
# @param title [String] The title to add to the graph
|
101
|
+
# @return textGrob that can be included in a graph
|
102
|
+
#--------------------------------------------------------------------------------------
|
103
|
+
|
104
|
+
def graph_params(title: "", subtitle: "", caption: "", x_label: "", y_label: "")
|
105
|
+
R.labs(
|
106
|
+
title: title,
|
107
|
+
subtitle: subtitle,
|
108
|
+
caption: caption,
|
109
|
+
y_label: y_label,
|
110
|
+
x_label: x_label,
|
111
|
+
)
|
112
|
+
end
|
113
|
+
|
114
|
+
#--------------------------------------------------------------------------------------
|
115
|
+
# Prepare the plot's points
|
116
|
+
#--------------------------------------------------------------------------------------
|
117
|
+
|
118
|
+
def points
|
119
|
+
params = {}
|
120
|
+
params[:col] = @color_by if @color_by
|
121
|
+
params[:size] = @size_by if @size_by
|
122
|
+
R.geom_point(E.aes(params))
|
123
|
+
end
|
124
|
+
|
125
|
+
#--------------------------------------------------------------------------------------
|
126
|
+
# Plots the scatterplot
|
127
|
+
#--------------------------------------------------------------------------------------
|
128
|
+
|
129
|
+
def plot
|
130
|
+
R.awt
|
131
|
+
|
132
|
+
puts @data.ggplot(E.aes(x: @x, y: @y)) +
|
133
|
+
points +
|
134
|
+
R.geom_smooth(method: @method, se: @confidence) +
|
135
|
+
R.xlim(R.c(0, 0.1)) +
|
136
|
+
R.ylim(R.c(0, 500000)) +
|
137
|
+
graph_params(title: @title,
|
138
|
+
subtitle: @subtitle,
|
139
|
+
y_label: @y_label,
|
140
|
+
x_label: @x_label,
|
141
|
+
caption: @caption) +
|
142
|
+
CorpTheme.global_theme
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
sp = ScatterPlot.new(~:midwest, x: :area, y: :poptotal)
|
148
|
+
sp.title = "Midwest Dataset - Scatterplot"
|
149
|
+
sp.subtitle = "Area Vs Population"
|
150
|
+
sp.caption = "Source: midwest"
|
151
|
+
sp.x_label = "Area"
|
152
|
+
sp.y_label = "Population"
|
153
|
+
sp.group_by(color: :state, size: :popdensity) # try sp.group_by(color: :state)
|
154
|
+
# available methods: "lm", "glm", "loess", "gam"
|
155
|
+
sp.add_smoothing_line(method: "glm")
|
156
|
+
sp.plot
|
157
|
+
|
158
|
+
|
159
|
+
a = gets.chomp
|
data/examples/R/calc.R
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
x <- matrix(runif(1000000), 1000, 1000)
|
2
|
+
mutual_R <- function(joint_dist) {
|
3
|
+
joint_dist <- joint_dist/sum(joint_dist)
|
4
|
+
mutual_information <- 0
|
5
|
+
num_rows <- nrow(joint_dist)
|
6
|
+
num_cols <- ncol(joint_dist)
|
7
|
+
colsums <- colSums(joint_dist)
|
8
|
+
rowsums <- rowSums(joint_dist)
|
9
|
+
for(i in seq_along(1:num_rows)){
|
10
|
+
for(j in seq_along(1:num_cols)){
|
11
|
+
temp <- log((joint_dist[i,j]/(colsums[j]*rowsums[i])))
|
12
|
+
if(!is.finite(temp)){
|
13
|
+
temp = 0
|
14
|
+
}
|
15
|
+
mutual_information <-
|
16
|
+
mutual_information + joint_dist[i,j] * temp
|
17
|
+
}
|
18
|
+
}
|
19
|
+
mutual_information
|
20
|
+
}
|
21
|
+
system.time(mutual_R(x))
|
@@ -0,0 +1,29 @@
|
|
1
|
+
library(grid)
|
2
|
+
openJavaWindow <- function () {
|
3
|
+
# create image and register graphics
|
4
|
+
imageClass <- java.type('java.awt.image.BufferedImage')
|
5
|
+
image <- new(imageClass, 450, 450, imageClass$TYPE_INT_RGB);
|
6
|
+
graphics <- image$getGraphics()
|
7
|
+
graphics$setBackground(java.type('java.awt.Color')$white);
|
8
|
+
grDevices:::awt(image$getWidth(), image$getHeight(), graphics)
|
9
|
+
|
10
|
+
# draw image
|
11
|
+
grid.newpage()
|
12
|
+
pushViewport(plotViewport(margins = c(5.1, 4.1, 4.1, 2.1)))
|
13
|
+
grid.xaxis(); grid.yaxis()
|
14
|
+
grid.points(x = runif(10, 0, 1), y = runif(10, 0, 1),
|
15
|
+
size = unit(0.01, "npc"))
|
16
|
+
|
17
|
+
# open frame with image
|
18
|
+
imageIcon <- new("javax.swing.ImageIcon", image)
|
19
|
+
label <- new("javax.swing.JLabel", imageIcon)
|
20
|
+
panel <- new("javax.swing.JPanel")
|
21
|
+
panel$add(label)
|
22
|
+
frame <- new("javax.swing.JFrame")
|
23
|
+
frame$setMinimumSize(new("java.awt.Dimension",
|
24
|
+
image$getWidth(), image$getHeight()))
|
25
|
+
frame$add(panel)
|
26
|
+
frame$setVisible(T)
|
27
|
+
while (frame$isVisible()) Sys.sleep(1)
|
28
|
+
}
|
29
|
+
openJavaWindow()
|
File without changes
|
File without changes
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'galaaz'
|
4
|
+
|
5
|
+
local_dir = File.expand_path File.dirname(__FILE__)
|
6
|
+
|
7
|
+
# This dataset comes from Baseball-Reference.com.
|
8
|
+
baseball = R.read__csv("#{local_dir}/baseball.csv")
|
9
|
+
|
10
|
+
# Lets look at the data available for Momeyball.
|
11
|
+
moneyball = baseball.subset(baseball.Year < 2002)
|
12
|
+
|
13
|
+
# Let's see if we can predict the number of wins, by looking at
|
14
|
+
# runs allowed (RA) and runs scored (RS). RD is the runs difference.
|
15
|
+
# We are making a linear model for predicting wins (W) based on RD
|
16
|
+
moneyball.RD = moneyball.RS - moneyball.RA
|
17
|
+
wins_reg = R.lm(+:W =~ +:RD, data: moneyball)
|
18
|
+
|
19
|
+
def show(title, data)
|
20
|
+
puts title
|
21
|
+
puts "=" * title.size
|
22
|
+
puts data
|
23
|
+
puts "=" * title.size
|
24
|
+
puts
|
25
|
+
end
|
26
|
+
|
27
|
+
puts "Fitting a linear model on the Baseball dataset as done in Moneyball:"
|
28
|
+
puts
|
29
|
+
show "Coefficients of the Linear Regression", wins_reg.coefficients
|
30
|
+
show "Fitted values summary:", wins_reg.fitted__values.summary
|
31
|
+
show "Residuals summary", wins_reg.residuals.summary
|
32
|
+
|
33
|
+
|
File without changes
|
@@ -0,0 +1,374 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'galaaz'
|
4
|
+
|
5
|
+
# This examples were extracted from "Advanced R", by Hadley Wickham, available on the
|
6
|
+
# web at: http://adv-r.had.co.nz/Subsetting.html#applications
|
7
|
+
|
8
|
+
#------------------------------------------------------------------------------------------
|
9
|
+
# Lookup tables (character subsetting)
|
10
|
+
# Character matching provides a powerful way to make lookup tables.
|
11
|
+
# Say you want to convert abbreviations:
|
12
|
+
#------------------------------------------------------------------------------------------
|
13
|
+
|
14
|
+
x = R.c("m", "f", "u", "f", "f", "m", "m")
|
15
|
+
lookup = R.c(m: "Male", f: "Female", u: R::NA)
|
16
|
+
lookup[x].pp
|
17
|
+
print("\n")
|
18
|
+
|
19
|
+
# m f u f f m m
|
20
|
+
# "Male" "Female" NA "Female" "Female" "Male" "Male"
|
21
|
+
|
22
|
+
R.unname(lookup[x]).pp
|
23
|
+
print("\n")
|
24
|
+
|
25
|
+
# [1] "Male" "Female" NA "Female" "Female" "Male" "Male"
|
26
|
+
|
27
|
+
|
28
|
+
#------------------------------------------------------------------------------------------
|
29
|
+
# Matching and merging by hand (integer subsetting)
|
30
|
+
#------------------------------------------------------------------------------------------
|
31
|
+
|
32
|
+
# You may have a more complicated lookup table which has multiple columns of information.
|
33
|
+
# Suppose we have a vector of grades, and a table that describes their properties:
|
34
|
+
# In R a vector c(1, 2, 3) is a double vector, when using polyglot R.c(1, 2, 3) is an
|
35
|
+
# integer vector, the equivalent of doing c(1L, 2L, 3L) in R. Function 'match' does not
|
36
|
+
# work correctly with integer vector, it has to be a double.
|
37
|
+
grades = R.c(1.0, 2.0, 2.0, 3.0, 1.0)
|
38
|
+
|
39
|
+
info = R.data__frame(
|
40
|
+
grade: (3..1),
|
41
|
+
desc: R.c("Excellent", "Good", "Poor"),
|
42
|
+
fail: R.c(false, false, true)
|
43
|
+
)
|
44
|
+
|
45
|
+
# We want to duplicate the info table so that we have a row for each value in grades.
|
46
|
+
# We can do this in two ways, either using match() and integer subsetting,
|
47
|
+
# or rownames() and character subsetting:
|
48
|
+
|
49
|
+
# Using match
|
50
|
+
id = R.match(grades, info.grade)
|
51
|
+
info[id, :all].pp
|
52
|
+
print("\n")
|
53
|
+
|
54
|
+
# grade desc fail
|
55
|
+
# 3 1 Poor TRUE
|
56
|
+
# 2 2 Good FALSE
|
57
|
+
# 2.1 2 Good FALSE
|
58
|
+
# 1 3 Excellent FALSE
|
59
|
+
# 3.1 1 Poor TRUE
|
60
|
+
|
61
|
+
# Using rownames
|
62
|
+
info.rownames = info.grade
|
63
|
+
info[grades.as__character, :all].pp
|
64
|
+
print("\n")
|
65
|
+
|
66
|
+
# grade desc fail
|
67
|
+
# 1 3 Excellent FALSE
|
68
|
+
# 2 2 Good FALSE
|
69
|
+
# 2.1 2 Good FALSE
|
70
|
+
# 3 1 Poor TRUE
|
71
|
+
# 1.1 3 Excellent FALSE
|
72
|
+
|
73
|
+
#------------------------------------------------------------------------------------------
|
74
|
+
# Random samples/bootstrap (integer subsetting)
|
75
|
+
#------------------------------------------------------------------------------------------
|
76
|
+
|
77
|
+
# You can use integer indices to perform random sampling or bootstrapping
|
78
|
+
# of a vector or data frame. sample() generates a vector of indices, then
|
79
|
+
# subsetting to access the values:
|
80
|
+
df = R.data__frame(x: R.rep((1..3), each: 2), y: (6..1), z: (~:letters)[(1..6)])
|
81
|
+
|
82
|
+
# Set seed for reproducibility
|
83
|
+
R.set__seed(10)
|
84
|
+
|
85
|
+
# Randomly reorder
|
86
|
+
df[R.sample(df.nrow), :all].pp
|
87
|
+
print("\n")
|
88
|
+
|
89
|
+
# x y z
|
90
|
+
# 4 2 3 d
|
91
|
+
# 2 1 5 b
|
92
|
+
# 5 3 2 e
|
93
|
+
# 3 2 4 c
|
94
|
+
# 1 1 6 a
|
95
|
+
# 6 3 1 f
|
96
|
+
|
97
|
+
# Select 3 random rows
|
98
|
+
df[R.sample(df.nrow, 3), :all].pp
|
99
|
+
print("\n")
|
100
|
+
|
101
|
+
# x y z
|
102
|
+
# 2 1 5 b
|
103
|
+
# 6 3 1 f
|
104
|
+
# 3 2 4 c
|
105
|
+
|
106
|
+
# Select 6 bootstrap replicates
|
107
|
+
df[R.sample(df.nrow, 6, rep: true), :all].pp
|
108
|
+
print("\n")
|
109
|
+
|
110
|
+
# x y z
|
111
|
+
# 3 2 4 c
|
112
|
+
# 4 2 3 d
|
113
|
+
# 4.1 2 3 d
|
114
|
+
# 1 1 6 a
|
115
|
+
# 4.2 2 3 d
|
116
|
+
# 3.1 2 4 c
|
117
|
+
|
118
|
+
#------------------------------------------------------------------------------------------
|
119
|
+
# Ordering (integer subsetting)
|
120
|
+
#------------------------------------------------------------------------------------------
|
121
|
+
|
122
|
+
x = R.c("b", "c", "a")
|
123
|
+
x.order.pp
|
124
|
+
print("\n")
|
125
|
+
|
126
|
+
# [1] 3 1 2
|
127
|
+
|
128
|
+
x[x.order].pp
|
129
|
+
print("\n")
|
130
|
+
|
131
|
+
# [1] "a" "b" "c"
|
132
|
+
|
133
|
+
# Randomly reorder df
|
134
|
+
df2 = df[R.sample(df.nrow), (3..1)]
|
135
|
+
df2.pp
|
136
|
+
print("\n")
|
137
|
+
|
138
|
+
# z y x
|
139
|
+
# 3 c 4 2
|
140
|
+
# 1 a 6 1
|
141
|
+
# 2 b 5 1
|
142
|
+
# 4 d 3 2
|
143
|
+
# 6 f 1 3
|
144
|
+
# 5 e 2 3
|
145
|
+
|
146
|
+
df2[df2.x.order, :all].pp
|
147
|
+
print("\n")
|
148
|
+
|
149
|
+
# z y x
|
150
|
+
# 1 a 6 1
|
151
|
+
# 2 b 5 1
|
152
|
+
# 3 c 4 2
|
153
|
+
# 4 d 3 2
|
154
|
+
# 6 f 1 3
|
155
|
+
# 5 e 2 3
|
156
|
+
|
157
|
+
df2[:all, df2.names.order].pp
|
158
|
+
print("\n")
|
159
|
+
|
160
|
+
# x y z
|
161
|
+
# 3 2 4 c
|
162
|
+
# 1 1 6 a
|
163
|
+
# 2 1 5 b
|
164
|
+
# 4 2 3 d
|
165
|
+
# 6 3 1 f
|
166
|
+
# 5 3 2 e
|
167
|
+
|
168
|
+
#------------------------------------------------------------------------------------------
|
169
|
+
# Expanding aggregated counts (integer subsetting)
|
170
|
+
#
|
171
|
+
# Sometimes you get a data frame where identical rows have been collapsed into one and a
|
172
|
+
# count column has been added. rep() and integer subsetting make it easy to uncollapse
|
173
|
+
# the data by subsetting with a repeated row index:
|
174
|
+
#------------------------------------------------------------------------------------------
|
175
|
+
|
176
|
+
df = R.data__frame(x: R.c(2, 4, 1), y: R.c(9, 11, 6), n: R.c(3, 5, 1))
|
177
|
+
R.rep((1..(df.nrow << 0)), df.n).pp
|
178
|
+
print("\n")
|
179
|
+
|
180
|
+
# [1] 1 1 1 2 2 2 2 2 3
|
181
|
+
|
182
|
+
df[R.rep((1..df.nrow << 0), df.n), :all].pp
|
183
|
+
print("\n")
|
184
|
+
|
185
|
+
# x y n
|
186
|
+
# 1 2 9 3
|
187
|
+
# 1.1 2 9 3
|
188
|
+
# 1.2 2 9 3
|
189
|
+
# 2 4 11 5
|
190
|
+
# 2.1 4 11 5
|
191
|
+
# 2.2 4 11 5
|
192
|
+
# 2.3 4 11 5
|
193
|
+
# 2.4 4 11 5
|
194
|
+
# 3 1 6 1
|
195
|
+
|
196
|
+
#------------------------------------------------------------------------------------------
|
197
|
+
# Removing columns from data frames (character subsetting)
|
198
|
+
#
|
199
|
+
# There are two ways to remove columns from a data frame. You can set individual columns
|
200
|
+
# to nil:
|
201
|
+
#------------------------------------------------------------------------------------------
|
202
|
+
|
203
|
+
df = R.data__frame(x: (1..3), y: (3..1), z: (~:letters)[(1..3)])
|
204
|
+
# Not implemented yet
|
205
|
+
# df.z = nil
|
206
|
+
df.pp
|
207
|
+
print("\n")
|
208
|
+
|
209
|
+
df = R.data__frame(x: (1..3), y: (3..1), z: (~:letters)[(1..3)])
|
210
|
+
df[R.c("x", "y")].pp
|
211
|
+
print("\n")
|
212
|
+
|
213
|
+
# x y
|
214
|
+
# 1 1 3
|
215
|
+
# 2 2 2
|
216
|
+
# 3 3 1
|
217
|
+
|
218
|
+
df[df.names.setdiff("z")].pp
|
219
|
+
print("\n")
|
220
|
+
|
221
|
+
# x y
|
222
|
+
# 1 1 3
|
223
|
+
# 2 2 2
|
224
|
+
# 3 3 1
|
225
|
+
|
226
|
+
#------------------------------------------------------------------------------------------
|
227
|
+
# Selecting rows based on a condition (logical subsetting)
|
228
|
+
#
|
229
|
+
# Because it allows you to easily combine conditions from multiple columns, logical
|
230
|
+
# subsetting is probably the most commonly used technique for extracting rows out of
|
231
|
+
# a data frame.
|
232
|
+
#------------------------------------------------------------------------------------------
|
233
|
+
|
234
|
+
mtcars = ~:mtcars
|
235
|
+
|
236
|
+
mtcars[mtcars.gear == 5, :all].pp
|
237
|
+
print("\n")
|
238
|
+
|
239
|
+
# mpg cyl disp hp drat wt qsec vs am gear carb
|
240
|
+
# Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
|
241
|
+
# Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
|
242
|
+
# Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.5 0 1 5 4
|
243
|
+
# Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.5 0 1 5 6
|
244
|
+
# Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.6 0 1 5 8
|
245
|
+
|
246
|
+
mtcars[(mtcars.gear == 5) & (mtcars.cyl == 4), :all].pp
|
247
|
+
print("\n")
|
248
|
+
|
249
|
+
# mpg cyl disp hp drat wt qsec vs am gear carb
|
250
|
+
# Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
|
251
|
+
# Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
|
252
|
+
|
253
|
+
|
254
|
+
#------------------------------------------------------------------------------------------
|
255
|
+
# Boolean algebra vs. sets (logical & integer subsetting)
|
256
|
+
#
|
257
|
+
# It’s useful to be aware of the natural equivalence between set operations (integer
|
258
|
+
# subsetting) and boolean algebra (logical subsetting)
|
259
|
+
#------------------------------------------------------------------------------------------
|
260
|
+
|
261
|
+
x = R.sample(10) < 4
|
262
|
+
x.which.pp
|
263
|
+
print("\n")
|
264
|
+
|
265
|
+
# [1] 3 7 10
|
266
|
+
|
267
|
+
#===
|
268
|
+
x1 = R.c((1..10)) % 2 == 0
|
269
|
+
x1.pp
|
270
|
+
print("\n")
|
271
|
+
|
272
|
+
# [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE
|
273
|
+
|
274
|
+
#===
|
275
|
+
x2 = x1.which
|
276
|
+
x2.pp
|
277
|
+
print("\n")
|
278
|
+
|
279
|
+
# [1] 2 4 6 8 10
|
280
|
+
|
281
|
+
#===
|
282
|
+
y1 = R.c((1..10)) % 5 == 0
|
283
|
+
y1.pp
|
284
|
+
print("\n")
|
285
|
+
|
286
|
+
# [1] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
|
287
|
+
|
288
|
+
#===
|
289
|
+
y2 = y1.which
|
290
|
+
y2.pp
|
291
|
+
print("\n")
|
292
|
+
|
293
|
+
# [1] 5 10
|
294
|
+
|
295
|
+
#===
|
296
|
+
# X & Y <-> intersect(x, y)
|
297
|
+
(x1 & y1).pp
|
298
|
+
print("\n")
|
299
|
+
|
300
|
+
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
|
301
|
+
|
302
|
+
#===
|
303
|
+
# This example shows the problem with having R objects returning either
|
304
|
+
# vector or scalar. We don't know the type of the result of applying
|
305
|
+
# intersect. If this is a vector, then we need to print it with pp
|
306
|
+
# but if this is a scalar, we need to print it with regular Ruby 'p' or
|
307
|
+
# 'print'
|
308
|
+
puts R.intersect(x2, y2)
|
309
|
+
print("\n")
|
310
|
+
|
311
|
+
# 10
|
312
|
+
|
313
|
+
puts x2.intersect y2
|
314
|
+
|
315
|
+
# 10
|
316
|
+
|
317
|
+
#===
|
318
|
+
# X | Y <-> union(x, y)
|
319
|
+
(x1 | y1).pp
|
320
|
+
print("\n")
|
321
|
+
|
322
|
+
# [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE
|
323
|
+
|
324
|
+
#===
|
325
|
+
R.union(x2, y2).pp
|
326
|
+
print("\n")
|
327
|
+
|
328
|
+
# [1] 2 4 6 8 10 5
|
329
|
+
|
330
|
+
(x2.union y2).pp
|
331
|
+
|
332
|
+
# [1] 2 4 6 8 10 5
|
333
|
+
|
334
|
+
#===
|
335
|
+
# X & !Y <-> setdiff(x, y)
|
336
|
+
(x1 & !y1).pp
|
337
|
+
print("\n")
|
338
|
+
|
339
|
+
# [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE FALSE
|
340
|
+
|
341
|
+
#===
|
342
|
+
R.setdiff(x2, y2).pp
|
343
|
+
print("\n")
|
344
|
+
|
345
|
+
# [1] 2 4 6 8
|
346
|
+
|
347
|
+
(x2.setdiff y2).pp
|
348
|
+
|
349
|
+
# [1] 2 4 6 8
|
350
|
+
|
351
|
+
|
352
|
+
#===
|
353
|
+
# xor(X, Y) <-> setdiff(union(x, y), intersect(x, y))
|
354
|
+
R.xor(x1, y1).pp
|
355
|
+
print("\n")
|
356
|
+
|
357
|
+
# [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE
|
358
|
+
|
359
|
+
# Writing the same as the last example in a Ruby style
|
360
|
+
(x1.xor y1).pp
|
361
|
+
|
362
|
+
# [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE
|
363
|
+
|
364
|
+
#===
|
365
|
+
R.setdiff(R.union(x2, y2), R.intersect(x2, y2)).pp
|
366
|
+
print("\n")
|
367
|
+
|
368
|
+
# [1] 2 4 6 8 5
|
369
|
+
|
370
|
+
# Writing the same as the last example in a Ruby style
|
371
|
+
((x2.union y2).setdiff (x2.intersect y2)).pp
|
372
|
+
print("\n")
|
373
|
+
|
374
|
+
# [1] 2 4 6 8 5
|