galaaz 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +325 -32
- data/Rakefile +14 -0
- data/bin/galaaz +0 -3
- data/bin/gknit +28 -0
- data/bin/gstudio +6 -0
- data/bin/gstudio.rb +6 -0
- data/bin/ogk~ +4 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.Rmd +335 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.html +460 -0
- data/blogs/galaaz_ggplot/galaaz_ggplot.md +327 -0
- data/blogs/galaaz_ggplot/midwest.Rmd +39 -0
- data/blogs/galaaz_ggplot/midwest.html +188 -0
- data/blogs/galaaz_ggplot/midwest.png +0 -0
- data/blogs/galaaz_ggplot/scatter_plot.png +0 -0
- data/examples/50Plots_MasterList/Images/midwest-scatterplot.PNG +0 -0
- data/examples/50Plots_MasterList/ScatterPlot.rb +159 -0
- data/examples/R/calc.R +21 -0
- data/examples/R/java_interop.R +29 -0
- data/examples/{baseball.csv → misc/baseball.csv} +0 -0
- data/examples/{ggplot.rb → misc/ggplot.rb} +0 -0
- data/examples/misc/moneyball.rb +33 -0
- data/examples/{baseball.rb → misc/moneyball.rb~} +0 -0
- data/examples/misc/subsetting.rb +374 -0
- data/examples/{subsetting.rb → misc/subsetting.rb~} +0 -0
- data/lib/{expression.rb → R/expression.rb} +0 -0
- data/lib/{r.rb → R/r.rb} +1 -0
- data/lib/R/r.rb~ +121 -0
- data/lib/{r_methods.rb → R/r_methods.rb} +0 -0
- data/lib/{rbinary_operators.rb → R/rbinary_operators.rb} +0 -0
- data/lib/{rclosure.rb → R/rclosure.rb} +0 -0
- data/lib/{rdata_frame.rb → R/rdata_frame.rb} +0 -0
- data/lib/{renvironment.rb → R/renvironment.rb} +0 -0
- data/lib/{rexpression.rb → R/rexpression.rb} +0 -0
- data/lib/{rindexed_object.rb → R/rindexed_object.rb} +0 -0
- data/lib/{rlanguage.rb → R/rlanguage.rb} +0 -0
- data/lib/{rlist.rb → R/rlist.rb} +0 -0
- data/lib/{rmatrix.rb → R/rmatrix.rb} +0 -0
- data/lib/{rmd_indexed_object.rb → R/rmd_indexed_object.rb} +0 -0
- data/lib/{robject.rb → R/robject.rb} +0 -0
- data/lib/{rpkg.rb → R/rpkg.rb} +0 -0
- data/lib/{rsupport.rb → R/rsupport.rb} +0 -0
- data/lib/{rsupport_scope.rb → R/rsupport_scope.rb} +0 -0
- data/lib/{rsymbol.rb → R/rsymbol.rb} +0 -0
- data/lib/{ruby_callback.rb → R/ruby_callback.rb} +0 -0
- data/lib/{ruby_extensions.rb → R/ruby_extensions.rb} +0 -0
- data/lib/{runary_operators.rb → R/runary_operators.rb} +0 -0
- data/lib/{rvector.rb → R/rvector.rb} +0 -0
- data/lib/galaaz.rb +2 -1
- data/lib/util/exec_ruby.rb +44 -0
- data/specs/tmp.rb +167 -1
- data/version.rb +1 -1
- metadata +63 -28
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,159 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
##########################################################################################
|
4
|
+
# @author Rodrigo Botafogo
|
5
|
+
#
|
6
|
+
# Copyright © 2018 Rodrigo Botafogo. All Rights Reserved. Permission to use, copy, modify,
|
7
|
+
# and distribute this software and its documentation, without fee and without a signed
|
8
|
+
# licensing agreement, is hereby granted, provided that the above copyright notice, this
|
9
|
+
# paragraph and the following two paragraphs appear in all copies, modifications, and
|
10
|
+
# distributions.
|
11
|
+
#
|
12
|
+
# IN NO EVENT SHALL RODRIGO BOTAFOGO BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL,
|
13
|
+
# INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF
|
14
|
+
# THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF RODRIGO BOTAFOGO HAS BEEN ADVISED OF THE
|
15
|
+
# POSSIBILITY OF SUCH DAMAGE.
|
16
|
+
#
|
17
|
+
# RODRIGO BOTAFOGO SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
18
|
+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
|
19
|
+
# SOFTWARE AND ACCOMPANYING DOCUMENTATION, IF ANY, PROVIDED HEREUNDER IS PROVIDED "AS IS".
|
20
|
+
# RODRIGO BOTAFOGO HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
|
21
|
+
# OR MODIFICATIONS.
|
22
|
+
##########################################################################################
|
23
|
+
|
24
|
+
require 'galaaz'
|
25
|
+
require 'ggplot'
|
26
|
+
|
27
|
+
module CorpTheme
|
28
|
+
|
29
|
+
#--------------------------------------------------------------------------------------
|
30
|
+
# Defines the plot theme (visualization). In this theme we remove major and minor
|
31
|
+
# grids, borders and background. We also turn-off scientific notation.
|
32
|
+
#--------------------------------------------------------------------------------------
|
33
|
+
|
34
|
+
def self.global_theme
|
35
|
+
|
36
|
+
R.options(scipen: 999) # turn-off scientific notation like 1e+48
|
37
|
+
|
38
|
+
# remove major grids
|
39
|
+
global_theme = R.theme(panel__grid__major: E.element_blank())
|
40
|
+
# remove minor grids
|
41
|
+
global_theme = global_theme + R.theme(panel__grid__minor: E.element_blank)
|
42
|
+
# remove border
|
43
|
+
global_theme = global_theme + R.theme(panel__border: E.element_blank)
|
44
|
+
# remove background
|
45
|
+
global_theme = global_theme + R.theme(panel__background: E.element_blank)
|
46
|
+
# Change axis font
|
47
|
+
global_theme = global_theme +
|
48
|
+
R.theme(axis__text: E.element_text(size: 8, color: "#000080"))
|
49
|
+
# change color of axis titles
|
50
|
+
global_theme = global_theme +
|
51
|
+
R.theme(axis__title: E.element_text(
|
52
|
+
color: "#000080",
|
53
|
+
face: "bold",
|
54
|
+
size: 8,
|
55
|
+
hjust: 1))
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
class ScatterPlot
|
62
|
+
|
63
|
+
attr_accessor :title
|
64
|
+
attr_accessor :subtitle
|
65
|
+
attr_accessor :caption
|
66
|
+
attr_accessor :x_label
|
67
|
+
attr_accessor :y_label
|
68
|
+
|
69
|
+
#--------------------------------------------------------------------------------------
|
70
|
+
# Initialize the plot with the data and the x and y variables
|
71
|
+
#--------------------------------------------------------------------------------------
|
72
|
+
|
73
|
+
def initialize(data, x:, y:)
|
74
|
+
@data = data
|
75
|
+
@x = x
|
76
|
+
@y = y
|
77
|
+
end
|
78
|
+
|
79
|
+
#--------------------------------------------------------------------------------------
|
80
|
+
# Define groupings by color and size
|
81
|
+
#--------------------------------------------------------------------------------------
|
82
|
+
|
83
|
+
def group_by(color: nil, size: nil)
|
84
|
+
@color_by = color
|
85
|
+
@size_by = size
|
86
|
+
end
|
87
|
+
|
88
|
+
#--------------------------------------------------------------------------------------
|
89
|
+
# Add a smoothing line, and if confidence is true the add a confidence interval, if
|
90
|
+
# false does not add the confidence interval
|
91
|
+
#--------------------------------------------------------------------------------------
|
92
|
+
|
93
|
+
def add_smoothing_line(method:, confidence: true)
|
94
|
+
@method = method
|
95
|
+
@confidence = confidence
|
96
|
+
end
|
97
|
+
|
98
|
+
#--------------------------------------------------------------------------------------
|
99
|
+
# Creates the graph title, properly formated for this theme
|
100
|
+
# @param title [String] The title to add to the graph
|
101
|
+
# @return textGrob that can be included in a graph
|
102
|
+
#--------------------------------------------------------------------------------------
|
103
|
+
|
104
|
+
def graph_params(title: "", subtitle: "", caption: "", x_label: "", y_label: "")
|
105
|
+
R.labs(
|
106
|
+
title: title,
|
107
|
+
subtitle: subtitle,
|
108
|
+
caption: caption,
|
109
|
+
y_label: y_label,
|
110
|
+
x_label: x_label,
|
111
|
+
)
|
112
|
+
end
|
113
|
+
|
114
|
+
#--------------------------------------------------------------------------------------
|
115
|
+
# Prepare the plot's points
|
116
|
+
#--------------------------------------------------------------------------------------
|
117
|
+
|
118
|
+
def points
|
119
|
+
params = {}
|
120
|
+
params[:col] = @color_by if @color_by
|
121
|
+
params[:size] = @size_by if @size_by
|
122
|
+
R.geom_point(E.aes(params))
|
123
|
+
end
|
124
|
+
|
125
|
+
#--------------------------------------------------------------------------------------
|
126
|
+
# Plots the scatterplot
|
127
|
+
#--------------------------------------------------------------------------------------
|
128
|
+
|
129
|
+
def plot
|
130
|
+
R.awt
|
131
|
+
|
132
|
+
puts @data.ggplot(E.aes(x: @x, y: @y)) +
|
133
|
+
points +
|
134
|
+
R.geom_smooth(method: @method, se: @confidence) +
|
135
|
+
R.xlim(R.c(0, 0.1)) +
|
136
|
+
R.ylim(R.c(0, 500000)) +
|
137
|
+
graph_params(title: @title,
|
138
|
+
subtitle: @subtitle,
|
139
|
+
y_label: @y_label,
|
140
|
+
x_label: @x_label,
|
141
|
+
caption: @caption) +
|
142
|
+
CorpTheme.global_theme
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
sp = ScatterPlot.new(~:midwest, x: :area, y: :poptotal)
|
148
|
+
sp.title = "Midwest Dataset - Scatterplot"
|
149
|
+
sp.subtitle = "Area Vs Population"
|
150
|
+
sp.caption = "Source: midwest"
|
151
|
+
sp.x_label = "Area"
|
152
|
+
sp.y_label = "Population"
|
153
|
+
sp.group_by(color: :state, size: :popdensity) # try sp.group_by(color: :state)
|
154
|
+
# available methods: "lm", "glm", "loess", "gam"
|
155
|
+
sp.add_smoothing_line(method: "glm")
|
156
|
+
sp.plot
|
157
|
+
|
158
|
+
|
159
|
+
a = gets.chomp
|
data/examples/R/calc.R
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
x <- matrix(runif(1000000), 1000, 1000)
|
2
|
+
mutual_R <- function(joint_dist) {
|
3
|
+
joint_dist <- joint_dist/sum(joint_dist)
|
4
|
+
mutual_information <- 0
|
5
|
+
num_rows <- nrow(joint_dist)
|
6
|
+
num_cols <- ncol(joint_dist)
|
7
|
+
colsums <- colSums(joint_dist)
|
8
|
+
rowsums <- rowSums(joint_dist)
|
9
|
+
for(i in seq_along(1:num_rows)){
|
10
|
+
for(j in seq_along(1:num_cols)){
|
11
|
+
temp <- log((joint_dist[i,j]/(colsums[j]*rowsums[i])))
|
12
|
+
if(!is.finite(temp)){
|
13
|
+
temp = 0
|
14
|
+
}
|
15
|
+
mutual_information <-
|
16
|
+
mutual_information + joint_dist[i,j] * temp
|
17
|
+
}
|
18
|
+
}
|
19
|
+
mutual_information
|
20
|
+
}
|
21
|
+
system.time(mutual_R(x))
|
@@ -0,0 +1,29 @@
|
|
1
|
+
library(grid)
|
2
|
+
openJavaWindow <- function () {
|
3
|
+
# create image and register graphics
|
4
|
+
imageClass <- java.type('java.awt.image.BufferedImage')
|
5
|
+
image <- new(imageClass, 450, 450, imageClass$TYPE_INT_RGB);
|
6
|
+
graphics <- image$getGraphics()
|
7
|
+
graphics$setBackground(java.type('java.awt.Color')$white);
|
8
|
+
grDevices:::awt(image$getWidth(), image$getHeight(), graphics)
|
9
|
+
|
10
|
+
# draw image
|
11
|
+
grid.newpage()
|
12
|
+
pushViewport(plotViewport(margins = c(5.1, 4.1, 4.1, 2.1)))
|
13
|
+
grid.xaxis(); grid.yaxis()
|
14
|
+
grid.points(x = runif(10, 0, 1), y = runif(10, 0, 1),
|
15
|
+
size = unit(0.01, "npc"))
|
16
|
+
|
17
|
+
# open frame with image
|
18
|
+
imageIcon <- new("javax.swing.ImageIcon", image)
|
19
|
+
label <- new("javax.swing.JLabel", imageIcon)
|
20
|
+
panel <- new("javax.swing.JPanel")
|
21
|
+
panel$add(label)
|
22
|
+
frame <- new("javax.swing.JFrame")
|
23
|
+
frame$setMinimumSize(new("java.awt.Dimension",
|
24
|
+
image$getWidth(), image$getHeight()))
|
25
|
+
frame$add(panel)
|
26
|
+
frame$setVisible(T)
|
27
|
+
while (frame$isVisible()) Sys.sleep(1)
|
28
|
+
}
|
29
|
+
openJavaWindow()
|
File without changes
|
File without changes
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'galaaz'
|
4
|
+
|
5
|
+
local_dir = File.expand_path File.dirname(__FILE__)
|
6
|
+
|
7
|
+
# This dataset comes from Baseball-Reference.com.
|
8
|
+
baseball = R.read__csv("#{local_dir}/baseball.csv")
|
9
|
+
|
10
|
+
# Lets look at the data available for Momeyball.
|
11
|
+
moneyball = baseball.subset(baseball.Year < 2002)
|
12
|
+
|
13
|
+
# Let's see if we can predict the number of wins, by looking at
|
14
|
+
# runs allowed (RA) and runs scored (RS). RD is the runs difference.
|
15
|
+
# We are making a linear model for predicting wins (W) based on RD
|
16
|
+
moneyball.RD = moneyball.RS - moneyball.RA
|
17
|
+
wins_reg = R.lm(+:W =~ +:RD, data: moneyball)
|
18
|
+
|
19
|
+
def show(title, data)
|
20
|
+
puts title
|
21
|
+
puts "=" * title.size
|
22
|
+
puts data
|
23
|
+
puts "=" * title.size
|
24
|
+
puts
|
25
|
+
end
|
26
|
+
|
27
|
+
puts "Fitting a linear model on the Baseball dataset as done in Moneyball:"
|
28
|
+
puts
|
29
|
+
show "Coefficients of the Linear Regression", wins_reg.coefficients
|
30
|
+
show "Fitted values summary:", wins_reg.fitted__values.summary
|
31
|
+
show "Residuals summary", wins_reg.residuals.summary
|
32
|
+
|
33
|
+
|
File without changes
|
@@ -0,0 +1,374 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'galaaz'
|
4
|
+
|
5
|
+
# This examples were extracted from "Advanced R", by Hadley Wickham, available on the
|
6
|
+
# web at: http://adv-r.had.co.nz/Subsetting.html#applications
|
7
|
+
|
8
|
+
#------------------------------------------------------------------------------------------
|
9
|
+
# Lookup tables (character subsetting)
|
10
|
+
# Character matching provides a powerful way to make lookup tables.
|
11
|
+
# Say you want to convert abbreviations:
|
12
|
+
#------------------------------------------------------------------------------------------
|
13
|
+
|
14
|
+
x = R.c("m", "f", "u", "f", "f", "m", "m")
|
15
|
+
lookup = R.c(m: "Male", f: "Female", u: R::NA)
|
16
|
+
lookup[x].pp
|
17
|
+
print("\n")
|
18
|
+
|
19
|
+
# m f u f f m m
|
20
|
+
# "Male" "Female" NA "Female" "Female" "Male" "Male"
|
21
|
+
|
22
|
+
R.unname(lookup[x]).pp
|
23
|
+
print("\n")
|
24
|
+
|
25
|
+
# [1] "Male" "Female" NA "Female" "Female" "Male" "Male"
|
26
|
+
|
27
|
+
|
28
|
+
#------------------------------------------------------------------------------------------
|
29
|
+
# Matching and merging by hand (integer subsetting)
|
30
|
+
#------------------------------------------------------------------------------------------
|
31
|
+
|
32
|
+
# You may have a more complicated lookup table which has multiple columns of information.
|
33
|
+
# Suppose we have a vector of grades, and a table that describes their properties:
|
34
|
+
# In R a vector c(1, 2, 3) is a double vector, when using polyglot R.c(1, 2, 3) is an
|
35
|
+
# integer vector, the equivalent of doing c(1L, 2L, 3L) in R. Function 'match' does not
|
36
|
+
# work correctly with integer vector, it has to be a double.
|
37
|
+
grades = R.c(1.0, 2.0, 2.0, 3.0, 1.0)
|
38
|
+
|
39
|
+
info = R.data__frame(
|
40
|
+
grade: (3..1),
|
41
|
+
desc: R.c("Excellent", "Good", "Poor"),
|
42
|
+
fail: R.c(false, false, true)
|
43
|
+
)
|
44
|
+
|
45
|
+
# We want to duplicate the info table so that we have a row for each value in grades.
|
46
|
+
# We can do this in two ways, either using match() and integer subsetting,
|
47
|
+
# or rownames() and character subsetting:
|
48
|
+
|
49
|
+
# Using match
|
50
|
+
id = R.match(grades, info.grade)
|
51
|
+
info[id, :all].pp
|
52
|
+
print("\n")
|
53
|
+
|
54
|
+
# grade desc fail
|
55
|
+
# 3 1 Poor TRUE
|
56
|
+
# 2 2 Good FALSE
|
57
|
+
# 2.1 2 Good FALSE
|
58
|
+
# 1 3 Excellent FALSE
|
59
|
+
# 3.1 1 Poor TRUE
|
60
|
+
|
61
|
+
# Using rownames
|
62
|
+
info.rownames = info.grade
|
63
|
+
info[grades.as__character, :all].pp
|
64
|
+
print("\n")
|
65
|
+
|
66
|
+
# grade desc fail
|
67
|
+
# 1 3 Excellent FALSE
|
68
|
+
# 2 2 Good FALSE
|
69
|
+
# 2.1 2 Good FALSE
|
70
|
+
# 3 1 Poor TRUE
|
71
|
+
# 1.1 3 Excellent FALSE
|
72
|
+
|
73
|
+
#------------------------------------------------------------------------------------------
|
74
|
+
# Random samples/bootstrap (integer subsetting)
|
75
|
+
#------------------------------------------------------------------------------------------
|
76
|
+
|
77
|
+
# You can use integer indices to perform random sampling or bootstrapping
|
78
|
+
# of a vector or data frame. sample() generates a vector of indices, then
|
79
|
+
# subsetting to access the values:
|
80
|
+
df = R.data__frame(x: R.rep((1..3), each: 2), y: (6..1), z: (~:letters)[(1..6)])
|
81
|
+
|
82
|
+
# Set seed for reproducibility
|
83
|
+
R.set__seed(10)
|
84
|
+
|
85
|
+
# Randomly reorder
|
86
|
+
df[R.sample(df.nrow), :all].pp
|
87
|
+
print("\n")
|
88
|
+
|
89
|
+
# x y z
|
90
|
+
# 4 2 3 d
|
91
|
+
# 2 1 5 b
|
92
|
+
# 5 3 2 e
|
93
|
+
# 3 2 4 c
|
94
|
+
# 1 1 6 a
|
95
|
+
# 6 3 1 f
|
96
|
+
|
97
|
+
# Select 3 random rows
|
98
|
+
df[R.sample(df.nrow, 3), :all].pp
|
99
|
+
print("\n")
|
100
|
+
|
101
|
+
# x y z
|
102
|
+
# 2 1 5 b
|
103
|
+
# 6 3 1 f
|
104
|
+
# 3 2 4 c
|
105
|
+
|
106
|
+
# Select 6 bootstrap replicates
|
107
|
+
df[R.sample(df.nrow, 6, rep: true), :all].pp
|
108
|
+
print("\n")
|
109
|
+
|
110
|
+
# x y z
|
111
|
+
# 3 2 4 c
|
112
|
+
# 4 2 3 d
|
113
|
+
# 4.1 2 3 d
|
114
|
+
# 1 1 6 a
|
115
|
+
# 4.2 2 3 d
|
116
|
+
# 3.1 2 4 c
|
117
|
+
|
118
|
+
#------------------------------------------------------------------------------------------
|
119
|
+
# Ordering (integer subsetting)
|
120
|
+
#------------------------------------------------------------------------------------------
|
121
|
+
|
122
|
+
x = R.c("b", "c", "a")
|
123
|
+
x.order.pp
|
124
|
+
print("\n")
|
125
|
+
|
126
|
+
# [1] 3 1 2
|
127
|
+
|
128
|
+
x[x.order].pp
|
129
|
+
print("\n")
|
130
|
+
|
131
|
+
# [1] "a" "b" "c"
|
132
|
+
|
133
|
+
# Randomly reorder df
|
134
|
+
df2 = df[R.sample(df.nrow), (3..1)]
|
135
|
+
df2.pp
|
136
|
+
print("\n")
|
137
|
+
|
138
|
+
# z y x
|
139
|
+
# 3 c 4 2
|
140
|
+
# 1 a 6 1
|
141
|
+
# 2 b 5 1
|
142
|
+
# 4 d 3 2
|
143
|
+
# 6 f 1 3
|
144
|
+
# 5 e 2 3
|
145
|
+
|
146
|
+
df2[df2.x.order, :all].pp
|
147
|
+
print("\n")
|
148
|
+
|
149
|
+
# z y x
|
150
|
+
# 1 a 6 1
|
151
|
+
# 2 b 5 1
|
152
|
+
# 3 c 4 2
|
153
|
+
# 4 d 3 2
|
154
|
+
# 6 f 1 3
|
155
|
+
# 5 e 2 3
|
156
|
+
|
157
|
+
df2[:all, df2.names.order].pp
|
158
|
+
print("\n")
|
159
|
+
|
160
|
+
# x y z
|
161
|
+
# 3 2 4 c
|
162
|
+
# 1 1 6 a
|
163
|
+
# 2 1 5 b
|
164
|
+
# 4 2 3 d
|
165
|
+
# 6 3 1 f
|
166
|
+
# 5 3 2 e
|
167
|
+
|
168
|
+
#------------------------------------------------------------------------------------------
|
169
|
+
# Expanding aggregated counts (integer subsetting)
|
170
|
+
#
|
171
|
+
# Sometimes you get a data frame where identical rows have been collapsed into one and a
|
172
|
+
# count column has been added. rep() and integer subsetting make it easy to uncollapse
|
173
|
+
# the data by subsetting with a repeated row index:
|
174
|
+
#------------------------------------------------------------------------------------------
|
175
|
+
|
176
|
+
df = R.data__frame(x: R.c(2, 4, 1), y: R.c(9, 11, 6), n: R.c(3, 5, 1))
|
177
|
+
R.rep((1..(df.nrow << 0)), df.n).pp
|
178
|
+
print("\n")
|
179
|
+
|
180
|
+
# [1] 1 1 1 2 2 2 2 2 3
|
181
|
+
|
182
|
+
df[R.rep((1..df.nrow << 0), df.n), :all].pp
|
183
|
+
print("\n")
|
184
|
+
|
185
|
+
# x y n
|
186
|
+
# 1 2 9 3
|
187
|
+
# 1.1 2 9 3
|
188
|
+
# 1.2 2 9 3
|
189
|
+
# 2 4 11 5
|
190
|
+
# 2.1 4 11 5
|
191
|
+
# 2.2 4 11 5
|
192
|
+
# 2.3 4 11 5
|
193
|
+
# 2.4 4 11 5
|
194
|
+
# 3 1 6 1
|
195
|
+
|
196
|
+
#------------------------------------------------------------------------------------------
|
197
|
+
# Removing columns from data frames (character subsetting)
|
198
|
+
#
|
199
|
+
# There are two ways to remove columns from a data frame. You can set individual columns
|
200
|
+
# to nil:
|
201
|
+
#------------------------------------------------------------------------------------------
|
202
|
+
|
203
|
+
df = R.data__frame(x: (1..3), y: (3..1), z: (~:letters)[(1..3)])
|
204
|
+
# Not implemented yet
|
205
|
+
# df.z = nil
|
206
|
+
df.pp
|
207
|
+
print("\n")
|
208
|
+
|
209
|
+
df = R.data__frame(x: (1..3), y: (3..1), z: (~:letters)[(1..3)])
|
210
|
+
df[R.c("x", "y")].pp
|
211
|
+
print("\n")
|
212
|
+
|
213
|
+
# x y
|
214
|
+
# 1 1 3
|
215
|
+
# 2 2 2
|
216
|
+
# 3 3 1
|
217
|
+
|
218
|
+
df[df.names.setdiff("z")].pp
|
219
|
+
print("\n")
|
220
|
+
|
221
|
+
# x y
|
222
|
+
# 1 1 3
|
223
|
+
# 2 2 2
|
224
|
+
# 3 3 1
|
225
|
+
|
226
|
+
#------------------------------------------------------------------------------------------
|
227
|
+
# Selecting rows based on a condition (logical subsetting)
|
228
|
+
#
|
229
|
+
# Because it allows you to easily combine conditions from multiple columns, logical
|
230
|
+
# subsetting is probably the most commonly used technique for extracting rows out of
|
231
|
+
# a data frame.
|
232
|
+
#------------------------------------------------------------------------------------------
|
233
|
+
|
234
|
+
mtcars = ~:mtcars
|
235
|
+
|
236
|
+
mtcars[mtcars.gear == 5, :all].pp
|
237
|
+
print("\n")
|
238
|
+
|
239
|
+
# mpg cyl disp hp drat wt qsec vs am gear carb
|
240
|
+
# Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
|
241
|
+
# Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
|
242
|
+
# Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.5 0 1 5 4
|
243
|
+
# Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.5 0 1 5 6
|
244
|
+
# Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.6 0 1 5 8
|
245
|
+
|
246
|
+
mtcars[(mtcars.gear == 5) & (mtcars.cyl == 4), :all].pp
|
247
|
+
print("\n")
|
248
|
+
|
249
|
+
# mpg cyl disp hp drat wt qsec vs am gear carb
|
250
|
+
# Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.7 0 1 5 2
|
251
|
+
# Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.9 1 1 5 2
|
252
|
+
|
253
|
+
|
254
|
+
#------------------------------------------------------------------------------------------
|
255
|
+
# Boolean algebra vs. sets (logical & integer subsetting)
|
256
|
+
#
|
257
|
+
# It’s useful to be aware of the natural equivalence between set operations (integer
|
258
|
+
# subsetting) and boolean algebra (logical subsetting)
|
259
|
+
#------------------------------------------------------------------------------------------
|
260
|
+
|
261
|
+
x = R.sample(10) < 4
|
262
|
+
x.which.pp
|
263
|
+
print("\n")
|
264
|
+
|
265
|
+
# [1] 3 7 10
|
266
|
+
|
267
|
+
#===
|
268
|
+
x1 = R.c((1..10)) % 2 == 0
|
269
|
+
x1.pp
|
270
|
+
print("\n")
|
271
|
+
|
272
|
+
# [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE
|
273
|
+
|
274
|
+
#===
|
275
|
+
x2 = x1.which
|
276
|
+
x2.pp
|
277
|
+
print("\n")
|
278
|
+
|
279
|
+
# [1] 2 4 6 8 10
|
280
|
+
|
281
|
+
#===
|
282
|
+
y1 = R.c((1..10)) % 5 == 0
|
283
|
+
y1.pp
|
284
|
+
print("\n")
|
285
|
+
|
286
|
+
# [1] FALSE FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE TRUE
|
287
|
+
|
288
|
+
#===
|
289
|
+
y2 = y1.which
|
290
|
+
y2.pp
|
291
|
+
print("\n")
|
292
|
+
|
293
|
+
# [1] 5 10
|
294
|
+
|
295
|
+
#===
|
296
|
+
# X & Y <-> intersect(x, y)
|
297
|
+
(x1 & y1).pp
|
298
|
+
print("\n")
|
299
|
+
|
300
|
+
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE
|
301
|
+
|
302
|
+
#===
|
303
|
+
# This example shows the problem with having R objects returning either
|
304
|
+
# vector or scalar. We don't know the type of the result of applying
|
305
|
+
# intersect. If this is a vector, then we need to print it with pp
|
306
|
+
# but if this is a scalar, we need to print it with regular Ruby 'p' or
|
307
|
+
# 'print'
|
308
|
+
puts R.intersect(x2, y2)
|
309
|
+
print("\n")
|
310
|
+
|
311
|
+
# 10
|
312
|
+
|
313
|
+
puts x2.intersect y2
|
314
|
+
|
315
|
+
# 10
|
316
|
+
|
317
|
+
#===
|
318
|
+
# X | Y <-> union(x, y)
|
319
|
+
(x1 | y1).pp
|
320
|
+
print("\n")
|
321
|
+
|
322
|
+
# [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE
|
323
|
+
|
324
|
+
#===
|
325
|
+
R.union(x2, y2).pp
|
326
|
+
print("\n")
|
327
|
+
|
328
|
+
# [1] 2 4 6 8 10 5
|
329
|
+
|
330
|
+
(x2.union y2).pp
|
331
|
+
|
332
|
+
# [1] 2 4 6 8 10 5
|
333
|
+
|
334
|
+
#===
|
335
|
+
# X & !Y <-> setdiff(x, y)
|
336
|
+
(x1 & !y1).pp
|
337
|
+
print("\n")
|
338
|
+
|
339
|
+
# [1] FALSE TRUE FALSE TRUE FALSE TRUE FALSE TRUE FALSE FALSE
|
340
|
+
|
341
|
+
#===
|
342
|
+
R.setdiff(x2, y2).pp
|
343
|
+
print("\n")
|
344
|
+
|
345
|
+
# [1] 2 4 6 8
|
346
|
+
|
347
|
+
(x2.setdiff y2).pp
|
348
|
+
|
349
|
+
# [1] 2 4 6 8
|
350
|
+
|
351
|
+
|
352
|
+
#===
|
353
|
+
# xor(X, Y) <-> setdiff(union(x, y), intersect(x, y))
|
354
|
+
R.xor(x1, y1).pp
|
355
|
+
print("\n")
|
356
|
+
|
357
|
+
# [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE
|
358
|
+
|
359
|
+
# Writing the same as the last example in a Ruby style
|
360
|
+
(x1.xor y1).pp
|
361
|
+
|
362
|
+
# [1] FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE FALSE FALSE
|
363
|
+
|
364
|
+
#===
|
365
|
+
R.setdiff(R.union(x2, y2), R.intersect(x2, y2)).pp
|
366
|
+
print("\n")
|
367
|
+
|
368
|
+
# [1] 2 4 6 8 5
|
369
|
+
|
370
|
+
# Writing the same as the last example in a Ruby style
|
371
|
+
((x2.union y2).setdiff (x2.intersect y2)).pp
|
372
|
+
print("\n")
|
373
|
+
|
374
|
+
# [1] 2 4 6 8 5
|