feldtruby 0.3.13 → 0.3.14
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/feldtruby/statistics.rb +40 -45
- data/lib/feldtruby/version.rb +1 -1
- data/test/test_statistics.rb +34 -28
- data/test/tmp.csv +9518 -316
- metadata +2 -2
data/lib/feldtruby/statistics.rb
CHANGED
@@ -232,49 +232,36 @@ module FeldtRuby::Statistics::Plotting
|
|
232
232
|
|
233
233
|
end
|
234
234
|
|
235
|
-
def plot_2dims(csvFilePath,
|
235
|
+
def plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot")
|
236
236
|
|
237
|
-
|
238
|
-
|
239
|
-
|
237
|
+
script = <<-EOS
|
238
|
+
data <- read.csv(#{csvFilePath.inspect})
|
239
|
+
#{plotCommand}
|
240
|
+
#{ggplot2_setup_and_theme()}
|
241
|
+
f
|
242
|
+
EOS
|
240
243
|
|
241
|
-
|
242
|
-
|
243
|
-
gfx_device(format, width, height)
|
244
|
-
]
|
244
|
+
subst_eval script, {:title => title,
|
245
|
+
:xlabel => xlabel, :ylabel => ylabel}
|
245
246
|
|
246
|
-
|
247
|
-
#plot = [yield().join(" ") + " + theme_bw(base_size = 12, base_family = \"\")"]
|
248
|
-
plot = yield()
|
249
|
-
plot << " + theme_bw(base_size = 12, base_family = \"\")"
|
247
|
+
end
|
250
248
|
|
251
|
-
|
252
|
-
|
253
|
-
|
249
|
+
def hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 20)
|
250
|
+
plot_2dims(csvFilePath,
|
251
|
+
"f <- ggplot(data, aes(#{ylabel}, #{ylabel})) + geom_hex( bins = #{bins} )",
|
252
|
+
xlabel, ylabel, title)
|
253
|
+
end
|
254
254
|
|
255
|
-
|
256
|
-
eval lines.join("\n")
|
255
|
+
def scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot")
|
257
256
|
|
258
|
-
|
257
|
+
script = <<-EOS
|
258
|
+
# smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}
|
259
|
+
f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_point(shape = 1)
|
260
|
+
f <- f + stat_smooth()
|
261
|
+
EOS
|
259
262
|
|
260
|
-
|
261
|
-
def scatter_plot(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", smoothFit = true, format = nil, width = 7, height = 5)
|
262
|
-
plot_2dims(csvFilePath, graphFilePath, xName, yName, title, format, width, height) {
|
263
|
-
[
|
264
|
-
"smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}",
|
265
|
-
"ggplot(data, aes(#{xName}, #{yName})) + ",
|
266
|
-
" geom_point(shape = 1) + ", # Each point is non-filled circle
|
267
|
-
(smoothFit ? " geom_smooth(method = smoothing_method) + " : nil),
|
268
|
-
" ggtitle(#{title.inspect})"
|
269
|
-
].compact
|
270
|
-
}
|
271
|
-
end
|
263
|
+
plot_2dims(csvFilePath, script, xlabel, ylabel, title)
|
272
264
|
|
273
|
-
# Scatter plot of columns xName vs yName in csvFilePath is saved to graphFilePath.
|
274
|
-
def hexbin_heatmap(csvFilePath, graphFilePath, xName, yName, title = "heatmap", bins = 30, format = "pdf", width = 7, height = 5)
|
275
|
-
plot_2dims(csvFilePath, graphFilePath, xName, yName, title, format, width, height) {
|
276
|
-
[ "ggplot(data, aes(#{xName}, #{yName})) + geom_hex( bins = #{bins} ) + ggtitle(\"#{title}\")"]
|
277
|
-
}
|
278
265
|
end
|
279
266
|
|
280
267
|
GfxFormatToGfxParams = {
|
@@ -305,14 +292,28 @@ module FeldtRuby::Statistics::Plotting
|
|
305
292
|
|
306
293
|
end
|
307
294
|
|
295
|
+
def ggplot2_setup_and_theme
|
296
|
+
|
297
|
+
include_library("ggplot2")
|
298
|
+
include_library("reshape2")
|
299
|
+
|
300
|
+
script = <<-EOS
|
301
|
+
f <- f + ggtitle(_title_) + xlab(_xlabel_) + ylab(_ylabel_)
|
302
|
+
f <- f + theme_bw()
|
303
|
+
f <- f + theme(
|
304
|
+
plot.title = element_text(face="bold", size=12),
|
305
|
+
axis.title.x = element_text(face="bold", size=10),
|
306
|
+
axis.title.y = element_text(face="bold", size=10)
|
307
|
+
)
|
308
|
+
EOS
|
309
|
+
|
310
|
+
end
|
311
|
+
|
308
312
|
# Overlaid density graph of the observations (sampled distributions) in data1
|
309
313
|
# and data2. The _dataMap_ maps the name of each data series to an array with
|
310
314
|
# its observations.
|
311
315
|
def overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density")
|
312
316
|
|
313
|
-
include_library("ggplot2")
|
314
|
-
include_library("reshape2")
|
315
|
-
|
316
317
|
cardinalities = dataMap.values.map {|vs| vs.length}.uniq
|
317
318
|
|
318
319
|
unless cardinalities.length == 1
|
@@ -327,13 +328,7 @@ module FeldtRuby::Statistics::Plotting
|
|
327
328
|
names(df.m)[2] <- _datasetsName_
|
328
329
|
f <- ggplot(df.m, aes(value, fill=#{datasetsName}))
|
329
330
|
f <- f + geom_density(alpha = 0.2, size = 0.5) + scale_color_brewer()
|
330
|
-
|
331
|
-
f <- f + theme_bw()
|
332
|
-
f <- f + theme(
|
333
|
-
plot.title = element_text(face="bold", size=12),
|
334
|
-
axis.title.x = element_text(face="bold", size=10),
|
335
|
-
axis.title.y = element_text(face="bold", size=10)
|
336
|
-
)
|
331
|
+
#{ggplot2_setup_and_theme()}
|
337
332
|
f
|
338
333
|
EOS
|
339
334
|
|
data/lib/feldtruby/version.rb
CHANGED
data/test/test_statistics.rb
CHANGED
@@ -141,34 +141,40 @@ describe "Plotting" do
|
|
141
141
|
|
142
142
|
end
|
143
143
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
#
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
144
|
+
it "can do a scatter plot" do
|
145
|
+
|
146
|
+
d = File.dirname(__FILE__) + "/"
|
147
|
+
filename = d + "tmp.csv"
|
148
|
+
|
149
|
+
out = "scatterplot.pdf"
|
150
|
+
|
151
|
+
RC.save_graph(out) do
|
152
|
+
RC.scatter_plot(filename, "size", "height", "Scatterplot")
|
153
|
+
end
|
154
|
+
|
155
|
+
File.exist?(out).must_equal true
|
156
|
+
|
157
|
+
#File.delete out
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
it "can do a hexbin heatmap plot" do
|
162
|
+
|
163
|
+
d = File.dirname(__FILE__) + "/"
|
164
|
+
filename = d + "tmp.csv"
|
165
|
+
|
166
|
+
out = "hexbin.pdf"
|
167
|
+
|
168
|
+
RC.save_graph(out) do
|
169
|
+
RC.hexbin_heatmap(filename, "size", "height",
|
170
|
+
"Hexbin heatmap", 30)
|
171
|
+
end
|
172
|
+
|
173
|
+
File.exist?(out).must_equal true
|
174
|
+
|
175
|
+
File.delete out
|
176
|
+
|
177
|
+
end
|
172
178
|
|
173
179
|
it "can do overlaid density plot of three arrays" do
|
174
180
|
|