feldtruby 0.3.13 → 0.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/feldtruby/statistics.rb +40 -45
- data/lib/feldtruby/version.rb +1 -1
- data/test/test_statistics.rb +34 -28
- data/test/tmp.csv +9518 -316
- metadata +2 -2
data/lib/feldtruby/statistics.rb
CHANGED
@@ -232,49 +232,36 @@ module FeldtRuby::Statistics::Plotting
|
|
232
232
|
|
233
233
|
end
|
234
234
|
|
235
|
-
def plot_2dims(csvFilePath,
|
235
|
+
def plot_2dims(csvFilePath, plotCommand, xlabel, ylabel, title = "scatterplot")
|
236
236
|
|
237
|
-
|
238
|
-
|
239
|
-
|
237
|
+
script = <<-EOS
|
238
|
+
data <- read.csv(#{csvFilePath.inspect})
|
239
|
+
#{plotCommand}
|
240
|
+
#{ggplot2_setup_and_theme()}
|
241
|
+
f
|
242
|
+
EOS
|
240
243
|
|
241
|
-
|
242
|
-
|
243
|
-
gfx_device(format, width, height)
|
244
|
-
]
|
244
|
+
subst_eval script, {:title => title,
|
245
|
+
:xlabel => xlabel, :ylabel => ylabel}
|
245
246
|
|
246
|
-
|
247
|
-
#plot = [yield().join(" ") + " + theme_bw(base_size = 12, base_family = \"\")"]
|
248
|
-
plot = yield()
|
249
|
-
plot << " + theme_bw(base_size = 12, base_family = \"\")"
|
247
|
+
end
|
250
248
|
|
251
|
-
|
252
|
-
|
253
|
-
|
249
|
+
def hexbin_heatmap(csvFilePath, xlabel, ylabel, title = "heatmap", bins = 20)
|
250
|
+
plot_2dims(csvFilePath,
|
251
|
+
"f <- ggplot(data, aes(#{ylabel}, #{ylabel})) + geom_hex( bins = #{bins} )",
|
252
|
+
xlabel, ylabel, title)
|
253
|
+
end
|
254
254
|
|
255
|
-
|
256
|
-
eval lines.join("\n")
|
255
|
+
def scatter_plot(csvFilePath, xlabel, ylabel, title = "scatterplot")
|
257
256
|
|
258
|
-
|
257
|
+
script = <<-EOS
|
258
|
+
# smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}
|
259
|
+
f <- ggplot(data, aes(#{xlabel}, #{ylabel})) + geom_point(shape = 1)
|
260
|
+
f <- f + stat_smooth()
|
261
|
+
EOS
|
259
262
|
|
260
|
-
|
261
|
-
def scatter_plot(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", smoothFit = true, format = nil, width = 7, height = 5)
|
262
|
-
plot_2dims(csvFilePath, graphFilePath, xName, yName, title, format, width, height) {
|
263
|
-
[
|
264
|
-
"smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}",
|
265
|
-
"ggplot(data, aes(#{xName}, #{yName})) + ",
|
266
|
-
" geom_point(shape = 1) + ", # Each point is non-filled circle
|
267
|
-
(smoothFit ? " geom_smooth(method = smoothing_method) + " : nil),
|
268
|
-
" ggtitle(#{title.inspect})"
|
269
|
-
].compact
|
270
|
-
}
|
271
|
-
end
|
263
|
+
plot_2dims(csvFilePath, script, xlabel, ylabel, title)
|
272
264
|
|
273
|
-
# Scatter plot of columns xName vs yName in csvFilePath is saved to graphFilePath.
|
274
|
-
def hexbin_heatmap(csvFilePath, graphFilePath, xName, yName, title = "heatmap", bins = 30, format = "pdf", width = 7, height = 5)
|
275
|
-
plot_2dims(csvFilePath, graphFilePath, xName, yName, title, format, width, height) {
|
276
|
-
[ "ggplot(data, aes(#{xName}, #{yName})) + geom_hex( bins = #{bins} ) + ggtitle(\"#{title}\")"]
|
277
|
-
}
|
278
265
|
end
|
279
266
|
|
280
267
|
GfxFormatToGfxParams = {
|
@@ -305,14 +292,28 @@ module FeldtRuby::Statistics::Plotting
|
|
305
292
|
|
306
293
|
end
|
307
294
|
|
295
|
+
def ggplot2_setup_and_theme
|
296
|
+
|
297
|
+
include_library("ggplot2")
|
298
|
+
include_library("reshape2")
|
299
|
+
|
300
|
+
script = <<-EOS
|
301
|
+
f <- f + ggtitle(_title_) + xlab(_xlabel_) + ylab(_ylabel_)
|
302
|
+
f <- f + theme_bw()
|
303
|
+
f <- f + theme(
|
304
|
+
plot.title = element_text(face="bold", size=12),
|
305
|
+
axis.title.x = element_text(face="bold", size=10),
|
306
|
+
axis.title.y = element_text(face="bold", size=10)
|
307
|
+
)
|
308
|
+
EOS
|
309
|
+
|
310
|
+
end
|
311
|
+
|
308
312
|
# Overlaid density graph of the observations (sampled distributions) in data1
|
309
313
|
# and data2. The _dataMap_ maps the name of each data series to an array with
|
310
314
|
# its observations.
|
311
315
|
def overlaid_densities(dataMap, title = "Densities of distributions", datasetsName = "distribution", xlabel = "values", ylabel = "density")
|
312
316
|
|
313
|
-
include_library("ggplot2")
|
314
|
-
include_library("reshape2")
|
315
|
-
|
316
317
|
cardinalities = dataMap.values.map {|vs| vs.length}.uniq
|
317
318
|
|
318
319
|
unless cardinalities.length == 1
|
@@ -327,13 +328,7 @@ module FeldtRuby::Statistics::Plotting
|
|
327
328
|
names(df.m)[2] <- _datasetsName_
|
328
329
|
f <- ggplot(df.m, aes(value, fill=#{datasetsName}))
|
329
330
|
f <- f + geom_density(alpha = 0.2, size = 0.5) + scale_color_brewer()
|
330
|
-
|
331
|
-
f <- f + theme_bw()
|
332
|
-
f <- f + theme(
|
333
|
-
plot.title = element_text(face="bold", size=12),
|
334
|
-
axis.title.x = element_text(face="bold", size=10),
|
335
|
-
axis.title.y = element_text(face="bold", size=10)
|
336
|
-
)
|
331
|
+
#{ggplot2_setup_and_theme()}
|
337
332
|
f
|
338
333
|
EOS
|
339
334
|
|
data/lib/feldtruby/version.rb
CHANGED
data/test/test_statistics.rb
CHANGED
@@ -141,34 +141,40 @@ describe "Plotting" do
|
|
141
141
|
|
142
142
|
end
|
143
143
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
#
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
144
|
+
it "can do a scatter plot" do
|
145
|
+
|
146
|
+
d = File.dirname(__FILE__) + "/"
|
147
|
+
filename = d + "tmp.csv"
|
148
|
+
|
149
|
+
out = "scatterplot.pdf"
|
150
|
+
|
151
|
+
RC.save_graph(out) do
|
152
|
+
RC.scatter_plot(filename, "size", "height", "Scatterplot")
|
153
|
+
end
|
154
|
+
|
155
|
+
File.exist?(out).must_equal true
|
156
|
+
|
157
|
+
#File.delete out
|
158
|
+
|
159
|
+
end
|
160
|
+
|
161
|
+
it "can do a hexbin heatmap plot" do
|
162
|
+
|
163
|
+
d = File.dirname(__FILE__) + "/"
|
164
|
+
filename = d + "tmp.csv"
|
165
|
+
|
166
|
+
out = "hexbin.pdf"
|
167
|
+
|
168
|
+
RC.save_graph(out) do
|
169
|
+
RC.hexbin_heatmap(filename, "size", "height",
|
170
|
+
"Hexbin heatmap", 30)
|
171
|
+
end
|
172
|
+
|
173
|
+
File.exist?(out).must_equal true
|
174
|
+
|
175
|
+
File.delete out
|
176
|
+
|
177
|
+
end
|
172
178
|
|
173
179
|
it "can do overlaid density plot of three arrays" do
|
174
180
|
|