feldtruby 0.3.11 → 0.3.12
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +4 -2
- data/Rplots.pdf +0 -0
- data/feldtruby.gemspec +3 -0
- data/lib/feldtruby/statistics.rb +126 -7
- data/lib/feldtruby/version.rb +1 -1
- data/test/test_statistics.rb +73 -16
- metadata +18 -1
data/Gemfile.lock
CHANGED
data/Rplots.pdf
ADDED
Binary file
|
data/feldtruby.gemspec
CHANGED
data/lib/feldtruby/statistics.rb
CHANGED
@@ -39,6 +39,21 @@ class RCommunicator
|
|
39
39
|
@r.eval str
|
40
40
|
end
|
41
41
|
|
42
|
+
# Given a script that has variable references in the form "_name_" insert
|
43
|
+
# the ruby objects mapped from these names in scriptNameToRubyValues
|
44
|
+
def subst_eval(script, scriptNameToRubyValues)
|
45
|
+
|
46
|
+
scriptNameToRubyValues.each do |key, value|
|
47
|
+
|
48
|
+
script = script.gsub("_#{key.to_s}_", ruby_object_to_R_string(value))
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
#puts "Eval'ing script:\n#{script}"
|
53
|
+
eval script
|
54
|
+
|
55
|
+
end
|
56
|
+
|
42
57
|
# This represents a hash returned as JSON from R but mapped to a
|
43
58
|
# Ruby object so we can more easily use it as if it was an R object.
|
44
59
|
class Rvalue
|
@@ -85,6 +100,32 @@ class RCommunicator
|
|
85
100
|
end
|
86
101
|
end
|
87
102
|
|
103
|
+
# Convert a Ruby object of one of the types String, Array, Integer or Float
|
104
|
+
# to a String that can be used in R code/scripts to represent the object.
|
105
|
+
def ruby_object_to_R_string(o)
|
106
|
+
|
107
|
+
case o
|
108
|
+
|
109
|
+
when String
|
110
|
+
return o.inspect
|
111
|
+
|
112
|
+
when Array
|
113
|
+
elems = o.map {|e| ruby_object_to_R_string(e)}.join(", ")
|
114
|
+
return "c(#{elems})"
|
115
|
+
|
116
|
+
when Integer
|
117
|
+
return o.to_s
|
118
|
+
|
119
|
+
when Float
|
120
|
+
return o.to_s
|
121
|
+
|
122
|
+
else
|
123
|
+
raise "Cannot represent object #{o} in valid R code"
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
88
129
|
private
|
89
130
|
|
90
131
|
def res_name(index = 1)
|
@@ -156,7 +197,51 @@ end
|
|
156
197
|
# Plotting data sets in R with ggplot2 and save them to files.
|
157
198
|
module FeldtRuby::Statistics::Plotting
|
158
199
|
|
159
|
-
|
200
|
+
GfxFormatToGfxParams = {
|
201
|
+
"pdf" => {:width => 7, :height => 5, :paper => 'special'},
|
202
|
+
"png" => {:units => "cm", :width => 12, :height => 8},
|
203
|
+
"tiff" => {:units => "cm", :width => 12, :height => 8},
|
204
|
+
}
|
205
|
+
|
206
|
+
def gfx_device(format, width = nil, height = nil)
|
207
|
+
|
208
|
+
format = format.to_s # If given as a symbol instead of a string
|
209
|
+
|
210
|
+
unless GfxFormatToGfxParams.has_key?(format)
|
211
|
+
raise ArgumentError.new("Don't now about gfx format #{format}")
|
212
|
+
end
|
213
|
+
|
214
|
+
params = GfxFormatToGfxParams[format]
|
215
|
+
|
216
|
+
"#{format}(#{hash_to_R_params(params)})"
|
217
|
+
|
218
|
+
end
|
219
|
+
|
220
|
+
# Map a ruby hash of objects to parameters in R code/script.
|
221
|
+
def hash_to_R_params(hash)
|
222
|
+
|
223
|
+
hash.keys.sort.map do |key|
|
224
|
+
|
225
|
+
"#{key.to_s} = #{ruby_object_to_R_string(hash[key])}"
|
226
|
+
|
227
|
+
end.join(", ")
|
228
|
+
|
229
|
+
end
|
230
|
+
|
231
|
+
def set_file_ending(filepath, newEnding)
|
232
|
+
|
233
|
+
dirname = File.dirname(filepath)
|
234
|
+
|
235
|
+
current_ending = filepath.split(".").last
|
236
|
+
|
237
|
+
# Works even if there is no current file ending:
|
238
|
+
basename = File.basename(filepath, "." + current_ending)
|
239
|
+
|
240
|
+
File.join dirname, basename
|
241
|
+
|
242
|
+
end
|
243
|
+
|
244
|
+
def plot_2dims(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", format = "pdf", width = nil, height = nil)
|
160
245
|
|
161
246
|
include_library("ggplot2")
|
162
247
|
|
@@ -164,13 +249,13 @@ module FeldtRuby::Statistics::Plotting
|
|
164
249
|
|
165
250
|
pre = [
|
166
251
|
"data <- read.csv(#{csvFilePath.inspect})",
|
167
|
-
|
252
|
+
gfx_device(format, width, height)
|
168
253
|
]
|
169
254
|
|
170
255
|
#plot = ["suppressWarnings( " + yield().join(" ") + " + theme_bw(base_size = 12, base_family = \"\") )"]
|
171
|
-
plot = [yield().join(" ") + " + theme_bw(base_size = 12, base_family = \"\")"]
|
172
|
-
|
173
|
-
|
256
|
+
#plot = [yield().join(" ") + " + theme_bw(base_size = 12, base_family = \"\")"]
|
257
|
+
plot = yield()
|
258
|
+
plot << " + theme_bw(base_size = 12, base_family = \"\")"
|
174
259
|
|
175
260
|
post = [
|
176
261
|
"dev.off()"
|
@@ -182,12 +267,13 @@ module FeldtRuby::Statistics::Plotting
|
|
182
267
|
end
|
183
268
|
|
184
269
|
# Scatter plot of columns xName vs yName in csvFilePath is saved to graphFilePath.
|
185
|
-
def scatter_plot(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", smoothFit = true, format =
|
270
|
+
def scatter_plot(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", smoothFit = true, format = nil, width = 7, height = 5)
|
186
271
|
plot_2dims(csvFilePath, graphFilePath, xName, yName, title, format, width, height) {
|
187
272
|
[
|
273
|
+
"smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}",
|
188
274
|
"ggplot(data, aes(#{xName}, #{yName})) + ",
|
189
275
|
" geom_point(shape = 1) + ", # Each point is non-filled circle
|
190
|
-
(smoothFit ? " geom_smooth() + " : nil),
|
276
|
+
(smoothFit ? " geom_smooth(method = smoothing_method) + " : nil),
|
191
277
|
" ggtitle(#{title.inspect})"
|
192
278
|
].compact
|
193
279
|
}
|
@@ -199,6 +285,39 @@ module FeldtRuby::Statistics::Plotting
|
|
199
285
|
[ "ggplot(data, aes(#{xName}, #{yName})) + geom_hex( bins = #{bins} ) + ggtitle(\"#{title}\")"]
|
200
286
|
}
|
201
287
|
end
|
288
|
+
|
289
|
+
# This is wrapped around a block that draws a diagram and will save that diagram
|
290
|
+
# to the given filename.
|
291
|
+
def save_graph(filename)
|
292
|
+
RC.eval("pdf(#{filename.inspect}, width = 6, height = 4, paper = 'special')")
|
293
|
+
yield() # Just be sure not to nest these save_graph calls within each other...
|
294
|
+
RC.eval("dev.off()")
|
295
|
+
end
|
296
|
+
|
297
|
+
# Overlaid density graph of the observations (sampled distributions) in data1
|
298
|
+
# and data2.
|
299
|
+
def overlaid_densities(data1, data2, xlabel = "x", ylabel = "density")
|
300
|
+
include_library("ggplot2")
|
301
|
+
|
302
|
+
raise ArgumentError.new("Must have same cardinality") unless data1.length == data2.length
|
303
|
+
|
304
|
+
script = <<-EOS
|
305
|
+
df <- data.frame(dc1 = _d1_, dc2 = _d2_)
|
306
|
+
f <- ggplot(df)
|
307
|
+
f <- f + geom_density(aes(dc1, fill = "blue"), alpha = 0.2)
|
308
|
+
f <- f + geom_density(aes(dc2, fill = "red"), alpha = 0.2)
|
309
|
+
f <- f + theme_bw() + ggtitle("Overlaid densities")
|
310
|
+
f <- f + theme(plot.title = element_text(face="bold", size=12),
|
311
|
+
axis.title.x = element_text(face="bold", size=10)
|
312
|
+
) + xlab(_xlabel_)
|
313
|
+
# pdf("tmp.pdf", width = 6, height = 4, paper='special')
|
314
|
+
f
|
315
|
+
#dev.off()
|
316
|
+
EOS
|
317
|
+
subst_eval script, {:d1 => data1, :d2 => data2,
|
318
|
+
:xlabel => xlabel, :ylabel => ylabel}
|
319
|
+
|
320
|
+
end
|
202
321
|
end
|
203
322
|
|
204
323
|
class FeldtRuby::RCommunicator
|
data/lib/feldtruby/version.rb
CHANGED
data/test/test_statistics.rb
CHANGED
@@ -100,32 +100,89 @@ end
|
|
100
100
|
|
101
101
|
describe "Plotting" do
|
102
102
|
|
103
|
-
it "can
|
103
|
+
it "can map Ruby integers to R code/script strings" do
|
104
104
|
|
105
|
-
|
106
|
-
|
107
|
-
out = d + "scatterplot.pdf"
|
108
|
-
RC.scatter_plot(filename, out, "size", "height",
|
109
|
-
"Scatterplot", true)
|
105
|
+
RC.ruby_object_to_R_string(1).must_equal "1"
|
106
|
+
RC.ruby_object_to_R_string(42).must_equal "42"
|
110
107
|
|
111
|
-
|
108
|
+
end
|
112
109
|
|
113
|
-
|
110
|
+
it "can map Ruby floats to R code/script strings" do
|
111
|
+
|
112
|
+
RC.ruby_object_to_R_string(3.675).must_equal "3.675"
|
113
|
+
RC.ruby_object_to_R_string(1e10).must_equal "10000000000.0"
|
114
114
|
|
115
115
|
end
|
116
116
|
|
117
|
-
it "can
|
117
|
+
it "can map Ruby arrays to R code/script strings" do
|
118
118
|
|
119
|
-
|
120
|
-
|
121
|
-
out = d + "heatmap.pdf"
|
119
|
+
RC.ruby_object_to_R_string([1,2,3]).must_equal "c(1, 2, 3)"
|
120
|
+
RC.ruby_object_to_R_string([10, 1.65]).must_equal "c(10, 1.65)"
|
122
121
|
|
123
|
-
|
124
|
-
"Hexbin heatmap", 30)
|
122
|
+
end
|
125
123
|
|
126
|
-
|
124
|
+
it "can map Ruby strings to R code/script strings" do
|
127
125
|
|
128
|
-
|
126
|
+
RC.ruby_object_to_R_string("loess").must_equal '"loess"'
|
127
|
+
RC.ruby_object_to_R_string("gam").must_equal '"gam"'
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
it "can convert a hash of Ruby objects into a R parameter script" do
|
132
|
+
|
133
|
+
RC.hash_to_R_params({:a => 1, :b => 42.5}).must_equal "a = 1, b = 42.5"
|
129
134
|
|
135
|
+
s = RC.hash_to_R_params({:b => "b", :height => [5, 7.2]})
|
136
|
+
s.must_equal 'b = "b", height = c(5, 7.2)'
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
it "can change the file ending if is not what is expected" do
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
# it "can do a scatter plot" do
|
145
|
+
#
|
146
|
+
# d = File.dirname(__FILE__) + "/"
|
147
|
+
# filename = d + "tmp.csv"
|
148
|
+
# out = d + "scatterplot.pdf"
|
149
|
+
# RC.scatter_plot(filename, out, "size", "height",
|
150
|
+
# "Scatterplot", true)
|
151
|
+
#
|
152
|
+
# File.exist?(out).must_equal true
|
153
|
+
#
|
154
|
+
# File.delete out
|
155
|
+
#
|
156
|
+
# end
|
157
|
+
#
|
158
|
+
# it "can do a hexbin heatmap plot" do
|
159
|
+
#
|
160
|
+
# d = File.dirname(__FILE__) + "/"
|
161
|
+
# filename = d + "tmp.csv"
|
162
|
+
# out = d + "heatmap.pdf"
|
163
|
+
#
|
164
|
+
# RC.hexbin_heatmap(filename, out, "size", "height",
|
165
|
+
# "Hexbin heatmap", 30)
|
166
|
+
#
|
167
|
+
# File.exist?(out).must_equal true
|
168
|
+
#
|
169
|
+
# File.delete out
|
170
|
+
#
|
171
|
+
# end
|
172
|
+
|
173
|
+
it "can do overlaid density plot" do
|
174
|
+
|
175
|
+
d1 = Array.new(100) {rand(10)}
|
176
|
+
d2 = Array.new(100) {2 + rand(5)}
|
177
|
+
|
178
|
+
out = "tmp.pdf"
|
179
|
+
|
180
|
+
RC.save_graph(out) do
|
181
|
+
RC.overlaid_densities(d1, d2)
|
182
|
+
end
|
183
|
+
|
184
|
+
File.exist?(out).must_equal true
|
185
|
+
File.delete out
|
186
|
+
|
130
187
|
end
|
131
188
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feldtruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.12
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -43,6 +43,22 @@ dependencies:
|
|
43
43
|
- - ! '>='
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: nokogiri
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
46
62
|
description: Robert Feldt's Common Ruby Code lib
|
47
63
|
email:
|
48
64
|
- robert.feldt@gmail.com
|
@@ -57,6 +73,7 @@ files:
|
|
57
73
|
- R/diffusion_kde.R
|
58
74
|
- README.md
|
59
75
|
- Rakefile
|
76
|
+
- Rplots.pdf
|
60
77
|
- TODO
|
61
78
|
- feldtruby.gemspec
|
62
79
|
- lib/feldtruby.rb
|