feldtruby 0.3.11 → 0.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +4 -2
- data/Rplots.pdf +0 -0
- data/feldtruby.gemspec +3 -0
- data/lib/feldtruby/statistics.rb +126 -7
- data/lib/feldtruby/version.rb +1 -1
- data/test/test_statistics.rb +73 -16
- metadata +18 -1
data/Gemfile.lock
CHANGED
data/Rplots.pdf
ADDED
Binary file
|
data/feldtruby.gemspec
CHANGED
data/lib/feldtruby/statistics.rb
CHANGED
@@ -39,6 +39,21 @@ class RCommunicator
|
|
39
39
|
@r.eval str
|
40
40
|
end
|
41
41
|
|
42
|
+
# Given a script that has variable references in the form "_name_" insert
|
43
|
+
# the ruby objects mapped from these names in scriptNameToRubyValues
|
44
|
+
def subst_eval(script, scriptNameToRubyValues)
|
45
|
+
|
46
|
+
scriptNameToRubyValues.each do |key, value|
|
47
|
+
|
48
|
+
script = script.gsub("_#{key.to_s}_", ruby_object_to_R_string(value))
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
#puts "Eval'ing script:\n#{script}"
|
53
|
+
eval script
|
54
|
+
|
55
|
+
end
|
56
|
+
|
42
57
|
# This represents a hash returned as JSON from R but mapped to a
|
43
58
|
# Ruby object so we can more easily use it as if it was an R object.
|
44
59
|
class Rvalue
|
@@ -85,6 +100,32 @@ class RCommunicator
|
|
85
100
|
end
|
86
101
|
end
|
87
102
|
|
103
|
+
# Convert a Ruby object of one of the types String, Array, Integer or Float
|
104
|
+
# to a String that can be used in R code/scripts to represent the object.
|
105
|
+
def ruby_object_to_R_string(o)
|
106
|
+
|
107
|
+
case o
|
108
|
+
|
109
|
+
when String
|
110
|
+
return o.inspect
|
111
|
+
|
112
|
+
when Array
|
113
|
+
elems = o.map {|e| ruby_object_to_R_string(e)}.join(", ")
|
114
|
+
return "c(#{elems})"
|
115
|
+
|
116
|
+
when Integer
|
117
|
+
return o.to_s
|
118
|
+
|
119
|
+
when Float
|
120
|
+
return o.to_s
|
121
|
+
|
122
|
+
else
|
123
|
+
raise "Cannot represent object #{o} in valid R code"
|
124
|
+
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
88
129
|
private
|
89
130
|
|
90
131
|
def res_name(index = 1)
|
@@ -156,7 +197,51 @@ end
|
|
156
197
|
# Plotting data sets in R with ggplot2 and save them to files.
|
157
198
|
module FeldtRuby::Statistics::Plotting
|
158
199
|
|
159
|
-
|
200
|
+
GfxFormatToGfxParams = {
|
201
|
+
"pdf" => {:width => 7, :height => 5, :paper => 'special'},
|
202
|
+
"png" => {:units => "cm", :width => 12, :height => 8},
|
203
|
+
"tiff" => {:units => "cm", :width => 12, :height => 8},
|
204
|
+
}
|
205
|
+
|
206
|
+
def gfx_device(format, width = nil, height = nil)
|
207
|
+
|
208
|
+
format = format.to_s # If given as a symbol instead of a string
|
209
|
+
|
210
|
+
unless GfxFormatToGfxParams.has_key?(format)
|
211
|
+
raise ArgumentError.new("Don't now about gfx format #{format}")
|
212
|
+
end
|
213
|
+
|
214
|
+
params = GfxFormatToGfxParams[format]
|
215
|
+
|
216
|
+
"#{format}(#{hash_to_R_params(params)})"
|
217
|
+
|
218
|
+
end
|
219
|
+
|
220
|
+
# Map a ruby hash of objects to parameters in R code/script.
|
221
|
+
def hash_to_R_params(hash)
|
222
|
+
|
223
|
+
hash.keys.sort.map do |key|
|
224
|
+
|
225
|
+
"#{key.to_s} = #{ruby_object_to_R_string(hash[key])}"
|
226
|
+
|
227
|
+
end.join(", ")
|
228
|
+
|
229
|
+
end
|
230
|
+
|
231
|
+
def set_file_ending(filepath, newEnding)
|
232
|
+
|
233
|
+
dirname = File.dirname(filepath)
|
234
|
+
|
235
|
+
current_ending = filepath.split(".").last
|
236
|
+
|
237
|
+
# Works even if there is no current file ending:
|
238
|
+
basename = File.basename(filepath, "." + current_ending)
|
239
|
+
|
240
|
+
File.join dirname, basename
|
241
|
+
|
242
|
+
end
|
243
|
+
|
244
|
+
def plot_2dims(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", format = "pdf", width = nil, height = nil)
|
160
245
|
|
161
246
|
include_library("ggplot2")
|
162
247
|
|
@@ -164,13 +249,13 @@ module FeldtRuby::Statistics::Plotting
|
|
164
249
|
|
165
250
|
pre = [
|
166
251
|
"data <- read.csv(#{csvFilePath.inspect})",
|
167
|
-
|
252
|
+
gfx_device(format, width, height)
|
168
253
|
]
|
169
254
|
|
170
255
|
#plot = ["suppressWarnings( " + yield().join(" ") + " + theme_bw(base_size = 12, base_family = \"\") )"]
|
171
|
-
plot = [yield().join(" ") + " + theme_bw(base_size = 12, base_family = \"\")"]
|
172
|
-
|
173
|
-
|
256
|
+
#plot = [yield().join(" ") + " + theme_bw(base_size = 12, base_family = \"\")"]
|
257
|
+
plot = yield()
|
258
|
+
plot << " + theme_bw(base_size = 12, base_family = \"\")"
|
174
259
|
|
175
260
|
post = [
|
176
261
|
"dev.off()"
|
@@ -182,12 +267,13 @@ module FeldtRuby::Statistics::Plotting
|
|
182
267
|
end
|
183
268
|
|
184
269
|
# Scatter plot of columns xName vs yName in csvFilePath is saved to graphFilePath.
|
185
|
-
def scatter_plot(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", smoothFit = true, format =
|
270
|
+
def scatter_plot(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", smoothFit = true, format = nil, width = 7, height = 5)
|
186
271
|
plot_2dims(csvFilePath, graphFilePath, xName, yName, title, format, width, height) {
|
187
272
|
[
|
273
|
+
"smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}",
|
188
274
|
"ggplot(data, aes(#{xName}, #{yName})) + ",
|
189
275
|
" geom_point(shape = 1) + ", # Each point is non-filled circle
|
190
|
-
(smoothFit ? " geom_smooth() + " : nil),
|
276
|
+
(smoothFit ? " geom_smooth(method = smoothing_method) + " : nil),
|
191
277
|
" ggtitle(#{title.inspect})"
|
192
278
|
].compact
|
193
279
|
}
|
@@ -199,6 +285,39 @@ module FeldtRuby::Statistics::Plotting
|
|
199
285
|
[ "ggplot(data, aes(#{xName}, #{yName})) + geom_hex( bins = #{bins} ) + ggtitle(\"#{title}\")"]
|
200
286
|
}
|
201
287
|
end
|
288
|
+
|
289
|
+
# This is wrapped around a block that draws a diagram and will save that diagram
|
290
|
+
# to the given filename.
|
291
|
+
def save_graph(filename)
|
292
|
+
RC.eval("pdf(#{filename.inspect}, width = 6, height = 4, paper = 'special')")
|
293
|
+
yield() # Just be sure not to nest these save_graph calls within each other...
|
294
|
+
RC.eval("dev.off()")
|
295
|
+
end
|
296
|
+
|
297
|
+
# Overlaid density graph of the observations (sampled distributions) in data1
|
298
|
+
# and data2.
|
299
|
+
def overlaid_densities(data1, data2, xlabel = "x", ylabel = "density")
|
300
|
+
include_library("ggplot2")
|
301
|
+
|
302
|
+
raise ArgumentError.new("Must have same cardinality") unless data1.length == data2.length
|
303
|
+
|
304
|
+
script = <<-EOS
|
305
|
+
df <- data.frame(dc1 = _d1_, dc2 = _d2_)
|
306
|
+
f <- ggplot(df)
|
307
|
+
f <- f + geom_density(aes(dc1, fill = "blue"), alpha = 0.2)
|
308
|
+
f <- f + geom_density(aes(dc2, fill = "red"), alpha = 0.2)
|
309
|
+
f <- f + theme_bw() + ggtitle("Overlaid densities")
|
310
|
+
f <- f + theme(plot.title = element_text(face="bold", size=12),
|
311
|
+
axis.title.x = element_text(face="bold", size=10)
|
312
|
+
) + xlab(_xlabel_)
|
313
|
+
# pdf("tmp.pdf", width = 6, height = 4, paper='special')
|
314
|
+
f
|
315
|
+
#dev.off()
|
316
|
+
EOS
|
317
|
+
subst_eval script, {:d1 => data1, :d2 => data2,
|
318
|
+
:xlabel => xlabel, :ylabel => ylabel}
|
319
|
+
|
320
|
+
end
|
202
321
|
end
|
203
322
|
|
204
323
|
class FeldtRuby::RCommunicator
|
data/lib/feldtruby/version.rb
CHANGED
data/test/test_statistics.rb
CHANGED
@@ -100,32 +100,89 @@ end
|
|
100
100
|
|
101
101
|
describe "Plotting" do
|
102
102
|
|
103
|
-
it "can
|
103
|
+
it "can map Ruby integers to R code/script strings" do
|
104
104
|
|
105
|
-
|
106
|
-
|
107
|
-
out = d + "scatterplot.pdf"
|
108
|
-
RC.scatter_plot(filename, out, "size", "height",
|
109
|
-
"Scatterplot", true)
|
105
|
+
RC.ruby_object_to_R_string(1).must_equal "1"
|
106
|
+
RC.ruby_object_to_R_string(42).must_equal "42"
|
110
107
|
|
111
|
-
|
108
|
+
end
|
112
109
|
|
113
|
-
|
110
|
+
it "can map Ruby floats to R code/script strings" do
|
111
|
+
|
112
|
+
RC.ruby_object_to_R_string(3.675).must_equal "3.675"
|
113
|
+
RC.ruby_object_to_R_string(1e10).must_equal "10000000000.0"
|
114
114
|
|
115
115
|
end
|
116
116
|
|
117
|
-
it "can
|
117
|
+
it "can map Ruby arrays to R code/script strings" do
|
118
118
|
|
119
|
-
|
120
|
-
|
121
|
-
out = d + "heatmap.pdf"
|
119
|
+
RC.ruby_object_to_R_string([1,2,3]).must_equal "c(1, 2, 3)"
|
120
|
+
RC.ruby_object_to_R_string([10, 1.65]).must_equal "c(10, 1.65)"
|
122
121
|
|
123
|
-
|
124
|
-
"Hexbin heatmap", 30)
|
122
|
+
end
|
125
123
|
|
126
|
-
|
124
|
+
it "can map Ruby strings to R code/script strings" do
|
127
125
|
|
128
|
-
|
126
|
+
RC.ruby_object_to_R_string("loess").must_equal '"loess"'
|
127
|
+
RC.ruby_object_to_R_string("gam").must_equal '"gam"'
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
it "can convert a hash of Ruby objects into a R parameter script" do
|
132
|
+
|
133
|
+
RC.hash_to_R_params({:a => 1, :b => 42.5}).must_equal "a = 1, b = 42.5"
|
129
134
|
|
135
|
+
s = RC.hash_to_R_params({:b => "b", :height => [5, 7.2]})
|
136
|
+
s.must_equal 'b = "b", height = c(5, 7.2)'
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
it "can change the file ending if is not what is expected" do
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
# it "can do a scatter plot" do
|
145
|
+
#
|
146
|
+
# d = File.dirname(__FILE__) + "/"
|
147
|
+
# filename = d + "tmp.csv"
|
148
|
+
# out = d + "scatterplot.pdf"
|
149
|
+
# RC.scatter_plot(filename, out, "size", "height",
|
150
|
+
# "Scatterplot", true)
|
151
|
+
#
|
152
|
+
# File.exist?(out).must_equal true
|
153
|
+
#
|
154
|
+
# File.delete out
|
155
|
+
#
|
156
|
+
# end
|
157
|
+
#
|
158
|
+
# it "can do a hexbin heatmap plot" do
|
159
|
+
#
|
160
|
+
# d = File.dirname(__FILE__) + "/"
|
161
|
+
# filename = d + "tmp.csv"
|
162
|
+
# out = d + "heatmap.pdf"
|
163
|
+
#
|
164
|
+
# RC.hexbin_heatmap(filename, out, "size", "height",
|
165
|
+
# "Hexbin heatmap", 30)
|
166
|
+
#
|
167
|
+
# File.exist?(out).must_equal true
|
168
|
+
#
|
169
|
+
# File.delete out
|
170
|
+
#
|
171
|
+
# end
|
172
|
+
|
173
|
+
it "can do overlaid density plot" do
|
174
|
+
|
175
|
+
d1 = Array.new(100) {rand(10)}
|
176
|
+
d2 = Array.new(100) {2 + rand(5)}
|
177
|
+
|
178
|
+
out = "tmp.pdf"
|
179
|
+
|
180
|
+
RC.save_graph(out) do
|
181
|
+
RC.overlaid_densities(d1, d2)
|
182
|
+
end
|
183
|
+
|
184
|
+
File.exist?(out).must_equal true
|
185
|
+
File.delete out
|
186
|
+
|
130
187
|
end
|
131
188
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feldtruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.12
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -43,6 +43,22 @@ dependencies:
|
|
43
43
|
- - ! '>='
|
44
44
|
- !ruby/object:Gem::Version
|
45
45
|
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: nokogiri
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
46
62
|
description: Robert Feldt's Common Ruby Code lib
|
47
63
|
email:
|
48
64
|
- robert.feldt@gmail.com
|
@@ -57,6 +73,7 @@ files:
|
|
57
73
|
- R/diffusion_kde.R
|
58
74
|
- README.md
|
59
75
|
- Rakefile
|
76
|
+
- Rplots.pdf
|
60
77
|
- TODO
|
61
78
|
- feldtruby.gemspec
|
62
79
|
- lib/feldtruby.rb
|