feldtruby 0.3.11 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,14 +1,16 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- feldtruby (0.3.3)
4
+ feldtruby (0.3.11)
5
5
  json
6
+ nokogiri
6
7
  rinruby
7
8
 
8
9
  GEM
9
10
  remote: https://rubygems.org/
10
11
  specs:
11
- json (1.7.6)
12
+ json (1.7.7)
13
+ nokogiri (1.5.6)
12
14
  rinruby (2.0.3)
13
15
 
14
16
  PLATFORMS
data/Rplots.pdf ADDED
Binary file
data/feldtruby.gemspec CHANGED
@@ -18,5 +18,8 @@ Gem::Specification.new do |gem|
18
18
  gem.require_paths = ["lib"]
19
19
 
20
20
  gem.add_dependency('rinruby')
21
+
21
22
  gem.add_dependency('json')
23
+
24
+ gem.add_dependency('nokogiri')
22
25
  end
@@ -39,6 +39,21 @@ class RCommunicator
39
39
  @r.eval str
40
40
  end
41
41
 
42
+ # Given a script that has variable references in the form "_name_" insert
43
+ # the ruby objects mapped from these names in scriptNameToRubyValues
44
+ def subst_eval(script, scriptNameToRubyValues)
45
+
46
+ scriptNameToRubyValues.each do |key, value|
47
+
48
+ script = script.gsub("_#{key.to_s}_", ruby_object_to_R_string(value))
49
+
50
+ end
51
+
52
+ #puts "Eval'ing script:\n#{script}"
53
+ eval script
54
+
55
+ end
56
+
42
57
  # This represents a hash returned as JSON from R but mapped to a
43
58
  # Ruby object so we can more easily use it as if it was an R object.
44
59
  class Rvalue
@@ -85,6 +100,32 @@ class RCommunicator
85
100
  end
86
101
  end
87
102
 
103
+ # Convert a Ruby object of one of the types String, Array, Integer or Float
104
+ # to a String that can be used in R code/scripts to represent the object.
105
+ def ruby_object_to_R_string(o)
106
+
107
+ case o
108
+
109
+ when String
110
+ return o.inspect
111
+
112
+ when Array
113
+ elems = o.map {|e| ruby_object_to_R_string(e)}.join(", ")
114
+ return "c(#{elems})"
115
+
116
+ when Integer
117
+ return o.to_s
118
+
119
+ when Float
120
+ return o.to_s
121
+
122
+ else
123
+ raise "Cannot represent object #{o} in valid R code"
124
+
125
+ end
126
+
127
+ end
128
+
88
129
  private
89
130
 
90
131
  def res_name(index = 1)
@@ -156,7 +197,51 @@ end
156
197
  # Plotting data sets in R with ggplot2 and save them to files.
157
198
  module FeldtRuby::Statistics::Plotting
158
199
 
159
- def plot_2dims(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", format = "pdf", width = 7, height = 5)
200
+ GfxFormatToGfxParams = {
201
+ "pdf" => {:width => 7, :height => 5, :paper => 'special'},
202
+ "png" => {:units => "cm", :width => 12, :height => 8},
203
+ "tiff" => {:units => "cm", :width => 12, :height => 8},
204
+ }
205
+
206
+ def gfx_device(format, width = nil, height = nil)
207
+
208
+ format = format.to_s # If given as a symbol instead of a string
209
+
210
+ unless GfxFormatToGfxParams.has_key?(format)
211
+ raise ArgumentError.new("Don't now about gfx format #{format}")
212
+ end
213
+
214
+ params = GfxFormatToGfxParams[format]
215
+
216
+ "#{format}(#{hash_to_R_params(params)})"
217
+
218
+ end
219
+
220
+ # Map a ruby hash of objects to parameters in R code/script.
221
+ def hash_to_R_params(hash)
222
+
223
+ hash.keys.sort.map do |key|
224
+
225
+ "#{key.to_s} = #{ruby_object_to_R_string(hash[key])}"
226
+
227
+ end.join(", ")
228
+
229
+ end
230
+
231
+ def set_file_ending(filepath, newEnding)
232
+
233
+ dirname = File.dirname(filepath)
234
+
235
+ current_ending = filepath.split(".").last
236
+
237
+ # Works even if there is no current file ending:
238
+ basename = File.basename(filepath, "." + current_ending)
239
+
240
+ File.join dirname, basename
241
+
242
+ end
243
+
244
+ def plot_2dims(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", format = "pdf", width = nil, height = nil)
160
245
 
161
246
  include_library("ggplot2")
162
247
 
@@ -164,13 +249,13 @@ module FeldtRuby::Statistics::Plotting
164
249
 
165
250
  pre = [
166
251
  "data <- read.csv(#{csvFilePath.inspect})",
167
- "#{format}(#{gfxFile.inspect}, width=#{width}, height=#{height})"
252
+ gfx_device(format, width, height)
168
253
  ]
169
254
 
170
255
  #plot = ["suppressWarnings( " + yield().join(" ") + " + theme_bw(base_size = 12, base_family = \"\") )"]
171
- plot = [yield().join(" ") + " + theme_bw(base_size = 12, base_family = \"\")"]
172
- #plot = yield()
173
- #plot << " + theme_bw(base_size = 16, base_family = \"\")"
256
+ #plot = [yield().join(" ") + " + theme_bw(base_size = 12, base_family = \"\")"]
257
+ plot = yield()
258
+ plot << " + theme_bw(base_size = 12, base_family = \"\")"
174
259
 
175
260
  post = [
176
261
  "dev.off()"
@@ -182,12 +267,13 @@ module FeldtRuby::Statistics::Plotting
182
267
  end
183
268
 
184
269
  # Scatter plot of columns xName vs yName in csvFilePath is saved to graphFilePath.
185
- def scatter_plot(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", smoothFit = true, format = "pdf", width = 7, height = 5)
270
+ def scatter_plot(csvFilePath, graphFilePath, xName, yName, title = "scatterplot", smoothFit = true, format = nil, width = 7, height = 5)
186
271
  plot_2dims(csvFilePath, graphFilePath, xName, yName, title, format, width, height) {
187
272
  [
273
+ "smoothing_method <- if(nrow(data) > 1000) {'gam'} else {'loess'}",
188
274
  "ggplot(data, aes(#{xName}, #{yName})) + ",
189
275
  " geom_point(shape = 1) + ", # Each point is non-filled circle
190
- (smoothFit ? " geom_smooth() + " : nil),
276
+ (smoothFit ? " geom_smooth(method = smoothing_method) + " : nil),
191
277
  " ggtitle(#{title.inspect})"
192
278
  ].compact
193
279
  }
@@ -199,6 +285,39 @@ module FeldtRuby::Statistics::Plotting
199
285
  [ "ggplot(data, aes(#{xName}, #{yName})) + geom_hex( bins = #{bins} ) + ggtitle(\"#{title}\")"]
200
286
  }
201
287
  end
288
+
289
+ # This is wrapped around a block that draws a diagram and will save that diagram
290
+ # to the given filename.
291
+ def save_graph(filename)
292
+ RC.eval("pdf(#{filename.inspect}, width = 6, height = 4, paper = 'special')")
293
+ yield() # Just be sure not to nest these save_graph calls within each other...
294
+ RC.eval("dev.off()")
295
+ end
296
+
297
+ # Overlaid density graph of the observations (sampled distributions) in data1
298
+ # and data2.
299
+ def overlaid_densities(data1, data2, xlabel = "x", ylabel = "density")
300
+ include_library("ggplot2")
301
+
302
+ raise ArgumentError.new("Must have same cardinality") unless data1.length == data2.length
303
+
304
+ script = <<-EOS
305
+ df <- data.frame(dc1 = _d1_, dc2 = _d2_)
306
+ f <- ggplot(df)
307
+ f <- f + geom_density(aes(dc1, fill = "blue"), alpha = 0.2)
308
+ f <- f + geom_density(aes(dc2, fill = "red"), alpha = 0.2)
309
+ f <- f + theme_bw() + ggtitle("Overlaid densities")
310
+ f <- f + theme(plot.title = element_text(face="bold", size=12),
311
+ axis.title.x = element_text(face="bold", size=10)
312
+ ) + xlab(_xlabel_)
313
+ # pdf("tmp.pdf", width = 6, height = 4, paper='special')
314
+ f
315
+ #dev.off()
316
+ EOS
317
+ subst_eval script, {:d1 => data1, :d2 => data2,
318
+ :xlabel => xlabel, :ylabel => ylabel}
319
+
320
+ end
202
321
  end
203
322
 
204
323
  class FeldtRuby::RCommunicator
@@ -1,3 +1,3 @@
1
1
  module FeldtRuby
2
- VERSION = "0.3.11"
2
+ VERSION = "0.3.12"
3
3
  end
@@ -100,32 +100,89 @@ end
100
100
 
101
101
  describe "Plotting" do
102
102
 
103
- it "can do a scatter plot" do
103
+ it "can map Ruby integers to R code/script strings" do
104
104
 
105
- d = File.dirname(__FILE__) + "/"
106
- filename = d + "tmp.csv"
107
- out = d + "scatterplot.pdf"
108
- RC.scatter_plot(filename, out, "size", "height",
109
- "Scatterplot", true)
105
+ RC.ruby_object_to_R_string(1).must_equal "1"
106
+ RC.ruby_object_to_R_string(42).must_equal "42"
110
107
 
111
- File.exist?(out).must_equal true
108
+ end
112
109
 
113
- File.delete out
110
+ it "can map Ruby floats to R code/script strings" do
111
+
112
+ RC.ruby_object_to_R_string(3.675).must_equal "3.675"
113
+ RC.ruby_object_to_R_string(1e10).must_equal "10000000000.0"
114
114
 
115
115
  end
116
116
 
117
- it "can do a hexbin heatmap plot" do
117
+ it "can map Ruby arrays to R code/script strings" do
118
118
 
119
- d = File.dirname(__FILE__) + "/"
120
- filename = d + "tmp.csv"
121
- out = d + "heatmap.pdf"
119
+ RC.ruby_object_to_R_string([1,2,3]).must_equal "c(1, 2, 3)"
120
+ RC.ruby_object_to_R_string([10, 1.65]).must_equal "c(10, 1.65)"
122
121
 
123
- RC.hexbin_heatmap(filename, out, "size", "height",
124
- "Hexbin heatmap", 30)
122
+ end
125
123
 
126
- File.exist?(out).must_equal true
124
+ it "can map Ruby strings to R code/script strings" do
127
125
 
128
- File.delete out
126
+ RC.ruby_object_to_R_string("loess").must_equal '"loess"'
127
+ RC.ruby_object_to_R_string("gam").must_equal '"gam"'
128
+
129
+ end
130
+
131
+ it "can convert a hash of Ruby objects into a R parameter script" do
132
+
133
+ RC.hash_to_R_params({:a => 1, :b => 42.5}).must_equal "a = 1, b = 42.5"
129
134
 
135
+ s = RC.hash_to_R_params({:b => "b", :height => [5, 7.2]})
136
+ s.must_equal 'b = "b", height = c(5, 7.2)'
137
+
138
+ end
139
+
140
+ it "can change the file ending if is not what is expected" do
141
+
142
+ end
143
+
144
+ # it "can do a scatter plot" do
145
+ #
146
+ # d = File.dirname(__FILE__) + "/"
147
+ # filename = d + "tmp.csv"
148
+ # out = d + "scatterplot.pdf"
149
+ # RC.scatter_plot(filename, out, "size", "height",
150
+ # "Scatterplot", true)
151
+ #
152
+ # File.exist?(out).must_equal true
153
+ #
154
+ # File.delete out
155
+ #
156
+ # end
157
+ #
158
+ # it "can do a hexbin heatmap plot" do
159
+ #
160
+ # d = File.dirname(__FILE__) + "/"
161
+ # filename = d + "tmp.csv"
162
+ # out = d + "heatmap.pdf"
163
+ #
164
+ # RC.hexbin_heatmap(filename, out, "size", "height",
165
+ # "Hexbin heatmap", 30)
166
+ #
167
+ # File.exist?(out).must_equal true
168
+ #
169
+ # File.delete out
170
+ #
171
+ # end
172
+
173
+ it "can do overlaid density plot" do
174
+
175
+ d1 = Array.new(100) {rand(10)}
176
+ d2 = Array.new(100) {2 + rand(5)}
177
+
178
+ out = "tmp.pdf"
179
+
180
+ RC.save_graph(out) do
181
+ RC.overlaid_densities(d1, d2)
182
+ end
183
+
184
+ File.exist?(out).must_equal true
185
+ File.delete out
186
+
130
187
  end
131
188
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feldtruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.11
4
+ version: 0.3.12
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -43,6 +43,22 @@ dependencies:
43
43
  - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
45
  version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: nokogiri
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
46
62
  description: Robert Feldt's Common Ruby Code lib
47
63
  email:
48
64
  - robert.feldt@gmail.com
@@ -57,6 +73,7 @@ files:
57
73
  - R/diffusion_kde.R
58
74
  - README.md
59
75
  - Rakefile
76
+ - Rplots.pdf
60
77
  - TODO
61
78
  - feldtruby.gemspec
62
79
  - lib/feldtruby.rb