rbbt-dm 1.1.39 → 1.1.40

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/stan.rb +205 -0
  3. data/test/rbbt/test_stan.rb +159 -0
  4. metadata +5 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5a6a86141d16b2da2c4024bebcff9447e78add40
4
- data.tar.gz: 062c8f641dfc0672d5d883958df950af7af49889
3
+ metadata.gz: f76827643000582061a88e4ea73e2bd75c77988c
4
+ data.tar.gz: bb6261e9ce5a53948b5429e7fcead2da551070d0
5
5
  SHA512:
6
- metadata.gz: fa3016424aca9f00970abda11b08cbec48faf4ddb61031d9e646527fa69baff50f60de733c29a99189889bc084de2d0c831e857b9507d240be91e16fc9a25f76
7
- data.tar.gz: 7fb134ffd67f043111c1bf6ec44d042fde6e53321b716ed3f2a1d0075b9b1b8e0d0ba0cbb3f1eb9d17f25ac849843e03c8c3aefb57ee377e12ecd766f79af636
6
+ metadata.gz: a1e4ccabc8088e6564cf1e2d2dc8e4cafc539d499fe1bdef6e4ce0ae7e41954457589fa79fc97371960d957b598394d41d4c36e515c6847427a69ca893955288
7
+ data.tar.gz: 6f2625689288ce82bcb7aac8f1ceca2f68300b5edb9ed63cb6a7ea4dbed1bc5c3442d38f0624d4e51224502e6a7e933e92d6eb3ae5851bcc00b7db8669e9bb52
@@ -0,0 +1,205 @@
1
+ require 'rbbt/util/R'
2
+ require 'mkfifo'
3
+
4
+ module STAN
5
+
6
+ def self.save_data(directory, data = {})
7
+ directory = directory.find if Path === directory
8
+ types = {}
9
+ load_str = "input_data = list()\n"
10
+
11
+ data.each do |name, value|
12
+ file = File.join(directory, name.to_s)
13
+ case value
14
+ when Integer
15
+ Open.write(file, value.to_json)
16
+ types[name] = 'integer'
17
+ load_str << "input_data[['" << name.to_s << "']] = " << "fromJSON(txt='#{file}')" << "\n"
18
+ when Float
19
+ Open.write(file, value.to_json)
20
+ types[name] = 'real'
21
+ load_str << "input_data[['" << name.to_s << "']] = " << "fromJSON(txt='#{file}')" << "\n"
22
+ when Array
23
+ Open.write(file, value.to_json)
24
+ if value.select{|v| Float === v}.empty?
25
+ types[name] = 'iarray'
26
+ else
27
+ types[name] = 'array'
28
+ end
29
+ load_str << "input_data[['" << name.to_s << "_l']] = " << value.length.to_s << "\n"
30
+ load_str << "input_data[['" << name.to_s << "']] = " << "fromJSON(txt='#{file}')" << "\n"
31
+ when TSV
32
+ Open.write(file, value.to_s)
33
+ types[name] = 'matrix'
34
+ load_str << "input_data[['" << name.to_s << "_c']] = " << value.fields.length.to_s << "\n"
35
+ load_str << "input_data[['" << name.to_s << "_r']] = " << value.size.to_s << "\n"
36
+ load_str << "#{name}.tmp = " << "rbbt.impute(rbbt.tsv('#{file}'))" << "\n"
37
+ load_str << "input_data[['" << name.to_s << "']] = " << "#{name}.tmp" << "\n"
38
+ when Path
39
+ value = TSV.open(value)
40
+ Open.write(file, value.to_s)
41
+ types[name] = 'matrix'
42
+ load_str << "input_data[['" << name.to_s << "_c']] = " << value.fields.length.to_s << "\n"
43
+ load_str << "input_data[['" << name.to_s << "_r']] = " << value.size.to_s << "\n"
44
+ load_str << "#{name}.tmp = " << "rbbt.impute(rbbt.tsv('#{file}'))" << "\n"
45
+ load_str << "input_data[['" << name.to_s << "']] = " << "#{name}.tmp" << "\n"
46
+ else
47
+ raise "Unknown type of data #{ name }: #{Misc.fingerprint value}"
48
+ end
49
+ end
50
+
51
+ [types, load_str]
52
+ end
53
+
54
+ def self.data_header(types)
55
+
56
+ types_str = ""
57
+ types.each do |name,type|
58
+ name = name.to_s
59
+ case type
60
+ when 'real'
61
+ types_str << " " << "real " << name << ";\n"
62
+ when 'integer'
63
+ types_str << " " << "int " << name << ";\n"
64
+ when 'array'
65
+ types_str << " " << "int<lower=0> " << name << "_l" << ";\n"
66
+ #types_str << " " << "real " << name << "[" << name << "_l]" << ";\n"
67
+ types_str << " " << "vector" << "[" << name << "_l] "<< name << ";\n"
68
+ when 'iarray'
69
+ types_str << " " << "int<lower=0> " << name << "_l" << ";\n"
70
+ #types_str << " " << "real " << name << "[" << name << "_l]" << ";\n"
71
+ types_str << " " << "int " << name << "[" << name << "_l]" << ";\n"
72
+ when 'matrix'
73
+ types_str << " " << "int<lower=0> " << name << "_c" << ";\n"
74
+ types_str << " " << "int<lower=0> " << name << "_r" << ";\n"
75
+ #types_str << " " << "real " << name << "[" << name << "_r," << name << "_c]" << ";\n"
76
+ types_str << " " << "matrix" << "[" << name << "_r," << name << "_c] " << name << ";\n"
77
+ else
78
+ raise "Unknown type for #{ name }: #{type}"
79
+ end
80
+
81
+ end
82
+ <<-EOF
83
+ data{
84
+
85
+ #{types_str}
86
+ }
87
+ EOF
88
+ end
89
+
90
+ def self.exec(data, model, input_directory, parameter_chains, sample_file, debug = FALSE, stan_options = {})
91
+ stan_options = Misc.add_defaults stan_options, :iter => 1000, :warmup => 500, :chains => 1, :seed => 2887, :refresh => 1200
92
+
93
+ data = {} if data.nil?
94
+
95
+ types, load_str = save_data(input_directory, data)
96
+ data_header = self.data_header(types)
97
+ stan_model = data_header + "\n" + model
98
+
99
+ stan_file = Rbbt.var.stan_models[Misc.digest(stan_model).to_s << ".stan"].find
100
+ Open.write(stan_file, stan_model)
101
+
102
+ Log.debug "STAN model:\n" + stan_model
103
+
104
+ script = <<-EOF
105
+ rbbt.require('rstan')
106
+ rbbt.require('jsonlite')
107
+
108
+ rstan_options(auto_write = TRUE)
109
+ options(mc.cores = parallel::detectCores())
110
+
111
+ #{load_str}
112
+
113
+ fit <- stan(file='#{stan_file}', data=input_data, sample_file='#{sample_file}', verbose=#{debug ? 'TRUE' : 'FALSE'}, #{R.hash2Rargs(stan_options)})
114
+
115
+ params <- as.data.frame(fit)
116
+
117
+ print(fit)
118
+ #{parameter_chains.nil? ? "" : "rbbt.tsv.write('#{parameter_chains}', params)" }
119
+ EOF
120
+
121
+ R.run script, nil, :monitor => debug
122
+ end
123
+
124
+ def self.stream_chain(data, model, directory = nil, options = {})
125
+ options, directory = directory, nil if Hash === directory
126
+ debug = Misc.process_options options, :debug
127
+
128
+ if directory.nil?
129
+ directory = TmpFile.tmp_file
130
+ erase = true
131
+ end
132
+
133
+ FileUtils.mkdir_p directory unless File.exists? directory
134
+ input_directory = File.join(directory, 'inputs')
135
+ parameter_chains = File.join(directory, 'chains') unless erase
136
+ summary = File.join(directory, 'summary') unless erase
137
+ sample_file = File.join(directory, 'samples')
138
+
139
+ File.mkfifo(sample_file)
140
+
141
+ io = Misc.open_pipe do |sin|
142
+ iteration = 1
143
+ sin << "#: :type=:list#:cast=:to_f" << "\n"
144
+ begin
145
+ reader = File.open(sample_file, 'r')
146
+ while line = reader.gets
147
+ if line =~ /^#/
148
+ new_line = line
149
+ next
150
+ elsif line =~ /^lp__/
151
+ parts = line.split(",")
152
+ new_line = "#Iteration\t" << parts * "\t"
153
+ else
154
+ parts = line.split(",")
155
+ new_line = iteration.to_s << "\t" << parts * "\t"
156
+ iteration += 1
157
+ end
158
+ sin << new_line
159
+ end
160
+ rescue
161
+ Log.exception $!
162
+ raise $!
163
+ end
164
+ end
165
+
166
+ exec_thread = Thread.new do
167
+ res = self.exec(data, model, input_directory, parameter_chains, sample_file, debug, options)
168
+ Open.write(summary, res.read.to_s) unless summary.nil?
169
+ Log.debug "Result from STAN:\n" << res.read
170
+ end
171
+
172
+ ConcurrentStream.setup io, :threads => [exec_thread] do
173
+ Log.debug "Done chains for STAN"
174
+ if erase
175
+ FileUtils.rm_rf directory
176
+ end
177
+ end
178
+ end
179
+
180
+ def self.run(data, model, directory, options = {})
181
+ debug = Misc.process_options options, :debug
182
+
183
+ input_directory = File.join(directory, 'inputs')
184
+
185
+ parameter_chains = File.join(directory, 'chains')
186
+ summary = File.join(directory, 'summary')
187
+ sample_file = File.join(directory, 'sample')
188
+
189
+ res = self.exec(data, model, input_directory, parameter_chains, sample_file, debug, options)
190
+ Log.debug "Result from STAN:\n" << res.read
191
+ Open.write(summary, res.read)
192
+
193
+ Open.open(parameter_chains)
194
+ end
195
+
196
+ def self.fit(data, model, options = {})
197
+ TmpFile.with_file do |directory|
198
+ FileUtils.mkdir_p directory
199
+ res = self.run(data, model, directory, options)
200
+ TSV.open(res, :type => :list, :cast => :to_f)
201
+ end
202
+ end
203
+
204
+
205
+ end
@@ -0,0 +1,159 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.rb')
2
+ require 'rbbt/stan'
3
+
4
+ class TestClass < Test::Unit::TestCase
5
+ def test_save_array
6
+
7
+ data = {}
8
+ data[:y] = [1,2,3,4]
9
+
10
+ TmpFile.with_file do |directory|
11
+ STAN.save_data(directory, data)
12
+ assert File.exists?(File.join(directory, 'y'))
13
+ assert_equal '[1,2,3,4]', Open.read(File.join(directory, 'y'))
14
+ end
15
+
16
+ end
17
+
18
+ def test_data_header
19
+ types = {:y => 'array'}
20
+
21
+ header = STAN.data_header(types)
22
+ assert header.include? 'vector[y_l] y;'
23
+ end
24
+
25
+ def test_simple_fit
26
+ Log.severity = 0
27
+ res = STAN.fit({}, <<-EOF)
28
+ parameters{
29
+ real y;
30
+ }
31
+
32
+ model {
33
+ target += -0.5 * y * y;
34
+ }
35
+ EOF
36
+ ppp res
37
+ Log.tsv res
38
+ end
39
+
40
+ def test_fit_vector
41
+ Log.severity = 0
42
+ data = {}
43
+ real_mean = 4
44
+ data[:y] = R.eval_a "rnorm(100, #{real_mean}, 1)"
45
+
46
+ res = STAN.fit(data, <<-EOF, :iter => 10_000, :chains => 2)
47
+ parameters{
48
+ real mu;
49
+ }
50
+ model{
51
+ mu ~ normal(0,10);
52
+ y ~ normal(mu, 1);
53
+ }
54
+ EOF
55
+
56
+ x = []
57
+ m = Misc.mean(res.column("mu").values)
58
+
59
+ assert (m - real_mean).abs < 0.5
60
+ end
61
+
62
+ def test_fit_matrix
63
+ Log.severity = 0
64
+
65
+ samples = 1000
66
+ real_mean = 5
67
+ y = TmpFile.with_file do |tsv|
68
+ R.run <<-EOF
69
+ s = c(1,5,10)
70
+
71
+ samples = #{samples}
72
+ m = list()
73
+ for (i in seq(1,samples)){
74
+ sample.name = paste("S",i, sep="")
75
+ sample.values = rnorm(3,#{real_mean},1) * s
76
+ m[[sample.name]] = sample.values
77
+ }
78
+
79
+ data = as.data.frame(m)
80
+ rbbt.tsv.write('#{tsv}', data)
81
+ EOF
82
+
83
+ TSV.open(tsv, :type => :list, :cast => :to_f)
84
+ end
85
+
86
+ y = y.transpose("Sample")
87
+
88
+ res = STAN.fit({:y => y}, <<-EOF, :iter => 100, :warmup => 20, :chains => 1)
89
+ parameters{
90
+ vector<lower=0>[y_c] s;
91
+ real<lower=0> w_m;
92
+ }
93
+
94
+ model{
95
+ s ~ cauchy(0,100);
96
+ w_m ~ uniform(0,10);
97
+
98
+ for (j in 1:y_c){
99
+ y[,j] ~ normal(w_m*s[j], 1/s[j]);
100
+ }
101
+ }
102
+ EOF
103
+
104
+ m = Misc.mean(res.column("w_m").values)
105
+ assert (m - real_mean).abs < (real_mean.to_f / 10)
106
+ end
107
+
108
+ def test_stream
109
+ Log.severity = 0
110
+
111
+ samples = 1000
112
+ real_mean = 5
113
+ y = TmpFile.with_file do |tsv|
114
+ R.run <<-EOF
115
+ s = c(1,5,10)
116
+
117
+ samples = #{samples}
118
+ m = list()
119
+ for (i in seq(1,samples)){
120
+ sample.name = paste("S",i, sep="")
121
+ sample.values = rnorm(3,#{real_mean},1) * s
122
+ m[[sample.name]] = sample.values
123
+ }
124
+
125
+ data = as.data.frame(m)
126
+ rbbt.tsv.write('#{tsv}', data)
127
+ EOF
128
+
129
+ TSV.open(tsv, :type => :list, :cast => :to_f)
130
+ end
131
+
132
+ y = y.transpose("Sample")
133
+
134
+ io = STAN.stream_chain({:y => y}, <<-EOF, :iter => 100, :warmup => 20)
135
+ parameters{
136
+ vector<lower=0>[y_c] s;
137
+ real<lower=0> w_m;
138
+ }
139
+
140
+ model{
141
+ s ~ cauchy(0,100);
142
+ w_m ~ uniform(0,10);
143
+
144
+ for (j in 1:y_c){
145
+ y[,j] ~ normal(w_m*s[j], 1/s[j]);
146
+ }
147
+ }
148
+ EOF
149
+
150
+ lines = 0
151
+ while line = io.gets
152
+ lines += 1
153
+ end
154
+ io.close
155
+ io.join if io.respond_to? :join
156
+ assert_equal 102, lines
157
+ end
158
+ end
159
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.39
4
+ version: 1.1.40
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-12 00:00:00.000000000 Z
11
+ date: 2018-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -98,6 +98,7 @@ files:
98
98
  - lib/rbbt/network/paths.rb
99
99
  - lib/rbbt/plots/bar.rb
100
100
  - lib/rbbt/plots/heatmap.rb
101
+ - lib/rbbt/stan.rb
101
102
  - lib/rbbt/statistics/fdr.rb
102
103
  - lib/rbbt/statistics/fisher.rb
103
104
  - lib/rbbt/statistics/hypergeometric.rb
@@ -114,6 +115,7 @@ files:
114
115
  - test/rbbt/statistics/test_fisher.rb
115
116
  - test/rbbt/statistics/test_hypergeometric.rb
116
117
  - test/rbbt/statistics/test_random_walk.rb
118
+ - test/rbbt/test_stan.rb
117
119
  - test/rbbt/vector/model/test_svm.rb
118
120
  - test/rbbt/vector/test_model.rb
119
121
  - test/test_helper.rb
@@ -149,4 +151,5 @@ test_files:
149
151
  - test/rbbt/statistics/test_hypergeometric.rb
150
152
  - test/rbbt/vector/test_model.rb
151
153
  - test/rbbt/vector/model/test_svm.rb
154
+ - test/rbbt/test_stan.rb
152
155
  - test/test_helper.rb