rbbt-dm 1.1.39 → 1.1.40
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/stan.rb +205 -0
- data/test/rbbt/test_stan.rb +159 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f76827643000582061a88e4ea73e2bd75c77988c
|
4
|
+
data.tar.gz: bb6261e9ce5a53948b5429e7fcead2da551070d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a1e4ccabc8088e6564cf1e2d2dc8e4cafc539d499fe1bdef6e4ce0ae7e41954457589fa79fc97371960d957b598394d41d4c36e515c6847427a69ca893955288
|
7
|
+
data.tar.gz: 6f2625689288ce82bcb7aac8f1ceca2f68300b5edb9ed63cb6a7ea4dbed1bc5c3442d38f0624d4e51224502e6a7e933e92d6eb3ae5851bcc00b7db8669e9bb52
|
data/lib/rbbt/stan.rb
ADDED
@@ -0,0 +1,205 @@
|
|
1
|
+
require 'rbbt/util/R'
|
2
|
+
require 'mkfifo'
|
3
|
+
|
4
|
+
module STAN
|
5
|
+
|
6
|
+
def self.save_data(directory, data = {})
|
7
|
+
directory = directory.find if Path === directory
|
8
|
+
types = {}
|
9
|
+
load_str = "input_data = list()\n"
|
10
|
+
|
11
|
+
data.each do |name, value|
|
12
|
+
file = File.join(directory, name.to_s)
|
13
|
+
case value
|
14
|
+
when Integer
|
15
|
+
Open.write(file, value.to_json)
|
16
|
+
types[name] = 'integer'
|
17
|
+
load_str << "input_data[['" << name.to_s << "']] = " << "fromJSON(txt='#{file}')" << "\n"
|
18
|
+
when Float
|
19
|
+
Open.write(file, value.to_json)
|
20
|
+
types[name] = 'real'
|
21
|
+
load_str << "input_data[['" << name.to_s << "']] = " << "fromJSON(txt='#{file}')" << "\n"
|
22
|
+
when Array
|
23
|
+
Open.write(file, value.to_json)
|
24
|
+
if value.select{|v| Float === v}.empty?
|
25
|
+
types[name] = 'iarray'
|
26
|
+
else
|
27
|
+
types[name] = 'array'
|
28
|
+
end
|
29
|
+
load_str << "input_data[['" << name.to_s << "_l']] = " << value.length.to_s << "\n"
|
30
|
+
load_str << "input_data[['" << name.to_s << "']] = " << "fromJSON(txt='#{file}')" << "\n"
|
31
|
+
when TSV
|
32
|
+
Open.write(file, value.to_s)
|
33
|
+
types[name] = 'matrix'
|
34
|
+
load_str << "input_data[['" << name.to_s << "_c']] = " << value.fields.length.to_s << "\n"
|
35
|
+
load_str << "input_data[['" << name.to_s << "_r']] = " << value.size.to_s << "\n"
|
36
|
+
load_str << "#{name}.tmp = " << "rbbt.impute(rbbt.tsv('#{file}'))" << "\n"
|
37
|
+
load_str << "input_data[['" << name.to_s << "']] = " << "#{name}.tmp" << "\n"
|
38
|
+
when Path
|
39
|
+
value = TSV.open(value)
|
40
|
+
Open.write(file, value.to_s)
|
41
|
+
types[name] = 'matrix'
|
42
|
+
load_str << "input_data[['" << name.to_s << "_c']] = " << value.fields.length.to_s << "\n"
|
43
|
+
load_str << "input_data[['" << name.to_s << "_r']] = " << value.size.to_s << "\n"
|
44
|
+
load_str << "#{name}.tmp = " << "rbbt.impute(rbbt.tsv('#{file}'))" << "\n"
|
45
|
+
load_str << "input_data[['" << name.to_s << "']] = " << "#{name}.tmp" << "\n"
|
46
|
+
else
|
47
|
+
raise "Unknown type of data #{ name }: #{Misc.fingerprint value}"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
[types, load_str]
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.data_header(types)
|
55
|
+
|
56
|
+
types_str = ""
|
57
|
+
types.each do |name,type|
|
58
|
+
name = name.to_s
|
59
|
+
case type
|
60
|
+
when 'real'
|
61
|
+
types_str << " " << "real " << name << ";\n"
|
62
|
+
when 'integer'
|
63
|
+
types_str << " " << "int " << name << ";\n"
|
64
|
+
when 'array'
|
65
|
+
types_str << " " << "int<lower=0> " << name << "_l" << ";\n"
|
66
|
+
#types_str << " " << "real " << name << "[" << name << "_l]" << ";\n"
|
67
|
+
types_str << " " << "vector" << "[" << name << "_l] "<< name << ";\n"
|
68
|
+
when 'iarray'
|
69
|
+
types_str << " " << "int<lower=0> " << name << "_l" << ";\n"
|
70
|
+
#types_str << " " << "real " << name << "[" << name << "_l]" << ";\n"
|
71
|
+
types_str << " " << "int " << name << "[" << name << "_l]" << ";\n"
|
72
|
+
when 'matrix'
|
73
|
+
types_str << " " << "int<lower=0> " << name << "_c" << ";\n"
|
74
|
+
types_str << " " << "int<lower=0> " << name << "_r" << ";\n"
|
75
|
+
#types_str << " " << "real " << name << "[" << name << "_r," << name << "_c]" << ";\n"
|
76
|
+
types_str << " " << "matrix" << "[" << name << "_r," << name << "_c] " << name << ";\n"
|
77
|
+
else
|
78
|
+
raise "Unknown type for #{ name }: #{type}"
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
<<-EOF
|
83
|
+
data{
|
84
|
+
|
85
|
+
#{types_str}
|
86
|
+
}
|
87
|
+
EOF
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.exec(data, model, input_directory, parameter_chains, sample_file, debug = FALSE, stan_options = {})
|
91
|
+
stan_options = Misc.add_defaults stan_options, :iter => 1000, :warmup => 500, :chains => 1, :seed => 2887, :refresh => 1200
|
92
|
+
|
93
|
+
data = {} if data.nil?
|
94
|
+
|
95
|
+
types, load_str = save_data(input_directory, data)
|
96
|
+
data_header = self.data_header(types)
|
97
|
+
stan_model = data_header + "\n" + model
|
98
|
+
|
99
|
+
stan_file = Rbbt.var.stan_models[Misc.digest(stan_model).to_s << ".stan"].find
|
100
|
+
Open.write(stan_file, stan_model)
|
101
|
+
|
102
|
+
Log.debug "STAN model:\n" + stan_model
|
103
|
+
|
104
|
+
script = <<-EOF
|
105
|
+
rbbt.require('rstan')
|
106
|
+
rbbt.require('jsonlite')
|
107
|
+
|
108
|
+
rstan_options(auto_write = TRUE)
|
109
|
+
options(mc.cores = parallel::detectCores())
|
110
|
+
|
111
|
+
#{load_str}
|
112
|
+
|
113
|
+
fit <- stan(file='#{stan_file}', data=input_data, sample_file='#{sample_file}', verbose=#{debug ? 'TRUE' : 'FALSE'}, #{R.hash2Rargs(stan_options)})
|
114
|
+
|
115
|
+
params <- as.data.frame(fit)
|
116
|
+
|
117
|
+
print(fit)
|
118
|
+
#{parameter_chains.nil? ? "" : "rbbt.tsv.write('#{parameter_chains}', params)" }
|
119
|
+
EOF
|
120
|
+
|
121
|
+
R.run script, nil, :monitor => debug
|
122
|
+
end
|
123
|
+
|
124
|
+
def self.stream_chain(data, model, directory = nil, options = {})
|
125
|
+
options, directory = directory, nil if Hash === directory
|
126
|
+
debug = Misc.process_options options, :debug
|
127
|
+
|
128
|
+
if directory.nil?
|
129
|
+
directory = TmpFile.tmp_file
|
130
|
+
erase = true
|
131
|
+
end
|
132
|
+
|
133
|
+
FileUtils.mkdir_p directory unless File.exists? directory
|
134
|
+
input_directory = File.join(directory, 'inputs')
|
135
|
+
parameter_chains = File.join(directory, 'chains') unless erase
|
136
|
+
summary = File.join(directory, 'summary') unless erase
|
137
|
+
sample_file = File.join(directory, 'samples')
|
138
|
+
|
139
|
+
File.mkfifo(sample_file)
|
140
|
+
|
141
|
+
io = Misc.open_pipe do |sin|
|
142
|
+
iteration = 1
|
143
|
+
sin << "#: :type=:list#:cast=:to_f" << "\n"
|
144
|
+
begin
|
145
|
+
reader = File.open(sample_file, 'r')
|
146
|
+
while line = reader.gets
|
147
|
+
if line =~ /^#/
|
148
|
+
new_line = line
|
149
|
+
next
|
150
|
+
elsif line =~ /^lp__/
|
151
|
+
parts = line.split(",")
|
152
|
+
new_line = "#Iteration\t" << parts * "\t"
|
153
|
+
else
|
154
|
+
parts = line.split(",")
|
155
|
+
new_line = iteration.to_s << "\t" << parts * "\t"
|
156
|
+
iteration += 1
|
157
|
+
end
|
158
|
+
sin << new_line
|
159
|
+
end
|
160
|
+
rescue
|
161
|
+
Log.exception $!
|
162
|
+
raise $!
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
exec_thread = Thread.new do
|
167
|
+
res = self.exec(data, model, input_directory, parameter_chains, sample_file, debug, options)
|
168
|
+
Open.write(summary, res.read.to_s) unless summary.nil?
|
169
|
+
Log.debug "Result from STAN:\n" << res.read
|
170
|
+
end
|
171
|
+
|
172
|
+
ConcurrentStream.setup io, :threads => [exec_thread] do
|
173
|
+
Log.debug "Done chains for STAN"
|
174
|
+
if erase
|
175
|
+
FileUtils.rm_rf directory
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def self.run(data, model, directory, options = {})
|
181
|
+
debug = Misc.process_options options, :debug
|
182
|
+
|
183
|
+
input_directory = File.join(directory, 'inputs')
|
184
|
+
|
185
|
+
parameter_chains = File.join(directory, 'chains')
|
186
|
+
summary = File.join(directory, 'summary')
|
187
|
+
sample_file = File.join(directory, 'sample')
|
188
|
+
|
189
|
+
res = self.exec(data, model, input_directory, parameter_chains, sample_file, debug, options)
|
190
|
+
Log.debug "Result from STAN:\n" << res.read
|
191
|
+
Open.write(summary, res.read)
|
192
|
+
|
193
|
+
Open.open(parameter_chains)
|
194
|
+
end
|
195
|
+
|
196
|
+
def self.fit(data, model, options = {})
|
197
|
+
TmpFile.with_file do |directory|
|
198
|
+
FileUtils.mkdir_p directory
|
199
|
+
res = self.run(data, model, directory, options)
|
200
|
+
TSV.open(res, :type => :list, :cast => :to_f)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
|
205
|
+
end
|
@@ -0,0 +1,159 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.rb')
|
2
|
+
require 'rbbt/stan'
|
3
|
+
|
4
|
+
class TestClass < Test::Unit::TestCase
|
5
|
+
def test_save_array
|
6
|
+
|
7
|
+
data = {}
|
8
|
+
data[:y] = [1,2,3,4]
|
9
|
+
|
10
|
+
TmpFile.with_file do |directory|
|
11
|
+
STAN.save_data(directory, data)
|
12
|
+
assert File.exists?(File.join(directory, 'y'))
|
13
|
+
assert_equal '[1,2,3,4]', Open.read(File.join(directory, 'y'))
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_data_header
|
19
|
+
types = {:y => 'array'}
|
20
|
+
|
21
|
+
header = STAN.data_header(types)
|
22
|
+
assert header.include? 'vector[y_l] y;'
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_simple_fit
|
26
|
+
Log.severity = 0
|
27
|
+
res = STAN.fit({}, <<-EOF)
|
28
|
+
parameters{
|
29
|
+
real y;
|
30
|
+
}
|
31
|
+
|
32
|
+
model {
|
33
|
+
target += -0.5 * y * y;
|
34
|
+
}
|
35
|
+
EOF
|
36
|
+
ppp res
|
37
|
+
Log.tsv res
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_fit_vector
|
41
|
+
Log.severity = 0
|
42
|
+
data = {}
|
43
|
+
real_mean = 4
|
44
|
+
data[:y] = R.eval_a "rnorm(100, #{real_mean}, 1)"
|
45
|
+
|
46
|
+
res = STAN.fit(data, <<-EOF, :iter => 10_000, :chains => 2)
|
47
|
+
parameters{
|
48
|
+
real mu;
|
49
|
+
}
|
50
|
+
model{
|
51
|
+
mu ~ normal(0,10);
|
52
|
+
y ~ normal(mu, 1);
|
53
|
+
}
|
54
|
+
EOF
|
55
|
+
|
56
|
+
x = []
|
57
|
+
m = Misc.mean(res.column("mu").values)
|
58
|
+
|
59
|
+
assert (m - real_mean).abs < 0.5
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_fit_matrix
|
63
|
+
Log.severity = 0
|
64
|
+
|
65
|
+
samples = 1000
|
66
|
+
real_mean = 5
|
67
|
+
y = TmpFile.with_file do |tsv|
|
68
|
+
R.run <<-EOF
|
69
|
+
s = c(1,5,10)
|
70
|
+
|
71
|
+
samples = #{samples}
|
72
|
+
m = list()
|
73
|
+
for (i in seq(1,samples)){
|
74
|
+
sample.name = paste("S",i, sep="")
|
75
|
+
sample.values = rnorm(3,#{real_mean},1) * s
|
76
|
+
m[[sample.name]] = sample.values
|
77
|
+
}
|
78
|
+
|
79
|
+
data = as.data.frame(m)
|
80
|
+
rbbt.tsv.write('#{tsv}', data)
|
81
|
+
EOF
|
82
|
+
|
83
|
+
TSV.open(tsv, :type => :list, :cast => :to_f)
|
84
|
+
end
|
85
|
+
|
86
|
+
y = y.transpose("Sample")
|
87
|
+
|
88
|
+
res = STAN.fit({:y => y}, <<-EOF, :iter => 100, :warmup => 20, :chains => 1)
|
89
|
+
parameters{
|
90
|
+
vector<lower=0>[y_c] s;
|
91
|
+
real<lower=0> w_m;
|
92
|
+
}
|
93
|
+
|
94
|
+
model{
|
95
|
+
s ~ cauchy(0,100);
|
96
|
+
w_m ~ uniform(0,10);
|
97
|
+
|
98
|
+
for (j in 1:y_c){
|
99
|
+
y[,j] ~ normal(w_m*s[j], 1/s[j]);
|
100
|
+
}
|
101
|
+
}
|
102
|
+
EOF
|
103
|
+
|
104
|
+
m = Misc.mean(res.column("w_m").values)
|
105
|
+
assert (m - real_mean).abs < (real_mean.to_f / 10)
|
106
|
+
end
|
107
|
+
|
108
|
+
def test_stream
|
109
|
+
Log.severity = 0
|
110
|
+
|
111
|
+
samples = 1000
|
112
|
+
real_mean = 5
|
113
|
+
y = TmpFile.with_file do |tsv|
|
114
|
+
R.run <<-EOF
|
115
|
+
s = c(1,5,10)
|
116
|
+
|
117
|
+
samples = #{samples}
|
118
|
+
m = list()
|
119
|
+
for (i in seq(1,samples)){
|
120
|
+
sample.name = paste("S",i, sep="")
|
121
|
+
sample.values = rnorm(3,#{real_mean},1) * s
|
122
|
+
m[[sample.name]] = sample.values
|
123
|
+
}
|
124
|
+
|
125
|
+
data = as.data.frame(m)
|
126
|
+
rbbt.tsv.write('#{tsv}', data)
|
127
|
+
EOF
|
128
|
+
|
129
|
+
TSV.open(tsv, :type => :list, :cast => :to_f)
|
130
|
+
end
|
131
|
+
|
132
|
+
y = y.transpose("Sample")
|
133
|
+
|
134
|
+
io = STAN.stream_chain({:y => y}, <<-EOF, :iter => 100, :warmup => 20)
|
135
|
+
parameters{
|
136
|
+
vector<lower=0>[y_c] s;
|
137
|
+
real<lower=0> w_m;
|
138
|
+
}
|
139
|
+
|
140
|
+
model{
|
141
|
+
s ~ cauchy(0,100);
|
142
|
+
w_m ~ uniform(0,10);
|
143
|
+
|
144
|
+
for (j in 1:y_c){
|
145
|
+
y[,j] ~ normal(w_m*s[j], 1/s[j]);
|
146
|
+
}
|
147
|
+
}
|
148
|
+
EOF
|
149
|
+
|
150
|
+
lines = 0
|
151
|
+
while line = io.gets
|
152
|
+
lines += 1
|
153
|
+
end
|
154
|
+
io.close
|
155
|
+
io.join if io.respond_to? :join
|
156
|
+
assert_equal 102, lines
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.40
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -98,6 +98,7 @@ files:
|
|
98
98
|
- lib/rbbt/network/paths.rb
|
99
99
|
- lib/rbbt/plots/bar.rb
|
100
100
|
- lib/rbbt/plots/heatmap.rb
|
101
|
+
- lib/rbbt/stan.rb
|
101
102
|
- lib/rbbt/statistics/fdr.rb
|
102
103
|
- lib/rbbt/statistics/fisher.rb
|
103
104
|
- lib/rbbt/statistics/hypergeometric.rb
|
@@ -114,6 +115,7 @@ files:
|
|
114
115
|
- test/rbbt/statistics/test_fisher.rb
|
115
116
|
- test/rbbt/statistics/test_hypergeometric.rb
|
116
117
|
- test/rbbt/statistics/test_random_walk.rb
|
118
|
+
- test/rbbt/test_stan.rb
|
117
119
|
- test/rbbt/vector/model/test_svm.rb
|
118
120
|
- test/rbbt/vector/test_model.rb
|
119
121
|
- test/test_helper.rb
|
@@ -149,4 +151,5 @@ test_files:
|
|
149
151
|
- test/rbbt/statistics/test_hypergeometric.rb
|
150
152
|
- test/rbbt/vector/test_model.rb
|
151
153
|
- test/rbbt/vector/model/test_svm.rb
|
154
|
+
- test/rbbt/test_stan.rb
|
152
155
|
- test/test_helper.rb
|