rbbt-dm 1.1.39 → 1.1.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/stan.rb +205 -0
  3. data/test/rbbt/test_stan.rb +159 -0
  4. metadata +5 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5a6a86141d16b2da2c4024bebcff9447e78add40
4
- data.tar.gz: 062c8f641dfc0672d5d883958df950af7af49889
3
+ metadata.gz: f76827643000582061a88e4ea73e2bd75c77988c
4
+ data.tar.gz: bb6261e9ce5a53948b5429e7fcead2da551070d0
5
5
  SHA512:
6
- metadata.gz: fa3016424aca9f00970abda11b08cbec48faf4ddb61031d9e646527fa69baff50f60de733c29a99189889bc084de2d0c831e857b9507d240be91e16fc9a25f76
7
- data.tar.gz: 7fb134ffd67f043111c1bf6ec44d042fde6e53321b716ed3f2a1d0075b9b1b8e0d0ba0cbb3f1eb9d17f25ac849843e03c8c3aefb57ee377e12ecd766f79af636
6
+ metadata.gz: a1e4ccabc8088e6564cf1e2d2dc8e4cafc539d499fe1bdef6e4ce0ae7e41954457589fa79fc97371960d957b598394d41d4c36e515c6847427a69ca893955288
7
+ data.tar.gz: 6f2625689288ce82bcb7aac8f1ceca2f68300b5edb9ed63cb6a7ea4dbed1bc5c3442d38f0624d4e51224502e6a7e933e92d6eb3ae5851bcc00b7db8669e9bb52
@@ -0,0 +1,205 @@
1
+ require 'rbbt/util/R'
2
+ require 'mkfifo'
3
+
4
+ module STAN
5
+
6
+ def self.save_data(directory, data = {})
7
+ directory = directory.find if Path === directory
8
+ types = {}
9
+ load_str = "input_data = list()\n"
10
+
11
+ data.each do |name, value|
12
+ file = File.join(directory, name.to_s)
13
+ case value
14
+ when Integer
15
+ Open.write(file, value.to_json)
16
+ types[name] = 'integer'
17
+ load_str << "input_data[['" << name.to_s << "']] = " << "fromJSON(txt='#{file}')" << "\n"
18
+ when Float
19
+ Open.write(file, value.to_json)
20
+ types[name] = 'real'
21
+ load_str << "input_data[['" << name.to_s << "']] = " << "fromJSON(txt='#{file}')" << "\n"
22
+ when Array
23
+ Open.write(file, value.to_json)
24
+ if value.select{|v| Float === v}.empty?
25
+ types[name] = 'iarray'
26
+ else
27
+ types[name] = 'array'
28
+ end
29
+ load_str << "input_data[['" << name.to_s << "_l']] = " << value.length.to_s << "\n"
30
+ load_str << "input_data[['" << name.to_s << "']] = " << "fromJSON(txt='#{file}')" << "\n"
31
+ when TSV
32
+ Open.write(file, value.to_s)
33
+ types[name] = 'matrix'
34
+ load_str << "input_data[['" << name.to_s << "_c']] = " << value.fields.length.to_s << "\n"
35
+ load_str << "input_data[['" << name.to_s << "_r']] = " << value.size.to_s << "\n"
36
+ load_str << "#{name}.tmp = " << "rbbt.impute(rbbt.tsv('#{file}'))" << "\n"
37
+ load_str << "input_data[['" << name.to_s << "']] = " << "#{name}.tmp" << "\n"
38
+ when Path
39
+ value = TSV.open(value)
40
+ Open.write(file, value.to_s)
41
+ types[name] = 'matrix'
42
+ load_str << "input_data[['" << name.to_s << "_c']] = " << value.fields.length.to_s << "\n"
43
+ load_str << "input_data[['" << name.to_s << "_r']] = " << value.size.to_s << "\n"
44
+ load_str << "#{name}.tmp = " << "rbbt.impute(rbbt.tsv('#{file}'))" << "\n"
45
+ load_str << "input_data[['" << name.to_s << "']] = " << "#{name}.tmp" << "\n"
46
+ else
47
+ raise "Unknown type of data #{ name }: #{Misc.fingerprint value}"
48
+ end
49
+ end
50
+
51
+ [types, load_str]
52
+ end
53
+
54
+ def self.data_header(types)
55
+
56
+ types_str = ""
57
+ types.each do |name,type|
58
+ name = name.to_s
59
+ case type
60
+ when 'real'
61
+ types_str << " " << "real " << name << ";\n"
62
+ when 'integer'
63
+ types_str << " " << "int " << name << ";\n"
64
+ when 'array'
65
+ types_str << " " << "int<lower=0> " << name << "_l" << ";\n"
66
+ #types_str << " " << "real " << name << "[" << name << "_l]" << ";\n"
67
+ types_str << " " << "vector" << "[" << name << "_l] "<< name << ";\n"
68
+ when 'iarray'
69
+ types_str << " " << "int<lower=0> " << name << "_l" << ";\n"
70
+ #types_str << " " << "real " << name << "[" << name << "_l]" << ";\n"
71
+ types_str << " " << "int " << name << "[" << name << "_l]" << ";\n"
72
+ when 'matrix'
73
+ types_str << " " << "int<lower=0> " << name << "_c" << ";\n"
74
+ types_str << " " << "int<lower=0> " << name << "_r" << ";\n"
75
+ #types_str << " " << "real " << name << "[" << name << "_r," << name << "_c]" << ";\n"
76
+ types_str << " " << "matrix" << "[" << name << "_r," << name << "_c] " << name << ";\n"
77
+ else
78
+ raise "Unknown type for #{ name }: #{type}"
79
+ end
80
+
81
+ end
82
+ <<-EOF
83
+ data{
84
+
85
+ #{types_str}
86
+ }
87
+ EOF
88
+ end
89
+
90
+ def self.exec(data, model, input_directory, parameter_chains, sample_file, debug = FALSE, stan_options = {})
91
+ stan_options = Misc.add_defaults stan_options, :iter => 1000, :warmup => 500, :chains => 1, :seed => 2887, :refresh => 1200
92
+
93
+ data = {} if data.nil?
94
+
95
+ types, load_str = save_data(input_directory, data)
96
+ data_header = self.data_header(types)
97
+ stan_model = data_header + "\n" + model
98
+
99
+ stan_file = Rbbt.var.stan_models[Misc.digest(stan_model).to_s << ".stan"].find
100
+ Open.write(stan_file, stan_model)
101
+
102
+ Log.debug "STAN model:\n" + stan_model
103
+
104
+ script = <<-EOF
105
+ rbbt.require('rstan')
106
+ rbbt.require('jsonlite')
107
+
108
+ rstan_options(auto_write = TRUE)
109
+ options(mc.cores = parallel::detectCores())
110
+
111
+ #{load_str}
112
+
113
+ fit <- stan(file='#{stan_file}', data=input_data, sample_file='#{sample_file}', verbose=#{debug ? 'TRUE' : 'FALSE'}, #{R.hash2Rargs(stan_options)})
114
+
115
+ params <- as.data.frame(fit)
116
+
117
+ print(fit)
118
+ #{parameter_chains.nil? ? "" : "rbbt.tsv.write('#{parameter_chains}', params)" }
119
+ EOF
120
+
121
+ R.run script, nil, :monitor => debug
122
+ end
123
+
124
+ def self.stream_chain(data, model, directory = nil, options = {})
125
+ options, directory = directory, nil if Hash === directory
126
+ debug = Misc.process_options options, :debug
127
+
128
+ if directory.nil?
129
+ directory = TmpFile.tmp_file
130
+ erase = true
131
+ end
132
+
133
+ FileUtils.mkdir_p directory unless File.exists? directory
134
+ input_directory = File.join(directory, 'inputs')
135
+ parameter_chains = File.join(directory, 'chains') unless erase
136
+ summary = File.join(directory, 'summary') unless erase
137
+ sample_file = File.join(directory, 'samples')
138
+
139
+ File.mkfifo(sample_file)
140
+
141
+ io = Misc.open_pipe do |sin|
142
+ iteration = 1
143
+ sin << "#: :type=:list#:cast=:to_f" << "\n"
144
+ begin
145
+ reader = File.open(sample_file, 'r')
146
+ while line = reader.gets
147
+ if line =~ /^#/
148
+ new_line = line
149
+ next
150
+ elsif line =~ /^lp__/
151
+ parts = line.split(",")
152
+ new_line = "#Iteration\t" << parts * "\t"
153
+ else
154
+ parts = line.split(",")
155
+ new_line = iteration.to_s << "\t" << parts * "\t"
156
+ iteration += 1
157
+ end
158
+ sin << new_line
159
+ end
160
+ rescue
161
+ Log.exception $!
162
+ raise $!
163
+ end
164
+ end
165
+
166
+ exec_thread = Thread.new do
167
+ res = self.exec(data, model, input_directory, parameter_chains, sample_file, debug, options)
168
+ Open.write(summary, res.read.to_s) unless summary.nil?
169
+ Log.debug "Result from STAN:\n" << res.read
170
+ end
171
+
172
+ ConcurrentStream.setup io, :threads => [exec_thread] do
173
+ Log.debug "Done chains for STAN"
174
+ if erase
175
+ FileUtils.rm_rf directory
176
+ end
177
+ end
178
+ end
179
+
180
+ def self.run(data, model, directory, options = {})
181
+ debug = Misc.process_options options, :debug
182
+
183
+ input_directory = File.join(directory, 'inputs')
184
+
185
+ parameter_chains = File.join(directory, 'chains')
186
+ summary = File.join(directory, 'summary')
187
+ sample_file = File.join(directory, 'sample')
188
+
189
+ res = self.exec(data, model, input_directory, parameter_chains, sample_file, debug, options)
190
+ Log.debug "Result from STAN:\n" << res.read
191
+ Open.write(summary, res.read)
192
+
193
+ Open.open(parameter_chains)
194
+ end
195
+
196
+ def self.fit(data, model, options = {})
197
+ TmpFile.with_file do |directory|
198
+ FileUtils.mkdir_p directory
199
+ res = self.run(data, model, directory, options)
200
+ TSV.open(res, :type => :list, :cast => :to_f)
201
+ end
202
+ end
203
+
204
+
205
+ end
@@ -0,0 +1,159 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.rb')
2
+ require 'rbbt/stan'
3
+
4
+ class TestClass < Test::Unit::TestCase
5
+ def test_save_array
6
+
7
+ data = {}
8
+ data[:y] = [1,2,3,4]
9
+
10
+ TmpFile.with_file do |directory|
11
+ STAN.save_data(directory, data)
12
+ assert File.exists?(File.join(directory, 'y'))
13
+ assert_equal '[1,2,3,4]', Open.read(File.join(directory, 'y'))
14
+ end
15
+
16
+ end
17
+
18
+ def test_data_header
19
+ types = {:y => 'array'}
20
+
21
+ header = STAN.data_header(types)
22
+ assert header.include? 'vector[y_l] y;'
23
+ end
24
+
25
+ def test_simple_fit
26
+ Log.severity = 0
27
+ res = STAN.fit({}, <<-EOF)
28
+ parameters{
29
+ real y;
30
+ }
31
+
32
+ model {
33
+ target += -0.5 * y * y;
34
+ }
35
+ EOF
36
+ ppp res
37
+ Log.tsv res
38
+ end
39
+
40
+ def test_fit_vector
41
+ Log.severity = 0
42
+ data = {}
43
+ real_mean = 4
44
+ data[:y] = R.eval_a "rnorm(100, #{real_mean}, 1)"
45
+
46
+ res = STAN.fit(data, <<-EOF, :iter => 10_000, :chains => 2)
47
+ parameters{
48
+ real mu;
49
+ }
50
+ model{
51
+ mu ~ normal(0,10);
52
+ y ~ normal(mu, 1);
53
+ }
54
+ EOF
55
+
56
+ x = []
57
+ m = Misc.mean(res.column("mu").values)
58
+
59
+ assert (m - real_mean).abs < 0.5
60
+ end
61
+
62
+ def test_fit_matrix
63
+ Log.severity = 0
64
+
65
+ samples = 1000
66
+ real_mean = 5
67
+ y = TmpFile.with_file do |tsv|
68
+ R.run <<-EOF
69
+ s = c(1,5,10)
70
+
71
+ samples = #{samples}
72
+ m = list()
73
+ for (i in seq(1,samples)){
74
+ sample.name = paste("S",i, sep="")
75
+ sample.values = rnorm(3,#{real_mean},1) * s
76
+ m[[sample.name]] = sample.values
77
+ }
78
+
79
+ data = as.data.frame(m)
80
+ rbbt.tsv.write('#{tsv}', data)
81
+ EOF
82
+
83
+ TSV.open(tsv, :type => :list, :cast => :to_f)
84
+ end
85
+
86
+ y = y.transpose("Sample")
87
+
88
+ res = STAN.fit({:y => y}, <<-EOF, :iter => 100, :warmup => 20, :chains => 1)
89
+ parameters{
90
+ vector<lower=0>[y_c] s;
91
+ real<lower=0> w_m;
92
+ }
93
+
94
+ model{
95
+ s ~ cauchy(0,100);
96
+ w_m ~ uniform(0,10);
97
+
98
+ for (j in 1:y_c){
99
+ y[,j] ~ normal(w_m*s[j], 1/s[j]);
100
+ }
101
+ }
102
+ EOF
103
+
104
+ m = Misc.mean(res.column("w_m").values)
105
+ assert (m - real_mean).abs < (real_mean.to_f / 10)
106
+ end
107
+
108
+ def test_stream
109
+ Log.severity = 0
110
+
111
+ samples = 1000
112
+ real_mean = 5
113
+ y = TmpFile.with_file do |tsv|
114
+ R.run <<-EOF
115
+ s = c(1,5,10)
116
+
117
+ samples = #{samples}
118
+ m = list()
119
+ for (i in seq(1,samples)){
120
+ sample.name = paste("S",i, sep="")
121
+ sample.values = rnorm(3,#{real_mean},1) * s
122
+ m[[sample.name]] = sample.values
123
+ }
124
+
125
+ data = as.data.frame(m)
126
+ rbbt.tsv.write('#{tsv}', data)
127
+ EOF
128
+
129
+ TSV.open(tsv, :type => :list, :cast => :to_f)
130
+ end
131
+
132
+ y = y.transpose("Sample")
133
+
134
+ io = STAN.stream_chain({:y => y}, <<-EOF, :iter => 100, :warmup => 20)
135
+ parameters{
136
+ vector<lower=0>[y_c] s;
137
+ real<lower=0> w_m;
138
+ }
139
+
140
+ model{
141
+ s ~ cauchy(0,100);
142
+ w_m ~ uniform(0,10);
143
+
144
+ for (j in 1:y_c){
145
+ y[,j] ~ normal(w_m*s[j], 1/s[j]);
146
+ }
147
+ }
148
+ EOF
149
+
150
+ lines = 0
151
+ while line = io.gets
152
+ lines += 1
153
+ end
154
+ io.close
155
+ io.join if io.respond_to? :join
156
+ assert_equal 102, lines
157
+ end
158
+ end
159
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.39
4
+ version: 1.1.40
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-12-12 00:00:00.000000000 Z
11
+ date: 2018-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -98,6 +98,7 @@ files:
98
98
  - lib/rbbt/network/paths.rb
99
99
  - lib/rbbt/plots/bar.rb
100
100
  - lib/rbbt/plots/heatmap.rb
101
+ - lib/rbbt/stan.rb
101
102
  - lib/rbbt/statistics/fdr.rb
102
103
  - lib/rbbt/statistics/fisher.rb
103
104
  - lib/rbbt/statistics/hypergeometric.rb
@@ -114,6 +115,7 @@ files:
114
115
  - test/rbbt/statistics/test_fisher.rb
115
116
  - test/rbbt/statistics/test_hypergeometric.rb
116
117
  - test/rbbt/statistics/test_random_walk.rb
118
+ - test/rbbt/test_stan.rb
117
119
  - test/rbbt/vector/model/test_svm.rb
118
120
  - test/rbbt/vector/test_model.rb
119
121
  - test/test_helper.rb
@@ -149,4 +151,5 @@ test_files:
149
151
  - test/rbbt/statistics/test_hypergeometric.rb
150
152
  - test/rbbt/vector/test_model.rb
151
153
  - test/rbbt/vector/model/test_svm.rb
154
+ - test/rbbt/test_stan.rb
152
155
  - test/test_helper.rb