cmdstan 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f52650c597cfa86c4e1bd0f095a2b321e906285df981f9f345bde5054fb8cc80
4
+ data.tar.gz: 3c98b6ff5c800ad9ecd28e325ed6f97d576306567a4ac8530bb117f948c6c321
5
+ SHA512:
6
+ metadata.gz: ddab8b6b73175afcbee65f5653deee681b746fe2083015ec06b65866bc467ca2bb7ec6cc475a65bc83976ff4f13e8911ac08d999703c52e96d529f8b68408554
7
+ data.tar.gz: c8316343f62159e1963f7835bb231114275b15ab7fd982f3ae58f50c7168aad08e6448c6c2fd6110910f061595f586f3882a7a38a9e377c332b8b5a381bc4acb
@@ -0,0 +1,3 @@
1
+ ## 0.1.0 (2020-04-08)
2
+
3
+ - First release
@@ -0,0 +1,29 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2020, Andrew Kane
4
+ All rights reserved.
5
+
6
+ Redistribution and use in source and binary forms, with or without
7
+ modification, are permitted provided that the following conditions are met:
8
+
9
+ 1. Redistributions of source code must retain the above copyright notice, this
10
+ list of conditions and the following disclaimer.
11
+
12
+ 2. Redistributions in binary form must reproduce the above copyright notice,
13
+ this list of conditions and the following disclaimer in the documentation
14
+ and/or other materials provided with the distribution.
15
+
16
+ 3. Neither the name of the copyright holder nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,84 @@
1
+ # CmdStan.rb
2
+
3
+ Bayesian inference for Ruby, powered by [CmdStan](https://github.com/stan-dev/cmdstan)
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application’s Gemfile:
8
+
9
+ ```ruby
10
+ gem 'cmdstan'
11
+ ```
12
+
13
+ Installation can take a few minutes as CmdStan downloads and builds.
14
+
15
+ ## Getting Started
16
+
17
+ Create a Stan file, like `bernoulli.stan`
18
+
19
+ ```stan
20
+ data {
21
+ int<lower=0> N;
22
+ int<lower=0,upper=1> y[N];
23
+ }
24
+ parameters {
25
+ real<lower=0,upper=1> theta;
26
+ }
27
+ model {
28
+ theta ~ beta(1,1);
29
+ y ~ bernoulli(theta);
30
+ }
31
+ ```
32
+
33
+ Compile the model
34
+
35
+ ```ruby
36
+ model = CmdStan::Model.new(stan_file: "bernoulli.stan")
37
+ ```
38
+
39
+ Fit the model
40
+
41
+ ```ruby
42
+ data = {"N" => 10, "y" => [0, 1, 0, 0, 0, 0, 0, 0, 0, 1]}
43
+ fit = model.sample(data: data, chains: 5)
44
+ ```
45
+
46
+ Summarize the results
47
+
48
+ ```ruby
49
+ fit.summary
50
+ ```
51
+
52
+ ## Maximum Likelihood Estimation
53
+
54
+ ```ruby
55
+ mle = model.optimize(data: data)
56
+ mle.optimized_params
57
+ ```
58
+
59
+ ## Credits
60
+
61
+ This library is modeled after the [CmdStanPy API](https://github.com/stan-dev/cmdstanpy).
62
+
63
+ ## History
64
+
65
+ View the [changelog](https://github.com/ankane/cmdstan/blob/master/CHANGELOG.md)
66
+
67
+ ## Contributing
68
+
69
+ Everyone is encouraged to help improve this project. Here are a few ways you can help:
70
+
71
+ - [Report bugs](https://github.com/ankane/cmdstan/issues)
72
+ - Fix bugs and [submit pull requests](https://github.com/ankane/cmdstan/pulls)
73
+ - Write, clarify, or fix documentation
74
+ - Suggest or add new features
75
+
76
+ To get started with development:
77
+
78
+ ```sh
79
+ git clone https://github.com/ankane/cmdstan.git
80
+ cd cmdstan
81
+ bundle install
82
+ ruby ext/cmdstan/extconf.rb
83
+ bundle exec rake test
84
+ ```
@@ -0,0 +1,5 @@
1
+ install:
2
+ @echo "Nothing to do"
3
+
4
+ clean:
5
+ @echo "Nothing to do"
@@ -0,0 +1,60 @@
1
+ require "digest"
2
+ require "fileutils"
3
+ require "net/http"
4
+ require "tmpdir"
5
+
6
+ version = "2.22.1"
7
+ checksum = "d12e46bda4bd3db9e8abe0554712b56e41f8e7843900338446d9a3b1acc2d0ce"
8
+ url = "https://github.com/stan-dev/cmdstan/releases/download/v#{version}/cmdstan-#{version}.tar.gz"
9
+
10
+ def download_file(url, download_path, checksum)
11
+ uri = URI(url)
12
+ location = nil
13
+
14
+ Net::HTTP.start(uri.host, uri.port, use_ssl: true) do |http|
15
+ request = Net::HTTP::Get.new(uri)
16
+ http.request(request) do |response|
17
+ case response
18
+ when Net::HTTPRedirection
19
+ location = response["location"]
20
+ when Net::HTTPSuccess
21
+ digest = Digest::SHA2.new
22
+
23
+ i = 0
24
+ File.open(download_path, "wb") do |f|
25
+ response.read_body do |chunk|
26
+ f.write(chunk)
27
+ digest.update(chunk)
28
+
29
+ # print progress
30
+ putc "." if i % 50 == 0
31
+ i += 1
32
+ end
33
+ end
34
+ puts # newline
35
+
36
+ abort "Bad checksum" if digest.hexdigest != checksum
37
+ else
38
+ abort "Bad response"
39
+ end
40
+ end
41
+ end
42
+
43
+ # outside of Net::HTTP block to close previous connection
44
+ download_file(location, download_path, checksum) if location
45
+ end
46
+
47
+ # download
48
+ puts "Downloading #{url}..."
49
+ download_path = "#{Dir.tmpdir}/cmdstan-#{version}.tar.gz"
50
+ download_file(url, download_path, checksum)
51
+
52
+ # extract
53
+ path = ENV["CMDSTAN"] || File.expand_path("../../tmp/cmdstan", __dir__)
54
+ FileUtils.mkdir_p(path)
55
+ Dir.chdir(path)
56
+ # TODO use Gem::Package::TarReader from Rubygems
57
+ system "tar", "zxvf", download_path, "-C", path, "--strip-components=1"
58
+
59
+ # build
60
+ system "make", "build", "-j"
@@ -0,0 +1,20 @@
1
+ # stdlib
2
+ require "csv"
3
+ require "json"
4
+ require "open3"
5
+
6
+ # modules
7
+ require "cmdstan/utils"
8
+ require "cmdstan/mcmc"
9
+ require "cmdstan/mle"
10
+ require "cmdstan/model"
11
+ require "cmdstan/version"
12
+
13
+ module CmdStan
14
+ class Error < StandardError; end
15
+
16
+ class << self
17
+ attr_accessor :path
18
+ end
19
+ self.path = ENV["CMDSTAN"] || File.expand_path("../tmp/cmdstan", __dir__)
20
+ end
@@ -0,0 +1,60 @@
1
+ module CmdStan
2
+ class MCMC
3
+ include Utils
4
+
5
+ attr_reader :column_names, :draws
6
+
7
+ # private
8
+ # TODO use runset for args
9
+ def initialize(output_files)
10
+ @output_files = output_files
11
+ validate_csv_files
12
+ end
13
+
14
+ def sample
15
+ sample = []
16
+ @output_files.each_with_index do |output_file, chain|
17
+ i = 0
18
+ CSV.foreach(output_file.path, skip_lines: /^#/, headers: true) do |row|
19
+ (sample[i] ||= [])[chain] = row.to_h.values.map(&:to_f)
20
+ i += 1
21
+ end
22
+ raise "Bug detected" if i != draws
23
+ end
24
+ sample
25
+ end
26
+
27
+ def summary
28
+ csv_file = Tempfile.new
29
+ run_command "#{CmdStan.path}/bin/stansummary", "--csv_file=#{csv_file.path}", *@output_files.map(&:path)
30
+
31
+ result = {}
32
+ CSV.foreach(csv_file.path, headers: true, converters: :numeric) do |row|
33
+ value = row.to_h
34
+ name = value.delete("name")
35
+ result[name] = value if name == "lp__" || !name.end_with?("__")
36
+ end
37
+ result
38
+ end
39
+
40
+ private
41
+
42
+ def validate_csv_files
43
+ # TODO ensure consistent files
44
+ output_file = @output_files.first
45
+
46
+ File.foreach(output_file.path) do |line|
47
+ matches = /num_samples = (\d+)/.match(line)
48
+ if matches
49
+ @draws = matches[1].to_i
50
+ break
51
+ end
52
+ end
53
+
54
+ CSV.foreach(output_file.path, skip_lines: /^#/, headers: true) do |row|
55
+ @column_names = row.to_h.keys
56
+ break
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,21 @@
1
+ module CmdStan
2
+ class MLE
3
+ include Utils
4
+
5
+ # private
6
+ # TODO use runset for args
7
+ def initialize(output_file)
8
+ @output_file = output_file
9
+ end
10
+
11
+ def optimized_params
12
+ CSV.foreach(@output_file.path, skip_lines: /^#/, headers: true, converters: :numeric) do |row|
13
+ return row.to_h
14
+ end
15
+ end
16
+
17
+ def column_names
18
+ optimized_params.keys
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,107 @@
1
+ module CmdStan
2
+ class Model
3
+ include Utils
4
+
5
+ attr_reader :exe_file, :name, :stan_file
6
+
7
+ def initialize(stan_file: nil, exe_file: nil, compile: true)
8
+ # convert to absolute path
9
+ stan_file = File.expand_path(stan_file) if stan_file
10
+
11
+ @stan_file = stan_file
12
+ @exe_file = exe_file || stan_file.sub(/.stan\z/, "")
13
+ @name = File.basename(@exe_file)
14
+
15
+ if compile && !exe_file
16
+ self.compile
17
+ end
18
+ end
19
+
20
+ def compile
21
+ Dir.chdir(CmdStan.path) do
22
+ run_command "make", @exe_file
23
+ end
24
+ end
25
+
26
+ def code
27
+ File.read(stan_file)
28
+ end
29
+
30
+ def sample(data:, chains: nil, seed: nil, inits: nil, warmup_iters: nil, sampling_iters: nil)
31
+ data_file = Tempfile.new(["cmdstan", ".json"])
32
+ data_file.write(data.to_json)
33
+ data_file.close
34
+
35
+ chain ||= 4
36
+
37
+ output_files = []
38
+ chains.times do |chain|
39
+ output_file = Tempfile.new(["cmdstan", ".csv"])
40
+
41
+ args = [@exe_file, "id=#{chain + 1}"]
42
+
43
+ # random
44
+ args += ["random", "seed=#{seed.to_i}"] if seed
45
+
46
+ # data
47
+ args += ["data", "file=#{data_file.path}"]
48
+ if inits
49
+ init_file = Tempfile.new(["cmdstan", ".json"])
50
+ init_file.write(inits.to_json)
51
+ init_file.close
52
+ args << "init=#{init_file.path}"
53
+ end
54
+
55
+ # output
56
+ args += ["output", "file=#{output_file.path}"]
57
+
58
+ # method
59
+ args += ["method=sample"]
60
+ args << "num_warmup=#{warmup_iters.to_i}" if warmup_iters
61
+ args << "num_samples=#{sampling_iters.to_i}" if sampling_iters
62
+ args += ["algorithm=hmc", "adapt", "engaged=1"]
63
+
64
+ run_command *args
65
+
66
+ output_files << output_file
67
+ end
68
+
69
+ MCMC.new(output_files)
70
+ end
71
+
72
+ def optimize(data:, seed: nil, inits: nil, algorithm: nil, iter: nil)
73
+ data_file = Tempfile.new(["cmdstan", ".json"])
74
+ data_file.write(data.to_json)
75
+ data_file.close
76
+
77
+ output_file = Tempfile.new(["cmdstan", ".csv"])
78
+ diagnostic_file = Tempfile.new(["cmdstan", ".csv"])
79
+
80
+ args = [@exe_file]
81
+
82
+ # random
83
+ args += ["random", "seed=#{seed.to_i}"] if seed
84
+
85
+ # data
86
+ args += ["data", "file=#{data_file.path}"]
87
+ if inits
88
+ init_file = Tempfile.new(["cmdstan", ".json"])
89
+ init_file.write(inits.to_json)
90
+ init_file.close
91
+ args << "init=#{init_file.path}"
92
+ end
93
+
94
+ # output
95
+ args += ["output", "file=#{output_file.path}", "diagnostic_file=#{diagnostic_file.path}"]
96
+
97
+ # method
98
+ args << "method=optimize"
99
+ args << "algorithm=#{algorithm.to_s.downcase}" if algorithm
100
+ args << "iter=#{iter.to_i}" if iter
101
+
102
+ run_command *args
103
+
104
+ MLE.new(output_file)
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,16 @@
1
+ module CmdStan
2
+ module Utils
3
+ private
4
+
5
+ def run_command(*args)
6
+ # use popen3 since it does escaping (like system)
7
+ Open3.popen3(*args) do |i, o, e, t|
8
+ if t.value.exitstatus != 0
9
+ $stderr.puts o.read
10
+ $stderr.puts e.read
11
+ raise Error, "Command failed"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,3 @@
1
+ module CmdStan
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cmdstan
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Kane
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-04-09 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '5'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '5'
55
+ description:
56
+ email: andrew@chartkick.com
57
+ executables: []
58
+ extensions:
59
+ - ext/cmdstan/extconf.rb
60
+ extra_rdoc_files: []
61
+ files:
62
+ - CHANGELOG.md
63
+ - LICENSE.txt
64
+ - README.md
65
+ - ext/cmdstan/Makefile
66
+ - ext/cmdstan/extconf.rb
67
+ - lib/cmdstan.rb
68
+ - lib/cmdstan/mcmc.rb
69
+ - lib/cmdstan/mle.rb
70
+ - lib/cmdstan/model.rb
71
+ - lib/cmdstan/utils.rb
72
+ - lib/cmdstan/version.rb
73
+ homepage: https://github.com/ankane/cmdstan
74
+ licenses:
75
+ - BSD-3-Clause
76
+ metadata: {}
77
+ post_install_message:
78
+ rdoc_options: []
79
+ require_paths:
80
+ - lib
81
+ required_ruby_version: !ruby/object:Gem::Requirement
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ version: '2.4'
86
+ required_rubygems_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ requirements: []
92
+ rubygems_version: 3.1.2
93
+ signing_key:
94
+ specification_version: 4
95
+ summary: Bayesian inference for Ruby, powered by CmdStan
96
+ test_files: []