bio-conduit 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5101f8f97fb43d2832036876835d7ecb7c6a690b
4
+ data.tar.gz: 94d96c406a17d7c5db2fe803c4b160b719b2e00b
5
+ SHA512:
6
+ metadata.gz: 3257b904ef5687b8e15267b4f610b0ce7bc9603c2913df6050a995e4d5a300245c45ba1da80e45c1d36a47a58fb74f2363388a309176af2a3371f137f0a970b4
7
+ data.tar.gz: fdecf9dbe891ba147d5afa2fc25d418e10fdf406dc48fe68e64d69e5a125055dc8f694bb9207a67ac4b68ac6ad1025f51b18442ee9a37f2085d947547eba140a
@@ -0,0 +1,3 @@
1
+ # README #
2
+
3
+ This is a simple pipeline sunning library inspired by [bioruby-pipengine](https://github.com/fstrozzi/bioruby-pipengine)
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+ # bio-conduit
3
+ #
4
+ # Copyright (C) 2015
5
+ #
6
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
7
+ #
8
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
9
+ #
10
+
11
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
12
+
13
+ $:<< File.expand_path(File.join(File.dirname(File.dirname __FILE__),"lib"))
14
+
15
+ require 'optparse'
16
+ require 'bio-conduit'
17
+
18
+ opts = {dry: false, queue_type: 'torque', pipeline: "pipeline.yml", sample_file: "samples.yml", queue_option: ""}
19
+ OptionParser.new do |option|
20
+ option.banner = <<-EOS
21
+ Test is an awesome program that does something very, very important.
22
+
23
+ Usage:
24
+ #{$0} [options]
25
+ where [options] are:
26
+ EOS
27
+ option.on("-p", "--pipeline PIPELINE", "YAML file with pipeline information") {|v| opts[:pipeline] = v}
28
+ option.on("-f", "--sample_file SAMPLEFILE", "YAML file with samples information") {|v| opts[:sample_file] = v}
29
+ option.on("-l", "--samples SAMPLES", "List of sample names to run the pipeline") {|v| opts[:samples] = v.split(/,/)}
30
+ option.on("-s", "--steps STEPS", "List of steps to be executed") {|v| opts[:steps] = v.split(/,/)}
31
+ option.on("-d", "--dry", "YAML file with pipeline information") {|v| opts[:dry] = true}
32
+ option.on("-g", "--group GROUP", "Specify the group of samples to run the pipeline") {|v| opts[:group] = v.split(/,/)}
33
+ option.on("-o", "--output_dir OUTPUTDIR", "Output directory (override output directory in pipeline file)") {|v| opts[:output_dir] = v}
34
+ option.on("-q", "--queue_type QUEUETYPE", "Specify job scheduler") {|v| opts[:queue_type] = v}
35
+ option.on("-b", "--queue_option OPTIONS", "YAML file with pipeline information") {|v| opts[:queue_option] = v}
36
+ end.parse!
37
+
38
+ begin
39
+ sampleyml = Bio::Conduit::Samples.new(opts[:sample_file])
40
+ rescue Errno::ENOENT
41
+ abort "ERR: No samples file - #{opts[:sample_file]}"
42
+ end
43
+
44
+ begin
45
+ procyml = Bio::Conduit::Process.new(opts[:pipeline])
46
+ rescue Errno::ENOENT
47
+ abort "ERR: No pipeline file - #{opts[:pipeline]}"
48
+ end
49
+
50
+ samples = []
51
+ if opts[:samples] != nil
52
+ opts[:samples].each do |sam|
53
+ samples.push(sampleyml[sam])
54
+ end
55
+ elsif opts[:groups] != nil
56
+ sampleyml.groups[opts[:groups]].each do |sam|
57
+ samples.push(sam)
58
+ end
59
+ else
60
+ samples = sampleyml.samples.values
61
+ end
62
+
63
+ samples.each do |sam|
64
+ template = Bio::Conduit::QUEUES[opts[:queue_type]][:template]
65
+ cmd = Bio::Conduit::QUEUES[opts[:queue_type]][:command].sub(/\<options\>/, opts[:queue_option])
66
+
67
+ runscript = procyml.run_steps(opts[:steps]).create_jobscript(sam, template)
68
+
69
+ if opts[:dry]
70
+ puts cmd
71
+ puts runscript
72
+ else
73
+ o, e, s = Open3.capture3(cmd, :stdin_data => runscript)
74
+ if !s.success?
75
+ raise Bio::Conduit::CommandError, "Process.run() of #{queuetype} job submission failed with #{e}"
76
+ end
77
+ end
78
+
79
+ end
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "bio/conduit/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'bio-conduit'
7
+ s.version = Bio::Conduit::VERSION
8
+ s.date = Time.now.strftime("%Y-%m-%d")
9
+ s.platform = Gem::Platform::RUBY
10
+
11
+ s.summary = "A pipeline creator for bioruby"
12
+ s.description = "A pipeline creator for bioruby"
13
+ s.authors = ["Natapol Pornputtapong"]
14
+ s.email = 'natapol.por@gmail.com'
15
+
16
+ s.homepage = 'http://rubygems.org/gems/bio-conduit'
17
+ s.license = 'GPL'
18
+
19
+
20
+ s.files = `git ls-files`.split("\n")
21
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
22
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
23
+ s.require_paths = ["bin", "lib"]
24
+
25
+ # if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("1.9.0")
26
+ # s.add_dependency "mongoid", "~> 3.1"
27
+ # else
28
+ # s.add_dependency "mongoid", "~> 4.0"
29
+ # s.add_dependency "mongoid-versioning", "~> 1.0"
30
+ # end
31
+ #s.add_dependency "trollop", "~> 2.0"
32
+ #s.add_dependency "bio", "~> 1.4"
33
+ #s.add_dependency "statsample", "~> 1.4"
34
+ end
@@ -0,0 +1 @@
1
+ Natapol Pornputtapong
@@ -0,0 +1,22 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ $:<< File.expand_path(File.join(File.dirname(File.dirname __FILE__),"lib"))
13
+
14
+ require 'yaml'
15
+ require 'erb'
16
+ require 'fileutils'
17
+ require 'open3'
18
+
19
+ #require 'active_support/concern'
20
+ #require 'active_support/core_ext/object/blank'
21
+
22
+ require 'bio/conduit'
@@ -0,0 +1,14 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ require 'bio/conduit/version'
10
+ require 'bio/conduit/exception'
11
+ require 'bio/conduit/samples'
12
+ require 'bio/conduit/process'
13
+ require 'bio/conduit/template'
14
+ require 'bio/conduit/queues'
@@ -0,0 +1,19 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ module Bio
10
+ module Conduit
11
+ class FileNotFound < Exception
12
+
13
+ end
14
+
15
+ class CommandError < Exception
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,147 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ module Bio
10
+ module Conduit
11
+ class Step
12
+
13
+ attr_reader :name, :info, :resources
14
+
15
+ UNITCONV = {
16
+ '' => 1,
17
+ 'b' => 1,
18
+ 'k' => 1024,
19
+ 'm' => 1048576,
20
+ 'g' => 1073741824,
21
+ 't' => 1099511627776
22
+ }
23
+
24
+ def initialize(name, hash, resources, addpath = true)
25
+ @name = name
26
+ @info = hash
27
+ @resources = resources
28
+
29
+ if addpath
30
+ if @info['run'].is_a?(String)
31
+ @info['run'] = ["mkdir -p <sample_output_path/>#{@name}", "cd <sample_output_path/>#{@name}", @info['run']]
32
+ elsif @info['run'].is_a?(Array)
33
+ @info['run'].unshift("mkdir -p <sample_output_path/>#{@name}", "cd <sample_output_path/>#{@name}")
34
+ end
35
+ end
36
+ end
37
+
38
+ def dependence?
39
+ return @info.has_key?('pre')
40
+ end
41
+
42
+ def dependency
43
+ if self.dependence?
44
+ return @info['pre']
45
+ else
46
+ return nil
47
+ end
48
+ end
49
+
50
+ def +(other)
51
+ name = self.name + "_" + other.name
52
+ resources = self.resources.merge(other.resources)
53
+ info = self.info.merge(other.info) do |k, first, second|
54
+ case k
55
+ when 'mem', 'cpu', 'nodes'
56
+ matched1 = /\A(\d+)([kmgtb]?)/.match(first)
57
+ matched2 = /\A(\d+)([kmgtb]?)/.match(second)
58
+ if (matched1[1].to_i * Bio::Conduit::Step::UNITCONV[matched1[2]]) > (matched2[1].to_i * Bio::Conduit::Step::UNITCONV[matched2[2]])
59
+ first
60
+ elsif (matched1[1].to_i * Bio::Conduit::Step::UNITCONV[matched1[2]]) < (matched2[1].to_i * Bio::Conduit::Step::UNITCONV[matched2[2]])
61
+ second
62
+ else
63
+ first
64
+ end
65
+ else
66
+ if first.is_a?(String) && second.is_a?(String)
67
+ [first, second]
68
+ elsif first.is_a?(Array) && second.is_a?(String)
69
+ first + [second]
70
+ elsif first.is_a?(String) && second.is_a?(Array)
71
+ [first] + second
72
+ else
73
+ first + second
74
+ end
75
+ end
76
+ end
77
+ Bio::Conduit::Step.new(name, info, resources, false)
78
+ end
79
+
80
+ def create_jobscript(sample, template)
81
+ total_resources = @resources.merge(sample['resources'])
82
+ total_resources['sample_path'] = sample['path']
83
+ total_resources['sample'] = sample['name']
84
+ total_resources['sample_output_path'] = "#{total_resources['output'].chomp('/')}" + "#{sample.has_key?('group') ? "/#{sample['group']}" : ''}" + "/#{sample['name']}"
85
+ @jobname = sample['name'] + "_" + @name
86
+ @commands = @info['run'].is_a?(Array) ? @info['run'].join("\n") : @info['run'].clone
87
+ subslist = {}
88
+ @commands.scan(/(\<([\w\/]+)\>)/).each do |entry|
89
+ entry[1].scan(/[^\/]+/).each do |res|
90
+ entry[1].sub!(/#{res}/, total_resources[res])
91
+ end
92
+ @commands.sub!(/#{entry[0]}/, entry[1])
93
+ end
94
+ return ERB.new(template, nil, '-').result(binding)
95
+ end
96
+
97
+ end
98
+
99
+ class Process
100
+
101
+ attr_reader :steps
102
+
103
+ def initialize(filename)
104
+ yaml = YAML::load(File.open(filename))
105
+ @steps = {}
106
+ @name = yaml['pipeline']
107
+ @resources = yaml['resources']
108
+ @runninglist = []
109
+ yaml['steps'].each_pair do |name, detail|
110
+ @resources[name] = "../#{name}"
111
+ @steps[name] = Bio::Conduit::Step.new(name, detail, @resources)
112
+ end
113
+ end
114
+
115
+ def [](name)
116
+ @steps[name]
117
+ end
118
+
119
+ def add_running_step(stepname)
120
+ step = self[stepname]
121
+ self.add_running_step(step.dependency) if step.dependence?
122
+ if !@runninglist.include?(stepname)
123
+ @runninglist.push(stepname)
124
+ end
125
+ end
126
+
127
+ def run_steps(steps)
128
+
129
+ @steps.keys.each do |name|
130
+ self.add_running_step(name) if steps == nil || steps.include?(name)
131
+ end
132
+
133
+ combinestep = nil
134
+
135
+ @runninglist.each do |stepname|
136
+ if combinestep
137
+ combinestep = combinestep + self[stepname]
138
+ else
139
+ combinestep = self[stepname]
140
+ end
141
+ end
142
+
143
+ return combinestep
144
+ end
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,27 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ module Bio
11
+ module Conduit
12
+ QUEUES = {
13
+ "torque" => {
14
+ command: 'qsub <options> -',
15
+ template: Bio::Conduit::Template::TORQUE
16
+ },
17
+ "sge" => {
18
+ command: 'qsub <options> -',
19
+ template: Bio::Conduit::Template::SGE
20
+ },
21
+ "bash" => {
22
+ command: 'bash <options>',
23
+ template: Bio::Conduit::Template::SGE
24
+ }
25
+ }
26
+ end
27
+ end
@@ -0,0 +1,61 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ module Bio
10
+ module Conduit
11
+ class Samples
12
+
13
+ attr_reader :samples, :resources, :groups
14
+
15
+ def initialize(filename)
16
+ yaml = YAML::load(File.open(filename))
17
+
18
+ @samples = {}
19
+ @groups = {}
20
+
21
+ if yaml.has_key?("resources")
22
+ @resources = yaml["resources"]
23
+ else
24
+ @resources = {}
25
+ end
26
+
27
+ if yaml.has_key?("samples")
28
+ yaml["samples"].each_pair do |l1k, l1v|
29
+ if l1v.is_a?(Hash)
30
+ l1v.each_pair do |k, v|
31
+ @samples[k] = {}
32
+ @samples[k]['name'] = k
33
+ @samples[k]['path'] = v
34
+ @samples[k]["resources"] = @resources
35
+ @samples[k]["group"] = l1k
36
+ @groups[l1k] ||= []
37
+ @groups[l1k].push(@samples[k])
38
+ end
39
+ else
40
+ @samples[l1k] = {}
41
+ @samples[l1k]['name'] = l1k
42
+ @samples[l1k]['path'] = l1v
43
+ @samples[l1k]["resources"] = @resources
44
+ end
45
+ end
46
+ end
47
+
48
+ end
49
+
50
+ def [](name)
51
+ @samples[name]
52
+ end
53
+
54
+ def each(&block)
55
+ @samples.each do |sam|
56
+ block.call(sam)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,11 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ require 'bio/conduit/template/torque'
11
+ require 'bio/conduit/template/sge'
@@ -0,0 +1,23 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ module Bio
10
+ module Conduit
11
+ module Template
12
+ SGE = %q{#!/bin/bash
13
+ #
14
+ #$ -N <%= @jobname %>
15
+ #$ -cwd
16
+ #$ -S /bin/bash
17
+ #
18
+ <%= "#$ -pe multicores #{@info['cpu']}" if @info.has_key?('cpu') -%>
19
+
20
+ <%= @commands -%>}
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,21 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ module Bio
10
+ module Conduit
11
+ module Template
12
+ TORQUE = %q{#PBS -N <%= @jobname %>
13
+ <% @resourceline = [] -%>
14
+ <% @resourceline.push("nodes=#{@info.has_key?('nodes') ? @info['nodes'] : 1}#{@info.has_key?('cpu') ? ":ppn=#{@info['cpu']}" : ""}") if @info.has_key?('cpu') || @info.has_key?('nodes') -%>
15
+ <% @resourceline.push("mem=#{@info['mem']}") if @info.has_key?('mem') -%>
16
+ <%= "#PBS -l #{@resourceline.join(',')}" if !@resourceline.empty? %>
17
+
18
+ <%= @commands -%>}
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,14 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ module Bio
11
+ module Conduit
12
+ VERSION = '1.0.0'
13
+ end
14
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-conduit
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Natapol Pornputtapong
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-04 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A pipeline creator for bioruby
14
+ email: natapol.por@gmail.com
15
+ executables:
16
+ - conduitrun
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - README.md
21
+ - bin/conduitrun
22
+ - bio-conduit.gemspec
23
+ - contributors.txt
24
+ - lib/bio-conduit.rb
25
+ - lib/bio/conduit.rb
26
+ - lib/bio/conduit/exception.rb
27
+ - lib/bio/conduit/process.rb
28
+ - lib/bio/conduit/queues.rb
29
+ - lib/bio/conduit/samples.rb
30
+ - lib/bio/conduit/template.rb
31
+ - lib/bio/conduit/template/sge.rb
32
+ - lib/bio/conduit/template/torque.rb
33
+ - lib/bio/conduit/version.rb
34
+ homepage: http://rubygems.org/gems/bio-conduit
35
+ licenses:
36
+ - GPL
37
+ metadata: {}
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - bin
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements: []
54
+ rubyforge_project:
55
+ rubygems_version: 2.0.14
56
+ signing_key:
57
+ specification_version: 4
58
+ summary: A pipeline creator for bioruby
59
+ test_files: []
60
+ has_rdoc: