bio-conduit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5101f8f97fb43d2832036876835d7ecb7c6a690b
4
+ data.tar.gz: 94d96c406a17d7c5db2fe803c4b160b719b2e00b
5
+ SHA512:
6
+ metadata.gz: 3257b904ef5687b8e15267b4f610b0ce7bc9603c2913df6050a995e4d5a300245c45ba1da80e45c1d36a47a58fb74f2363388a309176af2a3371f137f0a970b4
7
+ data.tar.gz: fdecf9dbe891ba147d5afa2fc25d418e10fdf406dc48fe68e64d69e5a125055dc8f694bb9207a67ac4b68ac6ad1025f51b18442ee9a37f2085d947547eba140a
@@ -0,0 +1,3 @@
1
+ # README #
2
+
3
+ This is a simple pipeline sunning library inspired by [bioruby-pipengine](https://github.com/fstrozzi/bioruby-pipengine)
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+ # bio-conduit
3
+ #
4
+ # Copyright (C) 2015
5
+ #
6
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
7
+ #
8
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
9
+ #
10
+
11
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
12
+
13
+ $:<< File.expand_path(File.join(File.dirname(File.dirname __FILE__),"lib"))
14
+
15
+ require 'optparse'
16
+ require 'bio-conduit'
17
+
18
+ opts = {dry: false, queue_type: 'torque', pipeline: "pipeline.yml", sample_file: "samples.yml", queue_option: ""}
19
+ OptionParser.new do |option|
20
+ option.banner = <<-EOS
21
+ Test is an awesome program that does something very, very important.
22
+
23
+ Usage:
24
+ #{$0} [options]
25
+ where [options] are:
26
+ EOS
27
+ option.on("-p", "--pipeline PIPELINE", "YAML file with pipeline information") {|v| opts[:pipeline] = v}
28
+ option.on("-f", "--sample_file SAMPLEFILE", "YAML file with samples information") {|v| opts[:sample_file] = v}
29
+ option.on("-l", "--samples SAMPLES", "List of sample names to run the pipeline") {|v| opts[:samples] = v.split(/,/)}
30
+ option.on("-s", "--steps STEPS", "List of steps to be executed") {|v| opts[:steps] = v.split(/,/)}
31
+ option.on("-d", "--dry", "YAML file with pipeline information") {|v| opts[:dry] = true}
32
+ option.on("-g", "--group GROUP", "Specify the group of samples to run the pipeline") {|v| opts[:group] = v.split(/,/)}
33
+ option.on("-o", "--output_dir OUTPUTDIR", "Output directory (override output directory in pipeline file)") {|v| opts[:output_dir] = v}
34
+ option.on("-q", "--queue_type QUEUETYPE", "Specify job scheduler") {|v| opts[:queue_type] = v}
35
+ option.on("-b", "--queue_option OPTIONS", "YAML file with pipeline information") {|v| opts[:queue_option] = v}
36
+ end.parse!
37
+
38
+ begin
39
+ sampleyml = Bio::Conduit::Samples.new(opts[:sample_file])
40
+ rescue Errno::ENOENT
41
+ abort "ERR: No samples file - #{opts[:sample_file]}"
42
+ end
43
+
44
+ begin
45
+ procyml = Bio::Conduit::Process.new(opts[:pipeline])
46
+ rescue Errno::ENOENT
47
+ abort "ERR: No pipeline file - #{opts[:pipeline]}"
48
+ end
49
+
50
+ samples = []
51
+ if opts[:samples] != nil
52
+ opts[:samples].each do |sam|
53
+ samples.push(sampleyml[sam])
54
+ end
55
+ elsif opts[:groups] != nil
56
+ sampleyml.groups[opts[:groups]].each do |sam|
57
+ samples.push(sam)
58
+ end
59
+ else
60
+ samples = sampleyml.samples.values
61
+ end
62
+
63
+ samples.each do |sam|
64
+ template = Bio::Conduit::QUEUES[opts[:queue_type]][:template]
65
+ cmd = Bio::Conduit::QUEUES[opts[:queue_type]][:command].sub(/\<options\>/, opts[:queue_option])
66
+
67
+ runscript = procyml.run_steps(opts[:steps]).create_jobscript(sam, template)
68
+
69
+ if opts[:dry]
70
+ puts cmd
71
+ puts runscript
72
+ else
73
+ o, e, s = Open3.capture3(cmd, :stdin_data => runscript)
74
+ if !s.success?
75
+ raise Bio::Conduit::CommandError, "Process.run() of #{queuetype} job submission failed with #{e}"
76
+ end
77
+ end
78
+
79
+ end
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "bio/conduit/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'bio-conduit'
7
+ s.version = Bio::Conduit::VERSION
8
+ s.date = Time.now.strftime("%Y-%m-%d")
9
+ s.platform = Gem::Platform::RUBY
10
+
11
+ s.summary = "A pipeline creator for bioruby"
12
+ s.description = "A pipeline creator for bioruby"
13
+ s.authors = ["Natapol Pornputtapong"]
14
+ s.email = 'natapol.por@gmail.com'
15
+
16
+ s.homepage = 'http://rubygems.org/gems/bio-conduit'
17
+ s.license = 'GPL'
18
+
19
+
20
+ s.files = `git ls-files`.split("\n")
21
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
22
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
23
+ s.require_paths = ["bin", "lib"]
24
+
25
+ # if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("1.9.0")
26
+ # s.add_dependency "mongoid", "~> 3.1"
27
+ # else
28
+ # s.add_dependency "mongoid", "~> 4.0"
29
+ # s.add_dependency "mongoid-versioning", "~> 1.0"
30
+ # end
31
+ #s.add_dependency "trollop", "~> 2.0"
32
+ #s.add_dependency "bio", "~> 1.4"
33
+ #s.add_dependency "statsample", "~> 1.4"
34
+ end
@@ -0,0 +1 @@
1
+ Natapol Pornputtapong
@@ -0,0 +1,22 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ # raise "Please, use ruby 1.9.0 or later." if RUBY_VERSION < "1.9.0"
11
+
12
+ $:<< File.expand_path(File.join(File.dirname(File.dirname __FILE__),"lib"))
13
+
14
+ require 'yaml'
15
+ require 'erb'
16
+ require 'fileutils'
17
+ require 'open3'
18
+
19
+ #require 'active_support/concern'
20
+ #require 'active_support/core_ext/object/blank'
21
+
22
+ require 'bio/conduit'
@@ -0,0 +1,14 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ require 'bio/conduit/version'
10
+ require 'bio/conduit/exception'
11
+ require 'bio/conduit/samples'
12
+ require 'bio/conduit/process'
13
+ require 'bio/conduit/template'
14
+ require 'bio/conduit/queues'
@@ -0,0 +1,19 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ module Bio
10
+ module Conduit
11
+ class FileNotFound < Exception
12
+
13
+ end
14
+
15
+ class CommandError < Exception
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,147 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ module Bio
10
+ module Conduit
11
+ class Step
12
+
13
+ attr_reader :name, :info, :resources
14
+
15
+ UNITCONV = {
16
+ '' => 1,
17
+ 'b' => 1,
18
+ 'k' => 1024,
19
+ 'm' => 1048576,
20
+ 'g' => 1073741824,
21
+ 't' => 1099511627776
22
+ }
23
+
24
+ def initialize(name, hash, resources, addpath = true)
25
+ @name = name
26
+ @info = hash
27
+ @resources = resources
28
+
29
+ if addpath
30
+ if @info['run'].is_a?(String)
31
+ @info['run'] = ["mkdir -p <sample_output_path/>#{@name}", "cd <sample_output_path/>#{@name}", @info['run']]
32
+ elsif @info['run'].is_a?(Array)
33
+ @info['run'].unshift("mkdir -p <sample_output_path/>#{@name}", "cd <sample_output_path/>#{@name}")
34
+ end
35
+ end
36
+ end
37
+
38
+ def dependence?
39
+ return @info.has_key?('pre')
40
+ end
41
+
42
+ def dependency
43
+ if self.dependence?
44
+ return @info['pre']
45
+ else
46
+ return nil
47
+ end
48
+ end
49
+
50
+ def +(other)
51
+ name = self.name + "_" + other.name
52
+ resources = self.resources.merge(other.resources)
53
+ info = self.info.merge(other.info) do |k, first, second|
54
+ case k
55
+ when 'mem', 'cpu', 'nodes'
56
+ matched1 = /\A(\d+)([kmgtb]?)/.match(first)
57
+ matched2 = /\A(\d+)([kmgtb]?)/.match(second)
58
+ if (matched1[1].to_i * Bio::Conduit::Step::UNITCONV[matched1[2]]) > (matched2[1].to_i * Bio::Conduit::Step::UNITCONV[matched2[2]])
59
+ first
60
+ elsif (matched1[1].to_i * Bio::Conduit::Step::UNITCONV[matched1[2]]) < (matched2[1].to_i * Bio::Conduit::Step::UNITCONV[matched2[2]])
61
+ second
62
+ else
63
+ first
64
+ end
65
+ else
66
+ if first.is_a?(String) && second.is_a?(String)
67
+ [first, second]
68
+ elsif first.is_a?(Array) && second.is_a?(String)
69
+ first + [second]
70
+ elsif first.is_a?(String) && second.is_a?(Array)
71
+ [first] + second
72
+ else
73
+ first + second
74
+ end
75
+ end
76
+ end
77
+ Bio::Conduit::Step.new(name, info, resources, false)
78
+ end
79
+
80
+ def create_jobscript(sample, template)
81
+ total_resources = @resources.merge(sample['resources'])
82
+ total_resources['sample_path'] = sample['path']
83
+ total_resources['sample'] = sample['name']
84
+ total_resources['sample_output_path'] = "#{total_resources['output'].chomp('/')}" + "#{sample.has_key?('group') ? "/#{sample['group']}" : ''}" + "/#{sample['name']}"
85
+ @jobname = sample['name'] + "_" + @name
86
+ @commands = @info['run'].is_a?(Array) ? @info['run'].join("\n") : @info['run'].clone
87
+ subslist = {}
88
+ @commands.scan(/(\<([\w\/]+)\>)/).each do |entry|
89
+ entry[1].scan(/[^\/]+/).each do |res|
90
+ entry[1].sub!(/#{res}/, total_resources[res])
91
+ end
92
+ @commands.sub!(/#{entry[0]}/, entry[1])
93
+ end
94
+ return ERB.new(template, nil, '-').result(binding)
95
+ end
96
+
97
+ end
98
+
99
+ class Process
100
+
101
+ attr_reader :steps
102
+
103
+ def initialize(filename)
104
+ yaml = YAML::load(File.open(filename))
105
+ @steps = {}
106
+ @name = yaml['pipeline']
107
+ @resources = yaml['resources']
108
+ @runninglist = []
109
+ yaml['steps'].each_pair do |name, detail|
110
+ @resources[name] = "../#{name}"
111
+ @steps[name] = Bio::Conduit::Step.new(name, detail, @resources)
112
+ end
113
+ end
114
+
115
+ def [](name)
116
+ @steps[name]
117
+ end
118
+
119
+ def add_running_step(stepname)
120
+ step = self[stepname]
121
+ self.add_running_step(step.dependency) if step.dependence?
122
+ if !@runninglist.include?(stepname)
123
+ @runninglist.push(stepname)
124
+ end
125
+ end
126
+
127
+ def run_steps(steps)
128
+
129
+ @steps.keys.each do |name|
130
+ self.add_running_step(name) if steps == nil || steps.include?(name)
131
+ end
132
+
133
+ combinestep = nil
134
+
135
+ @runninglist.each do |stepname|
136
+ if combinestep
137
+ combinestep = combinestep + self[stepname]
138
+ else
139
+ combinestep = self[stepname]
140
+ end
141
+ end
142
+
143
+ return combinestep
144
+ end
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,27 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ module Bio
11
+ module Conduit
12
+ QUEUES = {
13
+ "torque" => {
14
+ command: 'qsub <options> -',
15
+ template: Bio::Conduit::Template::TORQUE
16
+ },
17
+ "sge" => {
18
+ command: 'qsub <options> -',
19
+ template: Bio::Conduit::Template::SGE
20
+ },
21
+ "bash" => {
22
+ command: 'bash <options>',
23
+ template: Bio::Conduit::Template::SGE
24
+ }
25
+ }
26
+ end
27
+ end
@@ -0,0 +1,61 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ module Bio
10
+ module Conduit
11
+ class Samples
12
+
13
+ attr_reader :samples, :resources, :groups
14
+
15
+ def initialize(filename)
16
+ yaml = YAML::load(File.open(filename))
17
+
18
+ @samples = {}
19
+ @groups = {}
20
+
21
+ if yaml.has_key?("resources")
22
+ @resources = yaml["resources"]
23
+ else
24
+ @resources = {}
25
+ end
26
+
27
+ if yaml.has_key?("samples")
28
+ yaml["samples"].each_pair do |l1k, l1v|
29
+ if l1v.is_a?(Hash)
30
+ l1v.each_pair do |k, v|
31
+ @samples[k] = {}
32
+ @samples[k]['name'] = k
33
+ @samples[k]['path'] = v
34
+ @samples[k]["resources"] = @resources
35
+ @samples[k]["group"] = l1k
36
+ @groups[l1k] ||= []
37
+ @groups[l1k].push(@samples[k])
38
+ end
39
+ else
40
+ @samples[l1k] = {}
41
+ @samples[l1k]['name'] = l1k
42
+ @samples[l1k]['path'] = l1v
43
+ @samples[l1k]["resources"] = @resources
44
+ end
45
+ end
46
+ end
47
+
48
+ end
49
+
50
+ def [](name)
51
+ @samples[name]
52
+ end
53
+
54
+ def each(&block)
55
+ @samples.each do |sam|
56
+ block.call(sam)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,11 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ require 'bio/conduit/template/torque'
11
+ require 'bio/conduit/template/sge'
@@ -0,0 +1,23 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ module Bio
10
+ module Conduit
11
+ module Template
12
+ SGE = %q{#!/bin/bash
13
+ #
14
+ #$ -N <%= @jobname %>
15
+ #$ -cwd
16
+ #$ -S /bin/bash
17
+ #
18
+ <%= "#$ -pe multicores #{@info['cpu']}" if @info.has_key?('cpu') -%>
19
+
20
+ <%= @commands -%>}
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,21 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+ module Bio
10
+ module Conduit
11
+ module Template
12
+ TORQUE = %q{#PBS -N <%= @jobname %>
13
+ <% @resourceline = [] -%>
14
+ <% @resourceline.push("nodes=#{@info.has_key?('nodes') ? @info['nodes'] : 1}#{@info.has_key?('cpu') ? ":ppn=#{@info['cpu']}" : ""}") if @info.has_key?('cpu') || @info.has_key?('nodes') -%>
15
+ <% @resourceline.push("mem=#{@info['mem']}") if @info.has_key?('mem') -%>
16
+ <%= "#PBS -l #{@resourceline.join(',')}" if !@resourceline.empty? %>
17
+
18
+ <%= @commands -%>}
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,14 @@
1
+ # bio-conduit
2
+ #
3
+ # Copyright (C) 2015
4
+ #
5
+ # author: Natapol Pornputtapong <natapol.por@gmail.com>
6
+ #
7
+ # Documentation: Natapol Pornputtapong (RDoc'd and embellished by William Webber)
8
+ #
9
+
10
+ module Bio
11
+ module Conduit
12
+ VERSION = '1.0.0'
13
+ end
14
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-conduit
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Natapol Pornputtapong
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-09-04 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A pipeline creator for bioruby
14
+ email: natapol.por@gmail.com
15
+ executables:
16
+ - conduitrun
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - README.md
21
+ - bin/conduitrun
22
+ - bio-conduit.gemspec
23
+ - contributors.txt
24
+ - lib/bio-conduit.rb
25
+ - lib/bio/conduit.rb
26
+ - lib/bio/conduit/exception.rb
27
+ - lib/bio/conduit/process.rb
28
+ - lib/bio/conduit/queues.rb
29
+ - lib/bio/conduit/samples.rb
30
+ - lib/bio/conduit/template.rb
31
+ - lib/bio/conduit/template/sge.rb
32
+ - lib/bio/conduit/template/torque.rb
33
+ - lib/bio/conduit/version.rb
34
+ homepage: http://rubygems.org/gems/bio-conduit
35
+ licenses:
36
+ - GPL
37
+ metadata: {}
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - bin
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements: []
54
+ rubyforge_project:
55
+ rubygems_version: 2.0.14
56
+ signing_key:
57
+ specification_version: 4
58
+ summary: A pipeline creator for bioruby
59
+ test_files: []
60
+ has_rdoc: