cagnut_core 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e8f656425110b7d2bc1fbee6c89d45b97301e0b9
4
+ data.tar.gz: 056d7b62798bdc1254827276631111f1e2edbe49
5
+ SHA512:
6
+ metadata.gz: c7fe94d621066a84390cbb05bb9f300333fea02b44693190c446958286cd045d50529f821e648225132402843aa8c4b42aef4a61d607aad86a6f38eb1ba0611e
7
+ data.tar.gz: 3a271af722e943e1a3af6e35efe6bc2d1f057ad968e27397a78496d4666efaff0c5de5a85ba18b09df42638980c0e5d0c80dd4389e8e775c97d0d8b37caf6b8c
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-2.3.1
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.1
5
+ before_install: gem install bundler -v 1.12.5
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cagnut_core.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Goldenio Technology
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,36 @@
1
+ # CagnutCore
2
+
3
+ ## Installation
4
+
5
+ Add this line to your application's Gemfile:
6
+
7
+ ```ruby
8
+ gem 'cagnut_core'
9
+ ```
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install cagnut_core
18
+
19
+ ## Usage
20
+
21
+
22
+ ## Development
23
+
24
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
25
+
26
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
27
+
28
+ ## Contributing
29
+
30
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/cagnut_core.
31
+
32
+
33
+ ## License
34
+
35
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
36
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "cagnut_core"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'cagnut/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "cagnut_core"
8
+ spec.version = CagnutCore::VERSION
9
+ spec.authors = ['Shi-Gang Wang', 'Tse-Ching Ho']
10
+ spec.email = ['seanwang@goldenio.com', 'tsechingho@goldenio.com']
11
+
12
+ spec.summary = %q{Computational and Analytical Gear for Nucleic acid Utilitarian Techniques}
13
+ spec.description = %q{Computational and Analytical Gear for Nucleic acid Utilitarian Techniques}
14
+ spec.homepage = "https://github.com/CAGNUT/cagnut_core"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency 'activesupport', '> 5'
23
+ spec.add_dependency 'tilt'
24
+ spec.add_dependency 'thor'
25
+
26
+ spec.add_development_dependency "bundler", "~> 1.12"
27
+ spec.add_development_dependency "rake", "~> 10.0"
28
+ spec.add_development_dependency "rspec", "~> 3.0"
29
+ end
data/exe/cagnut ADDED
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ lib = File.expand_path('../../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require 'thor'
7
+ require 'optparse'
8
+ require "cagnut"
9
+ require "cagnut/cli/new_project"
10
+ require "cagnut/cli/pipeline"
11
+
12
+ method = ARGV.first
13
+ case method
14
+ when 'new', 'copy_params'
15
+ Cagnut::NewProject.start(ARGV)
16
+ when 'pipeline'
17
+ Cagnut::Pipeline::Base.new.start(ARGV)
18
+ else
19
+ puts <<-USAGE.strip_heredoc
20
+ cagnut new <folder_name> create new project folder
21
+ cagnut pipeline pipeline relative
22
+ USAGE
23
+ end
data/exe/stat ADDED
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env ruby
2
+ require 'json'
3
+ require 'time'
4
+
5
+ dirs = ARGV
6
+
7
+ if dirs.nil?
8
+ puts 'Please give *.std Working Directory'
9
+ exit
10
+ end
11
+
12
+ def file_name dir, file
13
+ %r{#{dir}\/(.+).[s][t][d]}.match(file)[1]
14
+ end
15
+
16
+ def check_resource line
17
+ case line
18
+ when 'Resource usage summary:'
19
+ @is_resource = true
20
+ when 'The output (if any) is above this job summary.'
21
+ @is_resource = false
22
+ end
23
+ end
24
+
25
+ def job_complete? line
26
+ @complete = true if line == 'Successfully completed.'
27
+ end
28
+
29
+ def cal_spend_time line
30
+ @started = fetch_time line, 'Started at ' if @started.nil?
31
+ @ended = fetch_time line, 'Results reported ' if @ended.nil?
32
+ end
33
+
34
+ def fetch_time line, marked
35
+ Time.parse(line.match(/#{marked}(.*+)/)[1]) if line.match(/#{marked}/)
36
+ end
37
+
38
+ def time_diff
39
+ Time.at(@ended - @started).utc.strftime '%H:%M:%S'
40
+ end
41
+
42
+ def check_content line
43
+ job_complete? line
44
+ check_resource line
45
+ end
46
+
47
+ def clasp_time
48
+ @complete ? time_diff : 'failed'
49
+ end
50
+
51
+ def reset_variable
52
+ @started = nil
53
+ @ended = nil
54
+ @complete = false
55
+ @is_resource = false
56
+ end
57
+
58
+ def parse_std file
59
+ resource_hash = {}
60
+ reset_variable
61
+ File.open(file).each_line do |line|
62
+ check_content line.chomp!
63
+ cal_spend_time line if @started.nil? || @ended.nil?
64
+ resource_hash['time'] = clasp_time
65
+ next unless line.length > 0 && @complete && @is_resource
66
+ data = line.gsub(/\s\s+/, '').split(':')
67
+ resource_hash[data[0]] = data[1] unless data[1].nil?
68
+ end
69
+ resource_hash
70
+ end
71
+
72
+ dirs.each do |dir|
73
+ stat = []
74
+ Dir.glob("#{dir}/*.std").each do |file|
75
+ resource_hash = parse_std file
76
+ stat << [file_name(dir, file), resource_hash]
77
+ end
78
+ puts stat.to_json.inspect
79
+ end
data/lib/cagnut.rb ADDED
@@ -0,0 +1,36 @@
1
+ require 'tilt'
2
+ require 'active_support'
3
+ require 'active_support/core_ext'
4
+ require 'fileutils'
5
+ require 'pathname'
6
+ require 'cagnut/version'
7
+ require 'cagnut/configuration'
8
+ require 'cagnut/job_manage'
9
+
10
+ Tilt.register_lazy :StringTemplate, 'tilt/string', 'sh'
11
+
12
+ module Cagnut
13
+ autoload :Base, 'cagnut/base'
14
+
15
+ class << self
16
+ attr_writer :environment
17
+
18
+ def root
19
+ ::Pathname.new File.expand_path '../..', __FILE__
20
+ end
21
+
22
+ # Job names can contain up to 4094 characters.
23
+ def prefix_name
24
+ "CAGNUT_#{Time.now.strftime('%Y%m%d%H%M%S')}"
25
+ end
26
+
27
+ def environment
28
+ @environment ||= 'development'
29
+ end
30
+
31
+ def load_config config_name, config_options
32
+ Cagnut::Configuration.config = Cagnut::Configuration.load_config config_name, config_options
33
+ end
34
+
35
+ end
36
+ end
@@ -0,0 +1,97 @@
1
+ module Cagnut
2
+ class NewProject < Thor
3
+
4
+ include Thor::Actions
5
+ source_root Cagnut.root.join('templates')
6
+
7
+ desc 'new NAME project', 'new NAME project'
8
+ def new *args
9
+ if args.size >= 1
10
+ if !args.first.start_with? '-'
11
+ name = args.shift
12
+ elsif !args.last.start_with? '-'
13
+ name = args.pop
14
+ else
15
+ puts "please use `cagnut new <name>`\n"
16
+ exit(1)
17
+ end
18
+ else
19
+ puts "please use `cagnut new <name>`\n"
20
+ exit(1)
21
+ end
22
+ new_project name, project_opts
23
+ end
24
+
25
+ private
26
+
27
+ def new_project name, options
28
+ empty_directory name
29
+ copy_file 'Gemfile', "#{name}/Gemfile"
30
+ inside name, verbose: true do
31
+ create_file '.ruby-version', '2.3.1'
32
+ append_to_file 'Gemfile', "gem 'cagnut_cluster'\n" if options[:cluster]
33
+ append_pipeline_gems_to_gemfile options[:pipelines]
34
+ bundle 'install'
35
+ end
36
+ copy_file 'system.yml', "#{name}/system.yml"
37
+ load_bundle_env name
38
+ after_new_project name
39
+ generate_pipeline_tools_config name, options[:pipelines], options[:cluster]
40
+ end
41
+
42
+ def add_queue_setting name, pipeline
43
+ end
44
+
45
+ def load_bundle_env name
46
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("#{name}/Gemfile", Dir.pwd)
47
+ require 'bundler/setup'
48
+ Bundler.require(:default)
49
+ end
50
+
51
+ def append_pipeline_gems_to_gemfile pipelines
52
+ return if pipelines.blank?
53
+ pipelines.each do |pipeline_name|
54
+ append_to_file 'Gemfile', "gem 'cagnut_pipeline_#{pipeline_name}'\n"
55
+ end
56
+ end
57
+
58
+ def generate_pipeline_tools_config name, pipelines, cluster=nil
59
+ return if pipelines.blank?
60
+ pipelines.each do |pipeline_name|
61
+ send "copy_#{pipeline_name}_tools_config", name
62
+ add_queue_setting name, pipeline_name if cluster
63
+ end
64
+ end
65
+
66
+ def after_new_project name
67
+ end
68
+
69
+ def project_opts options = {}
70
+ OptionParser.new do |opts|
71
+ opts.banner = 'Usage: example.rb [options]'
72
+ opts.on('-c', '--cluster', 'Cluster') do
73
+ options[:cluster] = true
74
+ end
75
+ opts.on('-p', '--pipelines draw', Array, 'Pipelines') do |p|
76
+ options[:pipelines] = p
77
+ end
78
+ end.parse!
79
+ return options
80
+ end
81
+
82
+ def bundle command
83
+ say_status :run, "bundle #{command}"
84
+ _bundle = Gem.bin_path('bundler', 'bundle')
85
+ require 'bundler'
86
+ Bundler.with_clean_env do
87
+ full_command = %Q["#{Gem.ruby}" "#{_bundle}" #{command}]
88
+ if options[:quiet]
89
+ system(full_command, out: File::NULL)
90
+ else
91
+ system(full_command)
92
+ end
93
+ end
94
+ end
95
+
96
+ end
97
+ end
@@ -0,0 +1,97 @@
1
+ module Cagnut
2
+ module Pipeline
3
+ class Base
4
+
5
+ class << self
6
+ attr_reader :pipelines
7
+
8
+ def register klass
9
+ @pipelines ||= []
10
+ @pipelines << klass
11
+ end
12
+ end
13
+
14
+ def start *args
15
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path('./Gemfile', Dir.pwd)
16
+ require 'bundler/setup'
17
+ Bundler.require(:default)
18
+
19
+ options = pepeline_opts
20
+ run_filter options[:run] if options[:run]
21
+ config_name = get_config_name options[:run]
22
+ config = Cagnut.load_config config_name, options
23
+ config['dodebug'] = options[:debug]
24
+ config['samples'].each do |sample|
25
+ config['sample'] = sample
26
+ run_pipeline options[:run] if options[:run]
27
+ execute_command config, sample unless options[:debug]
28
+ end
29
+ end
30
+
31
+ def get_config_name pipeline_names
32
+ abort 'Did not assign pipeline to run' if pipeline_names.blank?
33
+ selected = self.class.pipelines.find do |p|
34
+ if p.try(:pipeline_names)
35
+ !( pipeline_names & p.pipeline_names ).empty?
36
+ elsif p.try(:pipeline_name)
37
+ pipeline_names.include? p.pipeline_name
38
+ else
39
+ puts "Can not find tools yml"
40
+ exit
41
+ end
42
+ end
43
+ selected.config_name
44
+ end
45
+
46
+ def run_filter names
47
+ puts 'Pipeline Conflict!'
48
+ exit
49
+ end
50
+
51
+ def run_pipeline pipelines, job_name = '', filename = ''
52
+ pipelines.sort.each do |pipeline|
53
+ job_name, filename = send "pipeline_#{pipeline}", { job_name: job_name, filename: filename }
54
+ end
55
+ end
56
+
57
+ def execute_command config, sample
58
+ job = fork do
59
+ exec "#{sample['jobs']}/submit_command_#{sample['name']}.jobs"
60
+ end
61
+ Process.detach(job)
62
+ end
63
+
64
+ def pepeline_opts options = {}
65
+ OptionParser.new do |opts|
66
+ opts.banner = 'Usage: example.rb [options]'
67
+ opts.on('-d', '--debug', 'Dodebug') do
68
+ options[:debug] = true
69
+ end
70
+ opts.on('-c', '--config yaml', 'Cagnut Config YAML') do |c|
71
+ options[:config] = c
72
+ end
73
+ opts.on('-n', '--no_check_tools', 'Not Check Tools') do
74
+ options[:no_check] = true
75
+ end
76
+ opts.on('-r', '--run draw1,draw2,draw3 or xyz', Array, 'run: draw1,draw2,draw3 or xyz') do |r|
77
+ options[:run] = r
78
+ end
79
+ opts.on('-p', '--parameter yaml', 'Cagnut Parameter Config YAML') do |p|
80
+ options[:params] = p
81
+ end
82
+ opts.on('-l', '--list', 'Pipeline List') do
83
+ list = self.class.pipelines.map do |pipeline|
84
+ pipeline.try(:pipeline_names) || pipeline.try(:pipeline_name)
85
+ end
86
+ puts "\s\s#{list.join("\n\s\s")}"
87
+ end
88
+ opts.on('-h', '--help', 'Help') do
89
+ puts "Help"
90
+ end
91
+ end.parse!
92
+ return options
93
+ end
94
+
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,88 @@
1
+ require 'yaml'
2
+ require 'forwardable'
3
+ require 'cagnut/configuration/base'
4
+ require 'cagnut/configuration/checks/tools'
5
+ require 'cagnut/configuration/checks/datasets'
6
+
7
+ module Cagnut
8
+ module Configuration
9
+ mattr_writer :base, :toolbox
10
+ mattr_accessor :config, :params
11
+
12
+ class << self
13
+ def configure
14
+ yield self
15
+ end
16
+
17
+ def params
18
+ @params
19
+ end
20
+
21
+ def base
22
+ @base ||= begin
23
+ Cagnut::Configuration::Base.load(@config)
24
+ Cagnut::Configuration::Base.instance
25
+ end
26
+ end
27
+
28
+ def load_config config_name, options
29
+ @config ||= check_and_load_yml fetch_system_config_path(options[:config])
30
+ @params ||= check_and_load_yml fetch_tools_config_path(config_name, options[:params])
31
+ config_check config_name, options[:not_check]
32
+ end
33
+
34
+ private
35
+
36
+ def check_and_load_yml yml_path
37
+ check_path yml_path
38
+ puts "Using #{yml_path}"
39
+ YAML.load_file yml_path
40
+ end
41
+
42
+ def check_path file
43
+ return file if File.exist?(file)
44
+ puts "No such File in: #{file}"
45
+ exit
46
+ end
47
+
48
+ def fetch_system_config_path cfg_path=nil
49
+ if !cfg_path.blank?
50
+ cfg_path
51
+ elsif Dir.entries(Dir.pwd).include? "system.yml"
52
+ File.join(Dir.pwd, 'system.yml')
53
+ else
54
+ puts "Not Found system.yml in #{Dir.pwd}"
55
+ exit
56
+ end
57
+ end
58
+
59
+ def fetch_tools_config_path config_name, config_path=nil
60
+ if !config_path.blank?
61
+ config_path
62
+ elsif Dir.entries(Dir.pwd).include? "#{config_name}_tools.yml"
63
+ File.join(Dir.pwd, "#{config_name}_tools.yml")
64
+ else
65
+ puts "Not Found #{config_name}_tools.yml in #{Dir.pwd}"
66
+ exit
67
+ end
68
+ end
69
+
70
+ def config_check config_name, not_check=false
71
+ tools_check unless not_check
72
+ @config['pipeline_name'] = config_name
73
+ @config = check_datasets config_name
74
+ end
75
+
76
+ def tools_check
77
+ @tools = Cagnut::Configuration::Checks::Tools.new @config
78
+ @tools.check
79
+ end
80
+
81
+ def check_datasets config_name
82
+ @datasets = Cagnut::Configuration::Checks::Datasets.new @config
83
+ @datasets.check config_name
84
+ end
85
+
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,44 @@
1
+ require 'singleton'
2
+
3
+ module Cagnut
4
+ module Configuration
5
+ class Base
6
+ include Singleton
7
+ attr_accessor :ref_fasta, :data_type, :snpdb, :sample_name, :jobs_dir, :java_path,
8
+ :prefix_name, :target_flanks_file, :dbsnp_ref_indels, :cluster,
9
+ :target, :magic28, :dodebug, :seqs_path, :pipeline_name
10
+
11
+ class << self
12
+ def load config
13
+ instance.load config
14
+ end
15
+ end
16
+
17
+ def load config
18
+ @config = config
19
+ attributes.each do |name, value|
20
+ send "#{name}=", value if respond_to? "#{name}="
21
+ end
22
+ end
23
+
24
+ def attributes
25
+ {
26
+ prefix_name: @config['prefix_name'],
27
+ sample_name: @config['sample']['name'],
28
+ dodebug: @config['dodebug'],
29
+ java_path: @config['tools']['java'],
30
+ ref_fasta: @config['refs']['ref_fasta'],
31
+ snpdb: @config['refs']['dbsnp']['ref'],
32
+ dbsnp_ref_indels: @config['refs']['dbsnp']['indels'],
33
+ target: @config['refs']['targets_file'],
34
+ target_flanks_file: @config['refs']['target_flanks_file'],
35
+ magic28: '1f8b08040000000000ff0600424302001b0003000000000000000000',
36
+ seqs_path: @config['sample']['seqs_path'],
37
+ data_type: @config['info']['data_type'],
38
+ jobs_dir: @config['sample']['jobs'],
39
+ cluster: @config['cluster']
40
+ }
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,149 @@
1
+ module Cagnut
2
+ module Configuration
3
+ module Checks
4
+
5
+ class Datasets
6
+ attr_accessor :config
7
+
8
+ def initialize config
9
+ @config = config
10
+ end
11
+
12
+ def check config_name
13
+ @config['prefix_name'] = "#{Cagnut.prefix_name}_#{config_name}"
14
+ analysis_folder = create_analysis_folder config
15
+ @config['samples'].each_with_index do |sample, index|
16
+ setup_requirements sample, index, analysis_folder
17
+ end
18
+ @config
19
+ end
20
+
21
+ def create_analysis_folder config
22
+ output_data_dir = dir_rm_slash @config['cagnut']['output_data_dir']
23
+ analysis_folder = "#{output_data_dir}/#{config['prefix_name']}"
24
+ FileUtils.mkdir_p "#{output_data_dir}/#{config['prefix_name']}"
25
+ analysis_folder
26
+ end
27
+
28
+ def setup_requirements sample, index, analysis_folder
29
+ puts "Dataset : #{sample['path']}"
30
+ dir = "#{analysis_folder}/#{sample['name']}"
31
+ FileUtils.mkdir_p dir unless Dir.exist?(dir)
32
+ @config['samples'][index]['path'] = dir_rm_slash sample['path']
33
+ FileUtils.mkdir_p "#{analysis_folder}/#{sample['name']}/jobs"
34
+ @config['samples'][index]['jobs'] = "#{analysis_folder}/#{sample['name']}/jobs"
35
+ FileUtils.mkdir_p "#{analysis_folder}/#{sample['name']}/tmp"
36
+ @config['samples'][index]['tmp'] = "#{analysis_folder}/#{sample['name']}/tmp"
37
+ # mysql_insert if options[:mysql]
38
+ make_required_folders @config, sample, index, dir
39
+ check_pu index
40
+ end
41
+
42
+ def make_required_folders config, sample, index, analysis_folder
43
+ end
44
+
45
+ def ln_seq_files_to_folder sample, qseq_path, fastq_path
46
+ ln_seq_files sample, qseq_path, fastq_path
47
+ check_datatype qseq_path, fastq_path
48
+ check_ln_file sample, qseq_path, fastq_path
49
+ end
50
+
51
+ def check_ln_file sample, qseq_path, fastq_path
52
+ fastq = Dir.glob("#{fastq_path}/*.fastq*")
53
+ qseq = Dir.glob("#{qseq_path}/*")
54
+ return unless (fastq + qseq).empty?
55
+ abort "Not found #{sample['name']} files in fastq and qseq"
56
+ end
57
+
58
+ def ln_seq_files sample, seq_txt, fastq_file
59
+ dir_present? sample['path']
60
+ ln_seq_txt_file sample, seq_txt
61
+ ln_fastq_file sample, fetch_flist(sample['path']), fastq_file
62
+ end
63
+
64
+ def dir_present? dataset
65
+ return if Dir.exist?(dataset)
66
+ puts "Error: Missing data directory #{@config['datasets']}"
67
+ exit
68
+ end
69
+
70
+ def dir_rm_slash dir
71
+ dir.gsub %r{/\z}, ''
72
+ end
73
+
74
+ def ln_seq_txt_file sample, qseq_dir
75
+ files =
76
+ Dir.glob("#{sample['path']}/*_sequence.txt*") + Dir.glob("#{sample['path']}/*_qseq.txt*")
77
+ files.each do |f|
78
+ `ln -s #{f} #{qseq_dir} 2>/dev/null` if f.match sample['name']
79
+ end
80
+ end
81
+
82
+ def fetch_flist dir
83
+ flist = Dir.glob("#{dir}/*.fastq*")
84
+ return flist unless flist.empty?
85
+ abort "No fastq found in #{dir}"
86
+ end
87
+
88
+ def ln_fastq_file sample, flist, fastq_dir
89
+ if %w(ONEFASTQ ONEFASTQSE).include? @config['info']['data_type']
90
+ files_to_much? flist
91
+ file_type = link_name flist, sample['name']
92
+ seq_file = "#{fastq_dir}/#{file_type}"
93
+ `ln -s #{flist[0]} #{seq_file} 2>/dev/null` if flist[0].match sample['name']
94
+ else
95
+ flist.each do |f|
96
+ next unless f.match sample['name']
97
+ `ln -s #{f} #{fastq_dir} 2>/dev/null`
98
+ end
99
+ end
100
+ end
101
+
102
+ def link_name flist, sample_name
103
+ if flist[0].match '.gz'
104
+ "#{sample_name}_sequence.txt.gz"
105
+ else
106
+ "#{sample_name}_sequence.txt"
107
+ end
108
+ end
109
+
110
+ def files_to_much? flist
111
+ return unless flist.size > 1
112
+ puts %(
113
+ DATA_TYPE = #{@config['info']['data_type']} but more than one fastq found.
114
+ Only the first would be processed.
115
+ #{flist.inspect}
116
+ )
117
+ end
118
+
119
+ def check_pu index
120
+ @config['samples'][index]['pu'] ||= 'NA'
121
+ end
122
+
123
+ def check_datatype qseq_dir, fastq_dir
124
+ @config['samples'].each_with_index do |sample, index|
125
+ case @config['info']['data_type']
126
+ when 'TILESQSEQ'
127
+ file = "#{qseq_dir}/*.txt*"
128
+ pattern = '.*s_\d+_1_(\d+).*'
129
+ file_end = '.fastq'
130
+ when 'TILESFASTQ'
131
+ file = "#{fastq_dir}/*.fastq*"
132
+ pattern = '(.*_R1_.*).fastq.*+'
133
+ file_end = '.fastq'
134
+ end
135
+ @config['samples'][index]['seqs_path']= fetch_seqs Dir[file], file_end, pattern
136
+ end
137
+ @config
138
+ end
139
+
140
+ def fetch_seqs files_path, file_end, pattern
141
+ files_path.map do |file|
142
+ return file if File.basename(file, file_end).match(/#{pattern}/)
143
+ end.flatten.compact
144
+ end
145
+ end
146
+
147
+ end
148
+ end
149
+ end
@@ -0,0 +1,76 @@
1
+ module Cagnut
2
+ module Configuration
3
+ module Checks
4
+ class Tools
5
+ attr_accessor :config, :check_completed, :java
6
+
7
+ def initialize config
8
+ @config = config
9
+ end
10
+
11
+ def check
12
+ @check_completed = true
13
+ check_each_tool
14
+ result = @check_completed ? 'Completed!' : 'Failed!'
15
+ puts "Check Tools: #{result}"
16
+ exit unless @check_completed
17
+ end
18
+
19
+ def check_each_tool
20
+ tools = @config['tools']
21
+ refs = @config['refs']
22
+ puts 'Start Checking...'
23
+ check_queueing_system @config['queueing_system']
24
+ check_ref_fasta refs['ref_fasta']
25
+ check_java tools['java']
26
+ check_r tools['R']
27
+ check_tool tools, refs
28
+ end
29
+
30
+ def check_tool tools_path, refs=nil
31
+ end
32
+
33
+ def check_tool_ver tool
34
+ ver = yield if block_given?
35
+ @check_completed = false if ver.nil?
36
+ ver = ver.nil? ? 'Not Found' : ver.chomp!
37
+ puts "Using #{tool} (#{ver})"
38
+ end
39
+
40
+ def check_java path
41
+ failed = check_tool_ver 'Java' do
42
+ `#{path} -version 2>&1| grep version | cut -f3 -d ' '` if path
43
+ end
44
+ @java = path unless failed
45
+ end
46
+
47
+ def check_r path
48
+ check_tool_ver 'R' do
49
+ `#{path} --version 2>&1 |grep ' version '| cut -f3 -d ' '` if path
50
+ end
51
+ check_r_libs path if path
52
+ end
53
+
54
+ def check_r_libs r_path
55
+ %w(gplots ggplot2 reshape gsalib).each do |lib|
56
+ check_tool_ver "R library: #{lib}" do
57
+ `#{r_path}script -e 'packageVersion("#{lib}")' | cut -f2 -d ' '`
58
+ end
59
+ end
60
+ end
61
+
62
+ def check_queueing_system queueing_system
63
+ system = queueing_system.nil? ? 'Local' : queueing_system['system']
64
+ puts "Using Queueing System: #{system}"
65
+ end
66
+
67
+ def check_ref_fasta ref_path
68
+ puts 'Checking Reference Files...'
69
+ return if File.exist?(ref_path)
70
+ puts "\tReference not founded in #{ref_path}"
71
+ @check_completed = false
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,63 @@
1
+ require 'singleton'
2
+
3
+ module Cagnut
4
+ class JobManage
5
+ include Singleton
6
+ extend Forwardable
7
+ def_delegators :'Cagnut::Configuration.base', :sample_name, :jobs_dir, :cluster
8
+
9
+ class << self
10
+ def submit job_script, job_name, opts
11
+ instance.submit job_script, job_name, opts
12
+ end
13
+
14
+ def run_local
15
+ instance.run_local
16
+ end
17
+ end
18
+
19
+ def submit job_script, job_name, opts
20
+ command = full_command job_script, job_name, opts
21
+ export_command command
22
+ puts command
23
+ end
24
+
25
+ def full_command job_script, job_name, opts
26
+ super if defined?(super)
27
+ return unless local_run?
28
+ command = local job_script
29
+ end
30
+
31
+ def local_run?
32
+ cluster.blank? || cluster['system'] == 'Local'
33
+ end
34
+
35
+ def export_command command
36
+ file = File.join jobs_dir, "submit_command_#{sample_name}.jobs"
37
+ File.open(file, 'a') do |f|
38
+ f.puts <<-BASH.strip_heredoc
39
+ #{command}
40
+ #{wait_local}
41
+ BASH
42
+ end
43
+ File.chmod(0700, file)
44
+ end
45
+
46
+ def local job_script
47
+ %(nohup #{jobs_dir}/#{job_script}.sh \
48
+ > #{jobs_dir}/#{job_script}.std \
49
+ 2>#{jobs_dir}/#{job_script}.err &)
50
+ end
51
+
52
+ def run_local
53
+ return unless local_run?
54
+ %(& echo $! >> #{jobs_dir}/submit_job_#{sample_name}.ids
55
+ wait $!)
56
+ end
57
+
58
+ def wait_local
59
+ return unless local_run?
60
+ 'wait $!'
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,3 @@
1
+ module CagnutCore
2
+ VERSION = "0.3.0"
3
+ end
@@ -0,0 +1 @@
1
+ require 'cagnut'
data/templates/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'cagnut_core'
@@ -0,0 +1,48 @@
1
+ ###### ACTGUN Configuration
2
+ cagnut:
3
+ #project_dir is used as the data output location
4
+ output_data_dir: '/home/u00wsg00/work3/data/'
5
+
6
+ tools:
7
+ bwa: '/home/u00wsg00/prg/bwa/bwa-0.7.12/bwa'
8
+ samtools: '/home/u00wsg00/prg/samtools/samtools/bin/samtools'
9
+ gatk: '/home/u00wsg00/prg/gatk/3.4.0/GenomeAnalysisTK.jar'
10
+ picard: '/home/u00wsg00/prg/picard/picard/dist/picard.jar'
11
+ java: '/pkg/java/jre/bin/java'
12
+ R: '/home/u00wsg00/prg/R/R-3.2.1/bin/R'
13
+ snpeff: '/home/u00wsg00/prg/snpeff/snpEff/snpEff.jar'
14
+
15
+ refs:
16
+ ref_fasta: '/home/u00wsg00/work3/ref/hg19/hg19.fasta'
17
+ # targets_file: '/home/u00wsg00/work3/ref/source/TruSeq_exome_targeted_regions_hg19_chr.bed'
18
+ # target_flanks_file: '/home/u00wsg00/work3/ref/hg19/targetregion100_hg19_nimblegen_exome_v2.interval_list'
19
+ dbsnp:
20
+ ref: '/home/u00wsg00/work3/ref/source/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf'
21
+ indels: '/home/u00wsg00/work3/ref/source/Mills_and_1000G_gold_standard.indels.hg19.sites.vcf'
22
+ version: '135'
23
+ snpeff:
24
+ config: '/home/u00wsg00/prg/snpeff/snpEff/snpEff.config'
25
+ db: 'hg19'
26
+
27
+ ####### Sample Infomation
28
+
29
+ # rgid is "read group id" used by bwa and picard
30
+ # PU:barcode. PU is required by Picard. If not multiplexing sequencing put something like "NA" or "lane1"..
31
+ # PATH for RAW Data Directories
32
+ samples:
33
+ -
34
+ name: F193
35
+ rgid: Project_F193
36
+ pu: 'NA'
37
+ path: '/home/u00wsg00/work3/sample/fastq/'
38
+
39
+ #DATA_TYPE= (TILESQSEQ | TILESFASTQ | TILESELAND | ONEFASTQ | ONEFASTQSE) Single-End
40
+ #TARGET_COVERAGE= (AGILENT | NIMBLEGEN)
41
+ info:
42
+ data_type: "TILESFASTQ"
43
+ target_coverage: "NIMBLEGEN"
44
+ pl: 'ILLumina'
45
+ lb: "nimblegen"
46
+ cn: "CPD"
47
+ ds: "2x100"
48
+ dt: "2015-11-06"
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cagnut_core
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Shi-Gang Wang
8
+ - Tse-Ching Ho
9
+ autorequire:
10
+ bindir: exe
11
+ cert_chain: []
12
+ date: 2016-11-01 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: activesupport
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">"
19
+ - !ruby/object:Gem::Version
20
+ version: '5'
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">"
26
+ - !ruby/object:Gem::Version
27
+ version: '5'
28
+ - !ruby/object:Gem::Dependency
29
+ name: tilt
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ - !ruby/object:Gem::Dependency
43
+ name: thor
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :runtime
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: bundler
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: '1.12'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: '1.12'
70
+ - !ruby/object:Gem::Dependency
71
+ name: rake
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '10.0'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '10.0'
84
+ - !ruby/object:Gem::Dependency
85
+ name: rspec
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - "~>"
89
+ - !ruby/object:Gem::Version
90
+ version: '3.0'
91
+ type: :development
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '3.0'
98
+ description: Computational and Analytical Gear for Nucleic acid Utilitarian Techniques
99
+ email:
100
+ - seanwang@goldenio.com
101
+ - tsechingho@goldenio.com
102
+ executables:
103
+ - cagnut
104
+ - stat
105
+ extensions: []
106
+ extra_rdoc_files: []
107
+ files:
108
+ - ".gitignore"
109
+ - ".rspec"
110
+ - ".ruby-version"
111
+ - ".travis.yml"
112
+ - Gemfile
113
+ - LICENSE.txt
114
+ - README.md
115
+ - Rakefile
116
+ - bin/console
117
+ - bin/setup
118
+ - cagnut_core.gemspec
119
+ - exe/cagnut
120
+ - exe/stat
121
+ - lib/cagnut.rb
122
+ - lib/cagnut/cli/new_project.rb
123
+ - lib/cagnut/cli/pipeline.rb
124
+ - lib/cagnut/configuration.rb
125
+ - lib/cagnut/configuration/base.rb
126
+ - lib/cagnut/configuration/checks/datasets.rb
127
+ - lib/cagnut/configuration/checks/tools.rb
128
+ - lib/cagnut/job_manage.rb
129
+ - lib/cagnut/version.rb
130
+ - lib/cagnut_core.rb
131
+ - templates/Gemfile
132
+ - templates/system.yml
133
+ homepage: https://github.com/CAGNUT/cagnut_core
134
+ licenses:
135
+ - MIT
136
+ metadata: {}
137
+ post_install_message:
138
+ rdoc_options: []
139
+ require_paths:
140
+ - lib
141
+ required_ruby_version: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ requirements: []
152
+ rubyforge_project:
153
+ rubygems_version: 2.5.1
154
+ signing_key:
155
+ specification_version: 4
156
+ summary: Computational and Analytical Gear for Nucleic acid Utilitarian Techniques
157
+ test_files: []