bio-ngs 0.4.6.alpha.01 → 0.4.6.alpha.02
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -2
- data/Gemfile.lock +21 -21
- data/README.rdoc +51 -4
- data/VERSION +1 -1
- data/bin/biongs +1 -0
- data/bio-ngs.gemspec +36 -8
- data/features/cufflinks_gtf_parser.feature +22 -0
- data/features/cufflinks_gtf_parser_indexing.feature +20 -0
- data/features/step_definitions/cufflinks_gtf.rb +30 -0
- data/features/step_definitions/cufflinks_gtf_parser_indexing.rb +53 -0
- data/features/support/env.rb +2 -0
- data/lib/bio-ngs.rb +19 -5
- data/lib/bio/appl/ngs/cufflinks.rb +447 -281
- data/lib/bio/appl/ngs/cufflinks/gtf/gtf.rb +23 -0
- data/lib/bio/appl/ngs/cufflinks/gtf/gtf_parser.rb +248 -0
- data/lib/bio/appl/ngs/cufflinks/gtf/transcript.rb +154 -0
- data/lib/bio/ngs/fs.rb +46 -0
- data/lib/bio/ngs/illumina/fastq.rb +176 -0
- data/lib/bio/ngs/illumina/illumina.rb +64 -0
- data/lib/bio/ngs/illumina/project.rb +81 -0
- data/lib/bio/ngs/illumina/sample.rb +85 -0
- data/lib/bio/ngs/task.rb +1 -1
- data/lib/bio/ngs/utils.rb +124 -112
- data/lib/meta.rb +162 -0
- data/lib/tasks/convert.thor +14 -14
- data/lib/tasks/filter.thor +158 -23
- data/lib/tasks/quality.thor +24 -4
- data/lib/tasks/rna.thor +26 -0
- data/lib/wrapper.rb +28 -0
- data/spec/bio/ngs/fs_spec.rb +70 -0
- data/spec/bio/ngs/illumina/fastq_spec.rb +52 -0
- data/spec/bio/ngs/illumina/illumina_spec.rb +21 -0
- data/spec/bio/ngs/illumina/project_spec.rb +0 -0
- data/spec/bio/ngs/illumina/sample_spec.rb +0 -0
- data/spec/bio/ngs/illumina/samples_spec.rb +0 -0
- data/spec/filter_spec.rb +25 -0
- data/spec/fixture/table_filter_list.txt +3 -0
- data/spec/fixture/table_filter_list_first_column.txt +2 -0
- data/spec/fixture/table_filter_source.tsv +44 -0
- data/spec/fixture/test-filtered-reference.fastq.gz +0 -0
- data/spec/fixture/test-merged-reference.fastq.gz +0 -0
- data/spec/fixture/test.fastq.gz +0 -0
- data/spec/meta_spec.rb +117 -0
- data/spec/spec_helper.rb +1 -1
- metadata +97 -69
@@ -0,0 +1,176 @@
|
|
1
|
+
module Bio
|
2
|
+
module Ngs
|
3
|
+
module Illumina
|
4
|
+
# module Fastx
|
5
|
+
# include Fs::Utility
|
6
|
+
# end #Fastx
|
7
|
+
|
8
|
+
|
9
|
+
def filter(file)
|
10
|
+
if File.exists? file
|
11
|
+
filtered_file_basename = File.basename(file)
|
12
|
+
filtered_dir = File.join(File.dirname,"filtered")
|
13
|
+
Dir.mkdir(filtered_dir) unless Dir.exists?(filtered_dir)
|
14
|
+
FastqGz.gets_compressed(filtered_file_basename) do |compress|
|
15
|
+
FastqGz.gets_filtered(ftest) do |read_header, reader_seq, read_splitter, read_qual|
|
16
|
+
compress.write(read_header + reader_seq + read_splitter + read_qual)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
else
|
20
|
+
raise "Bio::Ngs::Illumina.filter : unkown file #{file}"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
module FastqGz
|
25
|
+
require 'zlib'
|
26
|
+
class << self
|
27
|
+
|
28
|
+
# Return the number of reads processed
|
29
|
+
def gets_uncompressed(file, &block)
|
30
|
+
n_reads = 0
|
31
|
+
Zlib::GzipReader.open(file) do |gz|
|
32
|
+
while header=gz.gets
|
33
|
+
#seq, splitter, qual = gz.gets, gz.gets, gz.gets
|
34
|
+
n_reads+=1
|
35
|
+
#yield header, seq, splitter, qual
|
36
|
+
yield header, gz.gets, gz.gets, gz.gets
|
37
|
+
end #while
|
38
|
+
gz.close
|
39
|
+
end #GzipReader
|
40
|
+
n_reads
|
41
|
+
end #gets_uncompressed
|
42
|
+
|
43
|
+
|
44
|
+
def gets_compressed(file, &block)
|
45
|
+
Zlib::GzipWriter.open(file) do |gz|
|
46
|
+
yield gz
|
47
|
+
gz.close
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Return the number of reads processed
|
52
|
+
def gets_filtered(file, &block)
|
53
|
+
n_reads = 0
|
54
|
+
gets_uncompressed(file) do |header, seq, splitter, qual|
|
55
|
+
if header=~/^@.* [^:]*:N:[^:]*:/
|
56
|
+
n_reads+=1
|
57
|
+
#pass to the block header, fasta, quality splitter, quality
|
58
|
+
yield header, seq, splitter, qual
|
59
|
+
end
|
60
|
+
end #GzipReader
|
61
|
+
n_reads
|
62
|
+
end #gets_filtered
|
63
|
+
end #FastqGz
|
64
|
+
|
65
|
+
# desc "illumina_filter [DIR(s)]", "Filter the data using Y/N flag in FastQ headers (Illumina). Search for fastq.gz files within directory(ies) passed."
|
66
|
+
# method_option :compression, :type => :string, :default => "pigz"
|
67
|
+
# method_option :cpu, :type => :numeric, :default => 4
|
68
|
+
# def illumina_filter(dir)
|
69
|
+
# folders = Dir.glob(dir)
|
70
|
+
# cmd_blocks = []
|
71
|
+
# folders.each do |folder|
|
72
|
+
# Parallel.each(Dir.glob(folder+"/*.fastq.gz").sort,:in_processes => options[:cpu].to_i) do |fastq|
|
73
|
+
# Dir.mkdir(folder+"/filtered") unless Dir.exists? folder+"/filtered"
|
74
|
+
# fastq = fastq.split("/")[-1]
|
75
|
+
# system("zcat #{folder+"/"+fastq} | grep -A 3 '^@.* [^:]*:N:[^:]*:' | grep -v '^\-\-'| #{options[:compression]} > #{folder}/filtered/#{fastq}")
|
76
|
+
# end
|
77
|
+
# end
|
78
|
+
# end
|
79
|
+
|
80
|
+
|
81
|
+
# desc "merge [file(s)]","Merge together fastQ files (accepts wildcards)"
|
82
|
+
# method_option :compressed, :type => :boolean, :default => true
|
83
|
+
# def merge(file)
|
84
|
+
# files = Dir.glob(file).sort
|
85
|
+
# cat = (options[:compressed]) ? "zcat" : "cat"
|
86
|
+
# files.each do |file|
|
87
|
+
# system("#{cat} #{file} >> merged_reads.fastq")
|
88
|
+
# end
|
89
|
+
# end
|
90
|
+
|
91
|
+
# desc "paired_merge [file(s)]","Merge together FastQ files while checking for correct pairing (accepts wildcards)"
|
92
|
+
# method_option :compressed, :type => :boolean, :default => false
|
93
|
+
# def paired_merge(file)
|
94
|
+
# files = Dir.glob(file).sort.find_all {|f| f=~/_R1_/}
|
95
|
+
# cat = (options[:compressed] == true) ? "zcat" : "cat"
|
96
|
+
# files.each do |r1|
|
97
|
+
# r2 = r1.gsub(/_R1_/,"_R2_")
|
98
|
+
# if File.exists? r2
|
99
|
+
# r1_count = count_reads(r1,compressed:options[:compressed])
|
100
|
+
# r2_count = count_reads(r2,compressed:options[:compressed])
|
101
|
+
# puts "Read count: #{r1_count} : #{r2_count} , #{file}"
|
102
|
+
# if r1_count == r2_count
|
103
|
+
# Parallel.each(["R1","R2"],:in_processes => options[:cpu].to_i) do |read|
|
104
|
+
# filename = (read == "R1") ? r1 : r2
|
105
|
+
# system "#{cat} #{filename} >> #{read}_reads.fastq"
|
106
|
+
# end
|
107
|
+
# else
|
108
|
+
# raise RuntimeError "Error: files #{r1} and #{r2} do not have the same number of reads!"
|
109
|
+
# end
|
110
|
+
# else
|
111
|
+
# puts "WARN: file #{r2} does not exist! Reads from #{r1} will be considered as singlets"
|
112
|
+
# system("#{cat} #{r1} >> singlets_reads.fastq")
|
113
|
+
# end
|
114
|
+
# end
|
115
|
+
# end
|
116
|
+
|
117
|
+
# desc "uncompress [file(s)]","Uncompress multiple files in parallel (accepts wildcards)"
|
118
|
+
# method_option :cpu, :type => :numeric, :default => 4
|
119
|
+
# def uncompress(file)
|
120
|
+
# files = Dir.glob(file).sort
|
121
|
+
# blocks = []
|
122
|
+
# Parallel.each(files,:in_processes => options[:cpu].to_i) do |file|
|
123
|
+
# system("gunzip #{file}")
|
124
|
+
# end
|
125
|
+
# end
|
126
|
+
|
127
|
+
|
128
|
+
# desc "trim [fastq(s)]","Calulate quality profile and trim the all the reads using FastX (accepts wildcards)"
|
129
|
+
# method_option :cpu, :type => :numeric, :default => 4
|
130
|
+
# method_option :min_qual, :type => :numeric, :default => 20
|
131
|
+
# def trim(file)
|
132
|
+
# list = Dir.glob(file).sort
|
133
|
+
# groups = list / options[:cpu].to_i # get group of files equal to number of CPUs
|
134
|
+
# cmd_blocks = []
|
135
|
+
# groups.each do |files|
|
136
|
+
# Parallel.each(files, :in_processes => options[:cpu].to_i) do |file|
|
137
|
+
# invoke "quality:fastq_stats", [file], {output:file+".stats"}
|
138
|
+
# trim_position = options[:read_length]
|
139
|
+
# lines = File.read(file+".stats").split("\n")
|
140
|
+
# if lines.size == 0
|
141
|
+
# raise RuntimeError, "Error in Quality Stats file! Check fastx_quality_stat output"
|
142
|
+
# end
|
143
|
+
# read_length = (lines.size) -1
|
144
|
+
# lines[1..-1].each_with_index do |line,index|
|
145
|
+
# if line.split("\t")[7].to_i <= options[:min_qual]
|
146
|
+
# trim_position = index +1
|
147
|
+
# break
|
148
|
+
# end
|
149
|
+
# end
|
150
|
+
# if trim_position == options[:read_length]
|
151
|
+
# puts "WARN: no bases under quality cutoff. No trimming needed on #{file}"
|
152
|
+
# FileUtils.cp file, file+".ready"
|
153
|
+
# elsif trim_position < 25
|
154
|
+
# puts "WARN: Trimming on #{file} will produce reads that are too short. The file will be discarded, please check quality scores."
|
155
|
+
# else
|
156
|
+
# puts "Trimming on position #{trim_position} for #{file}"
|
157
|
+
# trim = Bio::Ngs::Fastx::Trim.new
|
158
|
+
# trim.params={trim:read_length-trim_position+1, input:file, output:file+".ready"}
|
159
|
+
# trim.run
|
160
|
+
# end
|
161
|
+
# end
|
162
|
+
# end
|
163
|
+
# end
|
164
|
+
|
165
|
+
# private
|
166
|
+
|
167
|
+
# def count_reads(file,opts = {compressed:false})
|
168
|
+
# cat = (opts[:compressed] == true) ? "zcat" : "cat"
|
169
|
+
# total = `#{cat} #{file} | wc -l`.to_i / 4
|
170
|
+
# end
|
171
|
+
|
172
|
+
|
173
|
+
end #FastqGz
|
174
|
+
end #Illumina
|
175
|
+
end #Ngs
|
176
|
+
end #Bio
|
@@ -0,0 +1,64 @@
|
|
1
|
+
#TODO: refactor this code, I don't like it very much
|
2
|
+
#TODO: export in JSON format
|
3
|
+
require 'bio/ngs/illumina/project'
|
4
|
+
require 'bio/ngs/illumina/sample'
|
5
|
+
require 'bio/ngs/illumina/fastq'
|
6
|
+
|
7
|
+
module Bio
|
8
|
+
module Ngs
|
9
|
+
module Illumina
|
10
|
+
|
11
|
+
class Projects < Meta::Pool
|
12
|
+
def initialize(name, path)
|
13
|
+
super(name)
|
14
|
+
metadata[:path]=path
|
15
|
+
end
|
16
|
+
alias :projects :pool
|
17
|
+
end
|
18
|
+
|
19
|
+
require 'find'
|
20
|
+
class << self
|
21
|
+
def project_directory?(path=".")
|
22
|
+
Dir.chdir(path) do
|
23
|
+
projects = Dir.glob(["Project_*","Undetermined_indices"])
|
24
|
+
return false if projects.empty?
|
25
|
+
into_projects = projects.map do |project|
|
26
|
+
Dir.chdir(project) do |sample|
|
27
|
+
Dir.glob("Sample*").size>0
|
28
|
+
end
|
29
|
+
end.uniq
|
30
|
+
|
31
|
+
if (into_projects.size>1 || (into_projects.first==false))
|
32
|
+
return false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
true
|
36
|
+
end
|
37
|
+
|
38
|
+
def build(path=".")
|
39
|
+
|
40
|
+
projects = Projects.new("Illumina", path)
|
41
|
+
|
42
|
+
Dir.chdir(path) do
|
43
|
+
Dir.glob(["Project_*","Undetermined_indices"]).each do |project_dir|
|
44
|
+
project = Project.new(project_dir.sub(/Project_/,""),project_dir)
|
45
|
+
projects.add(project)
|
46
|
+
Dir.chdir(project_dir) do
|
47
|
+
Dir.glob("Sample*").each do |sample_dir|
|
48
|
+
sample = Sample.new(sample_dir.sub(/Sample_/,""), sample_dir, project)
|
49
|
+
project.add(sample)
|
50
|
+
Dir.chdir(sample_dir) do
|
51
|
+
Dir.glob(["**/*.fastq", "**/*.fastq.gz"]) do |reads_filename|
|
52
|
+
sample.add_filename(reads_filename)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
projects
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end #Illumina
|
63
|
+
end #Ngs
|
64
|
+
end #Bio
|
@@ -0,0 +1,81 @@
|
|
1
|
+
#TODO: refactor this code, I don't like it very much
|
2
|
+
#TODO: export in JSON format
|
3
|
+
require 'meta'
|
4
|
+
|
5
|
+
module Bio
|
6
|
+
module Ngs
|
7
|
+
require 'json'
|
8
|
+
module Illumina
|
9
|
+
class Project < Meta::Pool
|
10
|
+
attr_accessor :samples, :sample_sheet, :root_dir
|
11
|
+
def initialize(name, root_dir=".")
|
12
|
+
super(name)
|
13
|
+
@sample_sheet = nil
|
14
|
+
@root_dir = root_dir
|
15
|
+
metadata[:path]=root_dir
|
16
|
+
end
|
17
|
+
|
18
|
+
alias :samples :pool
|
19
|
+
|
20
|
+
def path
|
21
|
+
File.join(@root_dir, (name=~/Undetermined_indices/ ? name : "Project_#{name}"))
|
22
|
+
end
|
23
|
+
|
24
|
+
def samples_path
|
25
|
+
@samples.each_key.map do |sample_name|
|
26
|
+
@samples[sample_name].filenames_paths
|
27
|
+
end.flatten
|
28
|
+
end
|
29
|
+
def to_json(*a)
|
30
|
+
{
|
31
|
+
"json_class" => self.class.name,
|
32
|
+
"name" => name,
|
33
|
+
"sample_sheet" => sample_sheet,
|
34
|
+
"samples" => samples.each_key.map{|k| samples[k].to_json }
|
35
|
+
}.to_json(*a)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# require 'find'
|
40
|
+
# class << self
|
41
|
+
# def project_directory?(path=".")
|
42
|
+
# Dir.chdir(path) do
|
43
|
+
# projects = Dir.glob(["Project_*","Undetermined_indices"])
|
44
|
+
# return false if projects.empty?
|
45
|
+
# into_projects = projects.map do |project|
|
46
|
+
# Dir.chdir(project) do |sample|
|
47
|
+
# Dir.glob("Sample*").size>0
|
48
|
+
# end
|
49
|
+
# end.uniq
|
50
|
+
|
51
|
+
# if (into_projects.size>1 || (into_projects.first==false))
|
52
|
+
# return false
|
53
|
+
# end
|
54
|
+
# end
|
55
|
+
# true
|
56
|
+
# end
|
57
|
+
|
58
|
+
# def build(path=".")
|
59
|
+
# Dir.chdir(path) do
|
60
|
+
# Dir.glob(["Project_*","Undetermined_indices"]).inject({}) do |projects, project_dir|
|
61
|
+
# project = Project.new(project_dir.sub(/Project_/,""), path)
|
62
|
+
# projects[project.name] = project
|
63
|
+
# Dir.chdir(project_dir) do
|
64
|
+
# Dir.glob("Sample*").each do |sample_dir|
|
65
|
+
# sample = Sample.new(sample_dir.sub(/Sample_/,""), project)
|
66
|
+
# project.samples[sample.name] = sample
|
67
|
+
# Dir.chdir(sample_dir) do
|
68
|
+
# Dir.glob(["**/*.fastq", "**/*.fastq.gz"]) do |reads_filename|
|
69
|
+
# sample.add_filename(reads_filename)
|
70
|
+
# end
|
71
|
+
# end
|
72
|
+
# end
|
73
|
+
# end
|
74
|
+
# projects
|
75
|
+
# end
|
76
|
+
# end
|
77
|
+
# end
|
78
|
+
# end
|
79
|
+
end #Illumina
|
80
|
+
end #Ngs
|
81
|
+
end #Bio
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#TODO: refactor this code, I don't like it very much
|
2
|
+
#TODO: export in JSON format
|
3
|
+
require 'meta'
|
4
|
+
require 'securerandom'
|
5
|
+
|
6
|
+
module Bio
|
7
|
+
module Ngs
|
8
|
+
module Illumina
|
9
|
+
class MetaReads < Meta::Data
|
10
|
+
def initialize(name, metadata={})
|
11
|
+
super(name, metadata)
|
12
|
+
metadata[:type]=:file
|
13
|
+
metadata[:format]=:fastq
|
14
|
+
end
|
15
|
+
end #File
|
16
|
+
|
17
|
+
class Sample < Meta::Pool
|
18
|
+
#attr_accessor :name #, :filenames
|
19
|
+
def initialize(name, path, parent=nil)
|
20
|
+
super(name)
|
21
|
+
metadata[:path]=path
|
22
|
+
@parent = parent
|
23
|
+
end
|
24
|
+
|
25
|
+
def path
|
26
|
+
File.join @parent.path,"Sample_#{name}"
|
27
|
+
end
|
28
|
+
|
29
|
+
def paired?
|
30
|
+
@filenames.key?(:left) && @filenames.key?(:right)
|
31
|
+
end
|
32
|
+
|
33
|
+
def add_filename(filename)
|
34
|
+
filename_metadata = filename.dup
|
35
|
+
metadata = {:filename=>filename_metadata}
|
36
|
+
#TODO maybe could be usefult to leave this to end used, define what is a filtered/trimmed file. Define a regexp for each category.
|
37
|
+
if filename=~/trimmed|TRIMMED/
|
38
|
+
metadata[:trimmed] = true
|
39
|
+
metadata[:trimmed_aggregated] = true unless filename=~/_\d+\./
|
40
|
+
end
|
41
|
+
|
42
|
+
if filename=~/filtered|FILTERED/
|
43
|
+
metadata[:filtered] = true
|
44
|
+
metadata[:filtered_aggregated] =true unless filename=~/_\d+\./
|
45
|
+
end
|
46
|
+
metadata[:left] = true if filename=~/.*_R1_.*/
|
47
|
+
metadata[:right] = true if filename=~/.*_R2_.*/
|
48
|
+
metadata[:zipped] = true if filename=~/\.gz/
|
49
|
+
metadata[:aggregated] = true unless metadata[:trimmed_aggregated] || metadata[:filtered_aggregated] || filename=~/_\d+\./
|
50
|
+
|
51
|
+
filename.sub!(/_([ACGT]+)_/,'_')
|
52
|
+
metadata[:index] = $1
|
53
|
+
filename.sub!(/_L([0-9]+)_?/,'_')
|
54
|
+
metadata[:lane] = $1
|
55
|
+
|
56
|
+
#filename_cleaned = filename.sub(/_R.*/,'')
|
57
|
+
readsdata_name = File.basename(filename).sub(/TRIMMED/,'').sub(/trimmed/,'').sub(/filtered/,'').sub(/FILTERED/,'').sub(/_R\d+_\d+_?/,'').sub(/_R\d+_/,'').sub(/\..+$/,'') #TODO is not the best thing to do
|
58
|
+
|
59
|
+
if filename=~/R._(\d*).fastq(.gz)?/
|
60
|
+
metadata[:chunks]=$1
|
61
|
+
end
|
62
|
+
self.add MetaReads.new(SecureRandom.uuid, metadata)
|
63
|
+
end
|
64
|
+
|
65
|
+
#REMOVE # def get(tag=filtered)
|
66
|
+
# @files.get(tag)
|
67
|
+
# end
|
68
|
+
|
69
|
+
def filenames_paths
|
70
|
+
@filenames.keys.map do |filename|
|
71
|
+
filename_path(filename)
|
72
|
+
end.flatten
|
73
|
+
end
|
74
|
+
|
75
|
+
# def to_json(*a)
|
76
|
+
# {
|
77
|
+
# "json_class" => self.class.name,
|
78
|
+
# "name" => name,
|
79
|
+
# "filenames" => filenames_paths
|
80
|
+
# }.to_json(*a)
|
81
|
+
# end
|
82
|
+
end #Sample
|
83
|
+
end #Illumina
|
84
|
+
end #Ngs
|
85
|
+
end #Bio
|
data/lib/bio/ngs/task.rb
CHANGED
@@ -15,7 +15,7 @@ class Thor
|
|
15
15
|
|
16
16
|
def run(instance, args=[])
|
17
17
|
public_method?(instance) ? instance.send(name, *args) : instance.class.handle_no_task_error(name)
|
18
|
-
|
18
|
+
# save_history(instance,args) unless instance.class == Bio::Ngs::Runner or instance.class == Thor::Sandbox::History
|
19
19
|
rescue ArgumentError => e
|
20
20
|
handle_argument_error?(instance, e, caller) ?
|
21
21
|
instance.class.handle_argument_error(self, e) : (raise e)
|
data/lib/bio/ngs/utils.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#
|
2
|
-
#
|
2
|
+
#
|
3
3
|
#
|
4
4
|
# Copyright:: Copyright (C) 2011
|
5
5
|
# Francesco Strozzi <francesco.strozzi@gmail.com>
|
@@ -12,6 +12,8 @@ require 'find'
|
|
12
12
|
module Bio
|
13
13
|
module Ngs
|
14
14
|
class Utils
|
15
|
+
@@skip_check_binaries=false
|
16
|
+
|
15
17
|
class BinaryNotFound < StandardError
|
16
18
|
def initialize(opts={})
|
17
19
|
@skip_task = opts[:skip_task]
|
@@ -22,26 +24,35 @@ module Bio
|
|
22
24
|
end
|
23
25
|
end
|
24
26
|
class << self
|
25
|
-
|
27
|
+
|
28
|
+
def skip_check_binaries
|
29
|
+
@@skip_check_binaries=true
|
30
|
+
end
|
31
|
+
|
32
|
+
def skip_check_binaries?
|
33
|
+
@@skip_check_binaries
|
34
|
+
end
|
35
|
+
|
26
36
|
def binary(name)
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
37
|
+
unless skip_check_binaries?
|
38
|
+
begin
|
39
|
+
if !(plugin_binaries_found = find_binary_files(name)).empty?
|
40
|
+
return plugin_binaries_found.first
|
41
|
+
elsif (os_binary = Bio::Command.query_command ["which", name]) != ""
|
42
|
+
return os_binary.tr("\n","")
|
43
|
+
else
|
44
|
+
raise BinaryNotFound.new(:skip_task=>true), "No binary found with this name: #{name}"
|
45
|
+
end
|
46
|
+
rescue BinaryNotFound => e
|
47
|
+
warn e.message
|
34
48
|
end
|
35
|
-
rescue BinaryNotFound => e
|
36
|
-
warn e.message
|
37
49
|
end
|
38
|
-
|
39
50
|
end #binary
|
40
51
|
|
41
52
|
def os_type
|
42
53
|
require 'rbconfig'
|
43
54
|
case RbConfig::CONFIG['host_os']
|
44
|
-
when /darwin/ then return "osx"
|
55
|
+
when /darwin/ then return "osx"
|
45
56
|
when /linux/ then return "linux"
|
46
57
|
when /mswin|mingw/ then raise NotImplementedError, "This plugin does not run on Windows"
|
47
58
|
end
|
@@ -76,114 +87,115 @@ module Bio
|
|
76
87
|
mode = (opts[:mode]) ? opts[:mode] : ""
|
77
88
|
pbar = nil
|
78
89
|
open(opts[:url],"r"+mode,
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
90
|
+
:content_length_proc => lambda {|t|
|
91
|
+
if t && 0 < t
|
92
|
+
pbar = ProgressBar.new('', t)
|
93
|
+
pbar.file_transfer_mode
|
94
|
+
end
|
84
95
|
},
|
85
96
|
:progress_proc => lambda {|s|
|
86
97
|
pbar.set s if pbar
|
87
|
-
|
88
|
-
|
98
|
+
}) do |remote|
|
99
|
+
open(filename,"w"+mode) {|file| file.write remote.read(16384) until remote.eof?}
|
100
|
+
end
|
101
|
+
puts "\nDone"
|
102
|
+
end
|
103
|
+
|
104
|
+
def uncompress_gz_file(file_in)
|
105
|
+
require 'zlib'
|
106
|
+
puts "Uncompressing file #{file_in}"
|
107
|
+
file_out = file_in.gsub(/.gz/,"")
|
108
|
+
Zlib::GzipReader.open(file_in) {|gz|
|
109
|
+
open(file_out,"w") do |file|
|
110
|
+
file.write gz.read
|
89
111
|
end
|
90
|
-
|
112
|
+
}
|
113
|
+
puts "Done\n"
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
def download_and_uncompress(url,fileout)
|
118
|
+
self.download_with_progress(:url => url,:mode => "b",:filename => fileout)
|
119
|
+
self.uncompress_gz_file(fileout)
|
120
|
+
end
|
121
|
+
|
122
|
+
def uncompress_command(suffix)
|
123
|
+
case suffix
|
124
|
+
when "tar.bz2" then "tar xvfj"
|
125
|
+
when "tar.gz" then "tar xvfz"
|
126
|
+
when "zip" then "unzip"
|
127
|
+
else
|
128
|
+
raise "Unkonw suffix."
|
91
129
|
end
|
130
|
+
end #uncompress_command
|
92
131
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
132
|
+
def uncompress_any(tool_name, tool_record)
|
133
|
+
tool_file_name = "#{tool_record["basename"]}.#{tool_record["suffix"]}"
|
134
|
+
tool_dir_name = tool_record["basename"]
|
135
|
+
uncompress = uncompress_command(tool_record["suffix"])
|
136
|
+
STDERR.puts "#{uncompress} #{tool_file_name}"
|
137
|
+
system "#{uncompress} #{tool_file_name}"
|
138
|
+
STDERR.puts "completed."
|
139
|
+
if Dir.exists?(tool_dir_name)
|
140
|
+
tool_dir_name
|
141
|
+
elsif Dir.exists?("#{tool_name}-#{tool_record['version']}")
|
142
|
+
"#{tool_name}-#{tool_record['version']}"
|
143
|
+
else
|
144
|
+
raise "BioNGS can not identify the uncompressed destination folder"
|
103
145
|
end
|
146
|
+
end #uncompress
|
147
|
+
|
148
|
+
def compile_source(tool_name, tool_record, path_external, path_binary)
|
149
|
+
puts "Uncompressing #{tool_name}..."
|
150
|
+
tool_dir_name = uncompress_any(tool_name, tool_record)
|
151
|
+
puts "Compiling #{tool_name}..."
|
152
|
+
cd(tool_dir_name) do
|
153
|
+
#system "#{tool_record["lib"]}='#{path_external}/bin/common/lib'" if tool_record["lib"]
|
154
|
+
#system "#{tool_record["flags"]}='-O2'" if tool_record["flags"]
|
155
|
+
system "PKG_CONFIG_PATH='#{path_external}/bin/common/lib/pkgconfig' ./configure --prefix=#{path_binary} --bindir=#{path_binary}"
|
156
|
+
system "make"
|
157
|
+
system "make install"
|
158
|
+
end #cd
|
159
|
+
end #uncompress_compile
|
104
160
|
|
161
|
+
def just_make(tool_name, tool_record, path_external, path_binary)
|
162
|
+
puts "Uncompressing #{tool_name}..."
|
163
|
+
tool_dir_name = uncompress_any(tool_name, tool_record)
|
164
|
+
puts "Compiling #{tool_name}..."
|
165
|
+
cd(tool_dir_name) do
|
166
|
+
system "make"
|
167
|
+
FileUtils.cp tool_name,path_binary
|
168
|
+
end #cd
|
169
|
+
end
|
105
170
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
171
|
+
def install_binary(tool_name, tool_record, path_external, path_binary)
|
172
|
+
require 'fileutils'
|
173
|
+
include FileUtils::Verbose
|
174
|
+
puts "Uncompressing #{tool_name}"
|
175
|
+
uncompressed_tool_dir_name = uncompress_any(tool_name, tool_record)
|
176
|
+
puts "Installing #{tool_name}"
|
177
|
+
path_binary_tool = File.join(path_binary,tool_name)
|
178
|
+
FileUtils.remove_dir(path_binary_tool) if Dir.exists?(path_binary_tool)
|
179
|
+
FileUtils.mkdir(path_binary_tool)
|
180
|
+
FileUtils.cp_r "#{uncompressed_tool_dir_name}/.", path_binary_tool, :preserve=>true
|
181
|
+
end #uncompress install binary
|
110
182
|
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
def uncompress_any(tool_name, tool_record)
|
122
|
-
tool_file_name = "#{tool_record["basename"]}.#{tool_record["suffix"]}"
|
123
|
-
tool_dir_name = tool_record["basename"]
|
124
|
-
uncompress = uncompress_command(tool_record["suffix"])
|
125
|
-
STDERR.puts "#{uncompress} #{tool_file_name}"
|
126
|
-
system "#{uncompress} #{tool_file_name}"
|
127
|
-
STDERR.puts "completed."
|
128
|
-
if Dir.exists?(tool_dir_name)
|
129
|
-
tool_dir_name
|
130
|
-
elsif Dir.exists?("#{tool_name}-#{tool_record['version']}")
|
131
|
-
"#{tool_name}-#{tool_record['version']}"
|
132
|
-
else
|
133
|
-
raise "BioNGS can not identify the uncompressed destination folder"
|
134
|
-
end
|
135
|
-
end #uncompress
|
136
|
-
|
137
|
-
def compile_source(tool_name, tool_record, path_external, path_binary)
|
138
|
-
puts "Uncompressing #{tool_name}..."
|
139
|
-
tool_dir_name = uncompress_any(tool_name, tool_record)
|
140
|
-
puts "Compiling #{tool_name}..."
|
141
|
-
cd(tool_dir_name) do
|
142
|
-
#system "#{tool_record["lib"]}='#{path_external}/bin/common/lib'" if tool_record["lib"]
|
143
|
-
#system "#{tool_record["flags"]}='-O2'" if tool_record["flags"]
|
144
|
-
system "PKG_CONFIG_PATH='#{path_external}/bin/common/lib/pkgconfig' ./configure --prefix=#{path_binary} --bindir=#{path_binary}"
|
145
|
-
system "make"
|
146
|
-
system "make install"
|
147
|
-
end #cd
|
148
|
-
end #uncompress_compile
|
149
|
-
|
150
|
-
def just_make(tool_name, tool_record, path_external, path_binary)
|
151
|
-
puts "Uncompressing #{tool_name}..."
|
152
|
-
tool_dir_name = uncompress_any(tool_name, tool_record)
|
153
|
-
puts "Compiling #{tool_name}..."
|
154
|
-
cd(tool_dir_name) do
|
155
|
-
system "make"
|
156
|
-
FileUtils.cp tool_name,path_binary
|
157
|
-
end #cd
|
183
|
+
|
184
|
+
# search in the current gem's directory for installed binaries which the name binary_name
|
185
|
+
# it's a recursive search in common and os specific directories
|
186
|
+
# return an array: empty if the binary can not be found otherwise full path to the binaries
|
187
|
+
# it is up to the user choose which binary to use, it's suggested to use the first in the array
|
188
|
+
# to have a behavirou similar to the search PATH
|
189
|
+
def find_binary_files(binary_name)
|
190
|
+
path = File.expand_path(File.dirname(__FILE__))
|
191
|
+
Find.find(File.join(path,"ext","bin","common"),File.join(path,"ext","bin",self.os_type)).select do |f|
|
192
|
+
File.file?(f) && File.basename(f) == binary_name
|
158
193
|
end
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
puts "Installing #{tool_name}"
|
166
|
-
path_binary_tool = File.join(path_binary,tool_name)
|
167
|
-
FileUtils.remove_dir(path_binary_tool) if Dir.exists?(path_binary_tool)
|
168
|
-
FileUtils.mkdir(path_binary_tool)
|
169
|
-
FileUtils.cp_r "#{uncompressed_tool_dir_name}/.", path_binary_tool, :preserve=>true
|
170
|
-
end #uncompress install binary
|
171
|
-
|
172
|
-
|
173
|
-
# search in the current gem's directory for installed binaries which the name binary_name
|
174
|
-
# it's a recursive search in common and os specific directories
|
175
|
-
# return an array: empty if the binary can not be found otherwise full path to the binaries
|
176
|
-
# it is up to the user choose which binary to use, it's suggested to use the first in the array
|
177
|
-
# to have a behavirou similar to the search PATH
|
178
|
-
def find_binary_files(binary_name)
|
179
|
-
path = File.expand_path(File.dirname(__FILE__))
|
180
|
-
Find.find(File.join(path,"ext","bin","common"),File.join(path,"ext","bin",self.os_type)).select do |f|
|
181
|
-
File.file?(f) && File.basename(f) == binary_name
|
182
|
-
end
|
183
|
-
end #find_binary_file
|
184
|
-
|
185
|
-
end #eiginclass
|
194
|
+
end #find_binary_file
|
195
|
+
end #eiginclass
|
196
|
+
|
197
|
+
end # end Utils
|
198
|
+
end # end NGS
|
199
|
+
end # end Bio
|
186
200
|
|
187
|
-
|
188
|
-
end # end NGS
|
189
|
-
end # end Bio
|
201
|
+
Bio::Ngs::Utils.skip_check_binaries if %w(true yes ok 1).include?(ENV['BIONGS_SKIP_CHECK_BINARIES'])
|