finishm 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,69 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
- require 'bio-logger'
5
- require 'bio-samtools'
6
- require 'bio'
7
-
8
- if __FILE__ == $0 #needs to be removed if this script is distributed as part of a rubygem
9
- SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
10
-
11
- # Parse command line options into the options hash
12
- options = {
13
- :logger => 'stderr',
14
- :contig_end_length => 200,
15
- }
16
- o = OptionParser.new do |opts|
17
- opts.banner = "
18
- Usage: #{SCRIPT_NAME} <arguments>
19
-
20
- Takes a sorted, indexed BAM file and outputs the coverages of each of the reference sequences\n\n"
21
-
22
- opts.on("-b", "--bam BAM_FILE", "BAM file that defines overall mapping/coverage [required]") do |arg|
23
- options[:bam_file] = arg
24
- end
25
- opts.on("-f", "--reference-fasta FASTA_FILE", "FASTA file of the reference [required]") do |arg|
26
- options[:fasta_file] = arg
27
- end
28
-
29
- opts.on("-l", "--end-length LENGTH", "How far from the end to count [default: #{options[:contig_end_length]}]") do |arg|
30
- options[:contig_end_length] = arg.to_i
31
- raise "Inappropriate end length detected, I need a positive number, found #{options[:contig_end_length]} parsed from #{arg}" if options[:contig_end_length] < 1
32
- end
33
-
34
- # logger options
35
- opts.separator "\n\tVerbosity:\n\n"
36
- opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {Bio::Log::CLI.trace('error')}
37
- opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
38
- opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| Bio::Log::CLI.trace(s)}
39
- end; o.parse!
40
- if ARGV.length != 0 or options[:bam_file].nil? or options[:fasta_file].nil?
41
- $stderr.puts o
42
- exit 1
43
- end
44
- # Setup logging. bio-logger defaults to STDERR not STDOUT, I disagree
45
- Bio::Log::CLI.logger(options[:logger]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
46
-
47
-
48
- # open the BAM file for reading
49
- bam = Bio::DB::Sam.new(:bam => options[:bam_file], :fasta => options[:fasta_file])
50
- bam.open
51
- puts %w(Reference StartCoverage EndCoverage).join("\t")
52
- contigs_file = Bio::FlatFile.auto(options[:fasta_file])
53
- contigs_file.each_entry do |record|
54
- ref = record.definition
55
- ref_length = record.length
56
- #bam.each_reference do |ref, ref_length| #currently commented out because there is extraneous output on STDOUT if you use this
57
- next if ref == '*' #Ignore umapped reads
58
- # Coverage of the start
59
- end_length = options[:contig_end_length]
60
- end_length = ref_length if ref_length < options[:contig_end_length]
61
-
62
- cov_start = bam.average_coverage(ref, 1, end_length)
63
- cov_end = bam.average_coverage(ref, ref_length-end_length+1, end_length)
64
- puts [
65
- ref, cov_start, cov_end
66
- ].join("\t")
67
- end
68
- exit
69
- end #end if running as a script
@@ -1,84 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
- require 'bio-logger'
5
- require 'pp'
6
-
7
- SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
8
- $:.unshift File.join(File.dirname(__FILE__),'..','lib')
9
- require 'priner'
10
-
11
- # Parse command line options into the options hash
12
- options = {
13
- :logger => 'stderr',
14
- :log_level => 'info',
15
- }
16
-
17
- o = OptionParser.new do |opts|
18
- opts.banner = "
19
- Usage: #{SCRIPT_NAME} --trails <trail(s) fasta> --kmer-abundances <abundances.csv>
20
-
21
- Given an input kmer set of sequences then abundances space separated file, and a threshold, print out how many kmers are unique to different subsets of columns\n\n"
22
-
23
- opts.on("--trails FASTA_FILE", "fasta file of trail(s) to be tested [required]") do |arg|
24
- options[:trails_fasta_file] = arg
25
- end
26
- opts.on("--kmer-abundances FILE", "kmer multiple abundance file [required]") do |arg|
27
- options[:kmer_multiple_abundance_file] = arg
28
- end
29
-
30
- #opts.separator "\nOptional arguments:\n\n"
31
- opts.on("--output-kmer-coverages FILE", "output kmer coverages across each library across the contigs default: don't output]") do |arg|
32
- options[:output_trail_kmer_coverage_file] = arg
33
- end
34
-
35
- # logger options
36
- opts.separator "\nVerbosity:\n\n"
37
- opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
38
- opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
39
- opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
40
- end; o.parse!
41
- if ARGV.length != 0 or options[:trails_fasta_file].nil? or options[:kmer_multiple_abundance_file].nil? or options[:output_trail_kmer_coverage_file].nil?
42
- $stderr.puts "Options not correctly specified. Found:"
43
- pp options
44
- $stderr.puts o
45
- exit 1
46
- end
47
- # Setup logging
48
- Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
49
-
50
- # read in fasta file of trails
51
- log.info "Reading trail sequences from #{options[:trails_fasta_file]}"
52
- fasta_seqs = {}
53
- Bio::FlatFile.foreach(options[:trails_fasta_file]) do |e|
54
- name = e.definition
55
- seq = e.seq.seq
56
- fasta_seqs[name]=seq
57
- end
58
-
59
- log.info "Reading kmer abundances from #{options[:kmer_multiple_abundance_file]}.."
60
- kmer_hash = Bio::KmerMultipleAbundanceHash.parse_from_file options[:kmer_multiple_abundance_file]
61
- log.info "Finished reading the kmer abundances"
62
-
63
- if options[:output_trail_kmer_coverage_file]
64
- log.info "Writing out kmer coverages to #{options[:output_trail_kmer_coverage_file]}.."
65
- writer = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new
66
- io = File.open(options[:output_trail_kmer_coverage_file],'w')
67
- fasta_seqs.each do |name, seq|
68
- log.debug "Writing coverages for #{name}"
69
- writer.write_depths(io, name, seq, kmer_hash)
70
- end
71
- log.info "Finished writing"
72
- end
73
-
74
- #log.info "Filtering trail(s) based on kmer coverage, requiring each kmer in the path to have a minimum of #{options[:kmer_path_filter_min_coverage]} coverage in patterned reads, except for the #{options[:kmer_path_end_exclusion_length]}bp at the ends"
75
- #kmer_path_filter = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new
76
- #thresholds = desired_pattern.collect{|c| c == true ? 1 : 0}
77
- #log.info "Using thresholds for filtering: #{thresholds}"
78
- #trails = kmer_path_filter.filter(trails, kmer_hash, thresholds, :exclude_ending_length => options[:kmer_path_end_exclusion_length])
79
- #log.info "After filtering remained #{trails.length} trails"
80
-
81
- #trails.each_with_index do |trail, i|
82
- # puts ">trail#{i+1}"
83
- # puts trail.sequence
84
- #end