finishm 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,69 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
- require 'bio-logger'
5
- require 'bio-samtools'
6
- require 'bio'
7
-
8
- if __FILE__ == $0 #needs to be removed if this script is distributed as part of a rubygem
9
- SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
10
-
11
- # Parse command line options into the options hash
12
- options = {
13
- :logger => 'stderr',
14
- :contig_end_length => 200,
15
- }
16
- o = OptionParser.new do |opts|
17
- opts.banner = "
18
- Usage: #{SCRIPT_NAME} <arguments>
19
-
20
- Takes a sorted, indexed BAM file and outputs the coverages of each of the reference sequences\n\n"
21
-
22
- opts.on("-b", "--bam BAM_FILE", "BAM file that defines overall mapping/coverage [required]") do |arg|
23
- options[:bam_file] = arg
24
- end
25
- opts.on("-f", "--reference-fasta FASTA_FILE", "FASTA file of the reference [required]") do |arg|
26
- options[:fasta_file] = arg
27
- end
28
-
29
- opts.on("-l", "--end-length LENGTH", "How far from the end to count [default: #{options[:contig_end_length]}]") do |arg|
30
- options[:contig_end_length] = arg.to_i
31
- raise "Inappropriate end length detected, I need a positive number, found #{options[:contig_end_length]} parsed from #{arg}" if options[:contig_end_length] < 1
32
- end
33
-
34
- # logger options
35
- opts.separator "\n\tVerbosity:\n\n"
36
- opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {Bio::Log::CLI.trace('error')}
37
- opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
38
- opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| Bio::Log::CLI.trace(s)}
39
- end; o.parse!
40
- if ARGV.length != 0 or options[:bam_file].nil? or options[:fasta_file].nil?
41
- $stderr.puts o
42
- exit 1
43
- end
44
- # Setup logging. bio-logger defaults to STDERR not STDOUT, I disagree
45
- Bio::Log::CLI.logger(options[:logger]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
46
-
47
-
48
- # open the BAM file for reading
49
- bam = Bio::DB::Sam.new(:bam => options[:bam_file], :fasta => options[:fasta_file])
50
- bam.open
51
- puts %w(Reference StartCoverage EndCoverage).join("\t")
52
- contigs_file = Bio::FlatFile.auto(options[:fasta_file])
53
- contigs_file.each_entry do |record|
54
- ref = record.definition
55
- ref_length = record.length
56
- #bam.each_reference do |ref, ref_length| #currently commented out because there is extraneous output on STDOUT if you use this
57
- next if ref == '*' #Ignore umapped reads
58
- # Coverage of the start
59
- end_length = options[:contig_end_length]
60
- end_length = ref_length if ref_length < options[:contig_end_length]
61
-
62
- cov_start = bam.average_coverage(ref, 1, end_length)
63
- cov_end = bam.average_coverage(ref, ref_length-end_length+1, end_length)
64
- puts [
65
- ref, cov_start, cov_end
66
- ].join("\t")
67
- end
68
- exit
69
- end #end if running as a script
@@ -1,84 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
- require 'bio-logger'
5
- require 'pp'
6
-
7
- SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
8
- $:.unshift File.join(File.dirname(__FILE__),'..','lib')
9
- require 'priner'
10
-
11
- # Parse command line options into the options hash
12
- options = {
13
- :logger => 'stderr',
14
- :log_level => 'info',
15
- }
16
-
17
- o = OptionParser.new do |opts|
18
- opts.banner = "
19
- Usage: #{SCRIPT_NAME} --trails <trail(s) fasta> --kmer-abundances <abundances.csv>
20
-
21
- Given an input kmer set of sequences then abundances space separated file, and a threshold, print out how many kmers are unique to different subsets of columns\n\n"
22
-
23
- opts.on("--trails FASTA_FILE", "fasta file of trail(s) to be tested [required]") do |arg|
24
- options[:trails_fasta_file] = arg
25
- end
26
- opts.on("--kmer-abundances FILE", "kmer multiple abundance file [required]") do |arg|
27
- options[:kmer_multiple_abundance_file] = arg
28
- end
29
-
30
- #opts.separator "\nOptional arguments:\n\n"
31
- opts.on("--output-kmer-coverages FILE", "output kmer coverages across each library across the contigs default: don't output]") do |arg|
32
- options[:output_trail_kmer_coverage_file] = arg
33
- end
34
-
35
- # logger options
36
- opts.separator "\nVerbosity:\n\n"
37
- opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
38
- opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
39
- opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
40
- end; o.parse!
41
- if ARGV.length != 0 or options[:trails_fasta_file].nil? or options[:kmer_multiple_abundance_file].nil? or options[:output_trail_kmer_coverage_file].nil?
42
- $stderr.puts "Options not correctly specified. Found:"
43
- pp options
44
- $stderr.puts o
45
- exit 1
46
- end
47
- # Setup logging
48
- Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
49
-
50
- # read in fasta file of trails
51
- log.info "Reading trail sequences from #{options[:trails_fasta_file]}"
52
- fasta_seqs = {}
53
- Bio::FlatFile.foreach(options[:trails_fasta_file]) do |e|
54
- name = e.definition
55
- seq = e.seq.seq
56
- fasta_seqs[name]=seq
57
- end
58
-
59
- log.info "Reading kmer abundances from #{options[:kmer_multiple_abundance_file]}.."
60
- kmer_hash = Bio::KmerMultipleAbundanceHash.parse_from_file options[:kmer_multiple_abundance_file]
61
- log.info "Finished reading the kmer abundances"
62
-
63
- if options[:output_trail_kmer_coverage_file]
64
- log.info "Writing out kmer coverages to #{options[:output_trail_kmer_coverage_file]}.."
65
- writer = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new
66
- io = File.open(options[:output_trail_kmer_coverage_file],'w')
67
- fasta_seqs.each do |name, seq|
68
- log.debug "Writing coverages for #{name}"
69
- writer.write_depths(io, name, seq, kmer_hash)
70
- end
71
- log.info "Finished writing"
72
- end
73
-
74
- #log.info "Filtering trail(s) based on kmer coverage, requiring each kmer in the path to have a minimum of #{options[:kmer_path_filter_min_coverage]} coverage in patterned reads, except for the #{options[:kmer_path_end_exclusion_length]}bp at the ends"
75
- #kmer_path_filter = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new
76
- #thresholds = desired_pattern.collect{|c| c == true ? 1 : 0}
77
- #log.info "Using thresholds for filtering: #{thresholds}"
78
- #trails = kmer_path_filter.filter(trails, kmer_hash, thresholds, :exclude_ending_length => options[:kmer_path_end_exclusion_length])
79
- #log.info "After filtering remained #{trails.length} trails"
80
-
81
- #trails.each_with_index do |trail, i|
82
- # puts ">trail#{i+1}"
83
- # puts trail.sequence
84
- #end