finishm 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +19 -19
- data/VERSION +1 -1
- data/finishm.gemspec +631 -0
- data/lib/assembly/graph_generator.rb +0 -1
- data/lib/assembly/probed_graph.rb +0 -1
- metadata +99 -96
- data/bin/assembly_visualiser +0 -106
- data/bin/check_primer_combinations.rb +0 -73
- data/bin/contig_joiner.rb +0 -244
- data/bin/contigs_against_assembly.rb +0 -153
- data/bin/finishm_assembler +0 -55
- data/bin/finishm_gap_closer.rb +0 -241
- data/bin/kmer_abundance_file_tool.rb +0 -49
- data/bin/kmer_pattern_to_assembly.rb +0 -377
- data/bin/kmer_profile_finder.rb +0 -92
- data/bin/kmers_count_parse.d +0 -52
- data/bin/kmers_count_tabulate.d +0 -123
- data/bin/kmers_count_tabulate.rb +0 -84
- data/bin/pcr_result_parser.rb +0 -108
- data/bin/primer_finder.rb +0 -119
- data/bin/read_selection_by_kmer.d +0 -174
- data/bin/scaffold_by_pattern.rb +0 -119
- data/bin/scaffold_connection_possibilities_to_knowns.rb +0 -193
- data/bin/scaffold_end_coverages.rb +0 -69
- data/bin/trail_validator.rb +0 -84
@@ -1,69 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'optparse'
|
4
|
-
require 'bio-logger'
|
5
|
-
require 'bio-samtools'
|
6
|
-
require 'bio'
|
7
|
-
|
8
|
-
if __FILE__ == $0 #needs to be removed if this script is distributed as part of a rubygem
|
9
|
-
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
|
10
|
-
|
11
|
-
# Parse command line options into the options hash
|
12
|
-
options = {
|
13
|
-
:logger => 'stderr',
|
14
|
-
:contig_end_length => 200,
|
15
|
-
}
|
16
|
-
o = OptionParser.new do |opts|
|
17
|
-
opts.banner = "
|
18
|
-
Usage: #{SCRIPT_NAME} <arguments>
|
19
|
-
|
20
|
-
Takes a sorted, indexed BAM file and outputs the coverages of each of the reference sequences\n\n"
|
21
|
-
|
22
|
-
opts.on("-b", "--bam BAM_FILE", "BAM file that defines overall mapping/coverage [required]") do |arg|
|
23
|
-
options[:bam_file] = arg
|
24
|
-
end
|
25
|
-
opts.on("-f", "--reference-fasta FASTA_FILE", "FASTA file of the reference [required]") do |arg|
|
26
|
-
options[:fasta_file] = arg
|
27
|
-
end
|
28
|
-
|
29
|
-
opts.on("-l", "--end-length LENGTH", "How far from the end to count [default: #{options[:contig_end_length]}]") do |arg|
|
30
|
-
options[:contig_end_length] = arg.to_i
|
31
|
-
raise "Inappropriate end length detected, I need a positive number, found #{options[:contig_end_length]} parsed from #{arg}" if options[:contig_end_length] < 1
|
32
|
-
end
|
33
|
-
|
34
|
-
# logger options
|
35
|
-
opts.separator "\n\tVerbosity:\n\n"
|
36
|
-
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {Bio::Log::CLI.trace('error')}
|
37
|
-
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
38
|
-
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| Bio::Log::CLI.trace(s)}
|
39
|
-
end; o.parse!
|
40
|
-
if ARGV.length != 0 or options[:bam_file].nil? or options[:fasta_file].nil?
|
41
|
-
$stderr.puts o
|
42
|
-
exit 1
|
43
|
-
end
|
44
|
-
# Setup logging. bio-logger defaults to STDERR not STDOUT, I disagree
|
45
|
-
Bio::Log::CLI.logger(options[:logger]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
46
|
-
|
47
|
-
|
48
|
-
# open the BAM file for reading
|
49
|
-
bam = Bio::DB::Sam.new(:bam => options[:bam_file], :fasta => options[:fasta_file])
|
50
|
-
bam.open
|
51
|
-
puts %w(Reference StartCoverage EndCoverage).join("\t")
|
52
|
-
contigs_file = Bio::FlatFile.auto(options[:fasta_file])
|
53
|
-
contigs_file.each_entry do |record|
|
54
|
-
ref = record.definition
|
55
|
-
ref_length = record.length
|
56
|
-
#bam.each_reference do |ref, ref_length| #currently commented out because there is extraneous output on STDOUT if you use this
|
57
|
-
next if ref == '*' #Ignore umapped reads
|
58
|
-
# Coverage of the start
|
59
|
-
end_length = options[:contig_end_length]
|
60
|
-
end_length = ref_length if ref_length < options[:contig_end_length]
|
61
|
-
|
62
|
-
cov_start = bam.average_coverage(ref, 1, end_length)
|
63
|
-
cov_end = bam.average_coverage(ref, ref_length-end_length+1, end_length)
|
64
|
-
puts [
|
65
|
-
ref, cov_start, cov_end
|
66
|
-
].join("\t")
|
67
|
-
end
|
68
|
-
exit
|
69
|
-
end #end if running as a script
|
data/bin/trail_validator.rb
DELETED
@@ -1,84 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'optparse'
|
4
|
-
require 'bio-logger'
|
5
|
-
require 'pp'
|
6
|
-
|
7
|
-
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
|
8
|
-
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
9
|
-
require 'priner'
|
10
|
-
|
11
|
-
# Parse command line options into the options hash
|
12
|
-
options = {
|
13
|
-
:logger => 'stderr',
|
14
|
-
:log_level => 'info',
|
15
|
-
}
|
16
|
-
|
17
|
-
o = OptionParser.new do |opts|
|
18
|
-
opts.banner = "
|
19
|
-
Usage: #{SCRIPT_NAME} --trails <trail(s) fasta> --kmer-abundances <abundances.csv>
|
20
|
-
|
21
|
-
Given an input kmer set of sequences then abundances space separated file, and a threshold, print out how many kmers are unique to different subsets of columns\n\n"
|
22
|
-
|
23
|
-
opts.on("--trails FASTA_FILE", "fasta file of trail(s) to be tested [required]") do |arg|
|
24
|
-
options[:trails_fasta_file] = arg
|
25
|
-
end
|
26
|
-
opts.on("--kmer-abundances FILE", "kmer multiple abundance file [required]") do |arg|
|
27
|
-
options[:kmer_multiple_abundance_file] = arg
|
28
|
-
end
|
29
|
-
|
30
|
-
#opts.separator "\nOptional arguments:\n\n"
|
31
|
-
opts.on("--output-kmer-coverages FILE", "output kmer coverages across each library across the contigs default: don't output]") do |arg|
|
32
|
-
options[:output_trail_kmer_coverage_file] = arg
|
33
|
-
end
|
34
|
-
|
35
|
-
# logger options
|
36
|
-
opts.separator "\nVerbosity:\n\n"
|
37
|
-
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
|
38
|
-
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
39
|
-
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
|
40
|
-
end; o.parse!
|
41
|
-
if ARGV.length != 0 or options[:trails_fasta_file].nil? or options[:kmer_multiple_abundance_file].nil? or options[:output_trail_kmer_coverage_file].nil?
|
42
|
-
$stderr.puts "Options not correctly specified. Found:"
|
43
|
-
pp options
|
44
|
-
$stderr.puts o
|
45
|
-
exit 1
|
46
|
-
end
|
47
|
-
# Setup logging
|
48
|
-
Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
49
|
-
|
50
|
-
# read in fasta file of trails
|
51
|
-
log.info "Reading trail sequences from #{options[:trails_fasta_file]}"
|
52
|
-
fasta_seqs = {}
|
53
|
-
Bio::FlatFile.foreach(options[:trails_fasta_file]) do |e|
|
54
|
-
name = e.definition
|
55
|
-
seq = e.seq.seq
|
56
|
-
fasta_seqs[name]=seq
|
57
|
-
end
|
58
|
-
|
59
|
-
log.info "Reading kmer abundances from #{options[:kmer_multiple_abundance_file]}.."
|
60
|
-
kmer_hash = Bio::KmerMultipleAbundanceHash.parse_from_file options[:kmer_multiple_abundance_file]
|
61
|
-
log.info "Finished reading the kmer abundances"
|
62
|
-
|
63
|
-
if options[:output_trail_kmer_coverage_file]
|
64
|
-
log.info "Writing out kmer coverages to #{options[:output_trail_kmer_coverage_file]}.."
|
65
|
-
writer = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new
|
66
|
-
io = File.open(options[:output_trail_kmer_coverage_file],'w')
|
67
|
-
fasta_seqs.each do |name, seq|
|
68
|
-
log.debug "Writing coverages for #{name}"
|
69
|
-
writer.write_depths(io, name, seq, kmer_hash)
|
70
|
-
end
|
71
|
-
log.info "Finished writing"
|
72
|
-
end
|
73
|
-
|
74
|
-
#log.info "Filtering trail(s) based on kmer coverage, requiring each kmer in the path to have a minimum of #{options[:kmer_path_filter_min_coverage]} coverage in patterned reads, except for the #{options[:kmer_path_end_exclusion_length]}bp at the ends"
|
75
|
-
#kmer_path_filter = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new
|
76
|
-
#thresholds = desired_pattern.collect{|c| c == true ? 1 : 0}
|
77
|
-
#log.info "Using thresholds for filtering: #{thresholds}"
|
78
|
-
#trails = kmer_path_filter.filter(trails, kmer_hash, thresholds, :exclude_ending_length => options[:kmer_path_end_exclusion_length])
|
79
|
-
#log.info "After filtering remained #{trails.length} trails"
|
80
|
-
|
81
|
-
#trails.each_with_index do |trail, i|
|
82
|
-
# puts ">trail#{i+1}"
|
83
|
-
# puts trail.sequence
|
84
|
-
#end
|