finishm 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +19 -19
- data/VERSION +1 -1
- data/finishm.gemspec +631 -0
- data/lib/assembly/graph_generator.rb +0 -1
- data/lib/assembly/probed_graph.rb +0 -1
- metadata +99 -96
- data/bin/assembly_visualiser +0 -106
- data/bin/check_primer_combinations.rb +0 -73
- data/bin/contig_joiner.rb +0 -244
- data/bin/contigs_against_assembly.rb +0 -153
- data/bin/finishm_assembler +0 -55
- data/bin/finishm_gap_closer.rb +0 -241
- data/bin/kmer_abundance_file_tool.rb +0 -49
- data/bin/kmer_pattern_to_assembly.rb +0 -377
- data/bin/kmer_profile_finder.rb +0 -92
- data/bin/kmers_count_parse.d +0 -52
- data/bin/kmers_count_tabulate.d +0 -123
- data/bin/kmers_count_tabulate.rb +0 -84
- data/bin/pcr_result_parser.rb +0 -108
- data/bin/primer_finder.rb +0 -119
- data/bin/read_selection_by_kmer.d +0 -174
- data/bin/scaffold_by_pattern.rb +0 -119
- data/bin/scaffold_connection_possibilities_to_knowns.rb +0 -193
- data/bin/scaffold_end_coverages.rb +0 -69
- data/bin/trail_validator.rb +0 -84
@@ -1,69 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'optparse'
|
4
|
-
require 'bio-logger'
|
5
|
-
require 'bio-samtools'
|
6
|
-
require 'bio'
|
7
|
-
|
8
|
-
if __FILE__ == $0 #needs to be removed if this script is distributed as part of a rubygem
|
9
|
-
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
|
10
|
-
|
11
|
-
# Parse command line options into the options hash
|
12
|
-
options = {
|
13
|
-
:logger => 'stderr',
|
14
|
-
:contig_end_length => 200,
|
15
|
-
}
|
16
|
-
o = OptionParser.new do |opts|
|
17
|
-
opts.banner = "
|
18
|
-
Usage: #{SCRIPT_NAME} <arguments>
|
19
|
-
|
20
|
-
Takes a sorted, indexed BAM file and outputs the coverages of each of the reference sequences\n\n"
|
21
|
-
|
22
|
-
opts.on("-b", "--bam BAM_FILE", "BAM file that defines overall mapping/coverage [required]") do |arg|
|
23
|
-
options[:bam_file] = arg
|
24
|
-
end
|
25
|
-
opts.on("-f", "--reference-fasta FASTA_FILE", "FASTA file of the reference [required]") do |arg|
|
26
|
-
options[:fasta_file] = arg
|
27
|
-
end
|
28
|
-
|
29
|
-
opts.on("-l", "--end-length LENGTH", "How far from the end to count [default: #{options[:contig_end_length]}]") do |arg|
|
30
|
-
options[:contig_end_length] = arg.to_i
|
31
|
-
raise "Inappropriate end length detected, I need a positive number, found #{options[:contig_end_length]} parsed from #{arg}" if options[:contig_end_length] < 1
|
32
|
-
end
|
33
|
-
|
34
|
-
# logger options
|
35
|
-
opts.separator "\n\tVerbosity:\n\n"
|
36
|
-
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {Bio::Log::CLI.trace('error')}
|
37
|
-
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
38
|
-
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| Bio::Log::CLI.trace(s)}
|
39
|
-
end; o.parse!
|
40
|
-
if ARGV.length != 0 or options[:bam_file].nil? or options[:fasta_file].nil?
|
41
|
-
$stderr.puts o
|
42
|
-
exit 1
|
43
|
-
end
|
44
|
-
# Setup logging. bio-logger defaults to STDERR not STDOUT, I disagree
|
45
|
-
Bio::Log::CLI.logger(options[:logger]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
46
|
-
|
47
|
-
|
48
|
-
# open the BAM file for reading
|
49
|
-
bam = Bio::DB::Sam.new(:bam => options[:bam_file], :fasta => options[:fasta_file])
|
50
|
-
bam.open
|
51
|
-
puts %w(Reference StartCoverage EndCoverage).join("\t")
|
52
|
-
contigs_file = Bio::FlatFile.auto(options[:fasta_file])
|
53
|
-
contigs_file.each_entry do |record|
|
54
|
-
ref = record.definition
|
55
|
-
ref_length = record.length
|
56
|
-
#bam.each_reference do |ref, ref_length| #currently commented out because there is extraneous output on STDOUT if you use this
|
57
|
-
next if ref == '*' #Ignore umapped reads
|
58
|
-
# Coverage of the start
|
59
|
-
end_length = options[:contig_end_length]
|
60
|
-
end_length = ref_length if ref_length < options[:contig_end_length]
|
61
|
-
|
62
|
-
cov_start = bam.average_coverage(ref, 1, end_length)
|
63
|
-
cov_end = bam.average_coverage(ref, ref_length-end_length+1, end_length)
|
64
|
-
puts [
|
65
|
-
ref, cov_start, cov_end
|
66
|
-
].join("\t")
|
67
|
-
end
|
68
|
-
exit
|
69
|
-
end #end if running as a script
|
data/bin/trail_validator.rb
DELETED
@@ -1,84 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'optparse'
|
4
|
-
require 'bio-logger'
|
5
|
-
require 'pp'
|
6
|
-
|
7
|
-
SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = 'finishm'
|
8
|
-
$:.unshift File.join(File.dirname(__FILE__),'..','lib')
|
9
|
-
require 'priner'
|
10
|
-
|
11
|
-
# Parse command line options into the options hash
|
12
|
-
options = {
|
13
|
-
:logger => 'stderr',
|
14
|
-
:log_level => 'info',
|
15
|
-
}
|
16
|
-
|
17
|
-
o = OptionParser.new do |opts|
|
18
|
-
opts.banner = "
|
19
|
-
Usage: #{SCRIPT_NAME} --trails <trail(s) fasta> --kmer-abundances <abundances.csv>
|
20
|
-
|
21
|
-
Given an input kmer set of sequences then abundances space separated file, and a threshold, print out how many kmers are unique to different subsets of columns\n\n"
|
22
|
-
|
23
|
-
opts.on("--trails FASTA_FILE", "fasta file of trail(s) to be tested [required]") do |arg|
|
24
|
-
options[:trails_fasta_file] = arg
|
25
|
-
end
|
26
|
-
opts.on("--kmer-abundances FILE", "kmer multiple abundance file [required]") do |arg|
|
27
|
-
options[:kmer_multiple_abundance_file] = arg
|
28
|
-
end
|
29
|
-
|
30
|
-
#opts.separator "\nOptional arguments:\n\n"
|
31
|
-
opts.on("--output-kmer-coverages FILE", "output kmer coverages across each library across the contigs default: don't output]") do |arg|
|
32
|
-
options[:output_trail_kmer_coverage_file] = arg
|
33
|
-
end
|
34
|
-
|
35
|
-
# logger options
|
36
|
-
opts.separator "\nVerbosity:\n\n"
|
37
|
-
opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
|
38
|
-
opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
|
39
|
-
opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
|
40
|
-
end; o.parse!
|
41
|
-
if ARGV.length != 0 or options[:trails_fasta_file].nil? or options[:kmer_multiple_abundance_file].nil? or options[:output_trail_kmer_coverage_file].nil?
|
42
|
-
$stderr.puts "Options not correctly specified. Found:"
|
43
|
-
pp options
|
44
|
-
$stderr.puts o
|
45
|
-
exit 1
|
46
|
-
end
|
47
|
-
# Setup logging
|
48
|
-
Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
|
49
|
-
|
50
|
-
# read in fasta file of trails
|
51
|
-
log.info "Reading trail sequences from #{options[:trails_fasta_file]}"
|
52
|
-
fasta_seqs = {}
|
53
|
-
Bio::FlatFile.foreach(options[:trails_fasta_file]) do |e|
|
54
|
-
name = e.definition
|
55
|
-
seq = e.seq.seq
|
56
|
-
fasta_seqs[name]=seq
|
57
|
-
end
|
58
|
-
|
59
|
-
log.info "Reading kmer abundances from #{options[:kmer_multiple_abundance_file]}.."
|
60
|
-
kmer_hash = Bio::KmerMultipleAbundanceHash.parse_from_file options[:kmer_multiple_abundance_file]
|
61
|
-
log.info "Finished reading the kmer abundances"
|
62
|
-
|
63
|
-
if options[:output_trail_kmer_coverage_file]
|
64
|
-
log.info "Writing out kmer coverages to #{options[:output_trail_kmer_coverage_file]}.."
|
65
|
-
writer = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new
|
66
|
-
io = File.open(options[:output_trail_kmer_coverage_file],'w')
|
67
|
-
fasta_seqs.each do |name, seq|
|
68
|
-
log.debug "Writing coverages for #{name}"
|
69
|
-
writer.write_depths(io, name, seq, kmer_hash)
|
70
|
-
end
|
71
|
-
log.info "Finished writing"
|
72
|
-
end
|
73
|
-
|
74
|
-
#log.info "Filtering trail(s) based on kmer coverage, requiring each kmer in the path to have a minimum of #{options[:kmer_path_filter_min_coverage]} coverage in patterned reads, except for the #{options[:kmer_path_end_exclusion_length]}bp at the ends"
|
75
|
-
#kmer_path_filter = Bio::AssemblyGraphAlgorithms::KmerCoverageBasedPathFilter.new
|
76
|
-
#thresholds = desired_pattern.collect{|c| c == true ? 1 : 0}
|
77
|
-
#log.info "Using thresholds for filtering: #{thresholds}"
|
78
|
-
#trails = kmer_path_filter.filter(trails, kmer_hash, thresholds, :exclude_ending_length => options[:kmer_path_end_exclusion_length])
|
79
|
-
#log.info "After filtering remained #{trails.length} trails"
|
80
|
-
|
81
|
-
#trails.each_with_index do |trail, i|
|
82
|
-
# puts ">trail#{i+1}"
|
83
|
-
# puts trail.sequence
|
84
|
-
#end
|