finishm 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,92 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
- require 'bio-logger'
5
- require 'csv'
6
-
7
- SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
8
-
9
- # Parse command line options into the options hash
10
- options = {
11
- :logger => 'stderr',
12
- :log_level => 'info',
13
- }
14
- o = OptionParser.new do |opts|
15
- opts.banner = "
16
- Usage: #{SCRIPT_NAME} <kmer_multiple_abundance_file>
17
-
18
- Given an input kmer then abundances space separated file, and a threshold, print out how many kmers are unique to different subsets of columns\n\n"
19
-
20
- opts.on("--upper-threshold ARG", "kmer frequency cutoff to saying 'present' [required]") do |arg|
21
- options[:upper_threshold] = arg.to_i
22
- end
23
- opts.on("--lower-threshold ARG", "kmer frequency cutoff to saying 'not present' [required]") do |arg|
24
- options[:lower_threshold] = arg.to_i
25
- end
26
-
27
- # logger options
28
- opts.separator "\nVerbosity:\n\n"
29
- opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
30
- opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
31
- opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
32
- end; o.parse!
33
- if ARGV.length != 1 or options[:upper_threshold].nil? or options[:lower_threshold].nil?
34
- $stderr.puts o
35
- exit 1
36
- end
37
- # Setup logging
38
- Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
39
-
40
- encoded_counts = {}
41
- max_i = 0
42
-
43
- input_file = nil
44
- if ARGV[0] == '-'
45
- input_file = $stdin
46
- else
47
- input_file = File.open ARGV[0]
48
- end
49
- csv = CSV.new(input_file, :col_sep => ' ')
50
-
51
- csv.each do |row|
52
- kmer = row[0]
53
- counts = row[1...row.length].collect{|s| s.to_i}
54
- index = 0
55
- counts.each_with_index do |count, i|
56
- max_i = i if i > max_i
57
-
58
- if count > options[:upper_threshold]
59
- increment = (1<<i)
60
- index += increment
61
- log.debug "Found a passable for #{options[:threshold]} in index #{i} for #{counts}, count is now #{index}" if log.debug?
62
- elsif count < options[:lower_threshold]
63
- # do nothing
64
- else
65
- # coverage was in no man's land between thresholds.
66
- # Ignore this kmer as noise.
67
- break
68
- end
69
- end
70
-
71
- if index != 0
72
- encoded_counts[index] ||= 0
73
- encoded_counts[index] += 1
74
- end
75
- end
76
-
77
- (0..encoded_counts.keys.max).each do |i|
78
- total = encoded_counts[i]
79
- unless total.nil?
80
- unencoded = i.to_s(2)
81
-
82
- while unencoded.length <= max_i
83
- unencoded = '0'+unencoded
84
- end
85
-
86
- puts [
87
- i,
88
- total,
89
- unencoded,
90
- ].join "\t"
91
- end
92
- end
@@ -1,52 +0,0 @@
1
- #!/usr/bin/env rdmd
2
-
3
- import std.stdio;
4
- import std.csv;
5
- import std.typecons;
6
- import std.getopt;
7
- import std.algorithm;
8
-
9
-
10
-
11
- void main(string[] args)
12
- {
13
- bool usePercentages = false;
14
- int minCount = 1;
15
- string trace = "info";
16
- getopt(args,
17
- "percentage", &usePercentages,
18
- "min-count", &minCount,
19
- "trace", &trace
20
- );
21
-
22
- auto kmersFile = File(args[1]);
23
-
24
- int lineCount = 0;
25
- foreach (line; kmersFile.byLine()) {
26
- lineCount += 1;
27
- if (lineCount % (1024*1024) == 0){
28
- stderr.writeln("Parsed ",lineCount, " lines");
29
- }
30
- auto reader = csvReader!(Tuple!(string,
31
- int, int, int, int, int,
32
- int, int, int, int, int,
33
- int, int, int, int, int,
34
- int, int, int, int, int,
35
- int, int, int, int, int,
36
- int, int, int, int, int,
37
- int, int))(line, ' ');
38
- foreach (record; reader) {
39
- /*writeln(record[0]);
40
- writeln(record[1]);
41
- writeln(record[1..32]);*/
42
- int[] range = [1,2,3,4];
43
- int sum = 0;
44
- foreach(T; record[1..32]){
45
- sum += T;
46
- }
47
- if (sum >= minCount){
48
- writeln(line);
49
- }
50
- }
51
- }
52
- }
@@ -1,123 +0,0 @@
1
- #!/usr/bin/env rdmd
2
-
3
- import std.stdio;
4
- import std.conv;
5
- import std.string;
6
- import std.regex;
7
- import std.getopt;
8
-
9
- void main(string[] args){
10
- bool usePercentages = false;
11
- int minCount = 1;
12
- string trace = "info";
13
- getopt(args,
14
- "percentage", &usePercentages,
15
- "min-count", &minCount,
16
- "trace", &trace
17
- );
18
-
19
- //Create an array of open file handles, one for each argument given
20
- auto filenames = args[1 .. $];
21
- int[] totalCounts = new int[filenames.length];
22
- int kmerLength = 0;
23
-
24
- foreach(i, file; filenames){
25
- int count = 0;
26
- auto f = File(file);
27
- char[] line;
28
- while (f.readln(line)){
29
- //foreach(line; f.byLine()){
30
- if (kmerLength==0){
31
- kmerLength = 101;//to!int(indexOf(line, " "));
32
- stderr.writeln("Detected kmer length of ",kmerLength);
33
- }
34
- int thisCount = to!int(line[kmerLength+1 .. $-1]);
35
- //int thisCount = to!int(line[kmerLength+1 .. $]);
36
- count += thisCount;
37
- //if (count > 4000002){break;}
38
- }
39
- totalCounts[i] = count;
40
- }
41
- stderr.writeln("Finished ccounting total kmers, totals were: ",totalCounts);
42
-
43
- if(false){
44
- bool allFinished = false;
45
- bool[] finished = new bool[filenames.length];
46
- foreach (f; finished){f=false;}
47
-
48
- File[] files = new File[filenames.length];
49
- foreach(i; 0 .. files.length){
50
- files[i] = File(filenames[i]);
51
- }
52
-
53
- struct KmerCount {
54
- char[101] kmer;
55
- int count;
56
- }
57
- KmerCount[] currentRows = new KmerCount[files.length];
58
- char[] lineBuffer;
59
- foreach (i; 0..currentRows.length){
60
- //read in the line to teh buffer, so memory is not reallocated
61
- files[i].readln(lineBuffer);
62
- currentRows[i].kmer = chomp(files[i].readln(currentRows[i].kmer))[0..kmerLength];
63
- currentRows[i].count = to!int(line[kmerLength+1..$]);
64
- }
65
-
66
- //write headers
67
- enum ctr = ctRegex!(".*/(.+)");
68
- foreach(f; filenames){
69
- write("\t",match(f, ctr).captures[1]);
70
- }
71
- writeln();
72
-
73
- string[] toPrint = new string[filenames.length+1];
74
- int kmersCounted = 0;
75
- while (!allFinished){
76
- kmersCounted += 1;
77
- if (kmersCounted % (1024*1024) == 0){stderr.writeln("Processed ",kmersCounted," kmers.");}
78
- //Find the lowest kmer
79
- string lowestKmer = null;
80
- foreach (kc; currentRows){
81
- if (lowestKmer == null || kc.kmer < lowestKmer){
82
- lowestKmer = kc.kmer;
83
- }
84
- }
85
-
86
- //Go through each file, printing the number of this kmer found
87
- int totalObservations = 0;
88
- toPrint[0] = lowestKmer;
89
- foreach (i, kc; currentRows){
90
- if (kc.kmer == lowestKmer){
91
- totalObservations += kc.count;
92
- if (usePercentages){
93
- toPrint[i+1] = to!string(to!float(kc.count)/totalCounts[i]);
94
- } else {
95
- toPrint[i+1] = to!string(kc.count);
96
- }
97
-
98
- // Read a new line in, check if this file is finished
99
- auto line = files[i].readln;
100
- if (line == null){
101
- finished[i] = true;
102
- allFinished = true; //guilty until proven innocent
103
- foreach(f; finished){
104
- if (!f){
105
- allFinished = false;
106
- }
107
- }
108
- currentRows[i].kmer = null;
109
- currentRows[i].count = -1;
110
- } else {
111
- //Regular line to be read in
112
- currentRows[i].kmer = line[0..kmerLength];
113
- currentRows[i].count = to!int(line[kmerLength+1..$-1]);
114
- }
115
- } else {
116
- toPrint[i+1] = "0";
117
- }
118
- }
119
- if (totalObservations >= minCount){
120
- writeln(join(toPrint, "\t"));
121
- }
122
- }
123
- }}
@@ -1,84 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
- require 'bio-logger'
5
- require 'csv'
6
- require 'progressbar'
7
- require 'tempfile'
8
- require 'systemu'
9
-
10
- SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
11
-
12
- # Parse command line options into the options hash
13
- options = {
14
- :logger => 'stderr',
15
- :log_level => 'info',
16
- :min_count => 1,
17
- }
18
- o = OptionParser.new do |opts|
19
- opts.banner = "
20
- Usage: #{SCRIPT_NAME} <kmers_count_output1> [<kmers_count_output2> ..]
21
-
22
- Take a list of files output from libngs' kmers_count tool, after being run through gnu sort.
23
-
24
- Create a table, where the columns are each file, the rows are each kmer, and
25
- the cells are the percent of that file's kmer actually is that kmer.\n\n"
26
-
27
-
28
- opts.on("--output-file FILENAME", "Output file path [required]") do |arg|
29
- options[:output_file] = arg
30
- end
31
-
32
- opts.on("--percentage", "description [default: #{options[:eg]}]") do
33
- raise "not yet implemented"
34
- options[:percentage_outputs] = true
35
- end
36
- opts.on("--min-count COUNT", "require at least this many kmers to be output into the output file [default: #{options[:min_count]}]") do |arg|
37
- raise "not yet implemented"
38
- options[:min_count] = arg.to_i
39
- end
40
-
41
- # logger options
42
- opts.separator "\nVerbosity:\n\n"
43
- opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
44
- opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
45
- opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
46
- end; o.parse!
47
- if ARGV.length == 0 or options[:output_file].nil?
48
- $stderr.puts o
49
- exit 1
50
- end
51
- # Setup logging
52
- Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
53
-
54
- filenames = ARGV
55
- raise "I need more than 1 file" unless filenames.length > 1
56
- log.info "Joining these files: #{filenames.inspect}"
57
-
58
- # run gnu join on each file
59
- current_build_file = filenames[0] #Build off the current build file first, then a tempfile subsequently
60
-
61
- Tempfile.open('kmers_join1') do |tempfile1|
62
- Tempfile.open('kmers_join2') do |tempfile2|
63
- filenames.each_with_index do |file, i|
64
- next if i==0
65
-
66
- first_file_output_fields = (2..(i+1)).to_a.collect{|n| "1.#{n.to_s}"}.join(',')
67
- cmd = "join -a1 -a2 -e 0 -o0,#{first_file_output_fields},2.2 #{current_build_file.inspect} #{file} >#{tempfile2.path}"
68
- log.info "At #{Time.now}, running #{cmd}.."
69
- status, stdout, stderr = systemu cmd
70
- raise stderr unless stderr == ''
71
- raise 'exitstatus bad1!' unless status.exitstatus == 0
72
- status, stdout, stderr = systemu "mv #{tempfile2.path} #{tempfile1.path}"
73
- raise stderr unless stderr == ''
74
- raise 'exitstatus bad2!' unless status.exitstatus == 0
75
- current_build_file = tempfile1.path
76
- end
77
- status, stdout, stderr = systemu "mv #{current_build_file} #{options[:output_file]}"
78
- raise stderr unless stderr == ''
79
- raise 'exitstatus bad3!' unless status.exitstatus == 0
80
- end
81
- end
82
-
83
-
84
-
@@ -1,108 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'optparse'
4
- require 'bio-logger'
5
- require 'csv'
6
-
7
- SCRIPT_NAME = File.basename(__FILE__); LOG_NAME = SCRIPT_NAME.gsub('.rb','')
8
-
9
- # Parse command line options into the options hash
10
- options = {
11
- :logger => 'stderr',
12
- :log_level => 'info',
13
- }
14
- o = OptionParser.new do |opts|
15
- opts.banner = "
16
- Usage: #{SCRIPT_NAME} <arguments>
17
-
18
- Takes a list of PCR primers that were put in several lanes (not all primers in all lanes), and a list of bands that were found, and decipher which bands are the result of which primer pairs, as best as possible\n\n"
19
-
20
- opts.on("--bands-file FILE", "tsv file, with the band names as the first column, and the lane numbers that they appear in as the second column (comma separated) [required]") do |arg|
21
- options[:bands_file] = arg
22
- end
23
- opts.on("--primers-file FILE", "tsv file, with the lane names as the first column, and the set of primers numbers that are in each lane as the second column (comma separated) [required]") do |arg|
24
- options[:primers_file] = arg
25
- end
26
-
27
- # logger options
28
- opts.separator "\nVerbosity:\n\n"
29
- opts.on("-q", "--quiet", "Run quietly, set logging to ERROR level [default INFO]") {options[:log_level] = 'error'}
30
- opts.on("--logger filename",String,"Log to file [default #{options[:logger]}]") { |name| options[:logger] = name}
31
- opts.on("--trace options",String,"Set log level [default INFO]. e.g. '--trace debug' to set logging level to DEBUG"){|s| options[:log_level] = s}
32
- end; o.parse!
33
- if ARGV.length != 0 or options[:bands_file].nil? or options[:primers_file].nil?
34
- $stderr.puts o
35
- exit 1
36
- end
37
- # Setup logging
38
- Bio::Log::CLI.logger(options[:logger]); Bio::Log::CLI.trace(options[:log_level]); log = Bio::Log::LoggerPlus.new(LOG_NAME); Bio::Log::CLI.configure(LOG_NAME)
39
-
40
-
41
- # Read in the bands
42
- bands_to_lanes = {}
43
- CSV.foreach(options[:bands_file], :col_sep => "\t") do |row|
44
- raise "Malformed bands file in this line: #{row.inspect}" unless row.length == 2
45
-
46
- band_name = row[0]
47
- raise "Two bands were labeled the same way, as #{band_name.inspect}" if bands_to_lanes.key?(band_name)
48
-
49
- lanes_of_this_band = row[1].split(/[,\s]/).collect{|c| c.strip}
50
- bands_to_lanes[band_name] = lanes_of_this_band
51
- end
52
- log.info "Parsed in #{bands_to_lanes.length} bands, found #{bands_to_lanes.collect{|k,v| v.length}.join(',')} lanes each, respectively"
53
-
54
- # Read in the primer sets
55
- lanes_to_primers = {}
56
- CSV.foreach(options[:primers_file], :col_sep => "\t") do |row|
57
- raise "Malformed primers file in this line: #{row.inspect}" unless row.length == 2
58
-
59
- lane_name = row[0]
60
- raise "Two lanes were labeled the same way, as #{lane_name.inspect}" if lanes_to_primers.key?(lane_name)
61
-
62
- primers_of_this_band = row[1].split(/[,\s]/).collect{|c| c.strip}
63
- lanes_to_primers[lane_name] = primers_of_this_band
64
- end
65
- log.info "Parsed in #{lanes_to_primers.length} lanes, with #{lanes_to_primers.collect{|k,v| v.length}.join(',')} primers each, respectively"
66
-
67
-
68
- # Go through each pairing of primers. Which primer sets explain each band?
69
- all_primers = lanes_to_primers.values.flatten.sort.uniq
70
- lanes = lanes_to_primers.keys
71
- bands = bands_to_lanes.keys
72
-
73
- bands_to_explaining_primer_pairs = {}
74
-
75
- bands.each do |band|
76
- log.debug "Going after band #{band}"
77
- all_primers.combination(2) do |array|
78
-
79
- primer1 = array.sort[0]
80
- primer2 = array.sort[1]
81
- log.debug "Testing primers #{primer1}/#{primer2} agreement with band #{band}"
82
-
83
- band_agrees_with_this_primer_pair = true
84
- lanes.each do |lane|
85
- band_is_in_this_lane = bands_to_lanes[band].include?(lane)
86
- primers_here = lanes_to_primers[lane]
87
- if band_is_in_this_lane and (!primers_here.include?(primer1) or !primers_here.include?(primer2))
88
- log.debug "primer pair #{primer1}/#{primer2} fails for band #{band} because band was present but at least one primer wasn't, in lane #{lane}"
89
- band_agrees_with_this_primer_pair = false
90
- end
91
- if !band_is_in_this_lane and (primers_here.include?(primer1) and primers_here.include?(primer2))
92
- log.debug "primer pair #{primer1}/#{primer2} fails for band #{band} because band was not present but both primers were, in lane #{lane}"
93
- band_agrees_with_this_primer_pair = false
94
- end
95
- end
96
-
97
- if band_agrees_with_this_primer_pair
98
- log.debug "Found a suitable pair of primers for band #{band}: #{primer1}/#{primer2}"
99
- bands_to_explaining_primer_pairs[band] ||= []
100
- bands_to_explaining_primer_pairs[band].push array
101
- end
102
- end
103
-
104
- puts [
105
- band,
106
- bands_to_explaining_primer_pairs[band].nil? ? 'none' : bands_to_explaining_primer_pairs[band].collect{|a| "(#{a.join(',')})"}.join(', ')
107
- ].join("\t")
108
- end