lederhosen 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/lederhosen/cli.rb +7 -204
  2. data/lib/version.rb +1 -1
  3. metadata +3 -3
@@ -1,212 +1,15 @@
1
1
  module Lederhosen
2
2
  class CLI < Thor
3
-
4
- ##
5
- # QUALITY TRIMMING
6
- #
7
- desc "trim Illumina QSEQ files", "--reads_dir=reads/* --out_dir=trimmed.fasta"
8
- method_options :reads_dir => :string, :out_dir => :string
9
- def trim
10
3
 
11
- raw_reads = options[:reads_dir]
12
- out_dir = options[:out_dir] || 'trimmed/'
13
-
14
- `mkdir -p #{out_dir}`
15
-
16
- raw_reads = Helpers.get_grouped_qseq_files raw_reads
17
- puts "found #{raw_reads.length} pairs of reads"
18
- puts "trimming!"
19
-
20
- pbar = ProgressBar.new "trimming", raw_reads.length
21
-
22
- raw_reads.each do |a|
23
- pbar.inc
24
- out = File.join(out_dir, "#{File.basename(a[0])}.fasta")
25
- # TODO get total and trimmed
26
- total, trimmed = Helpers.trim_pairs a[1][0], a[1][1], out, :min_length => 70
27
- end
28
-
29
- pbar.finish
30
- end
31
-
32
- ##
33
- # PAIRED-END READ WORK-AROUND (JOIN THEM)
34
- #
35
- desc "join reads end-to-end", "--trimmed=trimmed/*.fasta --output=joined.fasta"
36
- method_options :trimmed => :string, :output => :string
37
- def join
38
- puts "joining!"
39
-
40
- trimmed = Dir[options[:trimmed] || 'trimmed/*.fasta']
41
- output = options[:output] || 'joined.fasta'
42
-
43
- fail "no reads in #{trimmed}" if trimmed.length == 0
44
-
45
- output = File.open(output, 'w')
46
-
47
- pbar = ProgressBar.new "joining", trimmed.length
48
-
49
- trimmed.each do |fasta_file|
50
- pbar.inc
51
- records = Dna.new File.open(fasta_file)
52
- records.each_slice(2) do |r, l|
53
- output.puts ">#{r.name}:#{File.basename(fasta_file, '.fasta')}\n#{r.sequence.reverse+l.sequence}"
54
- end
4
+ no_tasks do
5
+ # just print string to STDERR
6
+ def ohai(s)
7
+ $stderr.puts s
55
8
  end
56
- pbar.finish
57
9
  end
58
10
 
59
- ##
60
- # SORT JOINED READS BY LENGTH
61
- #
62
- desc "sort fasta file by length", "--input=joined.fasta --output=sorted.fasta"
63
- method_options :input => :string, :output => :string
64
- def sort
65
- input = options[:input] || 'joined.fasta'
66
- output = options[:output] || 'sorted.fasta'
67
- `uclust --mergesort #{input} --output #{output}`
68
- end
69
-
70
- ##
71
- # FINALLY, CLUSTER!
72
- #
73
- desc "cluster fasta file", "--input=sorted.fasta --identity=0.80 --output=clusters.uc"
74
- method_options :input => :string, :output => :string, :identity => :float
75
- def cluster
76
- identity = options[:identity] || 0.8
77
- output = options[:output] || 'clusters.uc'
78
- input = options[:input] || 'sorted.fasta'
79
-
80
- cmd = [
81
- 'uclust',
82
- "--input #{input}",
83
- "--uc #{output}",
84
- "--id #{identity}",
85
- ].join(' ')
86
- exec cmd
87
- end
88
-
89
- ##
90
- # MAKE TABLES
91
- #
92
- desc "otu_tables generates otu tables & representative reads", "--clusters=clusters.uc --output=otu_prefix --joined=joined.fasta"
93
- method_options :clusters => :string, :output => :string, :joined => :string
94
- def otu_table
95
- input = options[:clusters] || 'clusters.uc'
96
- output = options[:output] || 'otus'
97
- joined_reads = options[:joined] || 'joined.fasta'
98
-
99
- clusters = Hash.new
100
-
101
- # Load cluster table!
102
- clusters = Helpers.load_uc_file(input)
103
-
104
- clusters_total = clusters[:count_data].values.collect{ |x| x[:total] }.inject(:+)
105
-
106
- # Get representative sequences!
107
- reads_total = 0
108
- representatives = {}
109
- clusters[:count_data].each{ |k, x| representatives[x[:seed]] = k }
110
-
111
- out_handle = File.open("#{output}.fasta", 'w')
112
-
113
- File.open(joined_reads) do |handle|
114
- records = Dna.new handle
115
- records.each do |dna|
116
- reads_total += 1
117
- if !representatives[dna.name].nil?
118
- dna.name = "#{dna.name}:cluster_#{representatives[dna.name]}"
119
- out_handle.puts dna
120
- end
121
- end
122
- end
123
-
124
- out_handle.close
125
-
126
- # Print some statistics
127
- puts "reads in clusters: #{clusters_total}"
128
- puts "number of reads: #{reads_total}"
129
- puts "unique clusters: #{clusters.keys.length}"
130
-
131
- # print OTU abundancy matrix
132
- csv = Helpers.cluster_data_as_csv(clusters)
133
- File.open("#{output}.csv", 'w') do |h|
134
- h.puts csv
135
- end
136
-
137
- end
138
-
139
- ##
140
- # Create a fasta file with nucleotide sequences for each cluster larger than a cutoff
141
- #
142
- desc "output separate fasta file containing sequences belonging to each cluster", "--clusters=clusters.uc --reads=joined.fasta --min-clst-size=100"
143
- method_options :clusters => :string, :reads=> :string, :buffer_size => :int, :min_clst_size => :int, :out_dir => :string
144
- def split
145
- clusters = options[:clusters] || 'clusters.uc'
146
- reads = options[:reads] || 'joined.fasta'
147
- out_dir = options[:out_dir] || 'clusters_split'
148
- buffer_size = (options[:buffer_size] || 1000).to_i
149
- min_clst_size = (options[:min_clst_size] || 100).to_i
150
- finalize_every = 100_000
151
-
152
- `mkdir -p #{out_dir}/`
153
-
154
- puts "loading #{clusters}"
155
-
156
- # Load read id -> cluster
157
- read_to_clusterid = Hash.new
158
-
159
- # keep track of cluster sizes
160
- cluster_counts = Hash.new { |h, k| h[k] = 0}
161
-
162
- File.open(clusters)do |handle|
163
- handle.each do |line|
164
- line = line.strip.split
165
- cluster_nr = line[1]
166
- if line[0] == 'S' || line[0] == 'H'
167
- read = line[8]
168
- else
169
- next
170
- end
171
- read_to_clusterid[read] = cluster_nr
172
- cluster_counts[cluster_nr] += 1
173
- end
174
- end
175
-
176
- read_to_clusterid.delete_if do |read, cluster_nr|
177
- cluster_counts[cluster_nr] < min_clst_size
178
- end
179
-
180
- total_reads = read_to_clusterid.length
181
- total_clusters = read_to_clusterid.values.uniq.length
182
- puts "#{total_reads} reads in #{total_clusters} clusters"
183
-
184
- puts "writing out fasta files"
185
-
186
- pbar = ProgressBar.new "writing", total_reads
187
-
188
- # Write reads to individual fasta files using Buffer
189
- buffer = Buffer.new :buffer_max => buffer_size
190
- File.open(reads) do |handle|
191
- records = Dna.new handle
192
- $stderr.puts "reads = #{reads}"
193
- records.each_with_index do |record, i|
194
- cluster_id = read_to_clusterid[record.name]
195
- if cluster_id
196
- pbar.inc
197
- filename = File.join(out_dir, cluster_id + '.fasta')
198
- buffer[filename] << record
199
- buffer.finalize if (i%finalize_every == 0)
200
- end
201
- end
202
- end
203
-
204
- pbar.finish
205
- puts "finalizing output"
206
- buffer.finalize # finish writing out
11
+ end # class CLI
207
12
 
208
- puts "done"
209
- end
13
+ end # module
210
14
 
211
- end # class CLI
212
- end # module
15
+ Dir.glob(File.join(File.dirname(__FILE__), 'tasks', '*.rb')).each { |f| require f }
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lederhosen
2
- VERSION = '0.0.2'
2
+ VERSION = '0.0.3'
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson