lederhosen 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/lederhosen/cli.rb +7 -204
  2. data/lib/version.rb +1 -1
  3. metadata +3 -3
@@ -1,212 +1,15 @@
1
1
  module Lederhosen
2
2
  class CLI < Thor
3
-
4
- ##
5
- # QUALITY TRIMMING
6
- #
7
- desc "trim Illumina QSEQ files", "--reads_dir=reads/* --out_dir=trimmed.fasta"
8
- method_options :reads_dir => :string, :out_dir => :string
9
- def trim
10
3
 
11
- raw_reads = options[:reads_dir]
12
- out_dir = options[:out_dir] || 'trimmed/'
13
-
14
- `mkdir -p #{out_dir}`
15
-
16
- raw_reads = Helpers.get_grouped_qseq_files raw_reads
17
- puts "found #{raw_reads.length} pairs of reads"
18
- puts "trimming!"
19
-
20
- pbar = ProgressBar.new "trimming", raw_reads.length
21
-
22
- raw_reads.each do |a|
23
- pbar.inc
24
- out = File.join(out_dir, "#{File.basename(a[0])}.fasta")
25
- # TODO get total and trimmed
26
- total, trimmed = Helpers.trim_pairs a[1][0], a[1][1], out, :min_length => 70
27
- end
28
-
29
- pbar.finish
30
- end
31
-
32
- ##
33
- # PAIRED-END READ WORK-AROUND (JOIN THEM)
34
- #
35
- desc "join reads end-to-end", "--trimmed=trimmed/*.fasta --output=joined.fasta"
36
- method_options :trimmed => :string, :output => :string
37
- def join
38
- puts "joining!"
39
-
40
- trimmed = Dir[options[:trimmed] || 'trimmed/*.fasta']
41
- output = options[:output] || 'joined.fasta'
42
-
43
- fail "no reads in #{trimmed}" if trimmed.length == 0
44
-
45
- output = File.open(output, 'w')
46
-
47
- pbar = ProgressBar.new "joining", trimmed.length
48
-
49
- trimmed.each do |fasta_file|
50
- pbar.inc
51
- records = Dna.new File.open(fasta_file)
52
- records.each_slice(2) do |r, l|
53
- output.puts ">#{r.name}:#{File.basename(fasta_file, '.fasta')}\n#{r.sequence.reverse+l.sequence}"
54
- end
4
+ no_tasks do
5
+ # just print string to STDERR
6
+ def ohai(s)
7
+ $stderr.puts s
55
8
  end
56
- pbar.finish
57
9
  end
58
10
 
59
- ##
60
- # SORT JOINED READS BY LENGTH
61
- #
62
- desc "sort fasta file by length", "--input=joined.fasta --output=sorted.fasta"
63
- method_options :input => :string, :output => :string
64
- def sort
65
- input = options[:input] || 'joined.fasta'
66
- output = options[:output] || 'sorted.fasta'
67
- `uclust --mergesort #{input} --output #{output}`
68
- end
69
-
70
- ##
71
- # FINALLY, CLUSTER!
72
- #
73
- desc "cluster fasta file", "--input=sorted.fasta --identity=0.80 --output=clusters.uc"
74
- method_options :input => :string, :output => :string, :identity => :float
75
- def cluster
76
- identity = options[:identity] || 0.8
77
- output = options[:output] || 'clusters.uc'
78
- input = options[:input] || 'sorted.fasta'
79
-
80
- cmd = [
81
- 'uclust',
82
- "--input #{input}",
83
- "--uc #{output}",
84
- "--id #{identity}",
85
- ].join(' ')
86
- exec cmd
87
- end
88
-
89
- ##
90
- # MAKE TABLES
91
- #
92
- desc "otu_tables generates otu tables & representative reads", "--clusters=clusters.uc --output=otu_prefix --joined=joined.fasta"
93
- method_options :clusters => :string, :output => :string, :joined => :string
94
- def otu_table
95
- input = options[:clusters] || 'clusters.uc'
96
- output = options[:output] || 'otus'
97
- joined_reads = options[:joined] || 'joined.fasta'
98
-
99
- clusters = Hash.new
100
-
101
- # Load cluster table!
102
- clusters = Helpers.load_uc_file(input)
103
-
104
- clusters_total = clusters[:count_data].values.collect{ |x| x[:total] }.inject(:+)
105
-
106
- # Get representative sequences!
107
- reads_total = 0
108
- representatives = {}
109
- clusters[:count_data].each{ |k, x| representatives[x[:seed]] = k }
110
-
111
- out_handle = File.open("#{output}.fasta", 'w')
112
-
113
- File.open(joined_reads) do |handle|
114
- records = Dna.new handle
115
- records.each do |dna|
116
- reads_total += 1
117
- if !representatives[dna.name].nil?
118
- dna.name = "#{dna.name}:cluster_#{representatives[dna.name]}"
119
- out_handle.puts dna
120
- end
121
- end
122
- end
123
-
124
- out_handle.close
125
-
126
- # Print some statistics
127
- puts "reads in clusters: #{clusters_total}"
128
- puts "number of reads: #{reads_total}"
129
- puts "unique clusters: #{clusters.keys.length}"
130
-
131
- # print OTU abundancy matrix
132
- csv = Helpers.cluster_data_as_csv(clusters)
133
- File.open("#{output}.csv", 'w') do |h|
134
- h.puts csv
135
- end
136
-
137
- end
138
-
139
- ##
140
- # Create a fasta file with nucleotide sequences for each cluster larger than a cutoff
141
- #
142
- desc "output separate fasta file containing sequences belonging to each cluster", "--clusters=clusters.uc --reads=joined.fasta --min-clst-size=100"
143
- method_options :clusters => :string, :reads=> :string, :buffer_size => :int, :min_clst_size => :int, :out_dir => :string
144
- def split
145
- clusters = options[:clusters] || 'clusters.uc'
146
- reads = options[:reads] || 'joined.fasta'
147
- out_dir = options[:out_dir] || 'clusters_split'
148
- buffer_size = (options[:buffer_size] || 1000).to_i
149
- min_clst_size = (options[:min_clst_size] || 100).to_i
150
- finalize_every = 100_000
151
-
152
- `mkdir -p #{out_dir}/`
153
-
154
- puts "loading #{clusters}"
155
-
156
- # Load read id -> cluster
157
- read_to_clusterid = Hash.new
158
-
159
- # keep track of cluster sizes
160
- cluster_counts = Hash.new { |h, k| h[k] = 0}
161
-
162
- File.open(clusters)do |handle|
163
- handle.each do |line|
164
- line = line.strip.split
165
- cluster_nr = line[1]
166
- if line[0] == 'S' || line[0] == 'H'
167
- read = line[8]
168
- else
169
- next
170
- end
171
- read_to_clusterid[read] = cluster_nr
172
- cluster_counts[cluster_nr] += 1
173
- end
174
- end
175
-
176
- read_to_clusterid.delete_if do |read, cluster_nr|
177
- cluster_counts[cluster_nr] < min_clst_size
178
- end
179
-
180
- total_reads = read_to_clusterid.length
181
- total_clusters = read_to_clusterid.values.uniq.length
182
- puts "#{total_reads} reads in #{total_clusters} clusters"
183
-
184
- puts "writing out fasta files"
185
-
186
- pbar = ProgressBar.new "writing", total_reads
187
-
188
- # Write reads to individual fasta files using Buffer
189
- buffer = Buffer.new :buffer_max => buffer_size
190
- File.open(reads) do |handle|
191
- records = Dna.new handle
192
- $stderr.puts "reads = #{reads}"
193
- records.each_with_index do |record, i|
194
- cluster_id = read_to_clusterid[record.name]
195
- if cluster_id
196
- pbar.inc
197
- filename = File.join(out_dir, cluster_id + '.fasta')
198
- buffer[filename] << record
199
- buffer.finalize if (i%finalize_every == 0)
200
- end
201
- end
202
- end
203
-
204
- pbar.finish
205
- puts "finalizing output"
206
- buffer.finalize # finish writing out
11
+ end # class CLI
207
12
 
208
- puts "done"
209
- end
13
+ end # module
210
14
 
211
- end # class CLI
212
- end # module
15
+ Dir.glob(File.join(File.dirname(__FILE__), 'tasks', '*.rb')).each { |f| require f }
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Lederhosen
2
- VERSION = '0.0.2'
2
+ VERSION = '0.0.3'
3
3
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lederhosen
3
3
  version: !ruby/object:Gem::Version
4
- hash: 27
4
+ hash: 25
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 2
10
- version: 0.0.2
9
+ - 3
10
+ version: 0.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Austin G. Davis-Richardson