miga-base 0.3.1.6 → 0.3.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/actions/ncbi_get.rb +57 -42
  3. data/lib/miga/result/base.rb +7 -0
  4. data/lib/miga/result/dates.rb +42 -0
  5. data/lib/miga/result.rb +4 -0
  6. data/lib/miga/version.rb +1 -1
  7. data/scripts/essential_genes.bash +5 -4
  8. data/utils/enveomics/Makefile +1 -1
  9. data/utils/enveomics/Manifest/Tasks/aasubs.json +75 -75
  10. data/utils/enveomics/Manifest/Tasks/blasttab.json +194 -185
  11. data/utils/enveomics/Manifest/Tasks/distances.json +130 -130
  12. data/utils/enveomics/Manifest/Tasks/fasta.json +51 -3
  13. data/utils/enveomics/Manifest/Tasks/fastq.json +161 -126
  14. data/utils/enveomics/Manifest/Tasks/graphics.json +111 -111
  15. data/utils/enveomics/Manifest/Tasks/mapping.json +30 -0
  16. data/utils/enveomics/Manifest/Tasks/ogs.json +308 -265
  17. data/utils/enveomics/Manifest/Tasks/other.json +451 -449
  18. data/utils/enveomics/Manifest/Tasks/remote.json +1 -1
  19. data/utils/enveomics/Manifest/Tasks/sequence-identity.json +18 -10
  20. data/utils/enveomics/Manifest/Tasks/tables.json +250 -250
  21. data/utils/enveomics/Manifest/Tasks/trees.json +52 -52
  22. data/utils/enveomics/Manifest/Tasks/variants.json +4 -4
  23. data/utils/enveomics/Manifest/categories.json +12 -4
  24. data/utils/enveomics/Manifest/examples.json +1 -1
  25. data/utils/enveomics/Scripts/BedGraph.tad.rb +71 -0
  26. data/utils/enveomics/Scripts/BlastTab.recplot2.R +23 -22
  27. data/utils/enveomics/Scripts/FastA.split.rb +79 -0
  28. data/utils/enveomics/Scripts/FastQ.test-error.rb +81 -0
  29. data/utils/enveomics/Scripts/JPlace.to_iToL.rb +272 -258
  30. data/utils/enveomics/Scripts/aai.rb +13 -6
  31. data/utils/enveomics/Scripts/ani.rb +2 -2
  32. data/utils/enveomics/Scripts/clust.rand.rb +102 -0
  33. data/utils/enveomics/Scripts/lib/enveomics_rb/enveomics.rb +12 -14
  34. data/utils/enveomics/Scripts/lib/enveomics_rb/jplace.rb +2 -2
  35. data/utils/enveomics/Scripts/rbm.rb +23 -14
  36. data/utils/enveomics/enveomics.R/DESCRIPTION +1 -1
  37. data/utils/enveomics/enveomics.R/R/barplot.R +2 -2
  38. metadata +9 -2
@@ -1,306 +1,320 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- #
4
3
  # @author: Luis M. Rodriguez-R
5
- # @update: Feb-28-2016
6
4
  # @license: artistic license 2.0
7
- #
8
5
 
9
6
  $:.push File.expand_path("../lib", __FILE__)
7
+ require "enveomics_rb/enveomics"
10
8
  require "enveomics_rb/jplace"
11
9
  require "optparse"
12
10
  require "json"
13
11
 
14
- o = {:q=>false, :regex=>'^(?<dataset>.+?):.*', :area=>false, :norm=>:counts,
15
- :olditol=>false}
16
- ARGV << '-h' if ARGV.size==0
12
+ o = {q: false, regex: "^(?<dataset>.+?):.*", area: false, norm: :counts,
13
+ olditol: false}
14
+ ARGV << "-h" if ARGV.size==0
17
15
  OptionParser.new do |opts|
18
- opts.banner = "
16
+ opts.banner = "
19
17
  Generates iToL-compatible files from a .jplace file (produced by RAxML's EPA
20
18
  or pplacer), that can be used to draw pie-charts in the nodes of the reference
21
19
  tree.
22
20
 
23
21
  Usage: #{$0} [options]"
24
- opts.separator ""
25
- opts.separator "Mandatory"
26
- opts.on("-i", "--in FILE", ".jplace input file containing the read placement."){ |v| o[:in]=v }
27
- opts.on("-o", "--out FILE", "Base of the output files."){ |v| o[:out]=v }
28
- opts.separator ""
29
- opts.separator "Other Options"
30
- opts.on("-u", "--unique STR", "Name of the dataset (if only one is used). Conflicts with -r and -s."){ |v| o[:unique]=v }
31
- opts.on("-r", "--regex STR", "Regular expression capturing the sample ID (named dataset) in read names.",
32
- "By default: '#{o[:regex]}'. Conflicts with -s."){ |v| o[:regex]=v }
33
- opts.on("-s", "--separator STR", "String separating the dataset name and the rest of the read name.",
34
- "It assumes that the read name starts by the dataset name. Conflicts with -r."){ |v| o[:regex]="^(?<dataset>.+?)#{v}" }
35
- opts.on("-m", "--metadata FILE", "Datasets metadata in tab-delimited format with a header row.",
36
- "Valid headers: name (required), color (in Hex), size (# reads), norm (any float)."){ |v| o[:metadata]=v }
37
- opts.on("-n", "--norm STR", %w[none counts size norm], "Normalization strategy. Must be one of:",
38
- "none: Direct read counts are reported without normalization.",
39
- "count (default): The counts are normalized (divided) by the total counts per dataset.",
40
- "size: The counts are normalized (divided) by the size column in metadata (must be integer).",
41
- "norm: The counts are normalized (divided) by the norm column in metadata (can be any float)."){ |v| o[:norm]=v.to_sym }
42
- opts.on("--old-itol",
43
- "Generate output file using the old iToL format (pre v3.0)."
44
- ){ |v| o[:olditol] = v }
45
- opts.on("-c", "--collapse FILE", "Internal nodes to collapse (requires rootted tree)."){ |v| o[:collapse]=v }
46
- opts.on("-a", "--area", "If set, the area of the pies is proportional to the placements. Otherwise, the radius is."){ o[:area]=true }
47
- opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
48
- opts.on("-h", "--help", "Display this screen.") do
49
- puts opts
50
- exit
51
- end
52
- opts.separator ""
53
- opts.separator "Quick how-to in 5 steps"
54
- opts.separator " 1. Create the placement file using RAxML's EPA [1] or pplacer [2]. You can use any other software"
55
- opts.separator " producing a compliant .jplace file [3]. If you're using multiple datasets, include the name of"
56
- opts.separator " the dataset somewhere in the read names."
57
- opts.separator " 2. If you have multiple datasets, it's convenient to create a metadata table. It's not necessary,"
58
- opts.separator " but it allows you to control the colors and the normalization method (see -m)."
59
- opts.separator " 3. Execute this script passing the .jplace file created in step 1 (see -i). If you have a single"
60
- opts.separator " dataset, use the option -u to give it a short name. If you have multiple datasets, use the -s"
61
- opts.separator " or -r options to tell the script how to find the dataset name within the read name. Note that"
62
- opts.separator " some programs (like CheckM) may produce nonstandard characters that won't be correctly parsed."
63
- opts.separator " To avoid this problem, install iconv support (gem install iconv) before running this script"
64
- opts.separator " (currently "+(JPlace::Tree.has_iconv? ? "" : "NOT ")+"installed)."
65
- opts.separator " 4. Upload the tree (.nwk file) to iToL [4]. Make sure you check 'Keep internal node IDs' in the"
66
- opts.separator " advanced options. In that same page, upload the dataset (.itol file), pick a name, and select"
67
- opts.separator " the data type 'Multi-value Bar Chart or Pie Chart'. If you used the -c option, upload the list"
68
- opts.separator " of nodes to collapse (.collapse file) in the 'Pre-collapsed clades' field (advanced options)."
69
- opts.separator " 5. Open the tree. You can now see the names of the internal nodes. If you want to collapse nodes,"
70
- opts.separator " simply list the nodes to collapse and go back to step 3, this time using the -c option."
71
- opts.separator ""
72
- opts.separator "References"
73
- opts.separator " [1] SA Berger, D Krompass and A Stamatakis, 2011, Syst Biol 60(3):291-302."
74
- opts.separator " http://sysbio.oxfordjournals.org/content/60/3/291"
75
- opts.separator " [2] FA Matsen, RB Kodner and EV Armbrust, 2010, BMC Bioinf 11:538."
76
- opts.separator " http://www.biomedcentral.com/1471-2105/11/538/"
77
- opts.separator " [3] FA Matsen, NG Hoffman, A Gallagher and A Stamatakis, 2012, PLoS ONE 7(2):e31009."
78
- opts.separator " http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0031009"
79
- opts.separator " [4] I Letunic and P Bork, 2011, NAR 39(suppl 2):W475-W478."
80
- opts.separator " http://nar.oxfordjournals.org/content/39/suppl_2/W475.full"
81
- opts.separator ""
22
+ opts.separator ""
23
+ opts.separator "Mandatory"
24
+ opts.on("-i", "--in FILE",
25
+ ".jplace input file containing the read placement."){ |v| o[:in]=v }
26
+ opts.on("-o", "--out FILE", "Base of the output files."){ |v| o[:out]=v }
27
+ opts.separator ""
28
+ opts.separator "Other Options"
29
+ opts.on("-u", "--unique STR",
30
+ "Name of the dataset (if only one is used). Conflicts with -r and -s."
31
+ ){ |v| o[:unique]=v }
32
+ opts.on("-r", "--regex STR",
33
+ "Regular expression capturing the sample ID (named dataset) in read names.",
34
+ "By default: '#{o[:regex]}'. Conflicts with -s."){ |v| o[:regex]=v }
35
+ opts.on("-s", "--separator STR",
36
+ "String separating the dataset name and the rest of the read name.",
37
+ "It assumes that the read name starts by the dataset name. Conflicts with -r."
38
+ ){ |v| o[:regex]="^(?<dataset>.+?)#{v}" }
39
+ opts.on("-m", "--metadata FILE",
40
+ "Datasets metadata in tab-delimited format with a header row.",
41
+ "Valid headers: name (required), color (in Hex), size (# reads), norm (any float)."
42
+ ){ |v| o[:metadata]=v }
43
+ opts.on("-n", "--norm STR", %w[none counts size norm],
44
+ "Normalization strategy. Must be one of:",
45
+ "none: Direct read counts are reported without normalization.",
46
+ "count (default): The counts are normalized (divided) by the total counts per dataset.",
47
+ "size: The counts are normalized (divided) by the size column in metadata (must be integer).",
48
+ "norm: The counts are normalized (divided) by the norm column in metadata (can be any float)."
49
+ ){ |v| o[:norm]=v.to_sym }
50
+ opts.on("--old-itol",
51
+ "Generate output file using the old iToL format (pre v3.0)."
52
+ ){ |v| o[:olditol] = v }
53
+ opts.on("-c", "--collapse FILE",
54
+ "Internal nodes to collapse (requires rootted tree)."){ |v| o[:collapse]=v }
55
+ opts.on("-a", "--area",
56
+ "If set, the area of the pies is proportional to the placements. Otherwise, the radius is."
57
+ ){ o[:area]=true }
58
+ opts.on("-q", "--quiet", "Run quietly (no STDERR output)."){ o[:q] = true }
59
+ opts.on("-h", "--help", "Display this screen.") do
60
+ puts opts
61
+ exit
62
+ end
63
+ opts.separator ""
64
+ opts.separator "Quick how-to in 5 steps"
65
+ opts.separator " 1. Create the placement file using RAxML's EPA [1] or pplacer [2]. You can use any other software"
66
+ opts.separator " producing a compliant .jplace file [3]. If you're using multiple datasets, include the name of"
67
+ opts.separator " the dataset somewhere in the read names."
68
+ opts.separator " 2. If you have multiple datasets, it's convenient to create a metadata table. It's not necessary,"
69
+ opts.separator " but it allows you to control the colors and the normalization method (see -m)."
70
+ opts.separator " 3. Execute this script passing the .jplace file created in step 1 (see -i). If you have a single"
71
+ opts.separator " dataset, use the option -u to give it a short name. If you have multiple datasets, use the -s"
72
+ opts.separator " or -r options to tell the script how to find the dataset name within the read name. Note that"
73
+ opts.separator " some programs (like CheckM) may produce nonstandard characters that won't be correctly parsed."
74
+ opts.separator " To avoid this problem, install iconv support (gem install iconv) before running this script"
75
+ opts.separator " (currently "+(JPlace::Tree.has_iconv? ? "" : "NOT ")+"installed)."
76
+ opts.separator " 4. Upload the tree (.nwk file) to iToL [4]. Make sure you check 'Keep internal node IDs' in the"
77
+ opts.separator " advanced options. In that same page, upload the dataset (.itol file), pick a name, and select"
78
+ opts.separator " the data type 'Multi-value Bar Chart or Pie Chart'. If you used the -c option, upload the list"
79
+ opts.separator " of nodes to collapse (.collapse file) in the 'Pre-collapsed clades' field (advanced options)."
80
+ opts.separator " 5. Open the tree. You can now see the names of the internal nodes. If you want to collapse nodes,"
81
+ opts.separator " simply list the nodes to collapse and go back to step 3, this time using the -c option."
82
+ opts.separator ""
83
+ opts.separator "References"
84
+ opts.separator " [1] SA Berger, D Krompass and A Stamatakis, 2011, Syst Biol 60(3):291-302."
85
+ opts.separator " http://sysbio.oxfordjournals.org/content/60/3/291"
86
+ opts.separator " [2] FA Matsen, RB Kodner and EV Armbrust, 2010, BMC Bioinf 11:538."
87
+ opts.separator " http://www.biomedcentral.com/1471-2105/11/538/"
88
+ opts.separator " [3] FA Matsen, NG Hoffman, A Gallagher and A Stamatakis, 2012, PLoS ONE 7(2):e31009."
89
+ opts.separator " http://www.plosone.org/article/info%3Adoi%2F10.1371%2Fjournal.pone.0031009"
90
+ opts.separator " [4] I Letunic and P Bork, 2011, NAR 39(suppl 2):W475-W478."
91
+ opts.separator " http://nar.oxfordjournals.org/content/39/suppl_2/W475.full"
92
+ opts.separator ""
82
93
  end.parse!
83
94
  abort "-o is mandatory" if o[:out].nil?
84
95
 
85
96
  ##### CLASSES:
86
97
 
87
98
  class Dataset
88
- attr_reader :name, :data
89
- def initialize(name)
90
- @name = name
91
- @data = {:count=>0}
92
- end
93
- def count
94
- self.datum :count
95
- end
96
- def add_count(n)
97
- @data[:count] += n
98
- end
99
- def datum(k)
100
- @data[k]
101
- end
102
- def add_datum(k, v)
103
- @data[k] = v
104
- end
105
- def color
106
- if @data[:color].nil?
107
- @data[:color] = '#' + (1 .. 3).map{ |i| sprintf("%02X", rand(255)) }.join('')
108
- end
109
- @data[:color].sub(/^#?/, '#')
110
- self.datum :color
111
- end
112
- def size
113
- self.datum :size
114
- end
115
- def norm
116
- self.datum :norm
117
- end
99
+ attr_reader :name, :data
100
+ def initialize(name)
101
+ @name = name
102
+ @data = {:count=>0}
103
+ end
104
+ def count
105
+ self.datum :count
106
+ end
107
+ def add_count(n)
108
+ @data[:count] += n
109
+ end
110
+ def datum(k)
111
+ @data[k]
112
+ end
113
+ def add_datum(k, v)
114
+ @data[k] = v
115
+ end
116
+ def color
117
+ if @data[:color].nil?
118
+ @data[:color] = "#" + (1 .. 3).map{ |i|
119
+ sprintf("%02X", rand(255)) }.join("")
120
+ end
121
+ @data[:color].sub(/^#?/, "#")
122
+ self.datum :color
123
+ end
124
+ def size
125
+ self.datum :size
126
+ end
127
+ def norm
128
+ self.datum :norm
129
+ end
118
130
  end
119
131
 
120
132
  class Metadata
121
- attr_reader :datasets
122
- def initialize
123
- @datasets = {}
124
- end
125
- def load_table(file)
126
- f = File.open(file, 'r')
127
- h = f.gets.chomp.split(/\t/)
128
- name_idx = h.find_index 'name'
129
- color_idx = h.find_index 'color'
130
- size_idx = h.find_index 'size'
131
- norm_idx = h.find_index 'norm'
132
- abort "The metadata table must contain a 'name' column." if name_idx.nil?
133
- while ln = f.gets
134
- vals = ln.chomp.split(/\t/)
135
- name = vals[name_idx]
136
- self[name] # Create sample, in case "name" is the only column
137
- self[name].add_datum(:color, vals[color_idx]) unless color_idx.nil?
138
- self[name].add_datum(:size, vals[size_idx].to_i) unless size_idx.nil?
139
- self[name].add_datum(:norm, vals[norm_idx].to_f) unless norm_idx.nil?
140
- end
141
- f.close
142
- end
143
- def [](name)
144
- self << Dataset.new(name) unless @datasets.has_key?(name)
145
- @datasets[name]
146
- end
147
- def <<(dataset)
148
- @datasets[dataset.name] = dataset
149
- end
150
- def names
151
- @datasets.keys
152
- end
153
- def colors
154
- @datasets.values.map{ |d| d.color }
155
- end
156
- def data(k)
157
- self.names.map{ |name| self[name].datum[k] }
158
- end
159
- def set_unique!(n)
160
- u = self[n]
161
- @datasets = {}
162
- @datasets[n] = u
163
- end
164
- def size
165
- self.datasets.length
166
- end
133
+ attr_reader :datasets
134
+ def initialize
135
+ @datasets = {}
136
+ end
137
+ def load_table(file)
138
+ f = File.open(file, "r")
139
+ h = f.gets.chomp.split(/\t/)
140
+ name_idx = h.find_index "name"
141
+ color_idx = h.find_index "color"
142
+ size_idx = h.find_index "size"
143
+ norm_idx = h.find_index "norm"
144
+ abort "The metadata table must contain a 'name' column." if name_idx.nil?
145
+ while ln = f.gets
146
+ vals = ln.chomp.split(/\t/)
147
+ name = vals[name_idx]
148
+ self[name] # Create sample, in case "name" is the only column
149
+ self[name].add_datum(:color, vals[color_idx]) unless color_idx.nil?
150
+ self[name].add_datum(:size, vals[size_idx].to_i) unless size_idx.nil?
151
+ self[name].add_datum(:norm, vals[norm_idx].to_f) unless norm_idx.nil?
152
+ end
153
+ f.close
154
+ end
155
+ def [](name)
156
+ self << Dataset.new(name) unless @datasets.has_key?(name)
157
+ @datasets[name]
158
+ end
159
+ def <<(dataset)
160
+ @datasets[dataset.name] = dataset
161
+ end
162
+ def names
163
+ @datasets.keys
164
+ end
165
+ def colors
166
+ @datasets.values.map{ |d| d.color }
167
+ end
168
+ def data(k)
169
+ self.names.map{ |name| self[name].datum[k] }
170
+ end
171
+ def set_unique!(n)
172
+ u = self[n]
173
+ @datasets = {}
174
+ @datasets[n] = u
175
+ end
176
+ def size
177
+ self.datasets.length
178
+ end
167
179
  end
168
180
 
169
181
  ##### MAIN:
170
182
  begin
171
- $stderr.puts "Parsing metadata." unless o[:q]
172
- metadata = Metadata.new
173
- metadata.load_table(o[:metadata]) unless o[:metadata].nil?
174
- metadata.set_unique! o[:unique] unless o[:unique].nil?
183
+ $stderr.puts "Parsing metadata." unless o[:q]
184
+ metadata = Metadata.new
185
+ metadata.load_table(o[:metadata]) unless o[:metadata].nil?
186
+ metadata.set_unique! o[:unique] unless o[:unique].nil?
175
187
 
176
188
 
177
- $stderr.puts "Loading jplace file." unless o[:q]
178
- ifh = File.open(o[:in], 'r')
179
- jplace = JSON.load(ifh)
180
- ifh.close
181
-
189
+ $stderr.puts "Loading jplace file." unless o[:q]
190
+ ifh = File.open(o[:in], 'r')
191
+ jplace = JSON.load(ifh)
192
+ ifh.close
182
193
 
183
- $stderr.puts "Parsing tree." unless o[:q]
184
- tree = JPlace::Tree.from_nwk(jplace["tree"])
185
-
186
194
 
187
- $stderr.puts "Parsing placements." unless o[:q]
188
- JPlace::Placement.fields = jplace["fields"]
189
- placements_n = 0
190
- jplace["placements"].each do |placement|
191
- JPlace::Node.link_placement(JPlace::Placement.new(placement))
192
- placements_n += 1
193
- end
194
- $stderr.puts " #{placements_n} placements." unless o[:q]
195
- tree.pre_order do |n|
196
- n.placements.each do |p|
197
- p.nm.each do |r|
198
- m = (o[:unique].nil? ? (/#{o[:regex]}/.match(r[:n]) or
199
- abort "Cannot parse read name: #{r[:n]}, placed at edge #{n.index}") : {:dataset=>o[:unique]})
200
- metadata[ m[:dataset] ].add_count(r[:m])
201
- end
202
- end
203
- end
195
+ $stderr.puts "Parsing tree." unless o[:q]
196
+ tree = JPlace::Tree.from_nwk(jplace["tree"])
204
197
 
205
198
 
206
- unless o[:collapse].nil?
207
- $stderr.puts "Collapsing nodes." unless o[:q]
208
- collapse = File.readlines(o[:collapse]).map do |ln|
209
- l = ln.chomp.split(/\t/)
210
- l[1] = l[0] if l[1].nil?
211
- l
212
- end.inject({}) do |hash,ar|
213
- hash[ar[0]] = ar[1]
214
- hash
199
+ $stderr.puts "Parsing placements." unless o[:q]
200
+ JPlace::Placement.fields = jplace["fields"]
201
+ placements_n = 0
202
+ jplace["placements"].each do |placement|
203
+ JPlace::Node.link_placement(JPlace::Placement.new(placement))
204
+ placements_n += 1
205
+ end
206
+ $stderr.puts " #{placements_n} placements." unless o[:q]
207
+ tree.pre_order do |n|
208
+ n.placements.each do |p|
209
+ p.nm.each do |r|
210
+ m = (o[:unique].nil? ? (/#{o[:regex]}/.match(r[:n]) or
211
+ abort "Cannot parse read name: #{r[:n]}, placed at edge #{n.index}") :
212
+ {:dataset=>o[:unique]})
213
+ metadata[ m[:dataset] ].add_count(r[:m])
215
214
  end
216
- f = File.open(o[:out] + ".collapse", 'w')
217
- coll_n = 0
218
- tree.pre_order do |n|
219
- if collapse.keys.include? n.cannonical_name
220
- n.collapse!
221
- n.name = collapse[n.cannonical_name]
222
- f.puts n.name
223
- coll_n += 1
224
- end
215
+ end
216
+ end
217
+
218
+
219
+ unless o[:collapse].nil?
220
+ $stderr.puts "Collapsing nodes." unless o[:q]
221
+ collapse = File.readlines(o[:collapse]).map do |ln|
222
+ l = ln.chomp.split(/\t/)
223
+ l[1] = l[0] if l[1].nil?
224
+ l
225
+ end.inject({}) do |hash,ar|
226
+ hash[ar[0]] = ar[1]
227
+ hash
228
+ end
229
+ f = File.open(o[:out] + ".collapse", "w")
230
+ coll_n = 0
231
+ tree.pre_order do |n|
232
+ if collapse.keys.include? n.cannonical_name
233
+ n.collapse!
234
+ n.name = collapse[n.cannonical_name]
235
+ f.puts n.name
236
+ coll_n += 1
225
237
  end
226
- f.close
227
- $stderr.puts " #{coll_n} nodes collapsed (#{collapse.length} requested)." unless o[:q]
228
- end
238
+ end
239
+ f.close
240
+ $stderr.puts " #{coll_n} nodes collapsed (#{collapse.length} requested)." unless o[:q]
241
+ end
229
242
 
230
243
 
231
- $stderr.puts "Estimating normalizing factors by #{o[:norm].to_s}." unless o[:q] or o[:norm]==:none
232
- case o[:norm]
233
- when :none
234
- metadata.datasets.values.each{ |d| d.add_datum :norm, 1.0 }
235
- when :counts
236
- metadata.datasets.values.each{ |d| d.add_datum :norm, d.count.to_f }
237
- when :size
238
- abort "Column 'size' required in metadata." if metadata.datasets.values[0].size.nil?
239
- metadata.datasets.values.each{ |d| d.add_datum :norm, d.size.to_f }
240
- when :norm
241
- abort "Column 'norm' required in metadata." if metadata.datasets.values[0].norm.nil?
242
- end
243
- max_norm = metadata.datasets.values.map{ |d| d.norm }.max
244
+ $stderr.puts "Estimating normalizing factors by #{o[:norm].to_s}." unless o[:q] or o[:norm]==:none
245
+ case o[:norm]
246
+ when :none
247
+ metadata.datasets.values.each{ |d| d.add_datum :norm, 1.0 }
248
+ when :counts
249
+ metadata.datasets.values.each{ |d| d.add_datum :norm, d.count.to_f }
250
+ when :size
251
+ abort "Column 'size' required in metadata." if metadata.datasets.values[0].size.nil?
252
+ metadata.datasets.values.each{ |d| d.add_datum :norm, d.size.to_f }
253
+ when :norm
254
+ abort "Column 'norm' required in metadata." if metadata.datasets.values[0].norm.nil?
255
+ end
256
+ max_norm = metadata.datasets.values.map{ |d| d.norm }.max
244
257
 
245
258
 
246
- $stderr.puts "Generating iToL dataset." unless o[:q]
247
- f = File.open(o[:out] + ".itol.txt", "w")
248
- if o[:olditol]
249
- f.puts "LABELS\t" + metadata.names.join("\t")
250
- f.puts "COLORS\t" + metadata.colors.join("\t")
251
- else
252
- f.puts "DATASET_PIECHART"
253
- f.puts "SEPARATOR TAB"
254
- f.puts "DATASET_LABEL\tReadPlacement"
255
- f.puts "COLOR\t#1f2122"
256
- f.puts "FIELD_LABELS\t" + metadata.names.join("\t")
257
- f.puts "FIELD_COLORS\t" + metadata.colors.join("\t")
258
- f.puts "DATA"
259
- end
260
- max_norm_sum,min_norm_sum,max_norm_n,min_norm_n = 0.0,Float::INFINITY,"",""
261
- tree.pre_order do |n|
262
- ds_counts = Hash.new(0.0)
263
- n.placements.each do |p|
264
- p.nm.each do |r|
265
- m = (o[:unique].nil? ? (/#{o[:regex]}/.match(r[:n]) or
266
- abort "Cannot parse read name: #{r[:n]}, placed at edge #{n.index}") : {:dataset=>o[:unique]})
267
- ds_counts[ m[:dataset] ] += r[:m] / metadata[ m[:dataset] ].norm
268
- end
259
+ $stderr.puts "Generating iToL dataset." unless o[:q]
260
+ f = File.open(o[:out] + ".itol.txt", "w")
261
+ if o[:olditol]
262
+ f.puts "LABELS\t" + metadata.names.join("\t")
263
+ f.puts "COLORS\t" + metadata.colors.join("\t")
264
+ else
265
+ f.puts "DATASET_PIECHART"
266
+ f.puts "SEPARATOR TAB"
267
+ f.puts "DATASET_LABEL\tReadPlacement"
268
+ f.puts "COLOR\t#1f2122"
269
+ f.puts "FIELD_LABELS\t" + metadata.names.join("\t")
270
+ f.puts "FIELD_COLORS\t" + metadata.colors.join("\t")
271
+ f.puts "DATA"
272
+ end
273
+ max_norm_sum,min_norm_sum,max_norm_n,min_norm_n = 0.0,Float::INFINITY,"",""
274
+ tree.pre_order do |n|
275
+ ds_counts = Hash.new(0.0)
276
+ n.placements.each do |p|
277
+ p.nm.each do |r|
278
+ m = (o[:unique].nil? ? (/#{o[:regex]}/.match(r[:n]) or
279
+ abort "Cannot parse read name: #{r[:n]}, placed at edge #{n.index}") :
280
+ {:dataset=>o[:unique]})
281
+ ds_counts[ m[:dataset] ] += r[:m] / metadata[ m[:dataset] ].norm
269
282
  end
270
- counts_sum = ds_counts.values.reduce(:+)
271
- unless counts_sum.nil?
272
- # In the area option, the radius is "twice" to make the smallest > 1 (since counts_sum is >= 1)
273
- radius = (o[:area] ? 2*Math.sqrt(counts_sum/Math::PI) : counts_sum)*max_norm
274
- f.puts n.cannonical_name +
275
- "#{"\t0.5" unless o[:olditol]}\t#{"R" if o[:olditol]}" +
276
- radius.to_i.to_s + "\t" +
277
- metadata.names.map{ |n| ds_counts[n] }.join("\t")
278
- if counts_sum > max_norm_sum
279
- max_norm_n = n.cannonical_name
280
- max_norm_sum = counts_sum
281
- end
282
- if counts_sum < min_norm_sum
283
- min_norm_n = n.cannonical_name
284
- min_norm_sum = counts_sum
285
- end
283
+ end
284
+ counts_sum = ds_counts.values.reduce(:+)
285
+ unless counts_sum.nil?
286
+ # In the area option, the radius is "twice" to make the smallest > 1 (since counts_sum is >= 1)
287
+ radius = (o[:area] ? 2*Math.sqrt(counts_sum/Math::PI) : counts_sum)*max_norm
288
+ f.puts n.cannonical_name +
289
+ "#{"\t0.5" unless o[:olditol]}\t#{"R" if o[:olditol]}" +
290
+ radius.to_i.to_s + "\t" +
291
+ metadata.names.map{ |n| ds_counts[n] }.join("\t")
292
+ if counts_sum > max_norm_sum
293
+ max_norm_n = n.cannonical_name
294
+ max_norm_sum = counts_sum
286
295
  end
287
- end
288
- f.close
289
- units = {:none=>'', :counts=>' per million placements', :size=>' per million reads', :norm=>' per normalizing unit'}
290
- $stderr.puts " The pie #{o[:area] ? 'areas' : 'radii'} are proportional to the placements#{units[o[:norm]]}." unless o[:q]
291
- $stderr.puts " The minimum radius (#{min_norm_n}) represents #{min_norm_sum*(([:none, :norm].include? o[:norm]) ? 1 : 1e6)} placements#{units[o[:norm]]}." unless o[:q]
292
- $stderr.puts " The maximum radius (#{max_norm_n}) represents #{max_norm_sum*(([:none, :norm].include? o[:norm]) ? 1 : 1e6)} placements#{units[o[:norm]]}." unless o[:q]
293
-
296
+ if counts_sum < min_norm_sum
297
+ min_norm_n = n.cannonical_name
298
+ min_norm_sum = counts_sum
299
+ end
300
+ end
301
+ end
302
+ f.close
303
+ units = {none: "", counts: " per million placements",
304
+ size: " per million reads", norm: " per normalizing unit"}
305
+ $stderr.puts " The pie #{o[:area] ? "areas" : "radii"} are proportional to the placements#{units[o[:norm]]}." unless o[:q]
306
+ $stderr.puts " The minimum radius (#{min_norm_n}) represents #{min_norm_sum*(([:none, :norm].include? o[:norm]) ? 1 : 1e6)} placements#{units[o[:norm]]}." unless o[:q]
307
+ $stderr.puts " The maximum radius (#{max_norm_n}) represents #{max_norm_sum*(([:none, :norm].include? o[:norm]) ? 1 : 1e6)} placements#{units[o[:norm]]}." unless o[:q]
294
308
 
295
- $stderr.puts "Re-formatting tree for iToL." unless o[:q]
296
- f = File.open(o[:out]+'.nwk', "w")
297
- f.puts tree.to_s+';'
298
- f.close
309
+
310
+ $stderr.puts "Re-formatting tree for iToL." unless o[:q]
311
+ f = File.open(o[:out] + ".nwk", "w")
312
+ f.puts tree.to_s + ";"
313
+ f.close
299
314
 
300
315
  rescue => err
301
- $stderr.puts "Exception: #{err}\n\n"
302
- err.backtrace.each { |l| $stderr.puts l + "\n" }
303
- err
316
+ $stderr.puts "Exception: #{err}\n\n"
317
+ err.backtrace.each { |l| $stderr.puts l + "\n" }
318
+ err
304
319
  end
305
320
 
306
-