tree_clusters 0.8.1 → 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/key_cols +24 -9
- data/lib/tree_clusters/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7f7ba7426f7048a684ba53a52611efe18cda9603
|
4
|
+
data.tar.gz: c511525c40d1b766c08c2a58e6076633d179439c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f5ad29eb52cbcdf997a7d1f2431bf9f26adf76c534bd6bbc74fc79c05b331f224d7e8b056173e4b49454a324ee5eb14c2f8dc401379552a8b9fed36627e0eff
|
7
|
+
data.tar.gz: 8d7885844452c979abdfbc71ee85c4ecedefa8818047062ede26eb894bc1913ae612093e07cf68c760aa12654e448b97ce323d6982f5589e2f13821debbf3bd7
|
data/exe/key_cols
CHANGED
@@ -169,13 +169,18 @@ key_cols_f =
|
|
169
169
|
annotated_tree_f =
|
170
170
|
File.open(annotated_tree_fname, "w")
|
171
171
|
|
172
|
-
key_col_sets
|
173
|
-
clade_sizes
|
174
|
-
clade_count
|
172
|
+
key_col_sets = {}
|
173
|
+
clade_sizes = {}
|
174
|
+
clade_count = TreeClusters.all_clades(tree).count
|
175
|
+
change_these_names = Set.new
|
176
|
+
|
177
|
+
def all_clades_helper tree
|
178
|
+
TreeClusters.all_clades(tree).sort_by { |cl| cl.all_leaves.count }.reverse
|
179
|
+
end
|
175
180
|
|
176
181
|
AbortIf.logger.info { "Processing clades" }
|
177
182
|
begin
|
178
|
-
|
183
|
+
all_clades_helper(tree).each_with_index do |clade, idx|
|
179
184
|
if ((idx + 1) % 100).zero?
|
180
185
|
perc = ((idx + 1) / clade_count.to_f * 100).round 2
|
181
186
|
|
@@ -199,25 +204,35 @@ begin
|
|
199
204
|
end
|
200
205
|
key_col_sets[key_cols_all_leaves] << clade_id
|
201
206
|
|
202
|
-
# This will change the node in the original NewickTree
|
203
|
-
clade.node.name = "'#{clade_id}'"
|
204
207
|
end
|
205
208
|
|
206
209
|
AbortIf.logger.info { "Writing results" }
|
207
210
|
|
208
211
|
# We only want key column sets that are unique to a single clade.
|
209
|
-
key_col_sets.select {|_, clades| clades.count == 1}.each do |kc_set, clades|
|
212
|
+
key_col_sets.select { |_, clades| clades.count == 1 }.each do |kc_set, clades|
|
210
213
|
clade_id = clades.first
|
211
214
|
|
212
215
|
# TODO should we just skip processing clades that are too small rather than just not printing them out?
|
213
|
-
if clade_sizes[clade_id]
|
216
|
+
if clade_sizes[clade_id] >= opts[:clade_size_cutoff]
|
217
|
+
change_these_names << clade_id
|
214
218
|
key_cols_f.puts [clade_id,
|
215
219
|
kc_set.count,
|
216
|
-
kc_set.map {|pos, bases| "#{pos}-#{bases.join}"}
|
220
|
+
kc_set.map { |pos, bases| "#{pos}-#{bases.join}" }
|
217
221
|
].join "\t"
|
218
222
|
end
|
219
223
|
end
|
220
224
|
|
225
|
+
AbortIf.logger.info { "Annotating tree" }
|
226
|
+
|
227
|
+
all_clades_helper(tree).each_with_index do |clade, idx|
|
228
|
+
clade_id = "clade_#{idx + 1}___#{clade.name.tr("'", "_")}"
|
229
|
+
|
230
|
+
if change_these_names.include? clade_id
|
231
|
+
# This will change the node in the original NewickTree
|
232
|
+
clade.node.name = "'#{clade_id}'"
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
221
236
|
annotated_tree_f.puts tree.to_s.sub(/;+$/, ";")
|
222
237
|
ensure
|
223
238
|
clade_members_f.close
|