tree_clusters 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/key_cols +24 -9
- data/lib/tree_clusters/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7f7ba7426f7048a684ba53a52611efe18cda9603
|
4
|
+
data.tar.gz: c511525c40d1b766c08c2a58e6076633d179439c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f5ad29eb52cbcdf997a7d1f2431bf9f26adf76c534bd6bbc74fc79c05b331f224d7e8b056173e4b49454a324ee5eb14c2f8dc401379552a8b9fed36627e0eff
|
7
|
+
data.tar.gz: 8d7885844452c979abdfbc71ee85c4ecedefa8818047062ede26eb894bc1913ae612093e07cf68c760aa12654e448b97ce323d6982f5589e2f13821debbf3bd7
|
data/exe/key_cols
CHANGED
@@ -169,13 +169,18 @@ key_cols_f =
|
|
169
169
|
annotated_tree_f =
|
170
170
|
File.open(annotated_tree_fname, "w")
|
171
171
|
|
172
|
-
key_col_sets
|
173
|
-
clade_sizes
|
174
|
-
clade_count
|
172
|
+
key_col_sets = {}
|
173
|
+
clade_sizes = {}
|
174
|
+
clade_count = TreeClusters.all_clades(tree).count
|
175
|
+
change_these_names = Set.new
|
176
|
+
|
177
|
+
def all_clades_helper tree
|
178
|
+
TreeClusters.all_clades(tree).sort_by { |cl| cl.all_leaves.count }.reverse
|
179
|
+
end
|
175
180
|
|
176
181
|
AbortIf.logger.info { "Processing clades" }
|
177
182
|
begin
|
178
|
-
|
183
|
+
all_clades_helper(tree).each_with_index do |clade, idx|
|
179
184
|
if ((idx + 1) % 100).zero?
|
180
185
|
perc = ((idx + 1) / clade_count.to_f * 100).round 2
|
181
186
|
|
@@ -199,25 +204,35 @@ begin
|
|
199
204
|
end
|
200
205
|
key_col_sets[key_cols_all_leaves] << clade_id
|
201
206
|
|
202
|
-
# This will change the node in the original NewickTree
|
203
|
-
clade.node.name = "'#{clade_id}'"
|
204
207
|
end
|
205
208
|
|
206
209
|
AbortIf.logger.info { "Writing results" }
|
207
210
|
|
208
211
|
# We only want key column sets that are unique to a single clade.
|
209
|
-
key_col_sets.select {|_, clades| clades.count == 1}.each do |kc_set, clades|
|
212
|
+
key_col_sets.select { |_, clades| clades.count == 1 }.each do |kc_set, clades|
|
210
213
|
clade_id = clades.first
|
211
214
|
|
212
215
|
# TODO should we just skip processing clades that are too small rather than just not printing them out?
|
213
|
-
if clade_sizes[clade_id]
|
216
|
+
if clade_sizes[clade_id] >= opts[:clade_size_cutoff]
|
217
|
+
change_these_names << clade_id
|
214
218
|
key_cols_f.puts [clade_id,
|
215
219
|
kc_set.count,
|
216
|
-
kc_set.map {|pos, bases| "#{pos}-#{bases.join}"}
|
220
|
+
kc_set.map { |pos, bases| "#{pos}-#{bases.join}" }
|
217
221
|
].join "\t"
|
218
222
|
end
|
219
223
|
end
|
220
224
|
|
225
|
+
AbortIf.logger.info { "Annotating tree" }
|
226
|
+
|
227
|
+
all_clades_helper(tree).each_with_index do |clade, idx|
|
228
|
+
clade_id = "clade_#{idx + 1}___#{clade.name.tr("'", "_")}"
|
229
|
+
|
230
|
+
if change_these_names.include? clade_id
|
231
|
+
# This will change the node in the original NewickTree
|
232
|
+
clade.node.name = "'#{clade_id}'"
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
221
236
|
annotated_tree_f.puts tree.to_s.sub(/;+$/, ";")
|
222
237
|
ensure
|
223
238
|
clade_members_f.close
|