tree_clusters 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5f37ec0e062c33d19f8e525279d587d5512ba106
4
- data.tar.gz: a59464b000338b3fc1aa1fa2c0ee267b5231df73
3
+ metadata.gz: 7f7ba7426f7048a684ba53a52611efe18cda9603
4
+ data.tar.gz: c511525c40d1b766c08c2a58e6076633d179439c
5
5
  SHA512:
6
- metadata.gz: 53b9025669f26719174eb57376d7f1826095a074f896af1352bef944833f0ebfca95c94d444f6d990e7f97bd06002098b13e43c305e4416492114f4bd44408be
7
- data.tar.gz: 1aadc375ce71361769e7f79768b3cbcd45606ac832100363fb68b79831d20019d2532b1d229d8b7e1d7b6becd49a5bb6ca72aea228d241a202686cfcf2d6e36c
6
+ metadata.gz: 4f5ad29eb52cbcdf997a7d1f2431bf9f26adf76c534bd6bbc74fc79c05b331f224d7e8b056173e4b49454a324ee5eb14c2f8dc401379552a8b9fed36627e0eff
7
+ data.tar.gz: 8d7885844452c979abdfbc71ee85c4ecedefa8818047062ede26eb894bc1913ae612093e07cf68c760aa12654e448b97ce323d6982f5589e2f13821debbf3bd7
data/exe/key_cols CHANGED
@@ -169,13 +169,18 @@ key_cols_f =
169
169
  annotated_tree_f =
170
170
  File.open(annotated_tree_fname, "w")
171
171
 
172
- key_col_sets = {}
173
- clade_sizes = {}
174
- clade_count = TreeClusters.all_clades(tree).count
172
+ key_col_sets = {}
173
+ clade_sizes = {}
174
+ clade_count = TreeClusters.all_clades(tree).count
175
+ change_these_names = Set.new
176
+
177
+ def all_clades_helper tree
178
+ TreeClusters.all_clades(tree).sort_by { |cl| cl.all_leaves.count }.reverse
179
+ end
175
180
 
176
181
  AbortIf.logger.info { "Processing clades" }
177
182
  begin
178
- TreeClusters.all_clades(tree).sort_by {|cl| cl.all_leaves.count}.reverse.each_with_index do |clade, idx|
183
+ all_clades_helper(tree).each_with_index do |clade, idx|
179
184
  if ((idx + 1) % 100).zero?
180
185
  perc = ((idx + 1) / clade_count.to_f * 100).round 2
181
186
 
@@ -199,25 +204,35 @@ begin
199
204
  end
200
205
  key_col_sets[key_cols_all_leaves] << clade_id
201
206
 
202
- # This will change the node in the original NewickTree
203
- clade.node.name = "'#{clade_id}'"
204
207
  end
205
208
 
206
209
  AbortIf.logger.info { "Writing results" }
207
210
 
208
211
  # We only want key column sets that are unique to a single clade.
209
- key_col_sets.select {|_, clades| clades.count == 1}.each do |kc_set, clades|
212
+ key_col_sets.select { |_, clades| clades.count == 1 }.each do |kc_set, clades|
210
213
  clade_id = clades.first
211
214
 
212
215
  # TODO should we just skip processing clades that are too small rather than just not printing them out?
213
- if clade_sizes[clade_id] > opts[:clade_size_cutoff]
216
+ if clade_sizes[clade_id] >= opts[:clade_size_cutoff]
217
+ change_these_names << clade_id
214
218
  key_cols_f.puts [clade_id,
215
219
  kc_set.count,
216
- kc_set.map {|pos, bases| "#{pos}-#{bases.join}"}
220
+ kc_set.map { |pos, bases| "#{pos}-#{bases.join}" }
217
221
  ].join "\t"
218
222
  end
219
223
  end
220
224
 
225
+ AbortIf.logger.info { "Annotating tree" }
226
+
227
+ all_clades_helper(tree).each_with_index do |clade, idx|
228
+ clade_id = "clade_#{idx + 1}___#{clade.name.tr("'", "_")}"
229
+
230
+ if change_these_names.include? clade_id
231
+ # This will change the node in the original NewickTree
232
+ clade.node.name = "'#{clade_id}'"
233
+ end
234
+ end
235
+
221
236
  annotated_tree_f.puts tree.to_s.sub(/;+$/, ";")
222
237
  ensure
223
238
  clade_members_f.close
@@ -1,3 +1,3 @@
1
1
  module TreeClusters
2
- VERSION = "0.8.1"
2
+ VERSION = "0.8.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_clusters
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.8.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore