tree_clusters 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 612ebc10d0f23a15ae9d11d35be3f1de197d29fd
4
- data.tar.gz: '084ff094f024a062e1c0f1476b9105950f796edf'
3
+ metadata.gz: e6f0523128e1d9efede01f9af494d99e7e51a48c
4
+ data.tar.gz: 58bac03495f585ad2ea755abaac5ceefdb90e2d3
5
5
  SHA512:
6
- metadata.gz: 247778fdebedd3213d96ccafc52ce1d5667bdb9e8eefee60e30b3dbf1fa618ea73000d6d2a5802e47e84d9ba6fa4c119fd3acb1d21907c4c268f63e38949ffcd
7
- data.tar.gz: e8d5cc7d6aa7ce86f277f42c5a5b8bd7b7d2bebe5ac3fbf5c4201315bc7aa8e411940056c30dce22226864d982083605180bfe7304c824566e107a43a308c910
6
+ metadata.gz: 73a82096be4f5be8199be28bf3e12b0ac63c08b9da1cfea1fded6cd4da7c448ffc7cdc8a285c681b88493b39fa32fcace02be0fff23190a314050d107db4b2f5
7
+ data.tar.gz: a5b00bec9a4f567efc098e664e6479e691f96e761a5c57ac5e4f198159c9d1cfd320dbb2033a1a458597460f621815e03ee6ecef351f31db4597f4a0a7aac838
data/exe/clade_attrs ADDED
@@ -0,0 +1,229 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ Signal.trap("PIPE", "EXIT")
4
+
5
+ require "tree_clusters"
6
+ require "trollop"
7
+ require "parse_fasta"
8
+ require "shannon"
9
+ require "fileutils"
10
+
11
+ TreeClusters.extend TreeClusters
12
+
13
+ def puts_info outf, clade_id, attr_cat, attr_set
14
+ outf.puts [clade_id, attr_cat, attr_set.to_a].join "\t"
15
+ end
16
+
17
+ opts = Trollop.options do
18
+ version TreeClusters::VERSION
19
+
20
+ banner <<-EOS
21
+
22
+
23
+ Checking IDs
24
+ ------------
25
+
26
+ IDs for the sequences must match between the three input files.
27
+
28
+ The tree file is allowed to have quoted taxa names, but the mapping
29
+ file and alignment file are not.
30
+
31
+ If your alignment file has spaces in the name, the ID part of the
32
+ header (i.e., the part up until the space) must match with the
33
+ sequence IDs in the tree and the mapping file.
34
+
35
+ Example: This would be okay.
36
+
37
+ tree file:
38
+ ('genome_A', 'genome_B');
39
+
40
+ aln file:
41
+ >genome_A apple pie
42
+ AAAAA
43
+ >genome_B brown sugar
44
+ AATTA
45
+
46
+ mapping file:
47
+ name coolness
48
+ genome_A cool
49
+ genome_B notcool
50
+
51
+
52
+ Subtracting parent nodes
53
+ ------------------------
54
+
55
+ If a clade's parent would be the root of the tree, no columns will
56
+ be subtracted when removing the parent columns as it would be the
57
+ entire alignment.
58
+
59
+ Options:
60
+ EOS
61
+
62
+ opt(:tree,
63
+ "Newick tree file",
64
+ type: :string)
65
+ opt(:mapping,
66
+ "Mapping file",
67
+ type: :string)
68
+ opt(:attrs,
69
+ "Attributes file",
70
+ type: :string)
71
+
72
+ opt(:clade_size_cutoff,
73
+ "Consider only clades with at least this many leaves",
74
+ default: 1)
75
+
76
+ opt(:outdir,
77
+ "Output directory",
78
+ default: ".")
79
+ opt(:base,
80
+ "Basename for output",
81
+ default: "clade_attrs")
82
+ end
83
+
84
+ abort_if opts[:tree].nil?,
85
+ "--tree is a required arg"
86
+ abort_if opts[:mapping].nil?,
87
+ "--mapping is a required arg"
88
+ abort_if opts[:attrs].nil?,
89
+ "--attrs is a required arg"
90
+
91
+ abort_unless_file_exists opts[:tree]
92
+ abort_unless_file_exists opts[:mapping]
93
+ abort_unless_file_exists opts[:attrs]
94
+
95
+ # TODO check IDs when attrs is not a fasta file
96
+ # TreeClusters.check_ids opts[:tree], opts[:mapping], opts[:attrs]
97
+
98
+ abort_unless opts[:clade_size_cutoff] >= 1,
99
+ "--clade-size-cutoff must be >= 1"
100
+
101
+ FileUtils.mkdir_p opts[:outdir]
102
+
103
+ tree = NewickTree.fromFile opts[:tree]
104
+ metadata = TreeClusters.read_mapping_file opts[:mapping]
105
+ snazzy_info = TreeClusters.snazzy_info tree, metadata
106
+ attr_names, leaf2attrs = TreeClusters.read_attrs_file opts[:attrs]
107
+
108
+ ext_base = "clade_attrs"
109
+
110
+ clades_fname =
111
+ File.join opts[:outdir],
112
+ "#{opts[:base]}.#{ext_base}.txt"
113
+ members_fname =
114
+ File.join opts[:outdir],
115
+ "#{opts[:base]}.#{ext_base}_clade_members.txt"
116
+ attrs_fname =
117
+ File.join opts[:outdir],
118
+ "#{opts[:base]}.#{ext_base}_attrs_union.txt"
119
+ attrs_intersection_fname =
120
+ File.join opts[:outdir],
121
+ "#{opts[:base]}.#{ext_base}_attrs_intersection.txt"
122
+ attrs_minus_parent_attrs_fname =
123
+ File.join opts[:outdir],
124
+ "#{opts[:base]}.#{ext_base}_attrs_minus_parent_attrs.txt"
125
+ attrs_minus_sibling_attrs_fname =
126
+ File.join opts[:outdir],
127
+ "#{opts[:base]}.#{ext_base}_attrs_minus_sibling_attrs.txt"
128
+ attrs_minus_other_attrs_fname =
129
+ File.join opts[:outdir],
130
+ "#{opts[:base]}.#{ext_base}_attrs_minus_other_attrs.txt"
131
+
132
+
133
+ info_f =
134
+ File.open(clades_fname, "w")
135
+ clade_members_f =
136
+ File.open(members_fname, "w")
137
+ attrs_f =
138
+ File.open(attrs_fname, "w")
139
+ attrs_intersection_f =
140
+ File.open(attrs_intersection_fname, "w")
141
+ attrs_minus_parent_attrs_f =
142
+ File.open(attrs_minus_parent_attrs_fname, "w")
143
+ attrs_minus_sibling_attrs_f =
144
+ File.open(attrs_minus_sibling_attrs_fname, "w")
145
+ attrs_minus_other_attrs_f =
146
+ File.open(attrs_minus_other_attrs_fname, "w")
147
+
148
+
149
+ begin
150
+ # info is { metadata_category => metadata_tag , ... }
151
+ snazzy_info.each_with_index do |(clade, info), idx|
152
+ assert clade.all_leaves.all? { |leaf| leaf2attrs.has_key? leaf },
153
+ "Not all leaves are present in the leaf2attrs hash table"
154
+
155
+ clade_id = "clade_#{idx+1}___#{clade.name}"
156
+
157
+ is_snazzy = info.nil? ? false : true
158
+ snazzy = is_snazzy ? "snazzy" : "not_snazzy"
159
+
160
+ if is_snazzy
161
+ info_f.puts [clade_id,
162
+ info.count,
163
+ info.map { |pair| pair.join("|")}].join "\t"
164
+ else
165
+ info_f.puts [clade_id,
166
+ 0,
167
+ "not_snazzy"].join "\t"
168
+ end
169
+
170
+ clade_members_f.puts [clade_id,
171
+ clade.all_leaves.count,
172
+ clade.all_leaves].join "\t"
173
+
174
+ attr_names.each do |attr_category|
175
+ attrs_all_leaves =
176
+ leaf2attrs.attrs clade.all_leaves, attr_category
177
+
178
+ attrs_all_sibling_leaves =
179
+ leaf2attrs.attrs clade.all_sibling_leaves,
180
+ attr_category
181
+ attrs_parent_leaves =
182
+ leaf2attrs.attrs clade.parent_leaves,
183
+ attr_category
184
+ attrs_other_leaves =
185
+ leaf2attrs.attrs clade.other_leaves,
186
+ attr_category
187
+
188
+ attrs_all_minus_parent =
189
+ attrs_all_leaves.union - attrs_parent_leaves.union
190
+ attrs_all_minus_sibling =
191
+ attrs_all_leaves.union - attrs_all_sibling_leaves.union
192
+ attrs_all_minus_other =
193
+ attrs_all_leaves.union - attrs_other_leaves.union
194
+
195
+
196
+ puts_info attrs_f,
197
+ clade_id,
198
+ attr_category,
199
+ attrs_all_leaves.union
200
+
201
+ puts_info attrs_intersection_f,
202
+ clade_id,
203
+ attr_category,
204
+ attrs_all_leaves.intersection
205
+
206
+ puts_info attrs_minus_parent_attrs_f,
207
+ clade_id,
208
+ attr_category,
209
+ attrs_all_minus_parent
210
+
211
+ puts_info attrs_minus_sibling_attrs_f,
212
+ clade_id,
213
+ attr_category,
214
+ attrs_all_minus_sibling
215
+
216
+ puts_info attrs_minus_other_attrs_f,
217
+ clade_id,
218
+ attr_category,
219
+ attrs_all_minus_other
220
+ end
221
+ end
222
+ ensure
223
+ info_f.close
224
+ clade_members_f.close
225
+ attrs_f.close
226
+ attrs_minus_parent_attrs_f.close
227
+ attrs_minus_sibling_attrs_f.close
228
+ attrs_minus_other_attrs_f.close
229
+ end
@@ -0,0 +1,18 @@
1
+ module TreeClusters
2
+ # Provides convenience methods for working with Arrays of Sets
3
+ class AttrArray < Object::Array
4
+ # Takes the union of all sets in the AttrArray
5
+ #
6
+ # @return [Set]
7
+ def union
8
+ self.reduce(&:union)
9
+ end
10
+
11
+ # Takes the intersection of all sets in the AttrArray
12
+ #
13
+ # @return [Set]
14
+ def intersection
15
+ self.reduce(&:intersection)
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,47 @@
1
+ module TreeClusters
2
+ # A Hash table for genome/leaf/taxa attributes
3
+ class Attrs < Hash
4
+
5
+ # Returns the an AttrArray of Sets for the given genomes and
6
+ # attribute.
7
+ #
8
+ # @note If a genome is in the leaves array, but is not in the hash
9
+ # table, NO error will be raised. Rather that genome will be
10
+ # skipped. This is for cases in which not all genomes have
11
+ # attributes.
12
+ #
13
+ # @param leaves [Array<String>] names of the leaves for which you
14
+ # need attributes
15
+ # @param attr [Symbol] the attribute you are interested in eg,
16
+ # :genes
17
+ #
18
+ # @return [AttrArray<Set>] an AttrArray of Sets of
19
+ # attributes
20
+ #
21
+ # @raise [AbortIf::Exit] if they leaf is present but doesn't have
22
+ # the requested attr
23
+ def attrs leaves, attr
24
+ ary = leaves.map do |leaf|
25
+
26
+ if self.has_key? leaf
27
+ abort_unless self[leaf].has_key?(attr),
28
+ "Missing attr #{attr.inspect} for leaf '#{leaf}'"
29
+
30
+ self[leaf][attr]
31
+ else
32
+ nil
33
+ end
34
+ end.compact
35
+
36
+ TreeClusters::AttrArray.new ary
37
+ end
38
+
39
+ def add leaf, attr, val
40
+ if self.has_key? leaf
41
+ self[leaf][attr] = val
42
+ else
43
+ self[leaf] = { attr => val }
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,128 @@
1
+ module TreeClusters
2
+ # Represents a clade in a NewickTree
3
+ class Clade
4
+ attr_accessor :name,
5
+ :all_leaves,
6
+ :left_leaves,
7
+ :right_leaves,
8
+ :all_sibling_leaves,
9
+ :each_sibling_leaf_set,
10
+ :parent_leaves,
11
+ :non_parent_leaves,
12
+ :other_leaves,
13
+ :single_tag_info,
14
+ :all_tags
15
+
16
+ # @note If a node name is quoted, then those quotes are removed
17
+ # first.
18
+ #
19
+ # @param node [NewickNode] a NewickNode from a NewickTree
20
+ # @param tree [NewickTree] a NewickTree
21
+ def initialize node, tree, metadata=nil
22
+ tree_taxa = tree.unquoted_taxa
23
+
24
+ @name = unquote node.name
25
+ @all_leaves = descendant_leaves node
26
+
27
+ if (children = node.children).count == 2
28
+ lchild, rchild = node.children
29
+
30
+ @left_leaves = descendant_leaves lchild
31
+
32
+ @right_leaves = descendant_leaves rchild
33
+ end
34
+
35
+ siblings = node.siblings
36
+ # assert siblings.count == 1,
37
+ # "Node #{node.name} has more than one sibling."
38
+
39
+ @each_sibling_leaf_set = siblings.
40
+ map { |node| descendant_leaves node }
41
+
42
+ @all_sibling_leaves = @each_sibling_leaf_set.flatten.uniq
43
+
44
+ parent = node.parent
45
+ assert parent,
46
+ "Noge #{node.name} has no parent. Is it the root?"
47
+ @parent_leaves = descendant_leaves parent
48
+
49
+ @other_leaves =
50
+ Object::Set.new(tree_taxa) - Object::Set.new(all_leaves)
51
+
52
+ @non_parent_leaves =
53
+ Object::Set.new(tree_taxa) - Object::Set.new(parent_leaves)
54
+
55
+ if metadata
56
+ @metadata = metadata
57
+ @all_tags ||= get_all_tags
58
+ @single_tag_info ||= get_single_tag_info
59
+ else
60
+ @single_tag_info = nil
61
+ end
62
+ end
63
+
64
+ # Compares two Clades field by field.
65
+ #
66
+ # If all instance variables are == than the two clades are == as
67
+ # well.
68
+ def == clade
69
+ (
70
+ self.name == clade.name &&
71
+ self.all_leaves == clade.all_leaves &&
72
+ self.left_leaves == clade.left_leaves &&
73
+ self.right_leaves == clade.right_leaves &&
74
+ self.all_sibling_leaves == clade.all_sibling_leaves &&
75
+ self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
76
+ self.parent_leaves == clade.parent_leaves &&
77
+ self.other_leaves == clade.other_leaves &&
78
+ self.single_tag_info == clade.single_tag_info &&
79
+ self.all_tags == clade.all_tags
80
+ )
81
+ end
82
+
83
+ # Alias for ==
84
+ def eql? clade
85
+ self == clade
86
+ end
87
+
88
+ private
89
+
90
+ def get_single_tag_info
91
+ @all_tags.map do |md_cat, set|
92
+ [md_cat, set.count == 1 ? set.to_a.first : nil]
93
+ end.to_h
94
+ end
95
+
96
+ def get_all_tags
97
+ # name2tag has leaf names => metadata tag and is an Attrs
98
+ @metadata.map do |md_cat, name2tag|
99
+ tag_info = self.all_leaves.map do |leaf|
100
+ assert name2tag.has_key?(leaf),
101
+ "leaf #{leaf} is not present in name2tag ht for " +
102
+ "md_cat #{md_cat}"
103
+
104
+ name2tag[leaf]
105
+ end
106
+
107
+ [md_cat, Set.new(tag_info)]
108
+ end.to_h
109
+ end
110
+
111
+ def descendant_leaves node
112
+ if node.leaf?
113
+ [unquote(node.name)]
114
+ else
115
+ node.
116
+ descendants.
117
+ flatten.
118
+ uniq.
119
+ select { |node| node.leaf? }.
120
+ map { |node| unquote(node.name) }
121
+ end
122
+ end
123
+
124
+ def unquote str
125
+ str.tr %q{"'}, ""
126
+ end
127
+ end
128
+ end
@@ -1,3 +1,3 @@
1
1
  module TreeClusters
2
- VERSION = "0.6.0"
2
+ VERSION = "0.7.0"
3
3
  end
data/lib/tree_clusters.rb CHANGED
@@ -3,6 +3,9 @@ require "Newick"
3
3
  require "set"
4
4
  require "parse_fasta"
5
5
  require "shannon"
6
+ require "tree_clusters/attrs"
7
+ require "tree_clusters/attr_array"
8
+ require "tree_clusters/clade"
6
9
  require "tree_clusters/version"
7
10
 
8
11
  include AbortIf
@@ -198,6 +201,73 @@ module TreeClusters
198
201
  snazzy_clades
199
202
  end
200
203
 
204
+ def snazzy_info tree, metadata
205
+ snazzy_info = {}
206
+
207
+ clades = self.
208
+ all_clades(tree, metadata).
209
+ sort_by { |clade| clade.all_leaves.count }.
210
+ reverse
211
+
212
+ # Non snazzy clades have a value of nil, so set all to nil and the
213
+ # snazzy ones will be overwritten.
214
+ clades.each do |clade|
215
+ snazzy_info[clade] = nil
216
+ end
217
+
218
+ metadata.each do |md_cat, leaf2mdtag|
219
+ already_checked = Set.new
220
+ single_tag_clades = {}
221
+
222
+ clades.each do |clade|
223
+ assert clade.all_leaves.count > 1,
224
+ "A clade cannot also be a leaf"
225
+
226
+ unless clade.all_leaves.all? do |leaf|
227
+ already_checked.include? leaf
228
+ end
229
+ md_tags = clade.all_leaves.map do |leaf|
230
+ assert leaf2mdtag.has_key?(leaf),
231
+ "leaf #{leaf} is missing from leaf2mdtag ht"
232
+
233
+ leaf2mdtag[leaf]
234
+ end
235
+
236
+ # this clade is mono-phyletic w.r.t. this metadata category.
237
+ if md_tags.uniq.count == 1
238
+ clade.all_leaves.each do |leaf|
239
+ already_checked << leaf
240
+ end
241
+
242
+ assert !single_tag_clades.has_key?(clade),
243
+ "clade #{clade.name} is repeated in single_tag_clades for #{md_cat}"
244
+
245
+ single_tag_clades[clade] = md_tags.first
246
+ end
247
+ end
248
+ end
249
+
250
+ single_tag_clades.each do |clade, md_tag|
251
+ non_clade_leaves = tree.unquoted_taxa - clade.all_leaves
252
+
253
+ non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
254
+ [leaf, leaf2mdtag[leaf]]
255
+ end.select { |ary| ary.last == md_tag }
256
+
257
+ is_snazzy_clade = non_clade_leaves_with_this_md_tag.count.zero?
258
+ if is_snazzy_clade
259
+ if !snazzy_info[clade].nil?
260
+ snazzy_info[clade][md_cat] = md_tag
261
+ else
262
+ snazzy_info[clade] = { md_cat => md_tag }
263
+ end
264
+ end
265
+ end
266
+ end
267
+
268
+ snazzy_info
269
+ end
270
+
201
271
  def read_mapping_file fname
202
272
  md_cat_names = nil
203
273
  metadata = TreeClusters::Attrs.new
@@ -255,194 +325,4 @@ module TreeClusters
255
325
 
256
326
  [attr_names, attrs]
257
327
  end
258
-
259
- # A Hash table for genome/leaf/taxa attributes
260
- class Attrs < Hash
261
-
262
- # Returns the an AttrArray of Sets for the given genomes and
263
- # attribute.
264
- #
265
- # @note If a genome is in the leaves array, but is not in the hash
266
- # table, NO error will be raised. Rather that genome will be
267
- # skipped. This is for cases in which not all genomes have
268
- # attributes.
269
- #
270
- # @param leaves [Array<String>] names of the leaves for which you
271
- # need attributes
272
- # @param attr [Symbol] the attribute you are interested in eg,
273
- # :genes
274
- #
275
- # @return [AttrArray<Set>] an AttrArray of Sets of
276
- # attributes
277
- #
278
- # @raise [AbortIf::Exit] if they leaf is present but doesn't have
279
- # the requested attr
280
- def attrs leaves, attr
281
- ary = leaves.map do |leaf|
282
-
283
- if self.has_key? leaf
284
- abort_unless self[leaf].has_key?(attr),
285
- "Missing attr #{attr.inspect} for leaf '#{leaf}'"
286
-
287
- self[leaf][attr]
288
- else
289
- nil
290
- end
291
- end.compact
292
-
293
- TreeClusters::AttrArray.new ary
294
- end
295
-
296
- def add leaf, attr, val
297
- if self.has_key? leaf
298
- self[leaf][attr] = val
299
- else
300
- self[leaf] = { attr => val }
301
- end
302
- end
303
- end
304
-
305
- # Provides convenience methods for working with Arrays of Sets
306
- class AttrArray < Object::Array
307
- # Takes the union of all sets in the AttrArray
308
- #
309
- # @return [Set]
310
- def union
311
- self.reduce(&:union)
312
- end
313
-
314
- # Takes the intersection of all sets in the AttrArray
315
- #
316
- # @return [Set]
317
- def intersection
318
- self.reduce(&:intersection)
319
- end
320
- end
321
-
322
- # Represents a clade in a NewickTree
323
- class Clade
324
- attr_accessor :name,
325
- :all_leaves,
326
- :left_leaves,
327
- :right_leaves,
328
- :all_sibling_leaves,
329
- :each_sibling_leaf_set,
330
- :parent_leaves,
331
- :non_parent_leaves,
332
- :other_leaves,
333
- :single_tag_info,
334
- :all_tags
335
-
336
- # @note If a node name is quoted, then those quotes are removed
337
- # first.
338
- #
339
- # @param node [NewickNode] a NewickNode from a NewickTree
340
- # @param tree [NewickTree] a NewickTree
341
- def initialize node, tree, metadata=nil
342
- tree_taxa = tree.unquoted_taxa
343
-
344
- @name = unquote node.name
345
- @all_leaves = descendant_leaves node
346
-
347
- if (children = node.children).count == 2
348
- lchild, rchild = node.children
349
-
350
- @left_leaves = descendant_leaves lchild
351
-
352
- @right_leaves = descendant_leaves rchild
353
- end
354
-
355
- siblings = node.siblings
356
- # assert siblings.count == 1,
357
- # "Node #{node.name} has more than one sibling."
358
-
359
- @each_sibling_leaf_set = siblings.
360
- map { |node| descendant_leaves node }
361
-
362
- @all_sibling_leaves = @each_sibling_leaf_set.flatten.uniq
363
-
364
- parent = node.parent
365
- assert parent,
366
- "Noge #{node.name} has no parent. Is it the root?"
367
- @parent_leaves = descendant_leaves parent
368
-
369
- @other_leaves =
370
- Object::Set.new(tree_taxa) - Object::Set.new(all_leaves)
371
-
372
- @non_parent_leaves =
373
- Object::Set.new(tree_taxa) - Object::Set.new(parent_leaves)
374
-
375
- if metadata
376
- @metadata = metadata
377
- @all_tags ||= get_all_tags
378
- @single_tag_info ||= get_single_tag_info
379
- else
380
- @single_tag_info = nil
381
- end
382
- end
383
-
384
- # Compares two Clades field by field.
385
- #
386
- # If all instance variables are == than the two clades are == as
387
- # well.
388
- def == clade
389
- (
390
- self.name == clade.name &&
391
- self.all_leaves == clade.all_leaves &&
392
- self.left_leaves == clade.left_leaves &&
393
- self.right_leaves == clade.right_leaves &&
394
- self.all_sibling_leaves == clade.all_sibling_leaves &&
395
- self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
396
- self.parent_leaves == clade.parent_leaves &&
397
- self.other_leaves == clade.other_leaves &&
398
- self.single_tag_info == clade.single_tag_info &&
399
- self.all_tags == clade.all_tags
400
- )
401
- end
402
-
403
- # Alias for ==
404
- def eql? clade
405
- self == clade
406
- end
407
-
408
- private
409
-
410
- def get_single_tag_info
411
- @all_tags.map do |md_cat, set|
412
- [md_cat, set.count == 1 ? set.to_a.first : nil]
413
- end.to_h
414
- end
415
-
416
- def get_all_tags
417
- # name2tag has leaf names => metadata tag and is an Attrs
418
- @metadata.map do |md_cat, name2tag|
419
- tag_info = self.all_leaves.map do |leaf|
420
- assert name2tag.has_key?(leaf),
421
- "leaf #{leaf} is not present in name2tag ht for " +
422
- "md_cat #{md_cat}"
423
-
424
- name2tag[leaf]
425
- end
426
-
427
- [md_cat, Set.new(tag_info)]
428
- end.to_h
429
- end
430
-
431
- def descendant_leaves node
432
- if node.leaf?
433
- [unquote(node.name)]
434
- else
435
- node.
436
- descendants.
437
- flatten.
438
- uniq.
439
- select { |node| node.leaf? }.
440
- map { |node| unquote(node.name) }
441
- end
442
- end
443
-
444
- def unquote str
445
- str.tr %q{"'}, ""
446
- end
447
- end
448
328
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_clusters
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-03 00:00:00.000000000 Z
11
+ date: 2017-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -172,6 +172,7 @@ description: Snazzy code for working with each cluster in a tree.
172
172
  email:
173
173
  - moorer@udel.edu
174
174
  executables:
175
+ - clade_attrs
175
176
  - snazzy_clades
176
177
  - snazzy_clades_attrs
177
178
  - snazzy_clades_key_cols
@@ -188,10 +189,14 @@ files:
188
189
  - Rakefile
189
190
  - bin/console
190
191
  - bin/setup
192
+ - exe/clade_attrs
191
193
  - exe/snazzy_clades
192
194
  - exe/snazzy_clades_attrs
193
195
  - exe/snazzy_clades_key_cols
194
196
  - lib/tree_clusters.rb
197
+ - lib/tree_clusters/attr_array.rb
198
+ - lib/tree_clusters/attrs.rb
199
+ - lib/tree_clusters/clade.rb
195
200
  - lib/tree_clusters/version.rb
196
201
  - test_files/bad.aln
197
202
  - test_files/non_bifurcating.aln