tree_clusters 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 612ebc10d0f23a15ae9d11d35be3f1de197d29fd
4
- data.tar.gz: '084ff094f024a062e1c0f1476b9105950f796edf'
3
+ metadata.gz: e6f0523128e1d9efede01f9af494d99e7e51a48c
4
+ data.tar.gz: 58bac03495f585ad2ea755abaac5ceefdb90e2d3
5
5
  SHA512:
6
- metadata.gz: 247778fdebedd3213d96ccafc52ce1d5667bdb9e8eefee60e30b3dbf1fa618ea73000d6d2a5802e47e84d9ba6fa4c119fd3acb1d21907c4c268f63e38949ffcd
7
- data.tar.gz: e8d5cc7d6aa7ce86f277f42c5a5b8bd7b7d2bebe5ac3fbf5c4201315bc7aa8e411940056c30dce22226864d982083605180bfe7304c824566e107a43a308c910
6
+ metadata.gz: 73a82096be4f5be8199be28bf3e12b0ac63c08b9da1cfea1fded6cd4da7c448ffc7cdc8a285c681b88493b39fa32fcace02be0fff23190a314050d107db4b2f5
7
+ data.tar.gz: a5b00bec9a4f567efc098e664e6479e691f96e761a5c57ac5e4f198159c9d1cfd320dbb2033a1a458597460f621815e03ee6ecef351f31db4597f4a0a7aac838
data/exe/clade_attrs ADDED
@@ -0,0 +1,229 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ Signal.trap("PIPE", "EXIT")
4
+
5
+ require "tree_clusters"
6
+ require "trollop"
7
+ require "parse_fasta"
8
+ require "shannon"
9
+ require "fileutils"
10
+
11
+ TreeClusters.extend TreeClusters
12
+
13
+ def puts_info outf, clade_id, attr_cat, attr_set
14
+ outf.puts [clade_id, attr_cat, attr_set.to_a].join "\t"
15
+ end
16
+
17
+ opts = Trollop.options do
18
+ version TreeClusters::VERSION
19
+
20
+ banner <<-EOS
21
+
22
+
23
+ Checking IDs
24
+ ------------
25
+
26
+ IDs for the sequences must match between the three input files.
27
+
28
+ The tree file is allowed to have quoted taxa names, but the mapping
29
+ file and alignment file are not.
30
+
31
+ If your alignment file has spaces in the name, the ID part of the
32
+ header (i.e., the part up until the space) must match with the
33
+ sequence IDs in the tree and the mapping file.
34
+
35
+ Example: This would be okay.
36
+
37
+ tree file:
38
+ ('genome_A', 'genome_B');
39
+
40
+ aln file:
41
+ >genome_A apple pie
42
+ AAAAA
43
+ >genome_B brown sugar
44
+ AATTA
45
+
46
+ mapping file:
47
+ name coolness
48
+ genome_A cool
49
+ genome_B notcool
50
+
51
+
52
+ Subtracting parent nodes
53
+ ------------------------
54
+
55
+ If a clade's parent would be the root of the tree, no columns will
56
+ be subtracted when removing the parent columns as it would be the
57
+ entire alignment.
58
+
59
+ Options:
60
+ EOS
61
+
62
+ opt(:tree,
63
+ "Newick tree file",
64
+ type: :string)
65
+ opt(:mapping,
66
+ "Mapping file",
67
+ type: :string)
68
+ opt(:attrs,
69
+ "Attributes file",
70
+ type: :string)
71
+
72
+ opt(:clade_size_cutoff,
73
+ "Consider only clades with at least this many leaves",
74
+ default: 1)
75
+
76
+ opt(:outdir,
77
+ "Output directory",
78
+ default: ".")
79
+ opt(:base,
80
+ "Basename for output",
81
+ default: "clade_attrs")
82
+ end
83
+
84
+ abort_if opts[:tree].nil?,
85
+ "--tree is a required arg"
86
+ abort_if opts[:mapping].nil?,
87
+ "--mapping is a required arg"
88
+ abort_if opts[:attrs].nil?,
89
+ "--attrs is a required arg"
90
+
91
+ abort_unless_file_exists opts[:tree]
92
+ abort_unless_file_exists opts[:mapping]
93
+ abort_unless_file_exists opts[:attrs]
94
+
95
+ # TODO check IDs when attrs is not a fasta file
96
+ # TreeClusters.check_ids opts[:tree], opts[:mapping], opts[:attrs]
97
+
98
+ abort_unless opts[:clade_size_cutoff] >= 1,
99
+ "--clade-size-cutoff must be >= 1"
100
+
101
+ FileUtils.mkdir_p opts[:outdir]
102
+
103
+ tree = NewickTree.fromFile opts[:tree]
104
+ metadata = TreeClusters.read_mapping_file opts[:mapping]
105
+ snazzy_info = TreeClusters.snazzy_info tree, metadata
106
+ attr_names, leaf2attrs = TreeClusters.read_attrs_file opts[:attrs]
107
+
108
+ ext_base = "clade_attrs"
109
+
110
+ clades_fname =
111
+ File.join opts[:outdir],
112
+ "#{opts[:base]}.#{ext_base}.txt"
113
+ members_fname =
114
+ File.join opts[:outdir],
115
+ "#{opts[:base]}.#{ext_base}_clade_members.txt"
116
+ attrs_fname =
117
+ File.join opts[:outdir],
118
+ "#{opts[:base]}.#{ext_base}_attrs_union.txt"
119
+ attrs_intersection_fname =
120
+ File.join opts[:outdir],
121
+ "#{opts[:base]}.#{ext_base}_attrs_intersection.txt"
122
+ attrs_minus_parent_attrs_fname =
123
+ File.join opts[:outdir],
124
+ "#{opts[:base]}.#{ext_base}_attrs_minus_parent_attrs.txt"
125
+ attrs_minus_sibling_attrs_fname =
126
+ File.join opts[:outdir],
127
+ "#{opts[:base]}.#{ext_base}_attrs_minus_sibling_attrs.txt"
128
+ attrs_minus_other_attrs_fname =
129
+ File.join opts[:outdir],
130
+ "#{opts[:base]}.#{ext_base}_attrs_minus_other_attrs.txt"
131
+
132
+
133
+ info_f =
134
+ File.open(clades_fname, "w")
135
+ clade_members_f =
136
+ File.open(members_fname, "w")
137
+ attrs_f =
138
+ File.open(attrs_fname, "w")
139
+ attrs_intersection_f =
140
+ File.open(attrs_intersection_fname, "w")
141
+ attrs_minus_parent_attrs_f =
142
+ File.open(attrs_minus_parent_attrs_fname, "w")
143
+ attrs_minus_sibling_attrs_f =
144
+ File.open(attrs_minus_sibling_attrs_fname, "w")
145
+ attrs_minus_other_attrs_f =
146
+ File.open(attrs_minus_other_attrs_fname, "w")
147
+
148
+
149
+ begin
150
+ # info is { metadata_category => metadata_tag , ... }
151
+ snazzy_info.each_with_index do |(clade, info), idx|
152
+ assert clade.all_leaves.all? { |leaf| leaf2attrs.has_key? leaf },
153
+ "Not all leaves are present in the leaf2attrs hash table"
154
+
155
+ clade_id = "clade_#{idx+1}___#{clade.name}"
156
+
157
+ is_snazzy = info.nil? ? false : true
158
+ snazzy = is_snazzy ? "snazzy" : "not_snazzy"
159
+
160
+ if is_snazzy
161
+ info_f.puts [clade_id,
162
+ info.count,
163
+ info.map { |pair| pair.join("|")}].join "\t"
164
+ else
165
+ info_f.puts [clade_id,
166
+ 0,
167
+ "not_snazzy"].join "\t"
168
+ end
169
+
170
+ clade_members_f.puts [clade_id,
171
+ clade.all_leaves.count,
172
+ clade.all_leaves].join "\t"
173
+
174
+ attr_names.each do |attr_category|
175
+ attrs_all_leaves =
176
+ leaf2attrs.attrs clade.all_leaves, attr_category
177
+
178
+ attrs_all_sibling_leaves =
179
+ leaf2attrs.attrs clade.all_sibling_leaves,
180
+ attr_category
181
+ attrs_parent_leaves =
182
+ leaf2attrs.attrs clade.parent_leaves,
183
+ attr_category
184
+ attrs_other_leaves =
185
+ leaf2attrs.attrs clade.other_leaves,
186
+ attr_category
187
+
188
+ attrs_all_minus_parent =
189
+ attrs_all_leaves.union - attrs_parent_leaves.union
190
+ attrs_all_minus_sibling =
191
+ attrs_all_leaves.union - attrs_all_sibling_leaves.union
192
+ attrs_all_minus_other =
193
+ attrs_all_leaves.union - attrs_other_leaves.union
194
+
195
+
196
+ puts_info attrs_f,
197
+ clade_id,
198
+ attr_category,
199
+ attrs_all_leaves.union
200
+
201
+ puts_info attrs_intersection_f,
202
+ clade_id,
203
+ attr_category,
204
+ attrs_all_leaves.intersection
205
+
206
+ puts_info attrs_minus_parent_attrs_f,
207
+ clade_id,
208
+ attr_category,
209
+ attrs_all_minus_parent
210
+
211
+ puts_info attrs_minus_sibling_attrs_f,
212
+ clade_id,
213
+ attr_category,
214
+ attrs_all_minus_sibling
215
+
216
+ puts_info attrs_minus_other_attrs_f,
217
+ clade_id,
218
+ attr_category,
219
+ attrs_all_minus_other
220
+ end
221
+ end
222
+ ensure
223
+ info_f.close
224
+ clade_members_f.close
225
+ attrs_f.close
226
+ attrs_minus_parent_attrs_f.close
227
+ attrs_minus_sibling_attrs_f.close
228
+ attrs_minus_other_attrs_f.close
229
+ end
@@ -0,0 +1,18 @@
1
+ module TreeClusters
2
+ # Provides convenience methods for working with Arrays of Sets
3
+ class AttrArray < Object::Array
4
+ # Takes the union of all sets in the AttrArray
5
+ #
6
+ # @return [Set]
7
+ def union
8
+ self.reduce(&:union)
9
+ end
10
+
11
+ # Takes the intersection of all sets in the AttrArray
12
+ #
13
+ # @return [Set]
14
+ def intersection
15
+ self.reduce(&:intersection)
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,47 @@
1
+ module TreeClusters
2
+ # A Hash table for genome/leaf/taxa attributes
3
+ class Attrs < Hash
4
+
5
+ # Returns the an AttrArray of Sets for the given genomes and
6
+ # attribute.
7
+ #
8
+ # @note If a genome is in the leaves array, but is not in the hash
9
+ # table, NO error will be raised. Rather that genome will be
10
+ # skipped. This is for cases in which not all genomes have
11
+ # attributes.
12
+ #
13
+ # @param leaves [Array<String>] names of the leaves for which you
14
+ # need attributes
15
+ # @param attr [Symbol] the attribute you are interested in eg,
16
+ # :genes
17
+ #
18
+ # @return [AttrArray<Set>] an AttrArray of Sets of
19
+ # attributes
20
+ #
21
+ # @raise [AbortIf::Exit] if they leaf is present but doesn't have
22
+ # the requested attr
23
+ def attrs leaves, attr
24
+ ary = leaves.map do |leaf|
25
+
26
+ if self.has_key? leaf
27
+ abort_unless self[leaf].has_key?(attr),
28
+ "Missing attr #{attr.inspect} for leaf '#{leaf}'"
29
+
30
+ self[leaf][attr]
31
+ else
32
+ nil
33
+ end
34
+ end.compact
35
+
36
+ TreeClusters::AttrArray.new ary
37
+ end
38
+
39
+ def add leaf, attr, val
40
+ if self.has_key? leaf
41
+ self[leaf][attr] = val
42
+ else
43
+ self[leaf] = { attr => val }
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,128 @@
1
+ module TreeClusters
2
+ # Represents a clade in a NewickTree
3
+ class Clade
4
+ attr_accessor :name,
5
+ :all_leaves,
6
+ :left_leaves,
7
+ :right_leaves,
8
+ :all_sibling_leaves,
9
+ :each_sibling_leaf_set,
10
+ :parent_leaves,
11
+ :non_parent_leaves,
12
+ :other_leaves,
13
+ :single_tag_info,
14
+ :all_tags
15
+
16
+ # @note If a node name is quoted, then those quotes are removed
17
+ # first.
18
+ #
19
+ # @param node [NewickNode] a NewickNode from a NewickTree
20
+ # @param tree [NewickTree] a NewickTree
21
+ def initialize node, tree, metadata=nil
22
+ tree_taxa = tree.unquoted_taxa
23
+
24
+ @name = unquote node.name
25
+ @all_leaves = descendant_leaves node
26
+
27
+ if (children = node.children).count == 2
28
+ lchild, rchild = node.children
29
+
30
+ @left_leaves = descendant_leaves lchild
31
+
32
+ @right_leaves = descendant_leaves rchild
33
+ end
34
+
35
+ siblings = node.siblings
36
+ # assert siblings.count == 1,
37
+ # "Node #{node.name} has more than one sibling."
38
+
39
+ @each_sibling_leaf_set = siblings.
40
+ map { |node| descendant_leaves node }
41
+
42
+ @all_sibling_leaves = @each_sibling_leaf_set.flatten.uniq
43
+
44
+ parent = node.parent
45
+ assert parent,
46
+ "Noge #{node.name} has no parent. Is it the root?"
47
+ @parent_leaves = descendant_leaves parent
48
+
49
+ @other_leaves =
50
+ Object::Set.new(tree_taxa) - Object::Set.new(all_leaves)
51
+
52
+ @non_parent_leaves =
53
+ Object::Set.new(tree_taxa) - Object::Set.new(parent_leaves)
54
+
55
+ if metadata
56
+ @metadata = metadata
57
+ @all_tags ||= get_all_tags
58
+ @single_tag_info ||= get_single_tag_info
59
+ else
60
+ @single_tag_info = nil
61
+ end
62
+ end
63
+
64
+ # Compares two Clades field by field.
65
+ #
66
+ # If all instance variables are == than the two clades are == as
67
+ # well.
68
+ def == clade
69
+ (
70
+ self.name == clade.name &&
71
+ self.all_leaves == clade.all_leaves &&
72
+ self.left_leaves == clade.left_leaves &&
73
+ self.right_leaves == clade.right_leaves &&
74
+ self.all_sibling_leaves == clade.all_sibling_leaves &&
75
+ self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
76
+ self.parent_leaves == clade.parent_leaves &&
77
+ self.other_leaves == clade.other_leaves &&
78
+ self.single_tag_info == clade.single_tag_info &&
79
+ self.all_tags == clade.all_tags
80
+ )
81
+ end
82
+
83
+ # Alias for ==
84
+ def eql? clade
85
+ self == clade
86
+ end
87
+
88
+ private
89
+
90
+ def get_single_tag_info
91
+ @all_tags.map do |md_cat, set|
92
+ [md_cat, set.count == 1 ? set.to_a.first : nil]
93
+ end.to_h
94
+ end
95
+
96
+ def get_all_tags
97
+ # name2tag has leaf names => metadata tag and is an Attrs
98
+ @metadata.map do |md_cat, name2tag|
99
+ tag_info = self.all_leaves.map do |leaf|
100
+ assert name2tag.has_key?(leaf),
101
+ "leaf #{leaf} is not present in name2tag ht for " +
102
+ "md_cat #{md_cat}"
103
+
104
+ name2tag[leaf]
105
+ end
106
+
107
+ [md_cat, Set.new(tag_info)]
108
+ end.to_h
109
+ end
110
+
111
+ def descendant_leaves node
112
+ if node.leaf?
113
+ [unquote(node.name)]
114
+ else
115
+ node.
116
+ descendants.
117
+ flatten.
118
+ uniq.
119
+ select { |node| node.leaf? }.
120
+ map { |node| unquote(node.name) }
121
+ end
122
+ end
123
+
124
+ def unquote str
125
+ str.tr %q{"'}, ""
126
+ end
127
+ end
128
+ end
@@ -1,3 +1,3 @@
1
1
  module TreeClusters
2
- VERSION = "0.6.0"
2
+ VERSION = "0.7.0"
3
3
  end
data/lib/tree_clusters.rb CHANGED
@@ -3,6 +3,9 @@ require "Newick"
3
3
  require "set"
4
4
  require "parse_fasta"
5
5
  require "shannon"
6
+ require "tree_clusters/attrs"
7
+ require "tree_clusters/attr_array"
8
+ require "tree_clusters/clade"
6
9
  require "tree_clusters/version"
7
10
 
8
11
  include AbortIf
@@ -198,6 +201,73 @@ module TreeClusters
198
201
  snazzy_clades
199
202
  end
200
203
 
204
+ def snazzy_info tree, metadata
205
+ snazzy_info = {}
206
+
207
+ clades = self.
208
+ all_clades(tree, metadata).
209
+ sort_by { |clade| clade.all_leaves.count }.
210
+ reverse
211
+
212
+ # Non snazzy clades have a value of nil, so set all to nil and the
213
+ # snazzy ones will be overwritten.
214
+ clades.each do |clade|
215
+ snazzy_info[clade] = nil
216
+ end
217
+
218
+ metadata.each do |md_cat, leaf2mdtag|
219
+ already_checked = Set.new
220
+ single_tag_clades = {}
221
+
222
+ clades.each do |clade|
223
+ assert clade.all_leaves.count > 1,
224
+ "A clade cannot also be a leaf"
225
+
226
+ unless clade.all_leaves.all? do |leaf|
227
+ already_checked.include? leaf
228
+ end
229
+ md_tags = clade.all_leaves.map do |leaf|
230
+ assert leaf2mdtag.has_key?(leaf),
231
+ "leaf #{leaf} is missing from leaf2mdtag ht"
232
+
233
+ leaf2mdtag[leaf]
234
+ end
235
+
236
+ # this clade is mono-phyletic w.r.t. this metadata category.
237
+ if md_tags.uniq.count == 1
238
+ clade.all_leaves.each do |leaf|
239
+ already_checked << leaf
240
+ end
241
+
242
+ assert !single_tag_clades.has_key?(clade),
243
+ "clade #{clade.name} is repeated in single_tag_clades for #{md_cat}"
244
+
245
+ single_tag_clades[clade] = md_tags.first
246
+ end
247
+ end
248
+ end
249
+
250
+ single_tag_clades.each do |clade, md_tag|
251
+ non_clade_leaves = tree.unquoted_taxa - clade.all_leaves
252
+
253
+ non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
254
+ [leaf, leaf2mdtag[leaf]]
255
+ end.select { |ary| ary.last == md_tag }
256
+
257
+ is_snazzy_clade = non_clade_leaves_with_this_md_tag.count.zero?
258
+ if is_snazzy_clade
259
+ if !snazzy_info[clade].nil?
260
+ snazzy_info[clade][md_cat] = md_tag
261
+ else
262
+ snazzy_info[clade] = { md_cat => md_tag }
263
+ end
264
+ end
265
+ end
266
+ end
267
+
268
+ snazzy_info
269
+ end
270
+
201
271
  def read_mapping_file fname
202
272
  md_cat_names = nil
203
273
  metadata = TreeClusters::Attrs.new
@@ -255,194 +325,4 @@ module TreeClusters
255
325
 
256
326
  [attr_names, attrs]
257
327
  end
258
-
259
- # A Hash table for genome/leaf/taxa attributes
260
- class Attrs < Hash
261
-
262
- # Returns the an AttrArray of Sets for the given genomes and
263
- # attribute.
264
- #
265
- # @note If a genome is in the leaves array, but is not in the hash
266
- # table, NO error will be raised. Rather that genome will be
267
- # skipped. This is for cases in which not all genomes have
268
- # attributes.
269
- #
270
- # @param leaves [Array<String>] names of the leaves for which you
271
- # need attributes
272
- # @param attr [Symbol] the attribute you are interested in eg,
273
- # :genes
274
- #
275
- # @return [AttrArray<Set>] an AttrArray of Sets of
276
- # attributes
277
- #
278
- # @raise [AbortIf::Exit] if they leaf is present but doesn't have
279
- # the requested attr
280
- def attrs leaves, attr
281
- ary = leaves.map do |leaf|
282
-
283
- if self.has_key? leaf
284
- abort_unless self[leaf].has_key?(attr),
285
- "Missing attr #{attr.inspect} for leaf '#{leaf}'"
286
-
287
- self[leaf][attr]
288
- else
289
- nil
290
- end
291
- end.compact
292
-
293
- TreeClusters::AttrArray.new ary
294
- end
295
-
296
- def add leaf, attr, val
297
- if self.has_key? leaf
298
- self[leaf][attr] = val
299
- else
300
- self[leaf] = { attr => val }
301
- end
302
- end
303
- end
304
-
305
- # Provides convenience methods for working with Arrays of Sets
306
- class AttrArray < Object::Array
307
- # Takes the union of all sets in the AttrArray
308
- #
309
- # @return [Set]
310
- def union
311
- self.reduce(&:union)
312
- end
313
-
314
- # Takes the intersection of all sets in the AttrArray
315
- #
316
- # @return [Set]
317
- def intersection
318
- self.reduce(&:intersection)
319
- end
320
- end
321
-
322
- # Represents a clade in a NewickTree
323
- class Clade
324
- attr_accessor :name,
325
- :all_leaves,
326
- :left_leaves,
327
- :right_leaves,
328
- :all_sibling_leaves,
329
- :each_sibling_leaf_set,
330
- :parent_leaves,
331
- :non_parent_leaves,
332
- :other_leaves,
333
- :single_tag_info,
334
- :all_tags
335
-
336
- # @note If a node name is quoted, then those quotes are removed
337
- # first.
338
- #
339
- # @param node [NewickNode] a NewickNode from a NewickTree
340
- # @param tree [NewickTree] a NewickTree
341
- def initialize node, tree, metadata=nil
342
- tree_taxa = tree.unquoted_taxa
343
-
344
- @name = unquote node.name
345
- @all_leaves = descendant_leaves node
346
-
347
- if (children = node.children).count == 2
348
- lchild, rchild = node.children
349
-
350
- @left_leaves = descendant_leaves lchild
351
-
352
- @right_leaves = descendant_leaves rchild
353
- end
354
-
355
- siblings = node.siblings
356
- # assert siblings.count == 1,
357
- # "Node #{node.name} has more than one sibling."
358
-
359
- @each_sibling_leaf_set = siblings.
360
- map { |node| descendant_leaves node }
361
-
362
- @all_sibling_leaves = @each_sibling_leaf_set.flatten.uniq
363
-
364
- parent = node.parent
365
- assert parent,
366
- "Noge #{node.name} has no parent. Is it the root?"
367
- @parent_leaves = descendant_leaves parent
368
-
369
- @other_leaves =
370
- Object::Set.new(tree_taxa) - Object::Set.new(all_leaves)
371
-
372
- @non_parent_leaves =
373
- Object::Set.new(tree_taxa) - Object::Set.new(parent_leaves)
374
-
375
- if metadata
376
- @metadata = metadata
377
- @all_tags ||= get_all_tags
378
- @single_tag_info ||= get_single_tag_info
379
- else
380
- @single_tag_info = nil
381
- end
382
- end
383
-
384
- # Compares two Clades field by field.
385
- #
386
- # If all instance variables are == than the two clades are == as
387
- # well.
388
- def == clade
389
- (
390
- self.name == clade.name &&
391
- self.all_leaves == clade.all_leaves &&
392
- self.left_leaves == clade.left_leaves &&
393
- self.right_leaves == clade.right_leaves &&
394
- self.all_sibling_leaves == clade.all_sibling_leaves &&
395
- self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
396
- self.parent_leaves == clade.parent_leaves &&
397
- self.other_leaves == clade.other_leaves &&
398
- self.single_tag_info == clade.single_tag_info &&
399
- self.all_tags == clade.all_tags
400
- )
401
- end
402
-
403
- # Alias for ==
404
- def eql? clade
405
- self == clade
406
- end
407
-
408
- private
409
-
410
- def get_single_tag_info
411
- @all_tags.map do |md_cat, set|
412
- [md_cat, set.count == 1 ? set.to_a.first : nil]
413
- end.to_h
414
- end
415
-
416
- def get_all_tags
417
- # name2tag has leaf names => metadata tag and is an Attrs
418
- @metadata.map do |md_cat, name2tag|
419
- tag_info = self.all_leaves.map do |leaf|
420
- assert name2tag.has_key?(leaf),
421
- "leaf #{leaf} is not present in name2tag ht for " +
422
- "md_cat #{md_cat}"
423
-
424
- name2tag[leaf]
425
- end
426
-
427
- [md_cat, Set.new(tag_info)]
428
- end.to_h
429
- end
430
-
431
- def descendant_leaves node
432
- if node.leaf?
433
- [unquote(node.name)]
434
- else
435
- node.
436
- descendants.
437
- flatten.
438
- uniq.
439
- select { |node| node.leaf? }.
440
- map { |node| unquote(node.name) }
441
- end
442
- end
443
-
444
- def unquote str
445
- str.tr %q{"'}, ""
446
- end
447
- end
448
328
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_clusters
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-03 00:00:00.000000000 Z
11
+ date: 2017-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -172,6 +172,7 @@ description: Snazzy code for working with each cluster in a tree.
172
172
  email:
173
173
  - moorer@udel.edu
174
174
  executables:
175
+ - clade_attrs
175
176
  - snazzy_clades
176
177
  - snazzy_clades_attrs
177
178
  - snazzy_clades_key_cols
@@ -188,10 +189,14 @@ files:
188
189
  - Rakefile
189
190
  - bin/console
190
191
  - bin/setup
192
+ - exe/clade_attrs
191
193
  - exe/snazzy_clades
192
194
  - exe/snazzy_clades_attrs
193
195
  - exe/snazzy_clades_key_cols
194
196
  - lib/tree_clusters.rb
197
+ - lib/tree_clusters/attr_array.rb
198
+ - lib/tree_clusters/attrs.rb
199
+ - lib/tree_clusters/clade.rb
195
200
  - lib/tree_clusters/version.rb
196
201
  - test_files/bad.aln
197
202
  - test_files/non_bifurcating.aln