tree_clusters 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ad186617714194c3d20bb4bc0a6e79d9779c4e29
4
- data.tar.gz: 30cbedb0728fffae00c185735e0ee3d583741e17
3
+ metadata.gz: 7d7f7982ea210cef7357e004ceb85d8b17389a32
4
+ data.tar.gz: a924d2bfd587ce03678c391f2a5f761848169726
5
5
  SHA512:
6
- metadata.gz: a7406ddcd526a485e2721a82bd74ccd5295b7a4c07482d40d5127f2f9e6e50b97b59deb3215576f9420e73315d1f4e8394c8ebef0c278e482b9ce693dbcaf5fe
7
- data.tar.gz: ceceec6c5f11037da5261e89754d02eeb463ea634f38df0ecfe150c78cf21ceb128c96a12e2ba662df4b808f625af5a97883d0638c9527ce9a977d148fa54c8b
6
+ metadata.gz: 7b4540a485a4635194a170c74c1bae7cd4eeb5b58df3d3a934933a161ef918903461ba75924f3baf65724050b2d3d122ac953a0822f5218c7ac0f8b37db72405
7
+ data.tar.gz: fced0e673bcacf6dc113d6112bd5d02b9d5fcf5756ae50d1b77cb41b1c8f97b76a3fe2ef86f8f6c7ea8ddeee96a5b5aa06600cdef310842276a486b829763eae
data/.gitignore CHANGED
@@ -21,6 +21,8 @@ snazzy_clades.*
21
21
 
22
22
  TEST
23
23
 
24
+ time.html
25
+
24
26
  # rspec failure tracking
25
27
  .rspec_status
26
28
  *.lock
@@ -19,9 +19,42 @@ opts = Trollop.options do
19
19
 
20
20
  banner <<-EOS
21
21
 
22
- Note that if a clade's parent would be the root of the tree, no
23
- columns will be subtracted when removing the parent columns as it
24
- would be the entire alignment.
22
+
23
+ Checking IDs
24
+ ------------
25
+
26
+ IDs for the sequences must match between the three input files.
27
+
28
+ The tree file is allowed to have quoted taxa names, but the mapping
29
+ file and alignment file are not.
30
+
31
+ If your alignment file has spaces in the name, the ID part of the
32
+ header (i.e., the part up until the space) must match with the
33
+ sequence IDs in the tree and the mapping file.
34
+
35
+ Example: This would be okay.
36
+
37
+ tree file:
38
+ ('genome_A', 'genome_B');
39
+
40
+ aln file:
41
+ >genome_A apple pie
42
+ AAAAA
43
+ >genome_B brown sugar
44
+ AATTA
45
+
46
+ mapping file:
47
+ name coolness
48
+ genome_A cool
49
+ genome_B notcool
50
+
51
+
52
+ Subtracting parent nodes
53
+ ------------------------
54
+
55
+ If a clade's parent would be the root of the tree, no columns will
56
+ be subtracted when removing the parent columns as it would be the
57
+ entire alignment.
25
58
 
26
59
  Options:
27
60
  EOS
data/lib/tree_clusters.rb CHANGED
@@ -22,6 +22,10 @@ class NewickTree
22
22
  end
23
23
  return clades
24
24
  end
25
+
26
+ def unquoted_taxa
27
+ self.taxa.map { |str| str.tr %q{"'}, "" }
28
+ end
25
29
  end
26
30
 
27
31
  # Top level namespace of the Gem.
@@ -82,8 +86,10 @@ module TreeClusters
82
86
  Set.new low_ent_cols
83
87
  end
84
88
 
89
+ # @note If there are quoted names in the tree file, they are
90
+ # unquoted first.
85
91
  def check_ids tree, mapping, aln
86
- tree_ids = Set.new(NewickTree.fromFile(tree).taxa)
92
+ tree_ids = Set.new(NewickTree.fromFile(tree).unquoted_taxa)
87
93
 
88
94
  mapping_ids = Set.new
89
95
  File.open(mapping, "rt").each_line.with_index do |line, idx|
@@ -143,8 +149,11 @@ module TreeClusters
143
149
  metadata.each do |md_cat, leaf2mdtag|
144
150
  already_checked = Set.new
145
151
  single_tag_clades = {}
152
+ p [md_cat, leaf2mdtag]
146
153
 
147
154
  clades.each do |clade|
155
+ p [clade.name, clade.all_leaves]
156
+
148
157
  assert clade.all_leaves.count > 1,
149
158
  "A clade cannot also be a leaf"
150
159
 
@@ -173,7 +182,7 @@ module TreeClusters
173
182
  end
174
183
 
175
184
  single_tag_clades.each do |clade, md_tag|
176
- non_clade_leaves = tree.taxa - clade.all_leaves
185
+ non_clade_leaves = tree.unquoted_taxa - clade.all_leaves
177
186
 
178
187
  non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
179
188
  [leaf, leaf2mdtag[leaf]]
@@ -288,10 +297,15 @@ module TreeClusters
288
297
  :single_tag_info,
289
298
  :all_tags
290
299
 
300
+ # @note If a node name is quoted, then those quotes are removed
301
+ # first.
302
+ #
291
303
  # @param node [NewickNode] a NewickNode from a NewickTree
292
304
  # @param tree [NewickTree] a NewickTree
293
305
  def initialize node, tree, metadata=nil
294
- @name = node.name
306
+ tree_taxa = tree.unquoted_taxa
307
+
308
+ @name = unquote node.name
295
309
  @all_leaves = descendant_leaves node
296
310
 
297
311
  if (children = node.children).count == 2
@@ -317,10 +331,10 @@ module TreeClusters
317
331
  @parent_leaves = descendant_leaves parent
318
332
 
319
333
  @other_leaves =
320
- Object::Set.new(tree.taxa) - Object::Set.new(all_leaves)
334
+ Object::Set.new(tree_taxa) - Object::Set.new(all_leaves)
321
335
 
322
336
  @non_parent_leaves =
323
- Object::Set.new(tree.taxa) - Object::Set.new(parent_leaves)
337
+ Object::Set.new(tree_taxa) - Object::Set.new(parent_leaves)
324
338
 
325
339
  if metadata
326
340
  @metadata = metadata
@@ -345,7 +359,8 @@ module TreeClusters
345
359
  self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
346
360
  self.parent_leaves == clade.parent_leaves &&
347
361
  self.other_leaves == clade.other_leaves &&
348
- self.single_tag_info == clade.single_tag_info
362
+ self.single_tag_info == clade.single_tag_info &&
363
+ self.all_tags == clade.all_tags
349
364
  )
350
365
  end
351
366
 
@@ -379,14 +394,19 @@ module TreeClusters
379
394
 
380
395
  def descendant_leaves node
381
396
  if node.leaf?
382
- [node.name]
397
+ [unquote(node.name)]
383
398
  else
384
399
  node.
385
400
  descendants.
386
401
  flatten.
387
402
  uniq.
388
- select { |node| node.leaf? }.map(&:name)
403
+ select { |node| node.leaf? }.
404
+ map { |node| unquote(node.name) }
389
405
  end
390
406
  end
407
+
408
+ def unquote str
409
+ str.tr %q{"'}, ""
410
+ end
391
411
  end
392
412
  end
@@ -1,3 +1,3 @@
1
1
  module TreeClusters
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.2"
3
3
  end
data/test_files/small.tre CHANGED
@@ -1 +1 @@
1
- ((a-1, a-2)cluster_A, ((b-1, b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
1
+ (('a-1', 'a-2')cluster_A, (("b-1", b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
data/test_files/test.tre CHANGED
@@ -1 +1 @@
1
- ((((((g1:1, g2:1)cluster1:1, g3:2)cluster4:1, (g4a:1, g4b:1)cluster7:2)cluster6:1, ((g5:2, g6:2)cluster11:1, g7:3)cluster14:1)cluster10:1, g8:5)cluster16:1, ((g9:1, g10:1)cluster19:1, g11:2)cluster22:4)cluster18:1;
1
+ ((((((g1:1, 'g2':1)cluster1:1, g3:2)cluster4:1, (g4a:1, g4b:1)cluster7:2)cluster6:1, ((g5:2, "g6":2)cluster11:1, g7:3)cluster14:1)cluster10:1, g8:5)cluster16:1, ((g9:1, g10:1)cluster19:1, g11:2)cluster22:4)cluster18:1;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_clusters
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore