tree_clusters 0.5.1 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ad186617714194c3d20bb4bc0a6e79d9779c4e29
4
- data.tar.gz: 30cbedb0728fffae00c185735e0ee3d583741e17
3
+ metadata.gz: 7d7f7982ea210cef7357e004ceb85d8b17389a32
4
+ data.tar.gz: a924d2bfd587ce03678c391f2a5f761848169726
5
5
  SHA512:
6
- metadata.gz: a7406ddcd526a485e2721a82bd74ccd5295b7a4c07482d40d5127f2f9e6e50b97b59deb3215576f9420e73315d1f4e8394c8ebef0c278e482b9ce693dbcaf5fe
7
- data.tar.gz: ceceec6c5f11037da5261e89754d02eeb463ea634f38df0ecfe150c78cf21ceb128c96a12e2ba662df4b808f625af5a97883d0638c9527ce9a977d148fa54c8b
6
+ metadata.gz: 7b4540a485a4635194a170c74c1bae7cd4eeb5b58df3d3a934933a161ef918903461ba75924f3baf65724050b2d3d122ac953a0822f5218c7ac0f8b37db72405
7
+ data.tar.gz: fced0e673bcacf6dc113d6112bd5d02b9d5fcf5756ae50d1b77cb41b1c8f97b76a3fe2ef86f8f6c7ea8ddeee96a5b5aa06600cdef310842276a486b829763eae
data/.gitignore CHANGED
@@ -21,6 +21,8 @@ snazzy_clades.*
21
21
 
22
22
  TEST
23
23
 
24
+ time.html
25
+
24
26
  # rspec failure tracking
25
27
  .rspec_status
26
28
  *.lock
@@ -19,9 +19,42 @@ opts = Trollop.options do
19
19
 
20
20
  banner <<-EOS
21
21
 
22
- Note that if a clade's parent would be the root of the tree, no
23
- columns will be subtracted when removing the parent columns as it
24
- would be the entire alignment.
22
+
23
+ Checking IDs
24
+ ------------
25
+
26
+ IDs for the sequences must match between the three input files.
27
+
28
+ The tree file is allowed to have quoted taxa names, but the mapping
29
+ file and alignment file are not.
30
+
31
+ If your alignment file has spaces in the name, the ID part of the
32
+ header (i.e., the part up until the space) must match with the
33
+ sequence IDs in the tree and the mapping file.
34
+
35
+ Example: This would be okay.
36
+
37
+ tree file:
38
+ ('genome_A', 'genome_B');
39
+
40
+ aln file:
41
+ >genome_A apple pie
42
+ AAAAA
43
+ >genome_B brown sugar
44
+ AATTA
45
+
46
+ mapping file:
47
+ name coolness
48
+ genome_A cool
49
+ genome_B notcool
50
+
51
+
52
+ Subtracting parent nodes
53
+ ------------------------
54
+
55
+ If a clade's parent would be the root of the tree, no columns will
56
+ be subtracted when removing the parent columns as it would be the
57
+ entire alignment.
25
58
 
26
59
  Options:
27
60
  EOS
data/lib/tree_clusters.rb CHANGED
@@ -22,6 +22,10 @@ class NewickTree
22
22
  end
23
23
  return clades
24
24
  end
25
+
26
+ def unquoted_taxa
27
+ self.taxa.map { |str| str.tr %q{"'}, "" }
28
+ end
25
29
  end
26
30
 
27
31
  # Top level namespace of the Gem.
@@ -82,8 +86,10 @@ module TreeClusters
82
86
  Set.new low_ent_cols
83
87
  end
84
88
 
89
+ # @note If there are quoted names in the tree file, they are
90
+ # unquoted first.
85
91
  def check_ids tree, mapping, aln
86
- tree_ids = Set.new(NewickTree.fromFile(tree).taxa)
92
+ tree_ids = Set.new(NewickTree.fromFile(tree).unquoted_taxa)
87
93
 
88
94
  mapping_ids = Set.new
89
95
  File.open(mapping, "rt").each_line.with_index do |line, idx|
@@ -143,8 +149,11 @@ module TreeClusters
143
149
  metadata.each do |md_cat, leaf2mdtag|
144
150
  already_checked = Set.new
145
151
  single_tag_clades = {}
152
+ p [md_cat, leaf2mdtag]
146
153
 
147
154
  clades.each do |clade|
155
+ p [clade.name, clade.all_leaves]
156
+
148
157
  assert clade.all_leaves.count > 1,
149
158
  "A clade cannot also be a leaf"
150
159
 
@@ -173,7 +182,7 @@ module TreeClusters
173
182
  end
174
183
 
175
184
  single_tag_clades.each do |clade, md_tag|
176
- non_clade_leaves = tree.taxa - clade.all_leaves
185
+ non_clade_leaves = tree.unquoted_taxa - clade.all_leaves
177
186
 
178
187
  non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
179
188
  [leaf, leaf2mdtag[leaf]]
@@ -288,10 +297,15 @@ module TreeClusters
288
297
  :single_tag_info,
289
298
  :all_tags
290
299
 
300
+ # @note If a node name is quoted, then those quotes are removed
301
+ # first.
302
+ #
291
303
  # @param node [NewickNode] a NewickNode from a NewickTree
292
304
  # @param tree [NewickTree] a NewickTree
293
305
  def initialize node, tree, metadata=nil
294
- @name = node.name
306
+ tree_taxa = tree.unquoted_taxa
307
+
308
+ @name = unquote node.name
295
309
  @all_leaves = descendant_leaves node
296
310
 
297
311
  if (children = node.children).count == 2
@@ -317,10 +331,10 @@ module TreeClusters
317
331
  @parent_leaves = descendant_leaves parent
318
332
 
319
333
  @other_leaves =
320
- Object::Set.new(tree.taxa) - Object::Set.new(all_leaves)
334
+ Object::Set.new(tree_taxa) - Object::Set.new(all_leaves)
321
335
 
322
336
  @non_parent_leaves =
323
- Object::Set.new(tree.taxa) - Object::Set.new(parent_leaves)
337
+ Object::Set.new(tree_taxa) - Object::Set.new(parent_leaves)
324
338
 
325
339
  if metadata
326
340
  @metadata = metadata
@@ -345,7 +359,8 @@ module TreeClusters
345
359
  self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
346
360
  self.parent_leaves == clade.parent_leaves &&
347
361
  self.other_leaves == clade.other_leaves &&
348
- self.single_tag_info == clade.single_tag_info
362
+ self.single_tag_info == clade.single_tag_info &&
363
+ self.all_tags == clade.all_tags
349
364
  )
350
365
  end
351
366
 
@@ -379,14 +394,19 @@ module TreeClusters
379
394
 
380
395
  def descendant_leaves node
381
396
  if node.leaf?
382
- [node.name]
397
+ [unquote(node.name)]
383
398
  else
384
399
  node.
385
400
  descendants.
386
401
  flatten.
387
402
  uniq.
388
- select { |node| node.leaf? }.map(&:name)
403
+ select { |node| node.leaf? }.
404
+ map { |node| unquote(node.name) }
389
405
  end
390
406
  end
407
+
408
+ def unquote str
409
+ str.tr %q{"'}, ""
410
+ end
391
411
  end
392
412
  end
@@ -1,3 +1,3 @@
1
1
  module TreeClusters
2
- VERSION = "0.5.1"
2
+ VERSION = "0.5.2"
3
3
  end
data/test_files/small.tre CHANGED
@@ -1 +1 @@
1
- ((a-1, a-2)cluster_A, ((b-1, b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
1
+ (('a-1', 'a-2')cluster_A, (("b-1", b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
data/test_files/test.tre CHANGED
@@ -1 +1 @@
1
- ((((((g1:1, g2:1)cluster1:1, g3:2)cluster4:1, (g4a:1, g4b:1)cluster7:2)cluster6:1, ((g5:2, g6:2)cluster11:1, g7:3)cluster14:1)cluster10:1, g8:5)cluster16:1, ((g9:1, g10:1)cluster19:1, g11:2)cluster22:4)cluster18:1;
1
+ ((((((g1:1, 'g2':1)cluster1:1, g3:2)cluster4:1, (g4a:1, g4b:1)cluster7:2)cluster6:1, ((g5:2, "g6":2)cluster11:1, g7:3)cluster14:1)cluster10:1, g8:5)cluster16:1, ((g9:1, g10:1)cluster19:1, g11:2)cluster22:4)cluster18:1;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_clusters
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore