tree_clusters 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f069c5801fa47b4861acb1ef2468526082c311b6
4
- data.tar.gz: f865bbcc0da44a648847e9093a4d40511d8eaef1
3
+ metadata.gz: 884a3b714acdf6b5b933c89ce0d6a82aaf9bcbca
4
+ data.tar.gz: 1e3573ead3b5ad82149cec6ccbdf8a8b94eb9939
5
5
  SHA512:
6
- metadata.gz: d82b8821ab2205517bb111602bbcc83bbf28091d8503c56332fe78db24ca1681a72d1be21f6272360629fa4e1e48872e4447ba65cd4c5c2f5971ebc90652b1da
7
- data.tar.gz: 694d9b68a945546b912ba998b817964cc98e1086d2ca2bd3933311834b26649c4ec0b66b26ac0889c76347c7167de8d0d50a6dba642c637e6353b58055b329cb
6
+ metadata.gz: 207ebce001ff2d7aaf1bd4095c8dc3718057637b86edb7b0988e14d0da1b6ad28c267f907b0cfe4e8df908d08a463fd5dd8d200c1a2864a4e27e337fd001b553
7
+ data.tar.gz: aea3b7dab128fbf1d08e6cd0ac813cc415652d2586f8be80c696f8946ac447b0251b1bbbc61acbd2f85760ab7452ae65db781f98d1d30d1feb2e02c93727d1fe
data/.gitignore CHANGED
@@ -17,5 +17,7 @@ test_files/ignore
17
17
 
18
18
  .ruby-*
19
19
 
20
+ snazzy_clades*
21
+
20
22
  # rspec failure tracking
21
23
  .rspec_status
@@ -1,3 +1,3 @@
1
1
  module TreeClusters
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/tree_clusters.rb CHANGED
@@ -32,14 +32,93 @@ module TreeClusters
32
32
  # @yieldparam clade [Clade] a clade of the tree
33
33
  #
34
34
  # @return [Enumerator<Clade>] enumerator of Clade objects
35
- def all_clades tree
36
- return enum_for(:all_clades, tree) unless block_given?
35
+ def all_clades tree, metadata=nil
36
+ return enum_for(:all_clades, tree, metadata) unless block_given?
37
37
 
38
38
  tree.clade_nodes.reverse.each do |node|
39
- yield Clade.new node, tree
39
+ yield Clade.new node, tree, metadata
40
40
  end
41
41
  end
42
42
 
43
+ def snazzy_clades tree, metadata
44
+ snazzy_clades = {}
45
+
46
+ clades = self.
47
+ all_clades(tree, metadata).
48
+ sort_by { |clade| clade.all_leaves.count }.
49
+ reverse
50
+
51
+ metadata.each do |md_cat, leaf2mdtag|
52
+ already_checked = Set.new
53
+ single_tag_clades = {}
54
+
55
+ clades.each do |clade|
56
+ assert clade.all_leaves.count > 1,
57
+ "A clade cannot also be a leaf"
58
+
59
+ unless clade.all_leaves.all? do |leaf|
60
+ already_checked.include? leaf
61
+ end
62
+ md_tags = clade.all_leaves.map do |leaf|
63
+ assert leaf2mdtag.has_key?(leaf),
64
+ "leaf #{leaf} is missing from leaf2mdtag ht"
65
+
66
+ leaf2mdtag[leaf]
67
+ end
68
+
69
+ # this clade is mono-phyletic w.r.t. this metadata category.
70
+ if md_tags.uniq.count == 1
71
+ clade.all_leaves.each do |leaf|
72
+ already_checked << leaf
73
+ end
74
+
75
+ assert !single_tag_clades.has_key?(clade),
76
+ "clade #{clade.name} is repeated in single_tag_clades for #{md_cat}"
77
+
78
+ single_tag_clades[clade] = md_tags.first
79
+ end
80
+ end
81
+ end
82
+
83
+ single_tag_clades.each do |clade, md_tag|
84
+ non_clade_leaves = tree.taxa - clade.all_leaves
85
+
86
+ non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
87
+ [leaf, leaf2mdtag[leaf]]
88
+ end.select { |ary| ary.last == md_tag }
89
+
90
+ if non_clade_leaves_with_this_md_tag.count.zero?
91
+ if snazzy_clades.has_key? clade
92
+ snazzy_clades[clade][md_cat] = md_tag
93
+ else
94
+ snazzy_clades[clade] = { md_cat => md_tag }
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ snazzy_clades
101
+ end
102
+
103
+ def read_mapping_file fname
104
+ md_cat_names = nil
105
+ metadata = TreeClusters::Attrs.new
106
+
107
+ File.open(fname, "rt").each_line.with_index do |line, idx|
108
+ leaf_name, *metadata_vals = line.chomp.split "\t"
109
+
110
+ if idx.zero?
111
+ md_cat_names = metadata_vals
112
+ else
113
+ metadata_vals.each_with_index do |val, val_idx|
114
+ metadata.add md_cat_names[val_idx], leaf_name, val
115
+ end
116
+ end
117
+ end
118
+
119
+ metadata
120
+ end
121
+
43
122
  # A Hash table for genome/leaf/taxa attributes
44
123
  class Attrs < Hash
45
124
 
@@ -113,11 +192,13 @@ module TreeClusters
113
192
  :each_sibling_leaf_set,
114
193
  :parent_leaves,
115
194
  :non_parent_leaves,
116
- :other_leaves
195
+ :other_leaves,
196
+ :single_tag_info,
197
+ :all_tags
117
198
 
118
199
  # @param node [NewickNode] a NewickNode from a NewickTree
119
200
  # @param tree [NewickTree] a NewickTree
120
- def initialize node, tree
201
+ def initialize node, tree, metadata=nil
121
202
  @name = node.name
122
203
  @all_leaves = descendant_leaves node
123
204
 
@@ -148,6 +229,14 @@ module TreeClusters
148
229
 
149
230
  @non_parent_leaves =
150
231
  Object::Set.new(tree.taxa) - Object::Set.new(parent_leaves)
232
+
233
+ if metadata
234
+ @metadata = metadata
235
+ @all_tags ||= get_all_tags
236
+ @single_tag_info ||= get_single_tag_info
237
+ else
238
+ @single_tag_info = nil
239
+ end
151
240
  end
152
241
 
153
242
  # Compares two Clades field by field.
@@ -163,7 +252,8 @@ module TreeClusters
163
252
  self.all_sibling_leaves == clade.all_sibling_leaves &&
164
253
  self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
165
254
  self.parent_leaves == clade.parent_leaves &&
166
- self.other_leaves == clade.other_leaves
255
+ self.other_leaves == clade.other_leaves &&
256
+ self.single_tag_info == clade.single_tag_info
167
257
  )
168
258
  end
169
259
 
@@ -174,6 +264,27 @@ module TreeClusters
174
264
 
175
265
  private
176
266
 
267
+ def get_single_tag_info
268
+ @all_tags.map do |md_cat, set|
269
+ [md_cat, set.count == 1 ? set.to_a.first : nil]
270
+ end.to_h
271
+ end
272
+
273
+ def get_all_tags
274
+ # name2tag has leaf names => metadata tag and is an Attrs
275
+ @metadata.map do |md_cat, name2tag|
276
+ tag_info = self.all_leaves.map do |leaf|
277
+ assert name2tag.has_key?(leaf),
278
+ "leaf #{leaf} is not present in name2tag ht for " +
279
+ "md_cat #{md_cat}"
280
+
281
+ name2tag[leaf]
282
+ end
283
+
284
+ [md_cat, Set.new(tag_info)]
285
+ end.to_h
286
+ end
287
+
177
288
  def descendant_leaves node
178
289
  if node.leaf?
179
290
  [node.name]
@@ -0,0 +1,8 @@
1
+ name coolness snazzyness sillyness jauntiness oddness
2
+ a-1 cool snazzy 1 jaunty quite odd
3
+ a-2 cool snazzy 7 notjaunty quite odd
4
+ b-1 notcool snazzy 3 jaunty not odd
5
+ b-2 notcool snazzy 4 notjaunty not odd
6
+ bb-1 notcool notsnazzy 5 notjaunty not odd
7
+ bbb-1 notcool notsnazzy 1 jaunty rather odd
8
+ bbb-2 notcool notsnazzy 7 jaunty rather odd
@@ -0,0 +1 @@
1
+ ((a-1, a-2)cluster_A, ((b-1, b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
@@ -30,4 +30,5 @@ Gem::Specification.new do |spec|
30
30
 
31
31
  spec.add_runtime_dependency "abort_if", "~> 0.2.0"
32
32
  spec.add_runtime_dependency "newick-ruby", "~> 1.0", ">= 1.0.4"
33
+ spec.add_runtime_dependency "trollop", "~> 2.1", ">= 2.1.2"
33
34
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_clusters
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
@@ -114,6 +114,26 @@ dependencies:
114
114
  - - ">="
115
115
  - !ruby/object:Gem::Version
116
116
  version: 1.0.4
117
+ - !ruby/object:Gem::Dependency
118
+ name: trollop
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '2.1'
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: 2.1.2
127
+ type: :runtime
128
+ prerelease: false
129
+ version_requirements: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - "~>"
132
+ - !ruby/object:Gem::Version
133
+ version: '2.1'
134
+ - - ">="
135
+ - !ruby/object:Gem::Version
136
+ version: 2.1.2
117
137
  description: Snazzy code for working with each cluster in a tree.
118
138
  email:
119
139
  - moorer@udel.edu
@@ -135,6 +155,8 @@ files:
135
155
  - lib/tree_clusters/version.rb
136
156
  - test_files/non_bifurcating.aln
137
157
  - test_files/non_bifurcating.tre
158
+ - test_files/small.mapping
159
+ - test_files/small.tre
138
160
  - test_files/test.tre
139
161
  - tree_clusters.gemspec
140
162
  homepage: https://github.com/mooreryan/tree_clusters