tree_clusters 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f069c5801fa47b4861acb1ef2468526082c311b6
4
- data.tar.gz: f865bbcc0da44a648847e9093a4d40511d8eaef1
3
+ metadata.gz: 884a3b714acdf6b5b933c89ce0d6a82aaf9bcbca
4
+ data.tar.gz: 1e3573ead3b5ad82149cec6ccbdf8a8b94eb9939
5
5
  SHA512:
6
- metadata.gz: d82b8821ab2205517bb111602bbcc83bbf28091d8503c56332fe78db24ca1681a72d1be21f6272360629fa4e1e48872e4447ba65cd4c5c2f5971ebc90652b1da
7
- data.tar.gz: 694d9b68a945546b912ba998b817964cc98e1086d2ca2bd3933311834b26649c4ec0b66b26ac0889c76347c7167de8d0d50a6dba642c637e6353b58055b329cb
6
+ metadata.gz: 207ebce001ff2d7aaf1bd4095c8dc3718057637b86edb7b0988e14d0da1b6ad28c267f907b0cfe4e8df908d08a463fd5dd8d200c1a2864a4e27e337fd001b553
7
+ data.tar.gz: aea3b7dab128fbf1d08e6cd0ac813cc415652d2586f8be80c696f8946ac447b0251b1bbbc61acbd2f85760ab7452ae65db781f98d1d30d1feb2e02c93727d1fe
data/.gitignore CHANGED
@@ -17,5 +17,7 @@ test_files/ignore
17
17
 
18
18
  .ruby-*
19
19
 
20
+ snazzy_clades*
21
+
20
22
  # rspec failure tracking
21
23
  .rspec_status
@@ -1,3 +1,3 @@
1
1
  module TreeClusters
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/tree_clusters.rb CHANGED
@@ -32,14 +32,93 @@ module TreeClusters
32
32
  # @yieldparam clade [Clade] a clade of the tree
33
33
  #
34
34
  # @return [Enumerator<Clade>] enumerator of Clade objects
35
- def all_clades tree
36
- return enum_for(:all_clades, tree) unless block_given?
35
+ def all_clades tree, metadata=nil
36
+ return enum_for(:all_clades, tree, metadata) unless block_given?
37
37
 
38
38
  tree.clade_nodes.reverse.each do |node|
39
- yield Clade.new node, tree
39
+ yield Clade.new node, tree, metadata
40
40
  end
41
41
  end
42
42
 
43
+ def snazzy_clades tree, metadata
44
+ snazzy_clades = {}
45
+
46
+ clades = self.
47
+ all_clades(tree, metadata).
48
+ sort_by { |clade| clade.all_leaves.count }.
49
+ reverse
50
+
51
+ metadata.each do |md_cat, leaf2mdtag|
52
+ already_checked = Set.new
53
+ single_tag_clades = {}
54
+
55
+ clades.each do |clade|
56
+ assert clade.all_leaves.count > 1,
57
+ "A clade cannot also be a leaf"
58
+
59
+ unless clade.all_leaves.all? do |leaf|
60
+ already_checked.include? leaf
61
+ end
62
+ md_tags = clade.all_leaves.map do |leaf|
63
+ assert leaf2mdtag.has_key?(leaf),
64
+ "leaf #{leaf} is missing from leaf2mdtag ht"
65
+
66
+ leaf2mdtag[leaf]
67
+ end
68
+
69
+ # this clade is mono-phyletic w.r.t. this metadata category.
70
+ if md_tags.uniq.count == 1
71
+ clade.all_leaves.each do |leaf|
72
+ already_checked << leaf
73
+ end
74
+
75
+ assert !single_tag_clades.has_key?(clade),
76
+ "clade #{clade.name} is repeated in single_tag_clades for #{md_cat}"
77
+
78
+ single_tag_clades[clade] = md_tags.first
79
+ end
80
+ end
81
+ end
82
+
83
+ single_tag_clades.each do |clade, md_tag|
84
+ non_clade_leaves = tree.taxa - clade.all_leaves
85
+
86
+ non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
87
+ [leaf, leaf2mdtag[leaf]]
88
+ end.select { |ary| ary.last == md_tag }
89
+
90
+ if non_clade_leaves_with_this_md_tag.count.zero?
91
+ if snazzy_clades.has_key? clade
92
+ snazzy_clades[clade][md_cat] = md_tag
93
+ else
94
+ snazzy_clades[clade] = { md_cat => md_tag }
95
+ end
96
+ end
97
+ end
98
+ end
99
+
100
+ snazzy_clades
101
+ end
102
+
103
+ def read_mapping_file fname
104
+ md_cat_names = nil
105
+ metadata = TreeClusters::Attrs.new
106
+
107
+ File.open(fname, "rt").each_line.with_index do |line, idx|
108
+ leaf_name, *metadata_vals = line.chomp.split "\t"
109
+
110
+ if idx.zero?
111
+ md_cat_names = metadata_vals
112
+ else
113
+ metadata_vals.each_with_index do |val, val_idx|
114
+ metadata.add md_cat_names[val_idx], leaf_name, val
115
+ end
116
+ end
117
+ end
118
+
119
+ metadata
120
+ end
121
+
43
122
  # A Hash table for genome/leaf/taxa attributes
44
123
  class Attrs < Hash
45
124
 
@@ -113,11 +192,13 @@ module TreeClusters
113
192
  :each_sibling_leaf_set,
114
193
  :parent_leaves,
115
194
  :non_parent_leaves,
116
- :other_leaves
195
+ :other_leaves,
196
+ :single_tag_info,
197
+ :all_tags
117
198
 
118
199
  # @param node [NewickNode] a NewickNode from a NewickTree
119
200
  # @param tree [NewickTree] a NewickTree
120
- def initialize node, tree
201
+ def initialize node, tree, metadata=nil
121
202
  @name = node.name
122
203
  @all_leaves = descendant_leaves node
123
204
 
@@ -148,6 +229,14 @@ module TreeClusters
148
229
 
149
230
  @non_parent_leaves =
150
231
  Object::Set.new(tree.taxa) - Object::Set.new(parent_leaves)
232
+
233
+ if metadata
234
+ @metadata = metadata
235
+ @all_tags ||= get_all_tags
236
+ @single_tag_info ||= get_single_tag_info
237
+ else
238
+ @single_tag_info = nil
239
+ end
151
240
  end
152
241
 
153
242
  # Compares two Clades field by field.
@@ -163,7 +252,8 @@ module TreeClusters
163
252
  self.all_sibling_leaves == clade.all_sibling_leaves &&
164
253
  self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
165
254
  self.parent_leaves == clade.parent_leaves &&
166
- self.other_leaves == clade.other_leaves
255
+ self.other_leaves == clade.other_leaves &&
256
+ self.single_tag_info == clade.single_tag_info
167
257
  )
168
258
  end
169
259
 
@@ -174,6 +264,27 @@ module TreeClusters
174
264
 
175
265
  private
176
266
 
267
+ def get_single_tag_info
268
+ @all_tags.map do |md_cat, set|
269
+ [md_cat, set.count == 1 ? set.to_a.first : nil]
270
+ end.to_h
271
+ end
272
+
273
+ def get_all_tags
274
+ # name2tag has leaf names => metadata tag and is an Attrs
275
+ @metadata.map do |md_cat, name2tag|
276
+ tag_info = self.all_leaves.map do |leaf|
277
+ assert name2tag.has_key?(leaf),
278
+ "leaf #{leaf} is not present in name2tag ht for " +
279
+ "md_cat #{md_cat}"
280
+
281
+ name2tag[leaf]
282
+ end
283
+
284
+ [md_cat, Set.new(tag_info)]
285
+ end.to_h
286
+ end
287
+
177
288
  def descendant_leaves node
178
289
  if node.leaf?
179
290
  [node.name]
@@ -0,0 +1,8 @@
1
+ name coolness snazzyness sillyness jauntiness oddness
2
+ a-1 cool snazzy 1 jaunty quite odd
3
+ a-2 cool snazzy 7 notjaunty quite odd
4
+ b-1 notcool snazzy 3 jaunty not odd
5
+ b-2 notcool snazzy 4 notjaunty not odd
6
+ bb-1 notcool notsnazzy 5 notjaunty not odd
7
+ bbb-1 notcool notsnazzy 1 jaunty rather odd
8
+ bbb-2 notcool notsnazzy 7 jaunty rather odd
@@ -0,0 +1 @@
1
+ ((a-1, a-2)cluster_A, ((b-1, b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
@@ -30,4 +30,5 @@ Gem::Specification.new do |spec|
30
30
 
31
31
  spec.add_runtime_dependency "abort_if", "~> 0.2.0"
32
32
  spec.add_runtime_dependency "newick-ruby", "~> 1.0", ">= 1.0.4"
33
+ spec.add_runtime_dependency "trollop", "~> 2.1", ">= 2.1.2"
33
34
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_clusters
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
@@ -114,6 +114,26 @@ dependencies:
114
114
  - - ">="
115
115
  - !ruby/object:Gem::Version
116
116
  version: 1.0.4
117
+ - !ruby/object:Gem::Dependency
118
+ name: trollop
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '2.1'
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: 2.1.2
127
+ type: :runtime
128
+ prerelease: false
129
+ version_requirements: !ruby/object:Gem::Requirement
130
+ requirements:
131
+ - - "~>"
132
+ - !ruby/object:Gem::Version
133
+ version: '2.1'
134
+ - - ">="
135
+ - !ruby/object:Gem::Version
136
+ version: 2.1.2
117
137
  description: Snazzy code for working with each cluster in a tree.
118
138
  email:
119
139
  - moorer@udel.edu
@@ -135,6 +155,8 @@ files:
135
155
  - lib/tree_clusters/version.rb
136
156
  - test_files/non_bifurcating.aln
137
157
  - test_files/non_bifurcating.tre
158
+ - test_files/small.mapping
159
+ - test_files/small.tre
138
160
  - test_files/test.tre
139
161
  - tree_clusters.gemspec
140
162
  homepage: https://github.com/mooreryan/tree_clusters