tree_clusters 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: aff432c29c55d5fd28f066c9ed7bf55a739c5998
4
- data.tar.gz: fc9599b6de682c51759b7d783ee4cc02babc50f2
3
+ metadata.gz: ea8e2199383e573da42d0a6d265b446de07bea4c
4
+ data.tar.gz: 775d0ace4a7398438ed4c4a34504ae9d7128d7c1
5
5
  SHA512:
6
- metadata.gz: e82e143d16cac1de4446ba5422e26961f6a90b964ef12244661cdb11757dd9758019e0133f0ca56caee4a29f7ede08f0e92091a7ce4f727c22b6f446f9611408
7
- data.tar.gz: e467648f859f638b3fd620d4a8dd843de702fc164a58359408de9c7dddf554fdbeec1ff9786e42436d5f4ae30dfe9a94320fcd7e14631f820176dd0f38cbd56b
6
+ metadata.gz: db7302cad104d6ae05e10a4f02b1880016ba9a960cb44209d283cf9c5d730dc39ebd26a6cf64d40bd0e516d6253692112615bde41dc0e1e935ee0bc582cb36c4
7
+ data.tar.gz: 8dbb10b889de0f3c7db92d2138c89bdcf5357279daadaf12c97ac2e7d9b74980e3e9219308e4bb3252c93bb92b9230d77e5dc166ebd5633f36e949a54c72ca94
data/.gitignore CHANGED
@@ -19,6 +19,8 @@ test_files/ignore
19
19
 
20
20
  snazzy_clades.*
21
21
 
22
+ TEST
23
+
22
24
  # rspec failure tracking
23
25
  .rspec_status
24
26
  *.lock
data/.travis.yml CHANGED
@@ -1,7 +1,6 @@
1
1
  sudo: false
2
2
  language: ruby
3
3
  rvm:
4
- - 2.0
5
4
  - 2.1
6
5
  - 2.2
7
6
  - 2.3
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ Signal.trap("PIPE", "EXIT")
4
+
5
+ require "tree_clusters"
6
+ require "trollop"
7
+ require "parse_fasta"
8
+ require "shannon"
9
+ require "fileutils"
10
+
11
+ def get_low_ent_cols leaves, leaf2attrs, entropy_cutoff
12
+ low_ent_cols = []
13
+ alns = leaf2attrs.attrs leaves, :aln
14
+ aln_cols = alns.transpose
15
+
16
+ aln_cols.each_with_index do |aln_col, aln_col_idx|
17
+ has_gaps = aln_col.any? { |aa| aa == "-" }
18
+ low_entropy = Shannon::entropy(aln_col.join) <= entropy_cutoff
19
+
20
+ if !has_gaps && low_entropy
21
+ low_ent_cols << (aln_col_idx + 1)
22
+ end
23
+ end
24
+
25
+ Set.new low_ent_cols
26
+ end
27
+
28
+ opts = Trollop.options do
29
+ version TreeClusters::VERSION
30
+
31
+ banner <<-EOS
32
+
33
+ Note that if a clade's parent would be the root of the tree, no
34
+ columns will be subtracted when removing the parent columns as it
35
+ would be the entire alignment.
36
+
37
+ Options:
38
+ EOS
39
+
40
+ opt(:tree,
41
+ "Newick tree file",
42
+ type: :string)
43
+ opt(:mapping,
44
+ "Mapping file",
45
+ type: :string)
46
+ opt(:aln,
47
+ "Alignment file",
48
+ type: :string)
49
+
50
+ opt(:entropy_cutoff,
51
+ "Cutoff to consider a column low entropy",
52
+ default: 0.0)
53
+ opt(:clade_size_cutoff,
54
+ "Consider only clades with at least this many leaves",
55
+ default: 1)
56
+
57
+ opt(:outdir,
58
+ "Output directory",
59
+ default: ".")
60
+ opt(:base,
61
+ "Basename for output",
62
+ default: "snazzy_clades")
63
+ end
64
+
65
+ FileUtils.mkdir_p opts[:outdir]
66
+
67
+ TreeClusters.extend TreeClusters
68
+
69
+ tree = NewickTree.fromFile opts[:tree]
70
+ metadata = TreeClusters.read_mapping_file opts[:mapping]
71
+ snazzy_clades = TreeClusters.snazzy_clades tree, metadata
72
+
73
+ aln_len = nil
74
+ leaf2attrs = TreeClusters::Attrs.new
75
+ ParseFasta::SeqFile.open(opts[:aln]).each_record do |rec|
76
+ leaf2attrs[rec.id] = { aln: rec.seq.chars }
77
+
78
+ aln_len ||= rec.seq.length
79
+
80
+ abort_unless aln_len == rec.seq.length,
81
+ "Aln len mismatch for #{rec.id}"
82
+ end
83
+
84
+
85
+ clades_fname = File.join opts[:outdir],
86
+ "#{opts[:base]}.snazzy_clades.txt"
87
+ members_fname = File.join opts[:outdir],
88
+ "#{opts[:base]}.snazzy_clades_clade_members.txt"
89
+ all_key_cols_fname = File.join opts[:outdir],
90
+ "#{opts[:base]}.snazzy_clades_key_cols.txt"
91
+ key_cols_fname = File.join opts[:outdir],
92
+ "#{opts[:base]}.snazzy_clades_key_cols.txt"
93
+ key_cols_minus_parent_cols_fname = File.join opts[:outdir],
94
+ "#{opts[:base]}.snazzy_clades_key_cols_minus_parent_cols.txt"
95
+ key_cols_minus_sibling_cols_fname = File.join opts[:outdir],
96
+ "#{opts[:base]}.snazzy_clades_key_cols_minus_sibling_cols.txt"
97
+
98
+ info_f = File.open(clades_fname, "w")
99
+ clade_members_f = File.open(members_fname, "w")
100
+ key_cols_f = File.open(key_cols_fname, "w")
101
+ key_cols_minus_parent_cols_f = File.open(key_cols_minus_parent_cols_fname, "w")
102
+ key_cols_minus_sibling_cols_f = File.open(key_cols_minus_sibling_cols_fname, "w")
103
+
104
+ begin
105
+ # info is { metadata_category => metadata_tag , ... }
106
+ snazzy_clades.each_with_index do |(clade, info), idx|
107
+ clade_id = "clade_#{idx+1}___#{clade.name}"
108
+
109
+ info_f.puts [clade_id,
110
+ info.count,
111
+ info.map { |pair| pair.join("|")}].join "\t"
112
+
113
+ clade_members_f.puts [clade_id,
114
+ clade.all_leaves.count,
115
+ clade.all_leaves].join "\t"
116
+
117
+ key_cols_all_leaves =
118
+ get_low_ent_cols clade.all_leaves, leaf2attrs, opts[:entropy_cutoff]
119
+ key_cols_all_sibling_leaves =
120
+ get_low_ent_cols clade.all_sibling_leaves, leaf2attrs, opts[:entropy_cutoff]
121
+ key_cols_parent_leaves =
122
+ get_low_ent_cols clade.parent_leaves, leaf2attrs, opts[:entropy_cutoff]
123
+
124
+ key_cols_all_minus_sibling =
125
+ key_cols_all_leaves - key_cols_all_sibling_leaves
126
+ key_cols_all_minus_parent =
127
+ key_cols_all_leaves - key_cols_parent_leaves
128
+
129
+ key_cols_f.puts [clade_id,
130
+ key_cols_all_leaves.count,
131
+ key_cols_all_leaves.to_a].join "\t"
132
+ key_cols_minus_parent_cols_f.puts [clade_id,
133
+ key_cols_all_minus_parent.count,
134
+ key_cols_all_minus_parent.to_a].join "\t"
135
+ key_cols_minus_sibling_cols_f.puts [clade_id,
136
+ key_cols_all_minus_sibling.count,
137
+ key_cols_all_minus_sibling.to_a].join "\t"
138
+
139
+ end
140
+ ensure
141
+ info_f.close
142
+ clade_members_f.close
143
+ key_cols_f.close
144
+ key_cols_minus_parent_cols_f.close
145
+ key_cols_minus_sibling_cols_f.close
146
+ end
@@ -1,3 +1,3 @@
1
1
  module TreeClusters
2
- VERSION = "0.3.1"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -0,0 +1,14 @@
1
+ >a-1
2
+ AAAA
3
+ >a-2
4
+ AAAT
5
+ >b-1
6
+ CCCC
7
+ >b-2
8
+ CCCT
9
+ >bb-1
10
+ CCTG
11
+ >bbb-1
12
+ CCGG
13
+ >bbb-2
14
+ CGGG
@@ -30,5 +30,7 @@ Gem::Specification.new do |spec|
30
30
 
31
31
  spec.add_runtime_dependency "abort_if", "~> 0.2.0"
32
32
  spec.add_runtime_dependency "newick-ruby", "~> 1.0", ">= 1.0.4"
33
+ spec.add_runtime_dependency "parse_fasta", "~> 2.3"
34
+ spec.add_runtime_dependency "shannon", "~> 0.1", ">= 0.1.1"
33
35
  spec.add_runtime_dependency "trollop", "~> 2.1", ">= 2.1.2"
34
36
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_clusters
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-01 00:00:00.000000000 Z
11
+ date: 2017-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -114,6 +114,40 @@ dependencies:
114
114
  - - ">="
115
115
  - !ruby/object:Gem::Version
116
116
  version: 1.0.4
117
+ - !ruby/object:Gem::Dependency
118
+ name: parse_fasta
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '2.3'
124
+ type: :runtime
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '2.3'
131
+ - !ruby/object:Gem::Dependency
132
+ name: shannon
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '0.1'
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: 0.1.1
141
+ type: :runtime
142
+ prerelease: false
143
+ version_requirements: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - "~>"
146
+ - !ruby/object:Gem::Version
147
+ version: '0.1'
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: 0.1.1
117
151
  - !ruby/object:Gem::Dependency
118
152
  name: trollop
119
153
  requirement: !ruby/object:Gem::Requirement
@@ -139,6 +173,7 @@ email:
139
173
  - moorer@udel.edu
140
174
  executables:
141
175
  - snazzy_clades
176
+ - snazzy_clades_key_cols
142
177
  extensions: []
143
178
  extra_rdoc_files: []
144
179
  files:
@@ -153,10 +188,12 @@ files:
153
188
  - bin/console
154
189
  - bin/setup
155
190
  - exe/snazzy_clades
191
+ - exe/snazzy_clades_key_cols
156
192
  - lib/tree_clusters.rb
157
193
  - lib/tree_clusters/version.rb
158
194
  - test_files/non_bifurcating.aln
159
195
  - test_files/non_bifurcating.tre
196
+ - test_files/small.aln
160
197
  - test_files/small.mapping
161
198
  - test_files/small.tre
162
199
  - test_files/test.tre