tree_clusters 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +0 -1
- data/exe/snazzy_clades_key_cols +146 -0
- data/lib/tree_clusters/version.rb +1 -1
- data/test_files/small.aln +14 -0
- data/tree_clusters.gemspec +2 -0
- metadata +39 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea8e2199383e573da42d0a6d265b446de07bea4c
|
4
|
+
data.tar.gz: 775d0ace4a7398438ed4c4a34504ae9d7128d7c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db7302cad104d6ae05e10a4f02b1880016ba9a960cb44209d283cf9c5d730dc39ebd26a6cf64d40bd0e516d6253692112615bde41dc0e1e935ee0bc582cb36c4
|
7
|
+
data.tar.gz: 8dbb10b889de0f3c7db92d2138c89bdcf5357279daadaf12c97ac2e7d9b74980e3e9219308e4bb3252c93bb92b9230d77e5dc166ebd5633f36e949a54c72ca94
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -0,0 +1,146 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
Signal.trap("PIPE", "EXIT")
|
4
|
+
|
5
|
+
require "tree_clusters"
|
6
|
+
require "trollop"
|
7
|
+
require "parse_fasta"
|
8
|
+
require "shannon"
|
9
|
+
require "fileutils"
|
10
|
+
|
11
|
+
def get_low_ent_cols leaves, leaf2attrs, entropy_cutoff
|
12
|
+
low_ent_cols = []
|
13
|
+
alns = leaf2attrs.attrs leaves, :aln
|
14
|
+
aln_cols = alns.transpose
|
15
|
+
|
16
|
+
aln_cols.each_with_index do |aln_col, aln_col_idx|
|
17
|
+
has_gaps = aln_col.any? { |aa| aa == "-" }
|
18
|
+
low_entropy = Shannon::entropy(aln_col.join) <= entropy_cutoff
|
19
|
+
|
20
|
+
if !has_gaps && low_entropy
|
21
|
+
low_ent_cols << (aln_col_idx + 1)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
Set.new low_ent_cols
|
26
|
+
end
|
27
|
+
|
28
|
+
opts = Trollop.options do
|
29
|
+
version TreeClusters::VERSION
|
30
|
+
|
31
|
+
banner <<-EOS
|
32
|
+
|
33
|
+
Note that if a clade's parent would be the root of the tree, no
|
34
|
+
columns will be subtracted when removing the parent columns as it
|
35
|
+
would be the entire alignment.
|
36
|
+
|
37
|
+
Options:
|
38
|
+
EOS
|
39
|
+
|
40
|
+
opt(:tree,
|
41
|
+
"Newick tree file",
|
42
|
+
type: :string)
|
43
|
+
opt(:mapping,
|
44
|
+
"Mapping file",
|
45
|
+
type: :string)
|
46
|
+
opt(:aln,
|
47
|
+
"Alignment file",
|
48
|
+
type: :string)
|
49
|
+
|
50
|
+
opt(:entropy_cutoff,
|
51
|
+
"Cutoff to consider a column low entropy",
|
52
|
+
default: 0.0)
|
53
|
+
opt(:clade_size_cutoff,
|
54
|
+
"Consider only clades with at least this many leaves",
|
55
|
+
default: 1)
|
56
|
+
|
57
|
+
opt(:outdir,
|
58
|
+
"Output directory",
|
59
|
+
default: ".")
|
60
|
+
opt(:base,
|
61
|
+
"Basename for output",
|
62
|
+
default: "snazzy_clades")
|
63
|
+
end
|
64
|
+
|
65
|
+
FileUtils.mkdir_p opts[:outdir]
|
66
|
+
|
67
|
+
TreeClusters.extend TreeClusters
|
68
|
+
|
69
|
+
tree = NewickTree.fromFile opts[:tree]
|
70
|
+
metadata = TreeClusters.read_mapping_file opts[:mapping]
|
71
|
+
snazzy_clades = TreeClusters.snazzy_clades tree, metadata
|
72
|
+
|
73
|
+
aln_len = nil
|
74
|
+
leaf2attrs = TreeClusters::Attrs.new
|
75
|
+
ParseFasta::SeqFile.open(opts[:aln]).each_record do |rec|
|
76
|
+
leaf2attrs[rec.id] = { aln: rec.seq.chars }
|
77
|
+
|
78
|
+
aln_len ||= rec.seq.length
|
79
|
+
|
80
|
+
abort_unless aln_len == rec.seq.length,
|
81
|
+
"Aln len mismatch for #{rec.id}"
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
clades_fname = File.join opts[:outdir],
|
86
|
+
"#{opts[:base]}.snazzy_clades.txt"
|
87
|
+
members_fname = File.join opts[:outdir],
|
88
|
+
"#{opts[:base]}.snazzy_clades_clade_members.txt"
|
89
|
+
all_key_cols_fname = File.join opts[:outdir],
|
90
|
+
"#{opts[:base]}.snazzy_clades_key_cols.txt"
|
91
|
+
key_cols_fname = File.join opts[:outdir],
|
92
|
+
"#{opts[:base]}.snazzy_clades_key_cols.txt"
|
93
|
+
key_cols_minus_parent_cols_fname = File.join opts[:outdir],
|
94
|
+
"#{opts[:base]}.snazzy_clades_key_cols_minus_parent_cols.txt"
|
95
|
+
key_cols_minus_sibling_cols_fname = File.join opts[:outdir],
|
96
|
+
"#{opts[:base]}.snazzy_clades_key_cols_minus_sibling_cols.txt"
|
97
|
+
|
98
|
+
info_f = File.open(clades_fname, "w")
|
99
|
+
clade_members_f = File.open(members_fname, "w")
|
100
|
+
key_cols_f = File.open(key_cols_fname, "w")
|
101
|
+
key_cols_minus_parent_cols_f = File.open(key_cols_minus_parent_cols_fname, "w")
|
102
|
+
key_cols_minus_sibling_cols_f = File.open(key_cols_minus_sibling_cols_fname, "w")
|
103
|
+
|
104
|
+
begin
|
105
|
+
# info is { metadata_category => metadata_tag , ... }
|
106
|
+
snazzy_clades.each_with_index do |(clade, info), idx|
|
107
|
+
clade_id = "clade_#{idx+1}___#{clade.name}"
|
108
|
+
|
109
|
+
info_f.puts [clade_id,
|
110
|
+
info.count,
|
111
|
+
info.map { |pair| pair.join("|")}].join "\t"
|
112
|
+
|
113
|
+
clade_members_f.puts [clade_id,
|
114
|
+
clade.all_leaves.count,
|
115
|
+
clade.all_leaves].join "\t"
|
116
|
+
|
117
|
+
key_cols_all_leaves =
|
118
|
+
get_low_ent_cols clade.all_leaves, leaf2attrs, opts[:entropy_cutoff]
|
119
|
+
key_cols_all_sibling_leaves =
|
120
|
+
get_low_ent_cols clade.all_sibling_leaves, leaf2attrs, opts[:entropy_cutoff]
|
121
|
+
key_cols_parent_leaves =
|
122
|
+
get_low_ent_cols clade.parent_leaves, leaf2attrs, opts[:entropy_cutoff]
|
123
|
+
|
124
|
+
key_cols_all_minus_sibling =
|
125
|
+
key_cols_all_leaves - key_cols_all_sibling_leaves
|
126
|
+
key_cols_all_minus_parent =
|
127
|
+
key_cols_all_leaves - key_cols_parent_leaves
|
128
|
+
|
129
|
+
key_cols_f.puts [clade_id,
|
130
|
+
key_cols_all_leaves.count,
|
131
|
+
key_cols_all_leaves.to_a].join "\t"
|
132
|
+
key_cols_minus_parent_cols_f.puts [clade_id,
|
133
|
+
key_cols_all_minus_parent.count,
|
134
|
+
key_cols_all_minus_parent.to_a].join "\t"
|
135
|
+
key_cols_minus_sibling_cols_f.puts [clade_id,
|
136
|
+
key_cols_all_minus_sibling.count,
|
137
|
+
key_cols_all_minus_sibling.to_a].join "\t"
|
138
|
+
|
139
|
+
end
|
140
|
+
ensure
|
141
|
+
info_f.close
|
142
|
+
clade_members_f.close
|
143
|
+
key_cols_f.close
|
144
|
+
key_cols_minus_parent_cols_f.close
|
145
|
+
key_cols_minus_sibling_cols_f.close
|
146
|
+
end
|
data/tree_clusters.gemspec
CHANGED
@@ -30,5 +30,7 @@ Gem::Specification.new do |spec|
|
|
30
30
|
|
31
31
|
spec.add_runtime_dependency "abort_if", "~> 0.2.0"
|
32
32
|
spec.add_runtime_dependency "newick-ruby", "~> 1.0", ">= 1.0.4"
|
33
|
+
spec.add_runtime_dependency "parse_fasta", "~> 2.3"
|
34
|
+
spec.add_runtime_dependency "shannon", "~> 0.1", ">= 0.1.1"
|
33
35
|
spec.add_runtime_dependency "trollop", "~> 2.1", ">= 2.1.2"
|
34
36
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tree_clusters
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-11-
|
11
|
+
date: 2017-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -114,6 +114,40 @@ dependencies:
|
|
114
114
|
- - ">="
|
115
115
|
- !ruby/object:Gem::Version
|
116
116
|
version: 1.0.4
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: parse_fasta
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - "~>"
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '2.3'
|
124
|
+
type: :runtime
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - "~>"
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '2.3'
|
131
|
+
- !ruby/object:Gem::Dependency
|
132
|
+
name: shannon
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - "~>"
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '0.1'
|
138
|
+
- - ">="
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: 0.1.1
|
141
|
+
type: :runtime
|
142
|
+
prerelease: false
|
143
|
+
version_requirements: !ruby/object:Gem::Requirement
|
144
|
+
requirements:
|
145
|
+
- - "~>"
|
146
|
+
- !ruby/object:Gem::Version
|
147
|
+
version: '0.1'
|
148
|
+
- - ">="
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: 0.1.1
|
117
151
|
- !ruby/object:Gem::Dependency
|
118
152
|
name: trollop
|
119
153
|
requirement: !ruby/object:Gem::Requirement
|
@@ -139,6 +173,7 @@ email:
|
|
139
173
|
- moorer@udel.edu
|
140
174
|
executables:
|
141
175
|
- snazzy_clades
|
176
|
+
- snazzy_clades_key_cols
|
142
177
|
extensions: []
|
143
178
|
extra_rdoc_files: []
|
144
179
|
files:
|
@@ -153,10 +188,12 @@ files:
|
|
153
188
|
- bin/console
|
154
189
|
- bin/setup
|
155
190
|
- exe/snazzy_clades
|
191
|
+
- exe/snazzy_clades_key_cols
|
156
192
|
- lib/tree_clusters.rb
|
157
193
|
- lib/tree_clusters/version.rb
|
158
194
|
- test_files/non_bifurcating.aln
|
159
195
|
- test_files/non_bifurcating.tre
|
196
|
+
- test_files/small.aln
|
160
197
|
- test_files/small.mapping
|
161
198
|
- test_files/small.tre
|
162
199
|
- test_files/test.tre
|