tree_clusters 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +0 -1
- data/exe/snazzy_clades_key_cols +146 -0
- data/lib/tree_clusters/version.rb +1 -1
- data/test_files/small.aln +14 -0
- data/tree_clusters.gemspec +2 -0
- metadata +39 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea8e2199383e573da42d0a6d265b446de07bea4c
|
4
|
+
data.tar.gz: 775d0ace4a7398438ed4c4a34504ae9d7128d7c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: db7302cad104d6ae05e10a4f02b1880016ba9a960cb44209d283cf9c5d730dc39ebd26a6cf64d40bd0e516d6253692112615bde41dc0e1e935ee0bc582cb36c4
|
7
|
+
data.tar.gz: 8dbb10b889de0f3c7db92d2138c89bdcf5357279daadaf12c97ac2e7d9b74980e3e9219308e4bb3252c93bb92b9230d77e5dc166ebd5633f36e949a54c72ca94
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -0,0 +1,146 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
Signal.trap("PIPE", "EXIT")
|
4
|
+
|
5
|
+
require "tree_clusters"
|
6
|
+
require "trollop"
|
7
|
+
require "parse_fasta"
|
8
|
+
require "shannon"
|
9
|
+
require "fileutils"
|
10
|
+
|
11
|
+
def get_low_ent_cols leaves, leaf2attrs, entropy_cutoff
|
12
|
+
low_ent_cols = []
|
13
|
+
alns = leaf2attrs.attrs leaves, :aln
|
14
|
+
aln_cols = alns.transpose
|
15
|
+
|
16
|
+
aln_cols.each_with_index do |aln_col, aln_col_idx|
|
17
|
+
has_gaps = aln_col.any? { |aa| aa == "-" }
|
18
|
+
low_entropy = Shannon::entropy(aln_col.join) <= entropy_cutoff
|
19
|
+
|
20
|
+
if !has_gaps && low_entropy
|
21
|
+
low_ent_cols << (aln_col_idx + 1)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
Set.new low_ent_cols
|
26
|
+
end
|
27
|
+
|
28
|
+
opts = Trollop.options do
|
29
|
+
version TreeClusters::VERSION
|
30
|
+
|
31
|
+
banner <<-EOS
|
32
|
+
|
33
|
+
Note that if a clade's parent would be the root of the tree, no
|
34
|
+
columns will be subtracted when removing the parent columns as it
|
35
|
+
would be the entire alignment.
|
36
|
+
|
37
|
+
Options:
|
38
|
+
EOS
|
39
|
+
|
40
|
+
opt(:tree,
|
41
|
+
"Newick tree file",
|
42
|
+
type: :string)
|
43
|
+
opt(:mapping,
|
44
|
+
"Mapping file",
|
45
|
+
type: :string)
|
46
|
+
opt(:aln,
|
47
|
+
"Alignment file",
|
48
|
+
type: :string)
|
49
|
+
|
50
|
+
opt(:entropy_cutoff,
|
51
|
+
"Cutoff to consider a column low entropy",
|
52
|
+
default: 0.0)
|
53
|
+
opt(:clade_size_cutoff,
|
54
|
+
"Consider only clades with at least this many leaves",
|
55
|
+
default: 1)
|
56
|
+
|
57
|
+
opt(:outdir,
|
58
|
+
"Output directory",
|
59
|
+
default: ".")
|
60
|
+
opt(:base,
|
61
|
+
"Basename for output",
|
62
|
+
default: "snazzy_clades")
|
63
|
+
end
|
64
|
+
|
65
|
+
FileUtils.mkdir_p opts[:outdir]
|
66
|
+
|
67
|
+
TreeClusters.extend TreeClusters
|
68
|
+
|
69
|
+
tree = NewickTree.fromFile opts[:tree]
|
70
|
+
metadata = TreeClusters.read_mapping_file opts[:mapping]
|
71
|
+
snazzy_clades = TreeClusters.snazzy_clades tree, metadata
|
72
|
+
|
73
|
+
aln_len = nil
|
74
|
+
leaf2attrs = TreeClusters::Attrs.new
|
75
|
+
ParseFasta::SeqFile.open(opts[:aln]).each_record do |rec|
|
76
|
+
leaf2attrs[rec.id] = { aln: rec.seq.chars }
|
77
|
+
|
78
|
+
aln_len ||= rec.seq.length
|
79
|
+
|
80
|
+
abort_unless aln_len == rec.seq.length,
|
81
|
+
"Aln len mismatch for #{rec.id}"
|
82
|
+
end
|
83
|
+
|
84
|
+
|
85
|
+
clades_fname = File.join opts[:outdir],
|
86
|
+
"#{opts[:base]}.snazzy_clades.txt"
|
87
|
+
members_fname = File.join opts[:outdir],
|
88
|
+
"#{opts[:base]}.snazzy_clades_clade_members.txt"
|
89
|
+
all_key_cols_fname = File.join opts[:outdir],
|
90
|
+
"#{opts[:base]}.snazzy_clades_key_cols.txt"
|
91
|
+
key_cols_fname = File.join opts[:outdir],
|
92
|
+
"#{opts[:base]}.snazzy_clades_key_cols.txt"
|
93
|
+
key_cols_minus_parent_cols_fname = File.join opts[:outdir],
|
94
|
+
"#{opts[:base]}.snazzy_clades_key_cols_minus_parent_cols.txt"
|
95
|
+
key_cols_minus_sibling_cols_fname = File.join opts[:outdir],
|
96
|
+
"#{opts[:base]}.snazzy_clades_key_cols_minus_sibling_cols.txt"
|
97
|
+
|
98
|
+
info_f = File.open(clades_fname, "w")
|
99
|
+
clade_members_f = File.open(members_fname, "w")
|
100
|
+
key_cols_f = File.open(key_cols_fname, "w")
|
101
|
+
key_cols_minus_parent_cols_f = File.open(key_cols_minus_parent_cols_fname, "w")
|
102
|
+
key_cols_minus_sibling_cols_f = File.open(key_cols_minus_sibling_cols_fname, "w")
|
103
|
+
|
104
|
+
begin
|
105
|
+
# info is { metadata_category => metadata_tag , ... }
|
106
|
+
snazzy_clades.each_with_index do |(clade, info), idx|
|
107
|
+
clade_id = "clade_#{idx+1}___#{clade.name}"
|
108
|
+
|
109
|
+
info_f.puts [clade_id,
|
110
|
+
info.count,
|
111
|
+
info.map { |pair| pair.join("|")}].join "\t"
|
112
|
+
|
113
|
+
clade_members_f.puts [clade_id,
|
114
|
+
clade.all_leaves.count,
|
115
|
+
clade.all_leaves].join "\t"
|
116
|
+
|
117
|
+
key_cols_all_leaves =
|
118
|
+
get_low_ent_cols clade.all_leaves, leaf2attrs, opts[:entropy_cutoff]
|
119
|
+
key_cols_all_sibling_leaves =
|
120
|
+
get_low_ent_cols clade.all_sibling_leaves, leaf2attrs, opts[:entropy_cutoff]
|
121
|
+
key_cols_parent_leaves =
|
122
|
+
get_low_ent_cols clade.parent_leaves, leaf2attrs, opts[:entropy_cutoff]
|
123
|
+
|
124
|
+
key_cols_all_minus_sibling =
|
125
|
+
key_cols_all_leaves - key_cols_all_sibling_leaves
|
126
|
+
key_cols_all_minus_parent =
|
127
|
+
key_cols_all_leaves - key_cols_parent_leaves
|
128
|
+
|
129
|
+
key_cols_f.puts [clade_id,
|
130
|
+
key_cols_all_leaves.count,
|
131
|
+
key_cols_all_leaves.to_a].join "\t"
|
132
|
+
key_cols_minus_parent_cols_f.puts [clade_id,
|
133
|
+
key_cols_all_minus_parent.count,
|
134
|
+
key_cols_all_minus_parent.to_a].join "\t"
|
135
|
+
key_cols_minus_sibling_cols_f.puts [clade_id,
|
136
|
+
key_cols_all_minus_sibling.count,
|
137
|
+
key_cols_all_minus_sibling.to_a].join "\t"
|
138
|
+
|
139
|
+
end
|
140
|
+
ensure
|
141
|
+
info_f.close
|
142
|
+
clade_members_f.close
|
143
|
+
key_cols_f.close
|
144
|
+
key_cols_minus_parent_cols_f.close
|
145
|
+
key_cols_minus_sibling_cols_f.close
|
146
|
+
end
|
data/tree_clusters.gemspec
CHANGED
@@ -30,5 +30,7 @@ Gem::Specification.new do |spec|
|
|
30
30
|
|
31
31
|
spec.add_runtime_dependency "abort_if", "~> 0.2.0"
|
32
32
|
spec.add_runtime_dependency "newick-ruby", "~> 1.0", ">= 1.0.4"
|
33
|
+
spec.add_runtime_dependency "parse_fasta", "~> 2.3"
|
34
|
+
spec.add_runtime_dependency "shannon", "~> 0.1", ">= 0.1.1"
|
33
35
|
spec.add_runtime_dependency "trollop", "~> 2.1", ">= 2.1.2"
|
34
36
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tree_clusters
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-11-
|
11
|
+
date: 2017-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -114,6 +114,40 @@ dependencies:
|
|
114
114
|
- - ">="
|
115
115
|
- !ruby/object:Gem::Version
|
116
116
|
version: 1.0.4
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: parse_fasta
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - "~>"
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '2.3'
|
124
|
+
type: :runtime
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - "~>"
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '2.3'
|
131
|
+
- !ruby/object:Gem::Dependency
|
132
|
+
name: shannon
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - "~>"
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '0.1'
|
138
|
+
- - ">="
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: 0.1.1
|
141
|
+
type: :runtime
|
142
|
+
prerelease: false
|
143
|
+
version_requirements: !ruby/object:Gem::Requirement
|
144
|
+
requirements:
|
145
|
+
- - "~>"
|
146
|
+
- !ruby/object:Gem::Version
|
147
|
+
version: '0.1'
|
148
|
+
- - ">="
|
149
|
+
- !ruby/object:Gem::Version
|
150
|
+
version: 0.1.1
|
117
151
|
- !ruby/object:Gem::Dependency
|
118
152
|
name: trollop
|
119
153
|
requirement: !ruby/object:Gem::Requirement
|
@@ -139,6 +173,7 @@ email:
|
|
139
173
|
- moorer@udel.edu
|
140
174
|
executables:
|
141
175
|
- snazzy_clades
|
176
|
+
- snazzy_clades_key_cols
|
142
177
|
extensions: []
|
143
178
|
extra_rdoc_files: []
|
144
179
|
files:
|
@@ -153,10 +188,12 @@ files:
|
|
153
188
|
- bin/console
|
154
189
|
- bin/setup
|
155
190
|
- exe/snazzy_clades
|
191
|
+
- exe/snazzy_clades_key_cols
|
156
192
|
- lib/tree_clusters.rb
|
157
193
|
- lib/tree_clusters/version.rb
|
158
194
|
- test_files/non_bifurcating.aln
|
159
195
|
- test_files/non_bifurcating.tre
|
196
|
+
- test_files/small.aln
|
160
197
|
- test_files/small.mapping
|
161
198
|
- test_files/small.tre
|
162
199
|
- test_files/test.tre
|