tree_clusters 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: aff432c29c55d5fd28f066c9ed7bf55a739c5998
4
- data.tar.gz: fc9599b6de682c51759b7d783ee4cc02babc50f2
3
+ metadata.gz: ea8e2199383e573da42d0a6d265b446de07bea4c
4
+ data.tar.gz: 775d0ace4a7398438ed4c4a34504ae9d7128d7c1
5
5
  SHA512:
6
- metadata.gz: e82e143d16cac1de4446ba5422e26961f6a90b964ef12244661cdb11757dd9758019e0133f0ca56caee4a29f7ede08f0e92091a7ce4f727c22b6f446f9611408
7
- data.tar.gz: e467648f859f638b3fd620d4a8dd843de702fc164a58359408de9c7dddf554fdbeec1ff9786e42436d5f4ae30dfe9a94320fcd7e14631f820176dd0f38cbd56b
6
+ metadata.gz: db7302cad104d6ae05e10a4f02b1880016ba9a960cb44209d283cf9c5d730dc39ebd26a6cf64d40bd0e516d6253692112615bde41dc0e1e935ee0bc582cb36c4
7
+ data.tar.gz: 8dbb10b889de0f3c7db92d2138c89bdcf5357279daadaf12c97ac2e7d9b74980e3e9219308e4bb3252c93bb92b9230d77e5dc166ebd5633f36e949a54c72ca94
data/.gitignore CHANGED
@@ -19,6 +19,8 @@ test_files/ignore
19
19
 
20
20
  snazzy_clades.*
21
21
 
22
+ TEST
23
+
22
24
  # rspec failure tracking
23
25
  .rspec_status
24
26
  *.lock
data/.travis.yml CHANGED
@@ -1,7 +1,6 @@
1
1
  sudo: false
2
2
  language: ruby
3
3
  rvm:
4
- - 2.0
5
4
  - 2.1
6
5
  - 2.2
7
6
  - 2.3
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ Signal.trap("PIPE", "EXIT")
4
+
5
+ require "tree_clusters"
6
+ require "trollop"
7
+ require "parse_fasta"
8
+ require "shannon"
9
+ require "fileutils"
10
+
11
+ def get_low_ent_cols leaves, leaf2attrs, entropy_cutoff
12
+ low_ent_cols = []
13
+ alns = leaf2attrs.attrs leaves, :aln
14
+ aln_cols = alns.transpose
15
+
16
+ aln_cols.each_with_index do |aln_col, aln_col_idx|
17
+ has_gaps = aln_col.any? { |aa| aa == "-" }
18
+ low_entropy = Shannon::entropy(aln_col.join) <= entropy_cutoff
19
+
20
+ if !has_gaps && low_entropy
21
+ low_ent_cols << (aln_col_idx + 1)
22
+ end
23
+ end
24
+
25
+ Set.new low_ent_cols
26
+ end
27
+
28
+ opts = Trollop.options do
29
+ version TreeClusters::VERSION
30
+
31
+ banner <<-EOS
32
+
33
+ Note that if a clade's parent would be the root of the tree, no
34
+ columns will be subtracted when removing the parent columns as it
35
+ would be the entire alignment.
36
+
37
+ Options:
38
+ EOS
39
+
40
+ opt(:tree,
41
+ "Newick tree file",
42
+ type: :string)
43
+ opt(:mapping,
44
+ "Mapping file",
45
+ type: :string)
46
+ opt(:aln,
47
+ "Alignment file",
48
+ type: :string)
49
+
50
+ opt(:entropy_cutoff,
51
+ "Cutoff to consider a column low entropy",
52
+ default: 0.0)
53
+ opt(:clade_size_cutoff,
54
+ "Consider only clades with at least this many leaves",
55
+ default: 1)
56
+
57
+ opt(:outdir,
58
+ "Output directory",
59
+ default: ".")
60
+ opt(:base,
61
+ "Basename for output",
62
+ default: "snazzy_clades")
63
+ end
64
+
65
+ FileUtils.mkdir_p opts[:outdir]
66
+
67
+ TreeClusters.extend TreeClusters
68
+
69
+ tree = NewickTree.fromFile opts[:tree]
70
+ metadata = TreeClusters.read_mapping_file opts[:mapping]
71
+ snazzy_clades = TreeClusters.snazzy_clades tree, metadata
72
+
73
+ aln_len = nil
74
+ leaf2attrs = TreeClusters::Attrs.new
75
+ ParseFasta::SeqFile.open(opts[:aln]).each_record do |rec|
76
+ leaf2attrs[rec.id] = { aln: rec.seq.chars }
77
+
78
+ aln_len ||= rec.seq.length
79
+
80
+ abort_unless aln_len == rec.seq.length,
81
+ "Aln len mismatch for #{rec.id}"
82
+ end
83
+
84
+
85
+ clades_fname = File.join opts[:outdir],
86
+ "#{opts[:base]}.snazzy_clades.txt"
87
+ members_fname = File.join opts[:outdir],
88
+ "#{opts[:base]}.snazzy_clades_clade_members.txt"
89
+ all_key_cols_fname = File.join opts[:outdir],
90
+ "#{opts[:base]}.snazzy_clades_key_cols.txt"
91
+ key_cols_fname = File.join opts[:outdir],
92
+ "#{opts[:base]}.snazzy_clades_key_cols.txt"
93
+ key_cols_minus_parent_cols_fname = File.join opts[:outdir],
94
+ "#{opts[:base]}.snazzy_clades_key_cols_minus_parent_cols.txt"
95
+ key_cols_minus_sibling_cols_fname = File.join opts[:outdir],
96
+ "#{opts[:base]}.snazzy_clades_key_cols_minus_sibling_cols.txt"
97
+
98
+ info_f = File.open(clades_fname, "w")
99
+ clade_members_f = File.open(members_fname, "w")
100
+ key_cols_f = File.open(key_cols_fname, "w")
101
+ key_cols_minus_parent_cols_f = File.open(key_cols_minus_parent_cols_fname, "w")
102
+ key_cols_minus_sibling_cols_f = File.open(key_cols_minus_sibling_cols_fname, "w")
103
+
104
+ begin
105
+ # info is { metadata_category => metadata_tag , ... }
106
+ snazzy_clades.each_with_index do |(clade, info), idx|
107
+ clade_id = "clade_#{idx+1}___#{clade.name}"
108
+
109
+ info_f.puts [clade_id,
110
+ info.count,
111
+ info.map { |pair| pair.join("|")}].join "\t"
112
+
113
+ clade_members_f.puts [clade_id,
114
+ clade.all_leaves.count,
115
+ clade.all_leaves].join "\t"
116
+
117
+ key_cols_all_leaves =
118
+ get_low_ent_cols clade.all_leaves, leaf2attrs, opts[:entropy_cutoff]
119
+ key_cols_all_sibling_leaves =
120
+ get_low_ent_cols clade.all_sibling_leaves, leaf2attrs, opts[:entropy_cutoff]
121
+ key_cols_parent_leaves =
122
+ get_low_ent_cols clade.parent_leaves, leaf2attrs, opts[:entropy_cutoff]
123
+
124
+ key_cols_all_minus_sibling =
125
+ key_cols_all_leaves - key_cols_all_sibling_leaves
126
+ key_cols_all_minus_parent =
127
+ key_cols_all_leaves - key_cols_parent_leaves
128
+
129
+ key_cols_f.puts [clade_id,
130
+ key_cols_all_leaves.count,
131
+ key_cols_all_leaves.to_a].join "\t"
132
+ key_cols_minus_parent_cols_f.puts [clade_id,
133
+ key_cols_all_minus_parent.count,
134
+ key_cols_all_minus_parent.to_a].join "\t"
135
+ key_cols_minus_sibling_cols_f.puts [clade_id,
136
+ key_cols_all_minus_sibling.count,
137
+ key_cols_all_minus_sibling.to_a].join "\t"
138
+
139
+ end
140
+ ensure
141
+ info_f.close
142
+ clade_members_f.close
143
+ key_cols_f.close
144
+ key_cols_minus_parent_cols_f.close
145
+ key_cols_minus_sibling_cols_f.close
146
+ end
@@ -1,3 +1,3 @@
1
1
  module TreeClusters
2
- VERSION = "0.3.1"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -0,0 +1,14 @@
1
+ >a-1
2
+ AAAA
3
+ >a-2
4
+ AAAT
5
+ >b-1
6
+ CCCC
7
+ >b-2
8
+ CCCT
9
+ >bb-1
10
+ CCTG
11
+ >bbb-1
12
+ CCGG
13
+ >bbb-2
14
+ CGGG
@@ -30,5 +30,7 @@ Gem::Specification.new do |spec|
30
30
 
31
31
  spec.add_runtime_dependency "abort_if", "~> 0.2.0"
32
32
  spec.add_runtime_dependency "newick-ruby", "~> 1.0", ">= 1.0.4"
33
+ spec.add_runtime_dependency "parse_fasta", "~> 2.3"
34
+ spec.add_runtime_dependency "shannon", "~> 0.1", ">= 0.1.1"
33
35
  spec.add_runtime_dependency "trollop", "~> 2.1", ">= 2.1.2"
34
36
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tree_clusters
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-01 00:00:00.000000000 Z
11
+ date: 2017-11-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -114,6 +114,40 @@ dependencies:
114
114
  - - ">="
115
115
  - !ruby/object:Gem::Version
116
116
  version: 1.0.4
117
+ - !ruby/object:Gem::Dependency
118
+ name: parse_fasta
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '2.3'
124
+ type: :runtime
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '2.3'
131
+ - !ruby/object:Gem::Dependency
132
+ name: shannon
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '0.1'
138
+ - - ">="
139
+ - !ruby/object:Gem::Version
140
+ version: 0.1.1
141
+ type: :runtime
142
+ prerelease: false
143
+ version_requirements: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - "~>"
146
+ - !ruby/object:Gem::Version
147
+ version: '0.1'
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: 0.1.1
117
151
  - !ruby/object:Gem::Dependency
118
152
  name: trollop
119
153
  requirement: !ruby/object:Gem::Requirement
@@ -139,6 +173,7 @@ email:
139
173
  - moorer@udel.edu
140
174
  executables:
141
175
  - snazzy_clades
176
+ - snazzy_clades_key_cols
142
177
  extensions: []
143
178
  extra_rdoc_files: []
144
179
  files:
@@ -153,10 +188,12 @@ files:
153
188
  - bin/console
154
189
  - bin/setup
155
190
  - exe/snazzy_clades
191
+ - exe/snazzy_clades_key_cols
156
192
  - lib/tree_clusters.rb
157
193
  - lib/tree_clusters/version.rb
158
194
  - test_files/non_bifurcating.aln
159
195
  - test_files/non_bifurcating.tre
196
+ - test_files/small.aln
160
197
  - test_files/small.mapping
161
198
  - test_files/small.tre
162
199
  - test_files/test.tre