transrate 1.0.0.beta1 → 1.0.0.beta2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +8 -0
- data/CITATION +3 -0
- data/README.md +1 -1
- data/Rakefile +71 -0
- data/bin/transrate +92 -41
- data/deps/blast.yaml +27 -0
- data/deps/deps.yaml +36 -62
- data/ext/transrate/transrate.c +9 -0
- data/lib/transrate/assembly.rb +21 -11
- data/lib/transrate/comparative_metrics.rb +2 -2
- data/lib/transrate/contig.rb +8 -3
- data/lib/transrate/read_metrics.rb +22 -62
- data/lib/transrate/salmon.rb +67 -0
- data/lib/transrate/snap.rb +4 -32
- data/lib/transrate/transrater.rb +1 -1
- data/lib/transrate/version.rb +1 -1
- data/lib/transrate.rb +18 -15
- data/test/data/sorghum_100.fa +200 -0
- data/test/data/test.sf +30 -0
- data/test/helper.rb +13 -0
- data/test/test_assembly.rb +54 -0
- data/test/test_bin.rb +30 -27
- data/test/test_cmd.rb +5 -0
- data/test/test_contig.rb +9 -14
- data/test/test_read_metrics.rb +66 -42
- data/test/test_salmon.rb +33 -0
- data/test/test_snap.rb +27 -0
- data/test/test_transrater.rb +10 -10
- data/transrate.gemspec +1 -1
- metadata +14 -12
- data/lib/transrate/express.rb +0 -102
- data/lib/transrate/sam_checker.rb +0 -74
- data/lib/transrate/samtools.rb +0 -146
- data/test/data/express_results.xprs +0 -5
- data/test/test_express.rb +0 -22
- data/test/test_samtools.rb +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b90d296192b895be2cb4f5411ccc716264f5f360
|
4
|
+
data.tar.gz: 54fa1d66571912a1526aa7bd0d9ed549a04c8b19
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc43e7f297ce2d53a03f9a7fcdde87124c605750cb6947d35e32185f5de92f53785fab37302e3abde3e4a2f7736aee3680fb415ab413d914720c025ebbff5f87
|
7
|
+
data.tar.gz: a122304dae2f6009c22e9d396c2b2f874cfa47937e582b1b5e5342cdf2a6d7eaeddcca465b3571022f7979d385f1c4014541a1ac1c5ca241eacaa8e8a4fd84e6
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -2,8 +2,16 @@ language: ruby
|
|
2
2
|
rvm:
|
3
3
|
- "2.0.0"
|
4
4
|
- "2.1.0"
|
5
|
+
- "2.2.0"
|
5
6
|
before_script:
|
6
7
|
- gem install bindeps
|
7
8
|
- bundle install
|
8
9
|
- bundle exec rake compile
|
9
10
|
- bundle exec bin/transrate --install-deps
|
11
|
+
os:
|
12
|
+
- linux
|
13
|
+
- osx
|
14
|
+
matrix:
|
15
|
+
allow_failures:
|
16
|
+
- os: osx
|
17
|
+
- rvm: "2.2.0"
|
data/CITATION
ADDED
data/README.md
CHANGED
@@ -24,7 +24,7 @@ This software is being actively developed. Please be aware that there may be bug
|
|
24
24
|
|
25
25
|
## Citation
|
26
26
|
|
27
|
-
Transrate is pre-publication academic software. If you use it, please cite the github repository and the DOI: [![DOI](https://zenodo.org/badge/
|
27
|
+
Transrate is pre-publication academic software. If you use it, please cite the github repository and the DOI: [![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.13161.svg)](http://dx.doi.org/10.5281/zenodo.13161).
|
28
28
|
|
29
29
|
## Documentation
|
30
30
|
|
data/Rakefile
CHANGED
@@ -57,5 +57,76 @@ Rake::TestTask.new do |t|
|
|
57
57
|
t.test_files = ['test/test_contig.rb']
|
58
58
|
end
|
59
59
|
|
60
|
+
Rake::TestTask.new do |t|
|
61
|
+
t.name = :assembly
|
62
|
+
t.libs << 'test'
|
63
|
+
t.test_files = ['test/test_assembly.rb']
|
64
|
+
end
|
65
|
+
|
66
|
+
Rake::TestTask.new do |t|
|
67
|
+
t.name = :snap
|
68
|
+
t.libs << 'test'
|
69
|
+
t.test_files = ['test/test_snap.rb']
|
70
|
+
end
|
71
|
+
|
72
|
+
Rake::TestTask.new do |t|
|
73
|
+
t.name = :salmon
|
74
|
+
t.libs << 'test'
|
75
|
+
t.test_files = ['test/test_salmon.rb']
|
76
|
+
end
|
77
|
+
|
78
|
+
|
60
79
|
desc "Run tests"
|
61
80
|
task :default => :test
|
81
|
+
|
82
|
+
# PACKAGING
|
83
|
+
|
84
|
+
PACKAGE_NAME = "transrate"
|
85
|
+
VERSION = "1.0.0.beta2"
|
86
|
+
TRAVELING_RUBY_VERSION = "20141215-2.1.5"
|
87
|
+
|
88
|
+
desc "Package your app"
|
89
|
+
task :package => ['package:linux:x86_64', 'package:osx']
|
90
|
+
|
91
|
+
namespace :package do
|
92
|
+
namespace :linux do
|
93
|
+
desc "Package your app for Linux x86_64"
|
94
|
+
task :x86_64 => "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-linux-x86_64.tar.gz" do
|
95
|
+
create_package("linux-x86_64")
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
desc "Package your app for OS X"
|
100
|
+
task :osx => "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-osx.tar.gz" do
|
101
|
+
create_package("osx")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
file "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-linux-x86_64.tar.gz" do
|
106
|
+
download_runtime("linux-x86_64")
|
107
|
+
end
|
108
|
+
|
109
|
+
file "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-osx.tar.gz" do
|
110
|
+
download_runtime("osx")
|
111
|
+
end
|
112
|
+
|
113
|
+
def create_package(target)
|
114
|
+
package_dir = "packaging/#{PACKAGE_NAME}-#{VERSION}-#{target}"
|
115
|
+
sh "rm -rf #{package_dir}"
|
116
|
+
sh "mkdir -p #{package_dir}/lib/app"
|
117
|
+
sh "cp -r lib #{package_dir}/lib/app/"
|
118
|
+
sh "cp -r bin #{package_dir}/lib/app/"
|
119
|
+
sh "mkdir #{package_dir}/lib/ruby"
|
120
|
+
sh "tar -xzf packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-#{target}.tar.gz -C #{package_dir}/lib/ruby"
|
121
|
+
sh "cp packaging/transrate #{package_dir}/transrate"
|
122
|
+
if !ENV['DIR_ONLY']
|
123
|
+
sh "tar -czf #{package_dir}.tar.gz #{package_dir}"
|
124
|
+
sh "rm -rf #{package_dir}"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def download_runtime(target)
|
129
|
+
sh "mkdir -p packaging/packaging &&" +
|
130
|
+
"cd packaging/packaging && curl -L -O --fail " +
|
131
|
+
"http://d6r77u77i8pq3.cloudfront.net/releases/traveling-ruby-#{TRAVELING_RUBY_VERSION}-#{target}.tar.gz"
|
132
|
+
end
|
data/bin/transrate
CHANGED
@@ -1,14 +1,32 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
PROFILE = false
|
3
2
|
|
4
3
|
require 'trollop'
|
5
4
|
require 'transrate'
|
6
5
|
require 'csv'
|
7
6
|
require 'bindeps'
|
8
|
-
require 'ruby-prof'
|
9
7
|
|
8
|
+
include Transrate
|
9
|
+
|
10
|
+
# Show the help message if no arguments provided
|
10
11
|
ARGV[0] = "--help" if ARGV.length() == 0
|
11
12
|
|
13
|
+
# We want clean error messages through the logger, no ugly backtraces
|
14
|
+
# because the user doesn't care about them, unless they specifically ask for
|
15
|
+
# them with --loglevel debug
|
16
|
+
module Kernel
|
17
|
+
alias _raise raise
|
18
|
+
|
19
|
+
def raise(*a)
|
20
|
+
begin
|
21
|
+
_raise(*a)
|
22
|
+
rescue TransrateError => e
|
23
|
+
logger.error e.message
|
24
|
+
logger.debug e.backtrace unless e.backtrace.nil?
|
25
|
+
exit 1
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
12
30
|
opts = Trollop::options do
|
13
31
|
version Transrate::VERSION::STRING.dup
|
14
32
|
banner <<-EOS
|
@@ -19,8 +37,8 @@ opts = Trollop::options do
|
|
19
37
|
DESCRIPTION:
|
20
38
|
Analyse a de-novo transcriptome assembly using three kinds of metrics:
|
21
39
|
|
22
|
-
1.
|
23
|
-
2. read-mapping (if --left and --right are provided)
|
40
|
+
1. sequence-based (basic)
|
41
|
+
2. read-mapping-based (if --left and --right are provided)
|
24
42
|
3. reference-based (if --reference is provided)
|
25
43
|
|
26
44
|
Bug reports and feature requests at:
|
@@ -34,10 +52,10 @@ opts = Trollop::options do
|
|
34
52
|
transrate --install-deps
|
35
53
|
# get the transrate score for the assembly and each contig
|
36
54
|
transrate --assembly contigs.fa --left left.fq --right right.fq
|
37
|
-
#
|
55
|
+
# basic assembly metrics only
|
38
56
|
transrate --assembly contigs.fa
|
39
|
-
#
|
40
|
-
transrate --assembly contigs.fa --reference
|
57
|
+
# basic and reference-based metrics with 8 threads
|
58
|
+
transrate --assembly contigs.fa --reference Athaliana_transcripts.fa --threads 8
|
41
59
|
# contig and read-based metrics for two assemblies with 32 threads
|
42
60
|
transrate --assembly one.fa,two.fa --left l.fq --right r.fq --threads 32
|
43
61
|
|
@@ -61,61 +79,109 @@ opts = Trollop::options do
|
|
61
79
|
"one of [error, info, warn, debug]",
|
62
80
|
:default => 'info'
|
63
81
|
opt :install_deps, "install any missing dependencies"
|
82
|
+
opt :install_read_deps, "install missing dependencies for read metrics only"
|
83
|
+
opt :install_ref_deps, "install missing dependencies for reference metrics only"
|
64
84
|
end
|
85
|
+
|
86
|
+
# Check dependencies if they are relevant to the command issued,
|
87
|
+
# and handle any commands to install missing ones
|
65
88
|
gem_dir = Gem.loaded_specs['transrate'].full_gem_path
|
66
89
|
gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
|
67
|
-
|
90
|
+
blast_dep = File.join(gem_dir, 'deps', 'blast.yaml')
|
91
|
+
|
92
|
+
if opts.install_deps || opts.install_read_deps || opts.install_ref_deps
|
93
|
+
# user has requested dependency installation
|
68
94
|
puts "Checking dependencies"
|
69
|
-
|
95
|
+
|
96
|
+
missing = []
|
97
|
+
if opts.install_deps || opts.install_read_deps
|
98
|
+
Bindeps.require gem_deps
|
99
|
+
missing += Bindeps.missing gem_deps
|
100
|
+
end
|
101
|
+
|
102
|
+
if opts.install_deps || opts.install_ref_deps
|
103
|
+
Bindeps.require blast_dep
|
104
|
+
missing += Bindeps.missing blast_dep
|
105
|
+
end
|
106
|
+
|
107
|
+
unless missing.empty?
|
108
|
+
raise TransrateError.new "Failed to install: \n - #{missing.join('\n - ')}"
|
109
|
+
end
|
110
|
+
|
70
111
|
puts "All dependencies installed"
|
71
112
|
exit
|
113
|
+
|
72
114
|
else
|
73
|
-
|
74
|
-
|
115
|
+
# no dependency installation requested, but check dependencies
|
116
|
+
# for the commands provided are installed
|
117
|
+
missing = []
|
118
|
+
missing = Bindeps.missing gem_deps if opts.left
|
119
|
+
blast_missing = []
|
120
|
+
blast_missing = Bindeps.missing blast_dep if opts.reference
|
121
|
+
|
122
|
+
if missing.length + blast_missing.length > 0
|
75
123
|
puts "Dependencies are missing:"
|
124
|
+
|
76
125
|
missing.each do |dep|
|
77
126
|
puts " - #{dep.name} (#{dep.version})"
|
78
127
|
end
|
79
|
-
|
80
|
-
|
128
|
+
|
129
|
+
blast_missing.each do |dep|
|
130
|
+
puts " - #{dep.name} (#{dep.version})"
|
131
|
+
end
|
132
|
+
|
133
|
+
puts "To install all missing dependencies, run:"
|
134
|
+
puts " transrate --install-deps"
|
135
|
+
puts "If you only want the read-metrics dependencies:"
|
136
|
+
puts " transrate --install-read-deps"
|
137
|
+
puts "Or if you only want the reference-metrics dependencies: "
|
138
|
+
puts " transrate --install-ref-deps"
|
139
|
+
|
140
|
+
exit 1
|
81
141
|
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
# Handle commands
|
146
|
+
unless %w[error info warn debug].include? opts.loglevel
|
147
|
+
raise "Loglevel #{opts.loglevel} is not valid. " +
|
148
|
+
"It must be one of: error, info, warn, debug."
|
82
149
|
end
|
83
150
|
|
151
|
+
logger.level = Yell::Level.new opts.loglevel.to_sym
|
152
|
+
|
84
153
|
if opts.assembly
|
85
154
|
opts.assembly.split(',').each do |assembly_file|
|
86
155
|
unless File.exist?(assembly_file)
|
87
|
-
raise
|
156
|
+
raise TransrateIOError.new "Assembly fasta file does not exist: " +
|
157
|
+
" #{assembly_file}"
|
88
158
|
end
|
89
159
|
end
|
90
160
|
else
|
91
|
-
raise
|
92
|
-
|
161
|
+
raise TransrateArgError.new "Option --assembly must be specified. " +
|
162
|
+
"Try --help for help."
|
93
163
|
end
|
94
164
|
|
95
165
|
if opts.reference && !File.exist?(opts.reference)
|
96
|
-
raise
|
166
|
+
raise TransrateIOError.new "Reference fasta file does not exist: " +
|
167
|
+
" #{opts.reference}"
|
97
168
|
end
|
98
169
|
|
99
170
|
if opts.left and opts.right
|
100
171
|
if opts.left.split(",").length != opts.right.split(",").length
|
101
172
|
msg = "Please provide the same number of left reads as right reads"
|
102
|
-
raise
|
173
|
+
raise TransrateArgError.new msg
|
103
174
|
end
|
104
175
|
opts.left.split(",").zip(opts.right.split(",")).each do |left,right|
|
105
176
|
if !File.exist?(left)
|
106
|
-
raise
|
177
|
+
raise TransrateIOError.new "Left read fastq file does not exist: #{left}"
|
107
178
|
end
|
108
179
|
if !File.exist?(right)
|
109
|
-
raise
|
180
|
+
raise TransrateIOError.new "Right read fastq file does not exist: #{right}"
|
110
181
|
end
|
111
182
|
end
|
112
183
|
end
|
113
184
|
|
114
|
-
if PROFILE
|
115
|
-
logger.info "Starting profiler"
|
116
|
-
RubyProf.start
|
117
|
-
end
|
118
|
-
|
119
185
|
def pretty_print_hash hash, width
|
120
186
|
hash.map do |k, v|
|
121
187
|
# show as float if there are any decimal places
|
@@ -133,15 +199,6 @@ def pretty_print_hash hash, width
|
|
133
199
|
end
|
134
200
|
end
|
135
201
|
|
136
|
-
include Transrate
|
137
|
-
|
138
|
-
unless %w[error info warn debug].include? opts.loglevel
|
139
|
-
raise "Loglevel #{opts.loglevel} is not valid. " +
|
140
|
-
"It must be one of: error, info, warn, debug."
|
141
|
-
end
|
142
|
-
|
143
|
-
logger.level = Yell::Level.new opts.loglevel.to_sym
|
144
|
-
|
145
202
|
r = opts.reference ? Assembly.new(opts.reference) : nil
|
146
203
|
report_width = 35
|
147
204
|
|
@@ -251,6 +308,7 @@ opts.assembly.split(',').each do |assembly|
|
|
251
308
|
all << contig_results.merge(read_results)
|
252
309
|
.merge(comparative_results)
|
253
310
|
.merge({ :assembly => assembly })
|
311
|
+
.merge({ :score => score })
|
254
312
|
|
255
313
|
end
|
256
314
|
|
@@ -266,10 +324,3 @@ CSV.open(outfile, 'wb') do |file|
|
|
266
324
|
file << head.map { |x| row[x] }
|
267
325
|
end
|
268
326
|
end
|
269
|
-
|
270
|
-
if PROFILE
|
271
|
-
logger.info "Writing profiling results to transrate_profile.txt"
|
272
|
-
result = RubyProf.stop
|
273
|
-
printer = RubyProf::FlatPrinter.new(result)
|
274
|
-
printer.print(File.open('transrate_profile.txt', 'w'))
|
275
|
-
end
|
data/deps/blast.yaml
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
blastplus:
|
2
|
+
binaries:
|
3
|
+
- makeblastdb
|
4
|
+
- blastn
|
5
|
+
- tblastn
|
6
|
+
- blastp
|
7
|
+
- blastx
|
8
|
+
- tblastx
|
9
|
+
- makembindex
|
10
|
+
- psiblast
|
11
|
+
- rpsblast
|
12
|
+
- blastdbcmd
|
13
|
+
- segmasker
|
14
|
+
- dustmasker
|
15
|
+
- blast_formatter
|
16
|
+
- windowmasker
|
17
|
+
- blastdb_aliastool
|
18
|
+
- deltablast
|
19
|
+
- rpstblastn
|
20
|
+
- blastdbcheck
|
21
|
+
version:
|
22
|
+
number: '2.2.[0-9]'
|
23
|
+
command: 'blastx -version'
|
24
|
+
url:
|
25
|
+
64bit:
|
26
|
+
macosx: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-universal-macosx.tar.gz
|
27
|
+
linux: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz
|
data/deps/deps.yaml
CHANGED
@@ -1,80 +1,54 @@
|
|
1
|
-
blastplus:
|
2
|
-
binaries:
|
3
|
-
- makeblastdb
|
4
|
-
- blastn
|
5
|
-
- tblastn
|
6
|
-
- blastp
|
7
|
-
- blastx
|
8
|
-
- tblastx
|
9
|
-
- makembindex
|
10
|
-
- psiblast
|
11
|
-
- rpsblast
|
12
|
-
- blastdbcmd
|
13
|
-
- segmasker
|
14
|
-
- dustmasker
|
15
|
-
- blast_formatter
|
16
|
-
- windowmasker
|
17
|
-
- blastdb_aliastool
|
18
|
-
- deltablast
|
19
|
-
- rpstblastn
|
20
|
-
- blastdbcheck
|
21
|
-
version:
|
22
|
-
number: '2.2.29'
|
23
|
-
command: 'blastx -version'
|
24
|
-
url:
|
25
|
-
64bit:
|
26
|
-
macosx: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-universal-macosx.tar.gz
|
27
|
-
linux: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz
|
28
1
|
snap:
|
29
2
|
binaries:
|
30
3
|
- snap
|
31
4
|
version:
|
32
|
-
number: '1.0dev.
|
5
|
+
number: '1.0dev.67.trfix1'
|
33
6
|
command: 'snap'
|
34
7
|
url:
|
35
8
|
64bit:
|
36
|
-
linux: https://github.com/
|
37
|
-
macosx: https://github.com/
|
38
|
-
samtools:
|
39
|
-
binaries:
|
40
|
-
- samtools
|
41
|
-
- bcftools
|
42
|
-
version:
|
43
|
-
number: '0.1.19'
|
44
|
-
command: 'samtools'
|
45
|
-
url:
|
46
|
-
64bit:
|
47
|
-
linux: https://github.com/cboursnell/samtools/raw/master/build/linux64.tar.gz
|
48
|
-
macosx: https://github.com/cboursnell/samtools/raw/master/build/osx64.tar.gz
|
9
|
+
linux: https://github.com/Blahah/snap/releases/download/v1.0dev.67.trfix1/snap_v1.0dev.67.trfix1_linux.tar.gz
|
10
|
+
macosx: https://github.com/Blahah/snap/releases/download/v1.0dev.67.trfix1/snap_v1.0dev.67.trfix1_macosx.tar.gz
|
49
11
|
bam-read:
|
50
12
|
binaries:
|
51
13
|
- bam-read
|
52
14
|
version:
|
53
|
-
number: '1.0.0.
|
15
|
+
number: '1.0.0.beta3'
|
54
16
|
command: 'bam-read'
|
55
17
|
url:
|
56
18
|
64bit:
|
57
|
-
linux: https://github.com/
|
58
|
-
macosx: https://github.com/Blahah/transrate-tools/
|
59
|
-
unpack:
|
60
|
-
|
61
|
-
binaries:
|
62
|
-
- bam-split
|
63
|
-
version:
|
64
|
-
number: '1.0.0.beta1'
|
65
|
-
command: 'bam-split'
|
66
|
-
url:
|
67
|
-
64bit:
|
68
|
-
linux: https://github.com/Blahah/transrate-tools/raw/master/bin/linux/bam-split
|
69
|
-
macosx: https://github.com/Blahah/transrate-tools/raw/master/bin/macosx/bam-split
|
70
|
-
unpack: false
|
71
|
-
express:
|
19
|
+
linux: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta3/bam-read_1.0.0.beta3_linux.tar.gz
|
20
|
+
macosx: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta3/bam-read_1.0.0.beta3_macosx.tar.gz
|
21
|
+
unpack: true
|
22
|
+
salmon:
|
72
23
|
binaries:
|
73
|
-
-
|
24
|
+
- salmon
|
25
|
+
libraries:
|
26
|
+
- libbz2.so.1
|
27
|
+
- libgcc_s.so.1
|
28
|
+
- libgomp.so.1
|
29
|
+
- liblzma.so.0
|
30
|
+
- libm.so.6
|
31
|
+
- libpthread.so.0
|
32
|
+
- librt.so.1
|
33
|
+
- libstdc++.so.6
|
34
|
+
- libtbbmalloc_proxy.so,
|
35
|
+
- libtbbmalloc_proxy.so.2
|
36
|
+
- libtbbmalloc.so
|
37
|
+
- libtbbmalloc.so.2
|
38
|
+
- libtbb.so
|
39
|
+
- libtbb.so.2
|
40
|
+
- libz.so.1
|
41
|
+
- libcmph.0.dylib
|
42
|
+
- libcmph.dylib
|
43
|
+
- libcmph.la
|
44
|
+
- libstaden-read.la
|
45
|
+
- libtbb.dylib
|
46
|
+
- libtbbmalloc.dylib
|
47
|
+
- libtbbmalloc_proxy.dylib
|
74
48
|
version:
|
75
|
-
number: '
|
76
|
-
command: '
|
49
|
+
number: 'Salmon v0.2.7'
|
50
|
+
command: 'salmon --help'
|
77
51
|
url:
|
78
52
|
64bit:
|
79
|
-
linux:
|
80
|
-
macosx:
|
53
|
+
linux: https://github.com/kingsfordgroup/sailfish/releases/download/v0.2.7/Salmon-v0.2.7_Ubuntu-12.04.tar.gz
|
54
|
+
macosx: https://github.com/kingsfordgroup/sailfish/releases/download/v0.2.7/Salmon-v0.2.7_MacOSX-10.10.1.tar.gz
|
data/ext/transrate/transrate.c
CHANGED
@@ -55,6 +55,9 @@ VALUE method_composition(VALUE self, VALUE _seq) {
|
|
55
55
|
}
|
56
56
|
for (i=0; i < len; i++) {
|
57
57
|
base = seq[i];
|
58
|
+
if (base > 90) {
|
59
|
+
base -= 32;
|
60
|
+
}
|
58
61
|
switch (base) {
|
59
62
|
case 'A': {
|
60
63
|
idx=0;
|
@@ -81,6 +84,9 @@ VALUE method_composition(VALUE self, VALUE _seq) {
|
|
81
84
|
|
82
85
|
if (i > 0) {
|
83
86
|
prevbase = seq[i-1];
|
87
|
+
if (prevbase > 90) {
|
88
|
+
prevbase -= 32;
|
89
|
+
}
|
84
90
|
switch (prevbase) {
|
85
91
|
case 'A': {
|
86
92
|
idx=idx;
|
@@ -138,6 +144,9 @@ VALUE method_kmer_count(VALUE self, VALUE _k, VALUE _s) {
|
|
138
144
|
n = 0;
|
139
145
|
for(i = start; i < start+k; i++) {
|
140
146
|
base = c_str[i];
|
147
|
+
if (base > 90) {
|
148
|
+
base -= 32;
|
149
|
+
}
|
141
150
|
switch (base) {
|
142
151
|
case 'A': {
|
143
152
|
h = h << 2;
|
data/lib/transrate/assembly.rb
CHANGED
@@ -4,6 +4,8 @@ require 'forwardable'
|
|
4
4
|
|
5
5
|
module Transrate
|
6
6
|
|
7
|
+
class AssemblyError < TransrateError; end
|
8
|
+
|
7
9
|
# Container for a transcriptome assembly and its associated
|
8
10
|
# metadata.
|
9
11
|
#
|
@@ -41,13 +43,20 @@ module Transrate
|
|
41
43
|
def initialize file
|
42
44
|
@file = File.expand_path file
|
43
45
|
unless File.exist? @file
|
44
|
-
raise
|
46
|
+
raise TransrateIOError.new "Assembly file doesn't exist: #{@file}"
|
45
47
|
end
|
46
48
|
@assembly = {}
|
47
49
|
@n_bases = 0
|
48
50
|
Bio::FastaFormat.open(file).each do |entry|
|
49
51
|
@n_bases += entry.length
|
50
52
|
contig = Contig.new(entry)
|
53
|
+
if @assembly.key?(contig.name)
|
54
|
+
logger.error "Non unique fasta identifier found"
|
55
|
+
logger.error ">#{contig.name}"
|
56
|
+
logger.error "Please make sure there are no duplicate entries in the assembly"
|
57
|
+
logger.error "Contig name is taken from before the first | or space"
|
58
|
+
raise AssemblyError
|
59
|
+
end
|
51
60
|
@assembly[contig.name] = contig
|
52
61
|
end
|
53
62
|
@contig_metrics = ContigMetrics.new self
|
@@ -125,7 +134,6 @@ module Transrate
|
|
125
134
|
# and iterate over them
|
126
135
|
bin.sort_by! { |c| c.seq.length }
|
127
136
|
bin.each do |contig|
|
128
|
-
|
129
137
|
# increment our long contig counters if this
|
130
138
|
# contig is above the thresholds
|
131
139
|
if contig.length < 200
|
@@ -157,7 +165,6 @@ module Transrate
|
|
157
165
|
cutoff = x2.pop / 100.0
|
158
166
|
end
|
159
167
|
end
|
160
|
-
|
161
168
|
end
|
162
169
|
|
163
170
|
# if there aren't enough sequences we might have no value for some
|
@@ -168,6 +175,11 @@ module Transrate
|
|
168
175
|
|
169
176
|
# calculate and return the statistics as a hash
|
170
177
|
mean = cumulative_length / @assembly.size
|
178
|
+
if @assembly.size * mean == 0
|
179
|
+
mean_orf_percent = 0
|
180
|
+
else
|
181
|
+
mean_orf_percent = 300 * orf_length_sum / (@assembly.size * mean)
|
182
|
+
end
|
171
183
|
ns = Hash[x.map { |n| "n#{n}" }.zip(res)]
|
172
184
|
{
|
173
185
|
'n_seqs' => bin.size,
|
@@ -179,7 +191,7 @@ module Transrate
|
|
179
191
|
'n_over_1k' => n_over_1k,
|
180
192
|
'n_over_10k' => n_over_10k,
|
181
193
|
'n_with_orf' => n_with_orf,
|
182
|
-
'mean_orf_percent' =>
|
194
|
+
'mean_orf_percent' => mean_orf_percent
|
183
195
|
}.merge ns
|
184
196
|
|
185
197
|
end # basic_bin_stats
|
@@ -187,20 +199,18 @@ module Transrate
|
|
187
199
|
def classify_contigs
|
188
200
|
# create hash of file handles for each output
|
189
201
|
base = File.basename @file
|
190
|
-
files =
|
191
|
-
|
192
|
-
|
193
|
-
[type.to_sym, handle]
|
194
|
-
end
|
202
|
+
files = {}
|
203
|
+
%w(good fragmented chimeric bad).each do |type|
|
204
|
+
files[type.to_sym] = File.open("#{type}.#{base}", "wb")
|
195
205
|
end
|
196
206
|
# loop through contigs writing them out to the appropriate file
|
197
207
|
@assembly.each_pair do |name, contig|
|
198
208
|
category = contig.classify
|
199
209
|
handle = files[category]
|
200
|
-
handle.
|
210
|
+
handle.write contig.to_fasta
|
201
211
|
end
|
202
212
|
# close all the file handles
|
203
|
-
files.each do |handle|
|
213
|
+
files.each do |type, handle|
|
204
214
|
handle.close
|
205
215
|
end
|
206
216
|
end
|
@@ -55,12 +55,12 @@ module Transrate
|
|
55
55
|
crbblast.reciprocals.each do |key, list|
|
56
56
|
list.each_with_index do |hit, i|
|
57
57
|
unless @reference.assembly.key? hit.target
|
58
|
-
raise "#{hit.target} not in reference"
|
58
|
+
raise TransrateError.new "#{hit.target} not in reference"
|
59
59
|
end
|
60
60
|
@reference[hit.target].hits << hit
|
61
61
|
|
62
62
|
unless @assembly.assembly.key? hit.query
|
63
|
-
raise "#{hit.query} not in assembly"
|
63
|
+
raise TransrateError.new "#{hit.query} not in assembly"
|
64
64
|
end
|
65
65
|
contig = @assembly[hit.query]
|
66
66
|
contig.has_crb = true
|
data/lib/transrate/contig.rb
CHANGED
@@ -20,7 +20,12 @@ module Transrate
|
|
20
20
|
attr_accessor :hits
|
21
21
|
|
22
22
|
def initialize(seq, name: nil)
|
23
|
-
|
23
|
+
# fix null bytes in the nucleotide sequence
|
24
|
+
seq.seq.gsub!("\0", "")
|
25
|
+
# trim trailing semicolons (because BLAST strips them)
|
26
|
+
if seq.respond_to?(:entry_id)
|
27
|
+
seq.entry_id.gsub!(/;$/, '')
|
28
|
+
end
|
24
29
|
@seq = seq
|
25
30
|
@seq.data = nil # no need to store raw fasta string
|
26
31
|
@name = seq.respond_to?(:entry_id) ? seq.entry_id : name
|
@@ -248,13 +253,13 @@ module Transrate
|
|
248
253
|
return :good if score >= 0.5
|
249
254
|
# fragmented?
|
250
255
|
if in_bridges > 5
|
251
|
-
if p_not_segmented * p_bases_covered * p_seq_true
|
256
|
+
if p_not_segmented * p_bases_covered * p_seq_true >= 0.5
|
252
257
|
return :fragmented
|
253
258
|
end
|
254
259
|
end
|
255
260
|
# chimeric?
|
256
261
|
if p_not_segmented < 0.25
|
257
|
-
if p_good * p_bases_covered * p_seq_true
|
262
|
+
if p_good * p_bases_covered * p_seq_true >= 0.5
|
258
263
|
return :chimeric
|
259
264
|
end
|
260
265
|
end
|