transrate 1.0.0.beta1 → 1.0.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +8 -0
- data/CITATION +3 -0
- data/README.md +1 -1
- data/Rakefile +71 -0
- data/bin/transrate +92 -41
- data/deps/blast.yaml +27 -0
- data/deps/deps.yaml +36 -62
- data/ext/transrate/transrate.c +9 -0
- data/lib/transrate/assembly.rb +21 -11
- data/lib/transrate/comparative_metrics.rb +2 -2
- data/lib/transrate/contig.rb +8 -3
- data/lib/transrate/read_metrics.rb +22 -62
- data/lib/transrate/salmon.rb +67 -0
- data/lib/transrate/snap.rb +4 -32
- data/lib/transrate/transrater.rb +1 -1
- data/lib/transrate/version.rb +1 -1
- data/lib/transrate.rb +18 -15
- data/test/data/sorghum_100.fa +200 -0
- data/test/data/test.sf +30 -0
- data/test/helper.rb +13 -0
- data/test/test_assembly.rb +54 -0
- data/test/test_bin.rb +30 -27
- data/test/test_cmd.rb +5 -0
- data/test/test_contig.rb +9 -14
- data/test/test_read_metrics.rb +66 -42
- data/test/test_salmon.rb +33 -0
- data/test/test_snap.rb +27 -0
- data/test/test_transrater.rb +10 -10
- data/transrate.gemspec +1 -1
- metadata +14 -12
- data/lib/transrate/express.rb +0 -102
- data/lib/transrate/sam_checker.rb +0 -74
- data/lib/transrate/samtools.rb +0 -146
- data/test/data/express_results.xprs +0 -5
- data/test/test_express.rb +0 -22
- data/test/test_samtools.rb +0 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b90d296192b895be2cb4f5411ccc716264f5f360
|
4
|
+
data.tar.gz: 54fa1d66571912a1526aa7bd0d9ed549a04c8b19
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dc43e7f297ce2d53a03f9a7fcdde87124c605750cb6947d35e32185f5de92f53785fab37302e3abde3e4a2f7736aee3680fb415ab413d914720c025ebbff5f87
|
7
|
+
data.tar.gz: a122304dae2f6009c22e9d396c2b2f874cfa47937e582b1b5e5342cdf2a6d7eaeddcca465b3571022f7979d385f1c4014541a1ac1c5ca241eacaa8e8a4fd84e6
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -2,8 +2,16 @@ language: ruby
|
|
2
2
|
rvm:
|
3
3
|
- "2.0.0"
|
4
4
|
- "2.1.0"
|
5
|
+
- "2.2.0"
|
5
6
|
before_script:
|
6
7
|
- gem install bindeps
|
7
8
|
- bundle install
|
8
9
|
- bundle exec rake compile
|
9
10
|
- bundle exec bin/transrate --install-deps
|
11
|
+
os:
|
12
|
+
- linux
|
13
|
+
- osx
|
14
|
+
matrix:
|
15
|
+
allow_failures:
|
16
|
+
- os: osx
|
17
|
+
- rvm: "2.2.0"
|
data/CITATION
ADDED
data/README.md
CHANGED
@@ -24,7 +24,7 @@ This software is being actively developed. Please be aware that there may be bug
|
|
24
24
|
|
25
25
|
## Citation
|
26
26
|
|
27
|
-
Transrate is pre-publication academic software. If you use it, please cite the github repository and the DOI: [](http://dx.doi.org/10.5281/zenodo.13161).
|
28
28
|
|
29
29
|
## Documentation
|
30
30
|
|
data/Rakefile
CHANGED
@@ -57,5 +57,76 @@ Rake::TestTask.new do |t|
|
|
57
57
|
t.test_files = ['test/test_contig.rb']
|
58
58
|
end
|
59
59
|
|
60
|
+
Rake::TestTask.new do |t|
|
61
|
+
t.name = :assembly
|
62
|
+
t.libs << 'test'
|
63
|
+
t.test_files = ['test/test_assembly.rb']
|
64
|
+
end
|
65
|
+
|
66
|
+
Rake::TestTask.new do |t|
|
67
|
+
t.name = :snap
|
68
|
+
t.libs << 'test'
|
69
|
+
t.test_files = ['test/test_snap.rb']
|
70
|
+
end
|
71
|
+
|
72
|
+
Rake::TestTask.new do |t|
|
73
|
+
t.name = :salmon
|
74
|
+
t.libs << 'test'
|
75
|
+
t.test_files = ['test/test_salmon.rb']
|
76
|
+
end
|
77
|
+
|
78
|
+
|
60
79
|
desc "Run tests"
|
61
80
|
task :default => :test
|
81
|
+
|
82
|
+
# PACKAGING
|
83
|
+
|
84
|
+
PACKAGE_NAME = "transrate"
|
85
|
+
VERSION = "1.0.0.beta2"
|
86
|
+
TRAVELING_RUBY_VERSION = "20141215-2.1.5"
|
87
|
+
|
88
|
+
desc "Package your app"
|
89
|
+
task :package => ['package:linux:x86_64', 'package:osx']
|
90
|
+
|
91
|
+
namespace :package do
|
92
|
+
namespace :linux do
|
93
|
+
desc "Package your app for Linux x86_64"
|
94
|
+
task :x86_64 => "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-linux-x86_64.tar.gz" do
|
95
|
+
create_package("linux-x86_64")
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
desc "Package your app for OS X"
|
100
|
+
task :osx => "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-osx.tar.gz" do
|
101
|
+
create_package("osx")
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
file "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-linux-x86_64.tar.gz" do
|
106
|
+
download_runtime("linux-x86_64")
|
107
|
+
end
|
108
|
+
|
109
|
+
file "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-osx.tar.gz" do
|
110
|
+
download_runtime("osx")
|
111
|
+
end
|
112
|
+
|
113
|
+
def create_package(target)
|
114
|
+
package_dir = "packaging/#{PACKAGE_NAME}-#{VERSION}-#{target}"
|
115
|
+
sh "rm -rf #{package_dir}"
|
116
|
+
sh "mkdir -p #{package_dir}/lib/app"
|
117
|
+
sh "cp -r lib #{package_dir}/lib/app/"
|
118
|
+
sh "cp -r bin #{package_dir}/lib/app/"
|
119
|
+
sh "mkdir #{package_dir}/lib/ruby"
|
120
|
+
sh "tar -xzf packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-#{target}.tar.gz -C #{package_dir}/lib/ruby"
|
121
|
+
sh "cp packaging/transrate #{package_dir}/transrate"
|
122
|
+
if !ENV['DIR_ONLY']
|
123
|
+
sh "tar -czf #{package_dir}.tar.gz #{package_dir}"
|
124
|
+
sh "rm -rf #{package_dir}"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def download_runtime(target)
|
129
|
+
sh "mkdir -p packaging/packaging &&" +
|
130
|
+
"cd packaging/packaging && curl -L -O --fail " +
|
131
|
+
"http://d6r77u77i8pq3.cloudfront.net/releases/traveling-ruby-#{TRAVELING_RUBY_VERSION}-#{target}.tar.gz"
|
132
|
+
end
|
data/bin/transrate
CHANGED
@@ -1,14 +1,32 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
-
PROFILE = false
|
3
2
|
|
4
3
|
require 'trollop'
|
5
4
|
require 'transrate'
|
6
5
|
require 'csv'
|
7
6
|
require 'bindeps'
|
8
|
-
require 'ruby-prof'
|
9
7
|
|
8
|
+
include Transrate
|
9
|
+
|
10
|
+
# Show the help message if no arguments provided
|
10
11
|
ARGV[0] = "--help" if ARGV.length() == 0
|
11
12
|
|
13
|
+
# We want clean error messages through the logger, no ugly backtraces
|
14
|
+
# because the user doesn't care about them, unless they specifically ask for
|
15
|
+
# them with --loglevel debug
|
16
|
+
module Kernel
|
17
|
+
alias _raise raise
|
18
|
+
|
19
|
+
def raise(*a)
|
20
|
+
begin
|
21
|
+
_raise(*a)
|
22
|
+
rescue TransrateError => e
|
23
|
+
logger.error e.message
|
24
|
+
logger.debug e.backtrace unless e.backtrace.nil?
|
25
|
+
exit 1
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
12
30
|
opts = Trollop::options do
|
13
31
|
version Transrate::VERSION::STRING.dup
|
14
32
|
banner <<-EOS
|
@@ -19,8 +37,8 @@ opts = Trollop::options do
|
|
19
37
|
DESCRIPTION:
|
20
38
|
Analyse a de-novo transcriptome assembly using three kinds of metrics:
|
21
39
|
|
22
|
-
1.
|
23
|
-
2. read-mapping (if --left and --right are provided)
|
40
|
+
1. sequence-based (basic)
|
41
|
+
2. read-mapping-based (if --left and --right are provided)
|
24
42
|
3. reference-based (if --reference is provided)
|
25
43
|
|
26
44
|
Bug reports and feature requests at:
|
@@ -34,10 +52,10 @@ opts = Trollop::options do
|
|
34
52
|
transrate --install-deps
|
35
53
|
# get the transrate score for the assembly and each contig
|
36
54
|
transrate --assembly contigs.fa --left left.fq --right right.fq
|
37
|
-
#
|
55
|
+
# basic assembly metrics only
|
38
56
|
transrate --assembly contigs.fa
|
39
|
-
#
|
40
|
-
transrate --assembly contigs.fa --reference
|
57
|
+
# basic and reference-based metrics with 8 threads
|
58
|
+
transrate --assembly contigs.fa --reference Athaliana_transcripts.fa --threads 8
|
41
59
|
# contig and read-based metrics for two assemblies with 32 threads
|
42
60
|
transrate --assembly one.fa,two.fa --left l.fq --right r.fq --threads 32
|
43
61
|
|
@@ -61,61 +79,109 @@ opts = Trollop::options do
|
|
61
79
|
"one of [error, info, warn, debug]",
|
62
80
|
:default => 'info'
|
63
81
|
opt :install_deps, "install any missing dependencies"
|
82
|
+
opt :install_read_deps, "install missing dependencies for read metrics only"
|
83
|
+
opt :install_ref_deps, "install missing dependencies for reference metrics only"
|
64
84
|
end
|
85
|
+
|
86
|
+
# Check dependencies if they are relevant to the command issued,
|
87
|
+
# and handle any commands to install missing ones
|
65
88
|
gem_dir = Gem.loaded_specs['transrate'].full_gem_path
|
66
89
|
gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
|
67
|
-
|
90
|
+
blast_dep = File.join(gem_dir, 'deps', 'blast.yaml')
|
91
|
+
|
92
|
+
if opts.install_deps || opts.install_read_deps || opts.install_ref_deps
|
93
|
+
# user has requested dependency installation
|
68
94
|
puts "Checking dependencies"
|
69
|
-
|
95
|
+
|
96
|
+
missing = []
|
97
|
+
if opts.install_deps || opts.install_read_deps
|
98
|
+
Bindeps.require gem_deps
|
99
|
+
missing += Bindeps.missing gem_deps
|
100
|
+
end
|
101
|
+
|
102
|
+
if opts.install_deps || opts.install_ref_deps
|
103
|
+
Bindeps.require blast_dep
|
104
|
+
missing += Bindeps.missing blast_dep
|
105
|
+
end
|
106
|
+
|
107
|
+
unless missing.empty?
|
108
|
+
raise TransrateError.new "Failed to install: \n - #{missing.join('\n - ')}"
|
109
|
+
end
|
110
|
+
|
70
111
|
puts "All dependencies installed"
|
71
112
|
exit
|
113
|
+
|
72
114
|
else
|
73
|
-
|
74
|
-
|
115
|
+
# no dependency installation requested, but check dependencies
|
116
|
+
# for the commands provided are installed
|
117
|
+
missing = []
|
118
|
+
missing = Bindeps.missing gem_deps if opts.left
|
119
|
+
blast_missing = []
|
120
|
+
blast_missing = Bindeps.missing blast_dep if opts.reference
|
121
|
+
|
122
|
+
if missing.length + blast_missing.length > 0
|
75
123
|
puts "Dependencies are missing:"
|
124
|
+
|
76
125
|
missing.each do |dep|
|
77
126
|
puts " - #{dep.name} (#{dep.version})"
|
78
127
|
end
|
79
|
-
|
80
|
-
|
128
|
+
|
129
|
+
blast_missing.each do |dep|
|
130
|
+
puts " - #{dep.name} (#{dep.version})"
|
131
|
+
end
|
132
|
+
|
133
|
+
puts "To install all missing dependencies, run:"
|
134
|
+
puts " transrate --install-deps"
|
135
|
+
puts "If you only want the read-metrics dependencies:"
|
136
|
+
puts " transrate --install-read-deps"
|
137
|
+
puts "Or if you only want the reference-metrics dependencies: "
|
138
|
+
puts " transrate --install-ref-deps"
|
139
|
+
|
140
|
+
exit 1
|
81
141
|
end
|
142
|
+
|
143
|
+
end
|
144
|
+
|
145
|
+
# Handle commands
|
146
|
+
unless %w[error info warn debug].include? opts.loglevel
|
147
|
+
raise "Loglevel #{opts.loglevel} is not valid. " +
|
148
|
+
"It must be one of: error, info, warn, debug."
|
82
149
|
end
|
83
150
|
|
151
|
+
logger.level = Yell::Level.new opts.loglevel.to_sym
|
152
|
+
|
84
153
|
if opts.assembly
|
85
154
|
opts.assembly.split(',').each do |assembly_file|
|
86
155
|
unless File.exist?(assembly_file)
|
87
|
-
raise
|
156
|
+
raise TransrateIOError.new "Assembly fasta file does not exist: " +
|
157
|
+
" #{assembly_file}"
|
88
158
|
end
|
89
159
|
end
|
90
160
|
else
|
91
|
-
raise
|
92
|
-
|
161
|
+
raise TransrateArgError.new "Option --assembly must be specified. " +
|
162
|
+
"Try --help for help."
|
93
163
|
end
|
94
164
|
|
95
165
|
if opts.reference && !File.exist?(opts.reference)
|
96
|
-
raise
|
166
|
+
raise TransrateIOError.new "Reference fasta file does not exist: " +
|
167
|
+
" #{opts.reference}"
|
97
168
|
end
|
98
169
|
|
99
170
|
if opts.left and opts.right
|
100
171
|
if opts.left.split(",").length != opts.right.split(",").length
|
101
172
|
msg = "Please provide the same number of left reads as right reads"
|
102
|
-
raise
|
173
|
+
raise TransrateArgError.new msg
|
103
174
|
end
|
104
175
|
opts.left.split(",").zip(opts.right.split(",")).each do |left,right|
|
105
176
|
if !File.exist?(left)
|
106
|
-
raise
|
177
|
+
raise TransrateIOError.new "Left read fastq file does not exist: #{left}"
|
107
178
|
end
|
108
179
|
if !File.exist?(right)
|
109
|
-
raise
|
180
|
+
raise TransrateIOError.new "Right read fastq file does not exist: #{right}"
|
110
181
|
end
|
111
182
|
end
|
112
183
|
end
|
113
184
|
|
114
|
-
if PROFILE
|
115
|
-
logger.info "Starting profiler"
|
116
|
-
RubyProf.start
|
117
|
-
end
|
118
|
-
|
119
185
|
def pretty_print_hash hash, width
|
120
186
|
hash.map do |k, v|
|
121
187
|
# show as float if there are any decimal places
|
@@ -133,15 +199,6 @@ def pretty_print_hash hash, width
|
|
133
199
|
end
|
134
200
|
end
|
135
201
|
|
136
|
-
include Transrate
|
137
|
-
|
138
|
-
unless %w[error info warn debug].include? opts.loglevel
|
139
|
-
raise "Loglevel #{opts.loglevel} is not valid. " +
|
140
|
-
"It must be one of: error, info, warn, debug."
|
141
|
-
end
|
142
|
-
|
143
|
-
logger.level = Yell::Level.new opts.loglevel.to_sym
|
144
|
-
|
145
202
|
r = opts.reference ? Assembly.new(opts.reference) : nil
|
146
203
|
report_width = 35
|
147
204
|
|
@@ -251,6 +308,7 @@ opts.assembly.split(',').each do |assembly|
|
|
251
308
|
all << contig_results.merge(read_results)
|
252
309
|
.merge(comparative_results)
|
253
310
|
.merge({ :assembly => assembly })
|
311
|
+
.merge({ :score => score })
|
254
312
|
|
255
313
|
end
|
256
314
|
|
@@ -266,10 +324,3 @@ CSV.open(outfile, 'wb') do |file|
|
|
266
324
|
file << head.map { |x| row[x] }
|
267
325
|
end
|
268
326
|
end
|
269
|
-
|
270
|
-
if PROFILE
|
271
|
-
logger.info "Writing profiling results to transrate_profile.txt"
|
272
|
-
result = RubyProf.stop
|
273
|
-
printer = RubyProf::FlatPrinter.new(result)
|
274
|
-
printer.print(File.open('transrate_profile.txt', 'w'))
|
275
|
-
end
|
data/deps/blast.yaml
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
blastplus:
|
2
|
+
binaries:
|
3
|
+
- makeblastdb
|
4
|
+
- blastn
|
5
|
+
- tblastn
|
6
|
+
- blastp
|
7
|
+
- blastx
|
8
|
+
- tblastx
|
9
|
+
- makembindex
|
10
|
+
- psiblast
|
11
|
+
- rpsblast
|
12
|
+
- blastdbcmd
|
13
|
+
- segmasker
|
14
|
+
- dustmasker
|
15
|
+
- blast_formatter
|
16
|
+
- windowmasker
|
17
|
+
- blastdb_aliastool
|
18
|
+
- deltablast
|
19
|
+
- rpstblastn
|
20
|
+
- blastdbcheck
|
21
|
+
version:
|
22
|
+
number: '2.2.[0-9]'
|
23
|
+
command: 'blastx -version'
|
24
|
+
url:
|
25
|
+
64bit:
|
26
|
+
macosx: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-universal-macosx.tar.gz
|
27
|
+
linux: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz
|
data/deps/deps.yaml
CHANGED
@@ -1,80 +1,54 @@
|
|
1
|
-
blastplus:
|
2
|
-
binaries:
|
3
|
-
- makeblastdb
|
4
|
-
- blastn
|
5
|
-
- tblastn
|
6
|
-
- blastp
|
7
|
-
- blastx
|
8
|
-
- tblastx
|
9
|
-
- makembindex
|
10
|
-
- psiblast
|
11
|
-
- rpsblast
|
12
|
-
- blastdbcmd
|
13
|
-
- segmasker
|
14
|
-
- dustmasker
|
15
|
-
- blast_formatter
|
16
|
-
- windowmasker
|
17
|
-
- blastdb_aliastool
|
18
|
-
- deltablast
|
19
|
-
- rpstblastn
|
20
|
-
- blastdbcheck
|
21
|
-
version:
|
22
|
-
number: '2.2.29'
|
23
|
-
command: 'blastx -version'
|
24
|
-
url:
|
25
|
-
64bit:
|
26
|
-
macosx: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-universal-macosx.tar.gz
|
27
|
-
linux: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz
|
28
1
|
snap:
|
29
2
|
binaries:
|
30
3
|
- snap
|
31
4
|
version:
|
32
|
-
number: '1.0dev.
|
5
|
+
number: '1.0dev.67.trfix1'
|
33
6
|
command: 'snap'
|
34
7
|
url:
|
35
8
|
64bit:
|
36
|
-
linux: https://github.com/
|
37
|
-
macosx: https://github.com/
|
38
|
-
samtools:
|
39
|
-
binaries:
|
40
|
-
- samtools
|
41
|
-
- bcftools
|
42
|
-
version:
|
43
|
-
number: '0.1.19'
|
44
|
-
command: 'samtools'
|
45
|
-
url:
|
46
|
-
64bit:
|
47
|
-
linux: https://github.com/cboursnell/samtools/raw/master/build/linux64.tar.gz
|
48
|
-
macosx: https://github.com/cboursnell/samtools/raw/master/build/osx64.tar.gz
|
9
|
+
linux: https://github.com/Blahah/snap/releases/download/v1.0dev.67.trfix1/snap_v1.0dev.67.trfix1_linux.tar.gz
|
10
|
+
macosx: https://github.com/Blahah/snap/releases/download/v1.0dev.67.trfix1/snap_v1.0dev.67.trfix1_macosx.tar.gz
|
49
11
|
bam-read:
|
50
12
|
binaries:
|
51
13
|
- bam-read
|
52
14
|
version:
|
53
|
-
number: '1.0.0.
|
15
|
+
number: '1.0.0.beta3'
|
54
16
|
command: 'bam-read'
|
55
17
|
url:
|
56
18
|
64bit:
|
57
|
-
linux: https://github.com/
|
58
|
-
macosx: https://github.com/Blahah/transrate-tools/
|
59
|
-
unpack:
|
60
|
-
|
61
|
-
binaries:
|
62
|
-
- bam-split
|
63
|
-
version:
|
64
|
-
number: '1.0.0.beta1'
|
65
|
-
command: 'bam-split'
|
66
|
-
url:
|
67
|
-
64bit:
|
68
|
-
linux: https://github.com/Blahah/transrate-tools/raw/master/bin/linux/bam-split
|
69
|
-
macosx: https://github.com/Blahah/transrate-tools/raw/master/bin/macosx/bam-split
|
70
|
-
unpack: false
|
71
|
-
express:
|
19
|
+
linux: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta3/bam-read_1.0.0.beta3_linux.tar.gz
|
20
|
+
macosx: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta3/bam-read_1.0.0.beta3_macosx.tar.gz
|
21
|
+
unpack: true
|
22
|
+
salmon:
|
72
23
|
binaries:
|
73
|
-
-
|
24
|
+
- salmon
|
25
|
+
libraries:
|
26
|
+
- libbz2.so.1
|
27
|
+
- libgcc_s.so.1
|
28
|
+
- libgomp.so.1
|
29
|
+
- liblzma.so.0
|
30
|
+
- libm.so.6
|
31
|
+
- libpthread.so.0
|
32
|
+
- librt.so.1
|
33
|
+
- libstdc++.so.6
|
34
|
+
- libtbbmalloc_proxy.so,
|
35
|
+
- libtbbmalloc_proxy.so.2
|
36
|
+
- libtbbmalloc.so
|
37
|
+
- libtbbmalloc.so.2
|
38
|
+
- libtbb.so
|
39
|
+
- libtbb.so.2
|
40
|
+
- libz.so.1
|
41
|
+
- libcmph.0.dylib
|
42
|
+
- libcmph.dylib
|
43
|
+
- libcmph.la
|
44
|
+
- libstaden-read.la
|
45
|
+
- libtbb.dylib
|
46
|
+
- libtbbmalloc.dylib
|
47
|
+
- libtbbmalloc_proxy.dylib
|
74
48
|
version:
|
75
|
-
number: '
|
76
|
-
command: '
|
49
|
+
number: 'Salmon v0.2.7'
|
50
|
+
command: 'salmon --help'
|
77
51
|
url:
|
78
52
|
64bit:
|
79
|
-
linux:
|
80
|
-
macosx:
|
53
|
+
linux: https://github.com/kingsfordgroup/sailfish/releases/download/v0.2.7/Salmon-v0.2.7_Ubuntu-12.04.tar.gz
|
54
|
+
macosx: https://github.com/kingsfordgroup/sailfish/releases/download/v0.2.7/Salmon-v0.2.7_MacOSX-10.10.1.tar.gz
|
data/ext/transrate/transrate.c
CHANGED
@@ -55,6 +55,9 @@ VALUE method_composition(VALUE self, VALUE _seq) {
|
|
55
55
|
}
|
56
56
|
for (i=0; i < len; i++) {
|
57
57
|
base = seq[i];
|
58
|
+
if (base > 90) {
|
59
|
+
base -= 32;
|
60
|
+
}
|
58
61
|
switch (base) {
|
59
62
|
case 'A': {
|
60
63
|
idx=0;
|
@@ -81,6 +84,9 @@ VALUE method_composition(VALUE self, VALUE _seq) {
|
|
81
84
|
|
82
85
|
if (i > 0) {
|
83
86
|
prevbase = seq[i-1];
|
87
|
+
if (prevbase > 90) {
|
88
|
+
prevbase -= 32;
|
89
|
+
}
|
84
90
|
switch (prevbase) {
|
85
91
|
case 'A': {
|
86
92
|
idx=idx;
|
@@ -138,6 +144,9 @@ VALUE method_kmer_count(VALUE self, VALUE _k, VALUE _s) {
|
|
138
144
|
n = 0;
|
139
145
|
for(i = start; i < start+k; i++) {
|
140
146
|
base = c_str[i];
|
147
|
+
if (base > 90) {
|
148
|
+
base -= 32;
|
149
|
+
}
|
141
150
|
switch (base) {
|
142
151
|
case 'A': {
|
143
152
|
h = h << 2;
|
data/lib/transrate/assembly.rb
CHANGED
@@ -4,6 +4,8 @@ require 'forwardable'
|
|
4
4
|
|
5
5
|
module Transrate
|
6
6
|
|
7
|
+
class AssemblyError < TransrateError; end
|
8
|
+
|
7
9
|
# Container for a transcriptome assembly and its associated
|
8
10
|
# metadata.
|
9
11
|
#
|
@@ -41,13 +43,20 @@ module Transrate
|
|
41
43
|
def initialize file
|
42
44
|
@file = File.expand_path file
|
43
45
|
unless File.exist? @file
|
44
|
-
raise
|
46
|
+
raise TransrateIOError.new "Assembly file doesn't exist: #{@file}"
|
45
47
|
end
|
46
48
|
@assembly = {}
|
47
49
|
@n_bases = 0
|
48
50
|
Bio::FastaFormat.open(file).each do |entry|
|
49
51
|
@n_bases += entry.length
|
50
52
|
contig = Contig.new(entry)
|
53
|
+
if @assembly.key?(contig.name)
|
54
|
+
logger.error "Non unique fasta identifier found"
|
55
|
+
logger.error ">#{contig.name}"
|
56
|
+
logger.error "Please make sure there are no duplicate entries in the assembly"
|
57
|
+
logger.error "Contig name is taken from before the first | or space"
|
58
|
+
raise AssemblyError
|
59
|
+
end
|
51
60
|
@assembly[contig.name] = contig
|
52
61
|
end
|
53
62
|
@contig_metrics = ContigMetrics.new self
|
@@ -125,7 +134,6 @@ module Transrate
|
|
125
134
|
# and iterate over them
|
126
135
|
bin.sort_by! { |c| c.seq.length }
|
127
136
|
bin.each do |contig|
|
128
|
-
|
129
137
|
# increment our long contig counters if this
|
130
138
|
# contig is above the thresholds
|
131
139
|
if contig.length < 200
|
@@ -157,7 +165,6 @@ module Transrate
|
|
157
165
|
cutoff = x2.pop / 100.0
|
158
166
|
end
|
159
167
|
end
|
160
|
-
|
161
168
|
end
|
162
169
|
|
163
170
|
# if there aren't enough sequences we might have no value for some
|
@@ -168,6 +175,11 @@ module Transrate
|
|
168
175
|
|
169
176
|
# calculate and return the statistics as a hash
|
170
177
|
mean = cumulative_length / @assembly.size
|
178
|
+
if @assembly.size * mean == 0
|
179
|
+
mean_orf_percent = 0
|
180
|
+
else
|
181
|
+
mean_orf_percent = 300 * orf_length_sum / (@assembly.size * mean)
|
182
|
+
end
|
171
183
|
ns = Hash[x.map { |n| "n#{n}" }.zip(res)]
|
172
184
|
{
|
173
185
|
'n_seqs' => bin.size,
|
@@ -179,7 +191,7 @@ module Transrate
|
|
179
191
|
'n_over_1k' => n_over_1k,
|
180
192
|
'n_over_10k' => n_over_10k,
|
181
193
|
'n_with_orf' => n_with_orf,
|
182
|
-
'mean_orf_percent' =>
|
194
|
+
'mean_orf_percent' => mean_orf_percent
|
183
195
|
}.merge ns
|
184
196
|
|
185
197
|
end # basic_bin_stats
|
@@ -187,20 +199,18 @@ module Transrate
|
|
187
199
|
def classify_contigs
|
188
200
|
# create hash of file handles for each output
|
189
201
|
base = File.basename @file
|
190
|
-
files =
|
191
|
-
|
192
|
-
|
193
|
-
[type.to_sym, handle]
|
194
|
-
end
|
202
|
+
files = {}
|
203
|
+
%w(good fragmented chimeric bad).each do |type|
|
204
|
+
files[type.to_sym] = File.open("#{type}.#{base}", "wb")
|
195
205
|
end
|
196
206
|
# loop through contigs writing them out to the appropriate file
|
197
207
|
@assembly.each_pair do |name, contig|
|
198
208
|
category = contig.classify
|
199
209
|
handle = files[category]
|
200
|
-
handle.
|
210
|
+
handle.write contig.to_fasta
|
201
211
|
end
|
202
212
|
# close all the file handles
|
203
|
-
files.each do |handle|
|
213
|
+
files.each do |type, handle|
|
204
214
|
handle.close
|
205
215
|
end
|
206
216
|
end
|
@@ -55,12 +55,12 @@ module Transrate
|
|
55
55
|
crbblast.reciprocals.each do |key, list|
|
56
56
|
list.each_with_index do |hit, i|
|
57
57
|
unless @reference.assembly.key? hit.target
|
58
|
-
raise "#{hit.target} not in reference"
|
58
|
+
raise TransrateError.new "#{hit.target} not in reference"
|
59
59
|
end
|
60
60
|
@reference[hit.target].hits << hit
|
61
61
|
|
62
62
|
unless @assembly.assembly.key? hit.query
|
63
|
-
raise "#{hit.query} not in assembly"
|
63
|
+
raise TransrateError.new "#{hit.query} not in assembly"
|
64
64
|
end
|
65
65
|
contig = @assembly[hit.query]
|
66
66
|
contig.has_crb = true
|
data/lib/transrate/contig.rb
CHANGED
@@ -20,7 +20,12 @@ module Transrate
|
|
20
20
|
attr_accessor :hits
|
21
21
|
|
22
22
|
def initialize(seq, name: nil)
|
23
|
-
|
23
|
+
# fix null bytes in the nucleotide sequence
|
24
|
+
seq.seq.gsub!("\0", "")
|
25
|
+
# trim trailing semicolons (because BLAST strips them)
|
26
|
+
if seq.respond_to?(:entry_id)
|
27
|
+
seq.entry_id.gsub!(/;$/, '')
|
28
|
+
end
|
24
29
|
@seq = seq
|
25
30
|
@seq.data = nil # no need to store raw fasta string
|
26
31
|
@name = seq.respond_to?(:entry_id) ? seq.entry_id : name
|
@@ -248,13 +253,13 @@ module Transrate
|
|
248
253
|
return :good if score >= 0.5
|
249
254
|
# fragmented?
|
250
255
|
if in_bridges > 5
|
251
|
-
if p_not_segmented * p_bases_covered * p_seq_true
|
256
|
+
if p_not_segmented * p_bases_covered * p_seq_true >= 0.5
|
252
257
|
return :fragmented
|
253
258
|
end
|
254
259
|
end
|
255
260
|
# chimeric?
|
256
261
|
if p_not_segmented < 0.25
|
257
|
-
if p_good * p_bases_covered * p_seq_true
|
262
|
+
if p_good * p_bases_covered * p_seq_true >= 0.5
|
258
263
|
return :chimeric
|
259
264
|
end
|
260
265
|
end
|