transrate 0.0.12 → 0.0.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/bin/transrate +3 -1
- data/lib/transrate.rb +2 -0
- data/lib/transrate/assembly.rb +95 -31
- data/lib/transrate/comparative_metrics.rb +8 -7
- data/lib/transrate/log.rb +16 -0
- data/lib/transrate/rb_hit.rb +2 -2
- data/lib/transrate/reciprocal_annotation.rb +13 -1
- data/lib/transrate/transrater.rb +29 -2
- data/lib/transrate/version.rb +8 -1
- data/test/assembly.fasta +8 -0
- data/test/test_inline.rb +30 -0
- data/test/test_test.rb +41 -0
- data/transrate.gemspec +1 -0
- metadata +21 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8e95b284158f18e48f07d5d0dbfffadab014f4f9
|
4
|
+
data.tar.gz: 7bbaa794b374538e2fc3d422d9256e0e501e14ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 71163763618dc38fed73de88e677f3db6d375e28f31d33f4c03bcbb4aece341ee9e849bf5e6654967a9635cd2b9225ca9025645f8cb9bfff1f3e313e2e44e23c
|
7
|
+
data.tar.gz: b6c297fa15496cc78195e11780cecac0a75dbfce971358c5814b3dc0a6c2e9ee6766556b8a32ac303d487c12967afbb63551d2aa78b543bdfbb19a77d4459ad4
|
data/.gitignore
CHANGED
data/bin/transrate
CHANGED
@@ -88,7 +88,9 @@ end
|
|
88
88
|
if opts.reference
|
89
89
|
puts "\nCalculating comparative metrics..."
|
90
90
|
t0 = Time.now
|
91
|
-
|
91
|
+
comparative_metrics = transrater.comparative_metrics
|
92
|
+
p comparative_metrics.comp_stats
|
93
|
+
comparative_results = comparative_metrics.comp_stats
|
92
94
|
|
93
95
|
if comparative_results
|
94
96
|
puts "\n"
|
data/lib/transrate.rb
CHANGED
data/lib/transrate/assembly.rb
CHANGED
@@ -2,9 +2,28 @@ require 'bio'
|
|
2
2
|
require 'bettersam'
|
3
3
|
require 'csv'
|
4
4
|
require 'forwardable'
|
5
|
+
require 'inline'
|
5
6
|
|
6
7
|
module Transrate
|
7
8
|
|
9
|
+
# Container for a transcriptome assembly and its associated
|
10
|
+
# metadata.
|
11
|
+
#
|
12
|
+
# @!attribute [rw] ublast_db
|
13
|
+
# @return [String] path to a ublast database generated from this assembly
|
14
|
+
# @!attribute [rw] orss_ublast_db
|
15
|
+
# @return [String] path to a ublast database generated from the orfs
|
16
|
+
# extracted from this assembly
|
17
|
+
# @!attribute [r] assembly
|
18
|
+
# @return [Array<Bio::FastaFormat>] the assembly
|
19
|
+
# @!attribute [r] has_run
|
20
|
+
# @return [BOOL] whether the basic metrics have been generated
|
21
|
+
# @!attribute [w] n_bases
|
22
|
+
# @return [Integer] the number of bases in the assembly
|
23
|
+
# @!attribute [rw] file
|
24
|
+
# @return [String] path to the assembly FASTA file
|
25
|
+
# @!attribute [r] n50
|
26
|
+
# @return [Integer] assembly n50
|
8
27
|
class Assembly
|
9
28
|
|
10
29
|
include Enumerable
|
@@ -13,22 +32,15 @@ module Transrate
|
|
13
32
|
|
14
33
|
attr_accessor :ublast_db
|
15
34
|
attr_accessor :orfs_ublast_db
|
16
|
-
attr_accessor :protein
|
17
35
|
attr_reader :assembly
|
18
36
|
attr_reader :has_run
|
19
|
-
|
20
|
-
# number of bases in the assembly
|
21
37
|
attr_writer :n_bases
|
22
|
-
|
23
|
-
# assembly filename
|
24
38
|
attr_accessor :file
|
25
|
-
|
26
|
-
# assembly n50
|
27
39
|
attr_reader :n50
|
28
40
|
|
29
|
-
#
|
41
|
+
# Create a new Assembly.
|
30
42
|
#
|
31
|
-
#
|
43
|
+
# @param file [String] path to the assembly FASTA file
|
32
44
|
def initialize file
|
33
45
|
@file = file
|
34
46
|
@assembly = []
|
@@ -39,13 +51,20 @@ module Transrate
|
|
39
51
|
end
|
40
52
|
end
|
41
53
|
|
42
|
-
# Return
|
43
|
-
#
|
44
|
-
|
54
|
+
# Return basic statistics about the assembly in
|
55
|
+
# the specified FASTA file
|
56
|
+
#
|
57
|
+
# @param file [String] path to assebmly FASTA file
|
58
|
+
#
|
59
|
+
# @return [Hash] basic statistics about the assembly
|
60
|
+
def self.stats_from_fasta file
|
45
61
|
a = Assembly.new file
|
46
62
|
a.basic_stats
|
47
63
|
end
|
48
64
|
|
65
|
+
# Generate and store the basic statistics for this assembly
|
66
|
+
#
|
67
|
+
# @param threads [Integer] number of threads to use
|
49
68
|
def run threads=8
|
50
69
|
stats = self.basic_stats threads
|
51
70
|
stats.each_pair do |key, value|
|
@@ -62,7 +81,10 @@ module Transrate
|
|
62
81
|
# calculated in parallel by splitting the assembly into
|
63
82
|
# equal-sized bins and calling Assembly#basic_bin_stat on each
|
64
83
|
# bin in a separate thread.
|
65
|
-
|
84
|
+
#
|
85
|
+
# @param threads [Integer] number of threads to use
|
86
|
+
#
|
87
|
+
# @return [Hash] basic statistics about the assembly
|
66
88
|
def basic_stats threads=8
|
67
89
|
|
68
90
|
# create a work queue to process contigs in parallel
|
@@ -185,14 +207,14 @@ module Transrate
|
|
185
207
|
mean = cumulative_length / @assembly.size
|
186
208
|
# ns = Hash[x.map { |n| "N#{n}" }.zip(res)]
|
187
209
|
{
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
210
|
+
'n_seqs' => bin.size,
|
211
|
+
'smallest' => bin.first.length,
|
212
|
+
'largest' => bin.last.length,
|
213
|
+
'n_bases' => n_bases,
|
214
|
+
'mean_len' => mean,
|
215
|
+
'n_1k' => n1k,
|
216
|
+
'n_10k' => n10k,
|
217
|
+
'orf_percent' => 300 * orf_length_sum / (@assembly.size * mean)
|
196
218
|
}
|
197
219
|
# }.merge ns
|
198
220
|
|
@@ -223,17 +245,59 @@ module Transrate
|
|
223
245
|
|
224
246
|
end # merge_basic_stats
|
225
247
|
|
248
|
+
inline do |builder|
|
249
|
+
|
250
|
+
builder.c <<SRC
|
251
|
+
static
|
252
|
+
void
|
253
|
+
longest_orf(VALUE _s) {
|
254
|
+
int i,sl,longest=0;
|
255
|
+
int len[6];
|
256
|
+
char * c_str;
|
257
|
+
|
258
|
+
sl = RSTRING_LEN(_s);
|
259
|
+
c_str = StringValueCStr(_s);
|
260
|
+
for (i=0;i<6;i++) {
|
261
|
+
len[i]=0;
|
262
|
+
}
|
263
|
+
for (i=0;i<sl-2;i++) {
|
264
|
+
if (c_str[i]=='T' &&
|
265
|
+
((c_str[i+1]=='A' && c_str[i+2]=='G') ||
|
266
|
+
(c_str[i+1]=='A' && c_str[i+2]=='A') ||
|
267
|
+
(c_str[i+1]=='G' && c_str[i+2]=='A'))) {
|
268
|
+
if (len[i%3] > longest) {
|
269
|
+
longest = len[i%3];
|
270
|
+
}
|
271
|
+
len[i%3]=0;
|
272
|
+
} else {
|
273
|
+
len[i%3]++;
|
274
|
+
}
|
275
|
+
if (c_str[i+2]=='A' &&
|
276
|
+
((c_str[i]=='C' && c_str[i+1]=='T') ||
|
277
|
+
(c_str[i]=='T' && c_str[i+1]=='T') ||
|
278
|
+
(c_str[i]=='T' && c_str[i+1]=='C'))) {
|
279
|
+
if (len[3+i%3] > longest) {
|
280
|
+
longest = len[3+i%3];
|
281
|
+
}
|
282
|
+
len[3+i%3]=0;
|
283
|
+
} else {
|
284
|
+
len[3+i%3]++;
|
285
|
+
}
|
286
|
+
}
|
287
|
+
if (len[i%3] > longest) {
|
288
|
+
longest = len[i%3];
|
289
|
+
}
|
290
|
+
if (len[3+i%3] > longest) {
|
291
|
+
longest = len[3+i%3];
|
292
|
+
}
|
293
|
+
return INT2NUM(longest);
|
294
|
+
}
|
295
|
+
SRC
|
296
|
+
end
|
297
|
+
|
226
298
|
# finds longest orf in a sequence
|
227
299
|
def orf_length sequence
|
228
|
-
longest=
|
229
|
-
(1..6).each do |frame|
|
230
|
-
translated = Bio::Sequence::NA.new(sequence).translate(frame)
|
231
|
-
translated.split('*').each do |orf|
|
232
|
-
if orf.length > longest
|
233
|
-
longest=orf.length
|
234
|
-
end
|
235
|
-
end
|
236
|
-
end
|
300
|
+
longest = longest_orf(sequence)
|
237
301
|
return longest
|
238
302
|
end
|
239
303
|
|
@@ -250,7 +314,7 @@ module Transrate
|
|
250
314
|
def print_stats
|
251
315
|
self.basic_stats.map do |k, v|
|
252
316
|
"#{k}#{" " * (20 - (k.length + v.to_i.to_s.length))}#{v.to_i}"
|
253
|
-
end.join(
|
317
|
+
end.join('\n')
|
254
318
|
end
|
255
319
|
|
256
320
|
end # Assembly
|
@@ -7,7 +7,6 @@ module Transrate
|
|
7
7
|
attr_reader :rbh_per_contig
|
8
8
|
attr_reader :rbh_per_reference
|
9
9
|
attr_reader :reciprocal_hits
|
10
|
-
attr_reader :reference_coverage
|
11
10
|
attr_reader :has_run
|
12
11
|
|
13
12
|
def initialize assembly, reference
|
@@ -17,12 +16,12 @@ module Transrate
|
|
17
16
|
end
|
18
17
|
|
19
18
|
def run
|
20
|
-
rbu =
|
21
|
-
@ortholog_hit_ratio =
|
22
|
-
@collapse_factor =
|
19
|
+
rbu = reciprocal_best_ublast
|
20
|
+
@ortholog_hit_ratio = ortholog_hit_ratio rbu
|
21
|
+
@collapse_factor = collapse_factor @ra.r2l_hits
|
23
22
|
@reciprocal_hits = rbu.size
|
24
23
|
@rbh_per_reference = @reciprocal_hits.to_f / @reference.size.to_f
|
25
|
-
@reference_coverage = @
|
24
|
+
@reference_coverage = @ortholog_hit_ratio * @rbh_per_reference
|
26
25
|
@rbh_per_contig = @reciprocal_hits.to_f / @assembly.assembly.size.to_f
|
27
26
|
@has_run = true
|
28
27
|
end
|
@@ -32,6 +31,7 @@ module Transrate
|
|
32
31
|
:reciprocal_hits => @reciprocal_hits,
|
33
32
|
:rbh_per_contig => @rbh_per_contig,
|
34
33
|
:rbh_per_reference => @rbh_per_reference,
|
34
|
+
:reference_coverage => @reference_coverage,
|
35
35
|
:ortholog_hit_ratio => @ortholog_hit_ratio,
|
36
36
|
:collapse_factor => @collapse_factor
|
37
37
|
}
|
@@ -43,8 +43,9 @@ module Transrate
|
|
43
43
|
end
|
44
44
|
|
45
45
|
def ortholog_hit_ratio rbu=nil
|
46
|
-
return @ortholog_hit_ratio unless @ortholog_hit_ratio.nil?
|
47
|
-
rbu.
|
46
|
+
return @ortholog_hit_ratio unless @ortholog_hit_ratio.nil?
|
47
|
+
divisor = (rbu.size * 100).to_f
|
48
|
+
rbu.reduce(0){ |sum, hit| sum += hit.last.target_coverage } / divisor
|
48
49
|
end
|
49
50
|
|
50
51
|
def collapse_factor hits=nil
|
data/lib/transrate/rb_hit.rb
CHANGED
@@ -6,7 +6,7 @@ module Transrate
|
|
6
6
|
# gap opens, q. start, q. end, s. start, s. end, evalue, bit score
|
7
7
|
attr_accessor :query, :target, :id, :alnlen, :mismatches
|
8
8
|
attr_accessor :gaps, :qstart, :qend, :tstart, :tend, :evalue
|
9
|
-
attr_accessor :bitscore, :
|
9
|
+
attr_accessor :bitscore, :target_coverage
|
10
10
|
|
11
11
|
def initialize(list)
|
12
12
|
@query = list[0].scan(/[^|]+/).first.split.first # extract only identifier
|
@@ -21,7 +21,7 @@ module Transrate
|
|
21
21
|
@tend = list[9]
|
22
22
|
@evalue = list[10]
|
23
23
|
@bitscore = list[11]
|
24
|
-
@
|
24
|
+
@target_coverage = list[12].to_i
|
25
25
|
end
|
26
26
|
|
27
27
|
def to_s
|
@@ -59,16 +59,28 @@ module Transrate
|
|
59
59
|
next if best.nil?
|
60
60
|
tbest = @r2l_hits[best.target]
|
61
61
|
next if tbest.nil?
|
62
|
-
|
62
|
+
if query == tbest.target
|
63
|
+
@results[query] = best
|
64
|
+
end
|
63
65
|
end
|
64
66
|
end
|
65
67
|
|
68
|
+
# what is this method trying to do? :/
|
66
69
|
def results_to_hits results
|
67
70
|
hits = {}
|
71
|
+
puts "<results_to_hits> results.size = #{results.size}"
|
68
72
|
results.each do |hit|
|
69
73
|
if hits.has_key? hit.query
|
70
74
|
old_hit = hits[hit.query]
|
71
75
|
old_eval, old_bits = old_hit.evalue, old_hit.bitscore
|
76
|
+
if hit.bitscore == nil
|
77
|
+
p hit
|
78
|
+
abort "oh noes"
|
79
|
+
end
|
80
|
+
if old_bits == nil
|
81
|
+
p old_hit
|
82
|
+
raise 'hell'
|
83
|
+
end
|
72
84
|
if hit.bitscore > old_bits
|
73
85
|
hits[hit.query] = hit
|
74
86
|
elsif hit.bitscore == old_bits && hit.evalue < old_eval
|
data/lib/transrate/transrater.rb
CHANGED
@@ -1,11 +1,27 @@
|
|
1
1
|
module Transrate
|
2
2
|
|
3
|
+
# A transrater runs all types of metrics on an assembly.
|
4
|
+
#
|
5
|
+
# @!attribute [r] assembly
|
6
|
+
# @return [Assembly, String] an Assembly or the path to an assembly
|
7
|
+
# @!attribute [r] read_metrics
|
8
|
+
# @return [Hash] the read metrics if they have been calculated
|
9
|
+
# @!attribute [r] comparative_metrics
|
10
|
+
# @return [hash] the comparative metrics if they have been calculated
|
3
11
|
class Transrater
|
4
12
|
|
5
13
|
attr_reader :assembly
|
6
14
|
attr_reader :read_metrics
|
7
|
-
attr_reader :comparative_metrics
|
8
15
|
|
16
|
+
# A new Transrater
|
17
|
+
#
|
18
|
+
# @param assembly [Assembly, String] the Assembly or path to the FASTA
|
19
|
+
# @param reference [Assembly, String] the reference Assembly or
|
20
|
+
# path to the FASTA
|
21
|
+
# @param left [String] path to the left reads
|
22
|
+
# @param right [String] path to the right reads
|
23
|
+
# @param insertsize [Integer] mean insert size of the read pairs
|
24
|
+
# @param insertsd [Integer] standard deviation of the read pair insert size
|
9
25
|
def initialize assembly, reference, left=nil, right=nil, insertsize=nil, insertsd=nil
|
10
26
|
@assembly = assembly.is_a?(Assembly) ? assembly : Assembly.new(assembly)
|
11
27
|
@reference = reference.is_a?(Assembly) ? reference : Assembly.new(reference)
|
@@ -13,6 +29,12 @@ module Transrate
|
|
13
29
|
@comparative_metrics = ComparativeMetrics.new(@assembly, @reference)
|
14
30
|
end
|
15
31
|
|
32
|
+
# Run all analyses
|
33
|
+
#
|
34
|
+
# @param left [String] path to the left reads
|
35
|
+
# @param right [String] path to the right reads
|
36
|
+
# @param insertsize [Integer] mean insert size of the read pairs
|
37
|
+
# @param insertsd [Integer] standard deviation of the read pair insert size
|
16
38
|
def run left=nil, right=nil, insertsize=nil, insertsd=nil
|
17
39
|
assembly_metrics
|
18
40
|
if left && right
|
@@ -21,6 +43,11 @@ module Transrate
|
|
21
43
|
comparative_metrics
|
22
44
|
end
|
23
45
|
|
46
|
+
# Reduce all metrics for the assembly to a single quality score.
|
47
|
+
#
|
48
|
+
#
|
49
|
+
#
|
50
|
+
# @return [Integer] the assembly score
|
24
51
|
def assembly_score
|
25
52
|
@score, pg, rc = nil
|
26
53
|
if @read_metrics.has_run
|
@@ -47,7 +74,7 @@ module Transrate
|
|
47
74
|
end
|
48
75
|
|
49
76
|
def comparative_metrics
|
50
|
-
@comparative_metrics.run unless @comparative_metrics.has_run
|
77
|
+
@comparative_metrics.run #unless @comparative_metrics.has_run
|
51
78
|
@comparative_metrics
|
52
79
|
end
|
53
80
|
|
data/lib/transrate/version.rb
CHANGED
@@ -1,12 +1,19 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
module Transrate
|
4
|
+
|
5
|
+
# Defines the version of this codebase.
|
6
|
+
#
|
7
|
+
# This module is used in help messages and in generating
|
8
|
+
# the Gem. Versions must be incremented in accordance with
|
9
|
+
# Semantic Versioning 2.0 (http://semver.org/).
|
4
10
|
module VERSION
|
5
11
|
MAJOR = 0
|
6
12
|
MINOR = 0
|
7
|
-
PATCH =
|
13
|
+
PATCH = 13
|
8
14
|
BUILD = nil
|
9
15
|
|
10
16
|
STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
|
11
17
|
end
|
18
|
+
|
12
19
|
end # Transrate
|
data/test/assembly.fasta
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
>scaffold1
|
2
|
+
TTTCAGTTGATGAAGAAAAAACTAGAGTGTGTGGATTGCATGAAATGAGCTGGGGCTTGTTGAAATTTTGCTATTGAAACTAATGTTTACTTCAAATTGATAGGAATTCAGTTGGTTTTCTGGTGGTNTCTCTTTCTTCTTCACAATTGAATTTTGATTCCAAACACCATCTCTTAGGAATTCAGTTGGTTTTCTGGTGATAAACACTTCAATTTGACTATGGACTCTGTGTATTGTAAAGAACATGATGACAGAATTGCTTCGAATCCGGTGGGTTTACAGTCATCCGGGTCGGGTCCTTGGTGGGGTGGGCTCGTGACTCAATCGGGTTTTGAAGAATCCATGGGGAGATTGAAGAATTCATCAATGGAACCACTCGTGCGAATAGCCCCTAAACTGTCGATTTCTGCTGCAGATCAATCAATGGCAAAAGGGGATACGACTTCTTTTGATATCTTTCATGGTGACTCCAAGAGTTCGTCAAATGGTCAAAAACCAGTGCTATTGCAAGCTGTTTCCACCTTGCAATCTGCTCCACAAGTGTTTGGAGGTCATCTTGAGCTTGGGTTTGGTCAGCCGGCTGTGGCATGTGGCGTGTATCCTTATGGCGACCAATCTTATCGCGTGCTCTCTGCTTATGGACCTCAAACCGTGGGTCGTGTTATGCTGCCTTTGAACTTGAGCACTGATGATGGACCTATATTTGTGAATGCAAAGCAGTATCATGGAATAATCAGGCGCAGACGATCAAGAGCTAAAGCAGAAATGGCAAAAAAGTTTACAAAAAGTCGAAAGCCATATCTGCACTTGTCGCGTCATCTTCATGCAAAACGTCGACCAAGAGGCTGTGGGGGCCGCTTCCTAAACACTAAAGAAATGGACAAGGGCAAAGTTGGAAACTTGGATTCAAAACCAACCAATAATCCAACCACTTCTCAGAGATCCGAGGTCTTACAATGTGATCATGAATCGAATGGTAGCAAGTCCTACACACTAGGGTCAGAGGTAACAAGCGTTTTCTCCATGGGAGACCTCAATCAATTCCCCATGGGAGATATTTCTGTTATTTCTCATTATGGCATGATGGCGTCTGGCAATGCCATTCTTAGTCTTGGCATGCAGAACGATTGGGTTGCAGTGGGTGCAGGTGGGGGTATCTGCTGCAACCTGACAGTCTGATGGTACATGGGGTCTGATGTGCCTGCACGACAGACCCCATTCTTAGATAGTCAAACGGGACGGGGTTGTTTTCCGTTGTTTGGCTATGGCAAATCATCCTTGGCTATGACGTGTGAAGGTTGTGTCTCTAACTTCAATTCCCCTGACAAATGCAGACTAGGAATTGTTTTAAAGATGCTGATAACCTCAATACATATGATGGGCAATCATTTATTCTAGATGTGTTTGTTTCATCCTATGTTAATACTTTGTTGTTGTTTGAAATAAACTAAACCTTAGAACTCTATTTTCAAGGTATGTATCATGTGAATCTTGACTTCCAAACATTTAGGAGATGCCATATATTGTGTTTGATATTTTCTTGATAGTTATGCGCATGTTGGTAATTAAGAAATTCCTTTGGGATAATCTTGTAAATTAACATGAATGGTGGATGTTTATGCTTACAATA
|
3
|
+
>scaffold2
|
4
|
+
AAAAGGCATATATGACAAATCCTCCTTTCAAGTCGAACAACATTAGAGTTTTGTACAGATTTAAGACCAAAGCATTACTCGTTTCTATTCTCTCTTTAAATGTGAAATTTACCATCATAGTATTACATAATCTTATACGCGATATCATTTTAAAAAGATTATTGTTTTCATCGTCTTAACAGATTGTATGCTGGTAACTCTTTCACACATGTGACATCTTCGATCGCAAGCTCCCTCTCTAGCTGCATTCTTGTGGATTTTATAACCTCATTGAATTCCATATATGAAGCACGCCTCGCAAGCCAATGAGCATCCATGAGTTTGAAAGCAACACAATATAGCTCATCAAAAGCCGATTCATCATTACTAAGAAATTGCAAGAACCGCACCCCTGACGAAGACGTTGGTAATCCTGACTGAATATCCAACATCTGCACCAACATGAAAGAAATATTGATGCCAGCTACAGCAAAAGGATATTCCCATTCAGCTCTAATTCCTTCTTTTTTGTTTAAAAGGTTTTGAAAAGCTTCCGGATATGTGCGGGCAAAGAAGATAAGATTCTCTAGAGATATGAATCCTCCACCTCTAAAGTCTGTTGAAGGATCATTTCCTTGCCACCCCATATCTTTCCAAACCTCTGATTTAAGTCCTGGGAGTTCTCTATTTGGATAAGCTGACCTCCATAATTTTCTAAGTGCATCTTGATGCTCCAAACAAGTACCATCGAAAGGGACCTCCAATCGTTTCTGCAGATTTCTGAGTCTTGATTCCTGTAATGGAGTAAGTATAGGTGGAAGAAACTTCTTCGTGGGTCCACTTCCTATTGTTGAAGGCAACGTCAAGAACCGTATAATAAACGATCCAGATCCAAGTAGAACATTTGCTAGCCACTGTGCCCATTCTGAAATTAAATGAGAGAATAGAAACGTCCAGTGGAGGTGTTCTTTTCTTTTTTCATCATACAAAATATCTTCGAGCCTTGGTACCTCCGACTGTTTAAGTTTATACTCGTCATTATTCCCTAGTAAAGGTTCACTTAGGTCATTTAAATCTGCACCAACTAAATGCTCGTGCTTTTTACCATCAACATCCCCATGGTGAAGTCTTCTCCTCATTGTTTGTGATGTCATTAATCCTTCAATGCCCGAACTTCTTTCTGAAGCCTTGAGGGATAGGGTGAGAAGAAGATTGATTGATGGATGAAACCCTAATGAAGGGAGTAGAGAAAGAGTAAGAAAGAAAGCCCTTTCTTTTTGTTCTTCTTCTAATCTCTTTCTCTACCCTAATGAAGCACCACAAAGTAGGAATGAAGGAAGAAAGGAACCCTCTCTTTCTCACATCTACATTTACATCTGCATCTGCTTTACGTTTTCAGGAAGAAACATCACACCGTCAATTAAGCGGATCTGGTAACGCCTGCGCGGGCAAACGGGTCGGATCTTGATCTATTAAAACCACTTCATTTTTTTTTTTTTTTTTTT
|
5
|
+
>scaffold3
|
6
|
+
AAAAGGCATATATGACAAATCCTCCTTTCAAGTCGAACAACATTAGAGTTTTGTACAGATTTAAGACCAAAGCATTACTCGTTTCTATTCTCTCTTTAAATGTGAAATTTACCATCATAGTATTACATAATCTTATACGCGATATCATTTTAAAAAGATTATTGTTTTCATCGTCTTAACAGATTGTATGCTGGTAACTCTTTCACACATGTGACATCTTCGATCGCAAGCTCCCTCTCTAGCTGCATTCTTGTGGATTTTATAACCTCATTGAATTCCATATATGAAGCACGCCTCGCAAGCCAATGAGCATCCATGAGTTTGAAAGCAACACAATATAGCTCATCAAAAGCCGATTCATCATTACTAAGAAATTGCAAGAACCGCACCCCTGACGAAGACGTTGGTAATCCTGACTGAATATCCAACATCTGCACCAACATGAAAGAAATATTGATGCCAGCTACAGCAAAAGGATATTCCCATTCAGCTCTAATTCCTTCTTTTTTGTTTAAAAGGTTTTGAAAAGCTTCCGGATATGTGCGGGCAAAGAAGATNTGTTTTCAACAAAATAGCAATTAAAATACACTTTCCAACCTTTTACATATATGCAAATTATGGAACTTGTACAAAAGTTTCAATTTGAACAATAGGAGTTCAAAAATGTTTCAGCAGTACAACCGAAGTACAAAAGGTGTCATTTGCATGGTAAATGATGGCCAAATGGCCAAAATTGCCACCTTCAGATTCAACCTTTATTTTCTTCATTAACTTTAGTTTCATCAAGAACTCATTGAACACCAAAATGACACCTAATTTGAAATCAAATAAAAACCCAAACTTGATACACAAATTGCATGATTTGCCCAAATGTTACAGTTTGAAAAAACGGTATAAAAGTATGTTTCATTTGTTAAAACAAGTGTGAAGATATCATGTGTGAGGAATGTGATGACATAACGACCAAAACTACTCTCTCCGCCTTCAACCTCTCTTTACTTCTCAAGTATTTATTCTTAATATATCAGAATTCACGACATAAAGAACACCAAAATCAAACCCAATGTTCAAACTATCTCAAAATATCTGCAAAGGCACGTGATGGGCAAATAACAATCTTCCACTTCAACCTCTATTTTCTCTTTAAAACTTACTTTCTTACAGAAACCAAAATACCAAAATCCAAAATAACTCAAAAACCAAGATATTTGCAAAGGCAATAGAAGAATCTATCAATCTCGAAAAAACAAACCGGATATGTGCGGGCAAAGAAGATAAGATTCTCTAGAGATATGAATCCTCCACCTCTGCAAATAATACTCAATGCTTGAAATAACCAATTTGAATAAACTTCATTTGGTTAACTATAACAAGTAACTAAGTTACCTAAAGTCTGTTGAAGGATCATT
|
7
|
+
>scaffold5
|
8
|
+
CATCGATTTCATTTAGTGGCTAACCAGACCAGTCCTTCCGGTTCGATACGTGTGGAGATGAGAAGAACCCAACAGGAGTTCTGGAGGAAATTTGCCTGATTAAACAGACTCTACAGTTGATTTGGAGAGGTTGAAGATGGATGGAATTGAAGAACATGGAGATTGCACTTCTAGGTGCTGCTAGACAAGCTCAGAGAAAATCAAATGCTGATATATATTGAGTCTCTACTGGAAGTCTGGAAGAGTATCATCTAAGGGTATCGGCCCTCGAAAGAAAGACTTATCCTAATCCATTGCGTTCCAATCCTGCATATTCGGTTGTGAAGTAAGTTACATTTGAGTGACTGCATCTACTTTTCTTCAAGAACTACTTCTAATTTTTTTTATTAAGTGAACTAATCAGATCAGAAACTGTATCCAAACTTAAAAGGTTAAAACTTGATTATTACATTTTAAAACAACTTCAAGATTGAAGAGAACAATTTGGTGAGTTTAAGAATAGACAAAGCAGCCTTGGAGAAGCTGCTGGAAGAAAAACAAATGCTGATATCAAGTCTCTACAGGAGAAGTATCAGCAAAGAGTGTCGGCCCTCGAAAGAAAGTTAAATGGTTTGCATATTTGTAGAAGCTTAGTTTAACACCTCTGTTTTAAAAACACTGCAGGGAAGTATTTGAAGAAAGTTTTATGGTCCGTACTTTGAAAATTTTGTCACCAAAGACTTTTGCTGATTTATCTCAACTTATAAAACTGAACAAAAAGGTCCTTATAAATTTTATGGATTTGATTATGCAAAAAATAAAGAACTTTTAGTTGAACTTTTATAAGTTTAAGCACTTTTTCTAAATCTGACATTTTGTTTAAATCTGACTTTTTGTTAAAAATGAAGTCCATGGGGCATTTTTGGTGAGTGTTTATTAGTTAAAGGATATAGTTATGCAAAAATGAAGTCCAGGTTTGTGAACAAGCACAAAGTCTATAGGCTTTTTTTTCCCTTTTATATTATTTTTGGCAACTACACATAACTAAAATGAATTTAAAGGTTGTATGTGCTAAAAATGCACTTTGGGTGACTTTATCAACCAATTTCACCTATTTTTGTTTAGCTAGATATGTTGGTTTGATCATTTGACATGCTTGGGTTAAAAACATATTCTAAACTGACCCAATTGTAAGTGAACGGGTCGGCTTTAACATCTGTTCTCAGGTAAGTGAATGTAGATGTGACGAACTGGTTATGCAAGATCCTAAATGTATCAGGCCTCTTTTGAGGCAGATTGAGGCTATGCAGGCATTCTTTGGTTTGACATGGCACCATGAGCTTTATGGTTTGAACACTTGTACTAGGAACGATCTGCTAGAAAGGCAGAAGCTTTGCTGTGAGGAATCACTAAACATGCGGCTTCAGGTTTGATAAATGGTATATGTGTGTAGTTTCTTTGAAGTATAAATTATCTCAAGCCCAGTGATATTTTTTGTTTTGTATGAATCTTCGAGGCATTTGAAGGAGGG
|
data/test/test_inline.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'helper'
|
4
|
+
|
5
|
+
class TestInline < Test::Unit::TestCase
|
6
|
+
|
7
|
+
context 'transrate' do
|
8
|
+
|
9
|
+
setup do
|
10
|
+
@a = Transrate::Assembly.new('test/assembly.fasta')
|
11
|
+
@seq1 = 'ATGCCCCTAGGGTAG'
|
12
|
+
end
|
13
|
+
|
14
|
+
should 'find longest orf in file' do
|
15
|
+
orfs = []
|
16
|
+
@a.assembly.each do |entry|
|
17
|
+
l = @a.orf_length entry.seq
|
18
|
+
orfs << l
|
19
|
+
end
|
20
|
+
assert_equal orfs.length, 4
|
21
|
+
assert_equal orfs, [333, 370, 131, 84]
|
22
|
+
end
|
23
|
+
|
24
|
+
should 'find longest orf in sequence' do
|
25
|
+
l = @a.orf_length(@seq1)
|
26
|
+
assert_equal l, 4
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
data/test/test_test.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'helper'
|
4
|
+
|
5
|
+
class TestTransrate < Test::Unit::TestCase
|
6
|
+
|
7
|
+
context "transrate" do
|
8
|
+
|
9
|
+
setup do
|
10
|
+
@a = Transrate::Assembly.new("test/assembly.fasta")
|
11
|
+
@seq1 = "ATGCCCGGGTAG"
|
12
|
+
end
|
13
|
+
|
14
|
+
should "run metrics on assembly" do
|
15
|
+
ans = @a.run(2) # using 2 threads
|
16
|
+
assert_equal ans, true, "should run but returned #{ans}"
|
17
|
+
end
|
18
|
+
|
19
|
+
should "find longest orf" do
|
20
|
+
len = @a.orf_length("ATGCCCGGGTAG")
|
21
|
+
assert_equal len, 3, "expected 4 but got #{len}"
|
22
|
+
end
|
23
|
+
|
24
|
+
should "find longest orf in file" do
|
25
|
+
orfs = []
|
26
|
+
@a.assembly.each do |entry|
|
27
|
+
l = @a.orf_length entry.seq
|
28
|
+
orfs << l
|
29
|
+
end
|
30
|
+
assert_equal orfs.length, 4
|
31
|
+
assert_equal orfs, [333, 370, 131, 84]
|
32
|
+
end
|
33
|
+
|
34
|
+
should "find the mean length" do
|
35
|
+
ans = @a.run(2)
|
36
|
+
mean = @a.mean_len
|
37
|
+
assert_equal mean, 1508.25
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
data/transrate.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: transrate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Smith
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-04-
|
11
|
+
date: 2014-04-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: RubyInline
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '3.12'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '3.12'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: turn
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -156,6 +170,7 @@ files:
|
|
156
170
|
- lib/transrate/comparative_metrics.rb
|
157
171
|
- lib/transrate/dimension_reduce.rb
|
158
172
|
- lib/transrate/express.rb
|
173
|
+
- lib/transrate/log.rb
|
159
174
|
- lib/transrate/metric.rb
|
160
175
|
- lib/transrate/rb_hit.rb
|
161
176
|
- lib/transrate/read_metrics.rb
|
@@ -164,7 +179,11 @@ files:
|
|
164
179
|
- lib/transrate/usearch.rb
|
165
180
|
- lib/transrate/version.rb
|
166
181
|
- lib/transrate/writer.rb
|
182
|
+
- test/assembly.fasta
|
167
183
|
- test/helper.rb
|
184
|
+
- test/test_inline.rb
|
185
|
+
- test/test_test.rb
|
186
|
+
- test/test_test.rb~
|
168
187
|
- transrate.gemspec
|
169
188
|
homepage: https://github.com/Blahah/transrate
|
170
189
|
licenses:
|