transrate 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,103 +3,107 @@ require 'bettersam'
3
3
  require 'csv'
4
4
  require 'forwardable'
5
5
 
6
- class Assembly
6
+ module Transrate
7
7
 
8
- include Enumerable
9
- extend Forwardable
10
- def_delegators :@assembly, :each, :<<
8
+ class Assembly
11
9
 
12
- attr_accessor :ublast_db
13
- attr_accessor :orfs_ublast_db
14
- attr_accessor :protein
10
+ include Enumerable
11
+ extend Forwardable
12
+ def_delegators :@assembly, :each, :<<
15
13
 
16
- # number of bases in the assembly
17
- attr_writer :n_bases
14
+ attr_accessor :ublast_db
15
+ attr_accessor :orfs_ublast_db
16
+ attr_accessor :protein
18
17
 
19
- # assembly filename
20
- attr_accessor :file
18
+ # number of bases in the assembly
19
+ attr_writer :n_bases
21
20
 
22
- # assembly n50
23
- attr_reader :n50
21
+ # assembly filename
22
+ attr_accessor :file
24
23
 
25
- # Reuturn a new Assembly.
26
- #
27
- # - +:file+ - path to the assembly FASTA file
28
- def initialize file
29
- @file = file
30
- @assembly = []
31
- @n_bases = 0
32
- Bio::FastaFormat.open(file).each do |entry|
33
- @n_bases += entry.length
34
- @assembly << entry
24
+ # assembly n50
25
+ attr_reader :n50
26
+
27
+ # Reuturn a new Assembly.
28
+ #
29
+ # - +:file+ - path to the assembly FASTA file
30
+ def initialize file
31
+ @file = file
32
+ @assembly = []
33
+ @n_bases = 0
34
+ Bio::FastaFormat.open(file).each do |entry|
35
+ @n_bases += entry.length
36
+ @assembly << entry
37
+ end
38
+ @assembly.sort_by! { |x| x.length }
35
39
  end
36
- @assembly.sort_by! { |x| x.length }
37
- end
38
40
 
39
- # Return a new Assembly object by loading sequences
40
- # from the FASTA-format +:file+
41
- def self.stats_from_fasta file
42
- a = Assembly.new file
43
- a.basic_stats
44
- end
41
+ # Return a new Assembly object by loading sequences
42
+ # from the FASTA-format +:file+
43
+ def self.stats_from_fasta file
44
+ a = Assembly.new file
45
+ a.basic_stats
46
+ end
45
47
 
46
- def run
47
- stats = self.basic_stats
48
- stats.each_pair do |key, value|
49
- ivar = "@#{key.gsub(/ /, '_')}".to_sym
50
- self.instance_variable_set(key, value)
48
+ def run
49
+ stats = self.basic_stats
50
+ stats.each_pair do |key, value|
51
+ ivar = "@#{key.gsub(/ /, '_')}".to_sym
52
+ self.instance_variable_set(key, value)
53
+ end
51
54
  end
52
- end
53
55
 
54
- # Return a hash of statistics about this assembly
55
- def basic_stats
56
- cumulative_length = 0.0
57
- # we'll calculate Nx for all these x
58
- x = [90, 70, 50, 30, 10]
59
- x2 = x.clone
60
- cutoff = x2.pop / 100.0
61
- res = []
62
- n1k = 0
63
- n10k = 0
64
- @assembly.each do |s|
65
- new_cum_len = cumulative_length + s.length
66
- prop = new_cum_len / self.n_bases
67
- n1k += 1 if s.length > 1_000
68
- n10k += 1 if s.length > 10_000
69
- if prop >= cutoff
70
- res << s.length
71
- break if x2.empty?
72
- cutoff = x2.pop / 100.0
56
+ # Return a hash of statistics about this assembly
57
+ def basic_stats
58
+ cumulative_length = 0.0
59
+ # we'll calculate Nx for all these x
60
+ x = [90, 70, 50, 30, 10]
61
+ x2 = x.clone
62
+ cutoff = x2.pop / 100.0
63
+ res = []
64
+ n1k = 0
65
+ n10k = 0
66
+ @assembly.each do |s|
67
+ new_cum_len = cumulative_length + s.length
68
+ prop = new_cum_len / self.n_bases
69
+ n1k += 1 if s.length > 1_000
70
+ n10k += 1 if s.length > 10_000
71
+ if prop >= cutoff
72
+ res << s.length
73
+ break if x2.empty?
74
+ cutoff = x2.pop / 100.0
75
+ end
76
+ cumulative_length = new_cum_len
73
77
  end
74
- cumulative_length = new_cum_len
78
+ mean = cumulative_length / @assembly.size
79
+ ns = Hash[x.map { |n| "N#{n}" }.zip(res)]
80
+ {
81
+ "n_seqs" => @assembly.size,
82
+ "smallest" => @assembly.first.length,
83
+ "largest" => @assembly.last.length,
84
+ "n_bases" => @n_bases,
85
+ "mean_len" => mean,
86
+ "n > 1k" => n1k,
87
+ "n > 10k" => n10k
88
+ }.merge ns
75
89
  end
76
- mean = cumulative_length / @assembly.size
77
- ns = Hash[x.map { |n| "N#{n}" }.zip(res)]
78
- {
79
- "n_seqs" => @assembly.size,
80
- "smallest" => @assembly.first.length,
81
- "largest" => @assembly.last.length,
82
- "n_bases" => @n_bases,
83
- "mean_len" => mean,
84
- "n > 1k" => n1k,
85
- "n > 10k" => n10k
86
- }.merge ns
87
- end
88
90
 
89
- # return the number of bases in the assembly, calculating
90
- # from the assembly if it hasn't already been done.
91
- def n_bases
92
- unless @n_bases
93
- @n_bases = 0
94
- @assembly.each { |s| @n_bases += s.length }
91
+ # return the number of bases in the assembly, calculating
92
+ # from the assembly if it hasn't already been done.
93
+ def n_bases
94
+ unless @n_bases
95
+ @n_bases = 0
96
+ @assembly.each { |s| @n_bases += s.length }
97
+ end
98
+ @n_bases
99
+ end
100
+
101
+ def print_stats
102
+ self.basic_stats.map do |k, v|
103
+ "#{k}#{" " * (20 - (k.length + v.to_i.to_s.length))}#{v.to_i}"
104
+ end.join("\n")
95
105
  end
96
- @n_bases
97
- end
98
106
 
99
- def print_stats
100
- self.basic_stats.map do |k, v|
101
- "#{k}#{" " * (20 - (k.length + v.to_i.to_s.length))}#{v.to_i}"
102
- end.join("\n")
103
- end
107
+ end # Assembly
104
108
 
105
- end # Assembly
109
+ end # Transrate
@@ -4,7 +4,7 @@ module Transrate
4
4
  module VERSION
5
5
  MAJOR = 0
6
6
  MINOR = 0
7
- PATCH = 5
7
+ PATCH = 6
8
8
  BUILD = nil
9
9
 
10
10
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: transrate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: