transrate 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,103 +3,107 @@ require 'bettersam'
3
3
  require 'csv'
4
4
  require 'forwardable'
5
5
 
6
- class Assembly
6
+ module Transrate
7
7
 
8
- include Enumerable
9
- extend Forwardable
10
- def_delegators :@assembly, :each, :<<
8
+ class Assembly
11
9
 
12
- attr_accessor :ublast_db
13
- attr_accessor :orfs_ublast_db
14
- attr_accessor :protein
10
+ include Enumerable
11
+ extend Forwardable
12
+ def_delegators :@assembly, :each, :<<
15
13
 
16
- # number of bases in the assembly
17
- attr_writer :n_bases
14
+ attr_accessor :ublast_db
15
+ attr_accessor :orfs_ublast_db
16
+ attr_accessor :protein
18
17
 
19
- # assembly filename
20
- attr_accessor :file
18
+ # number of bases in the assembly
19
+ attr_writer :n_bases
21
20
 
22
- # assembly n50
23
- attr_reader :n50
21
+ # assembly filename
22
+ attr_accessor :file
24
23
 
25
- # Reuturn a new Assembly.
26
- #
27
- # - +:file+ - path to the assembly FASTA file
28
- def initialize file
29
- @file = file
30
- @assembly = []
31
- @n_bases = 0
32
- Bio::FastaFormat.open(file).each do |entry|
33
- @n_bases += entry.length
34
- @assembly << entry
24
+ # assembly n50
25
+ attr_reader :n50
26
+
27
+ # Reuturn a new Assembly.
28
+ #
29
+ # - +:file+ - path to the assembly FASTA file
30
+ def initialize file
31
+ @file = file
32
+ @assembly = []
33
+ @n_bases = 0
34
+ Bio::FastaFormat.open(file).each do |entry|
35
+ @n_bases += entry.length
36
+ @assembly << entry
37
+ end
38
+ @assembly.sort_by! { |x| x.length }
35
39
  end
36
- @assembly.sort_by! { |x| x.length }
37
- end
38
40
 
39
- # Return a new Assembly object by loading sequences
40
- # from the FASTA-format +:file+
41
- def self.stats_from_fasta file
42
- a = Assembly.new file
43
- a.basic_stats
44
- end
41
+ # Return a new Assembly object by loading sequences
42
+ # from the FASTA-format +:file+
43
+ def self.stats_from_fasta file
44
+ a = Assembly.new file
45
+ a.basic_stats
46
+ end
45
47
 
46
- def run
47
- stats = self.basic_stats
48
- stats.each_pair do |key, value|
49
- ivar = "@#{key.gsub(/ /, '_')}".to_sym
50
- self.instance_variable_set(key, value)
48
+ def run
49
+ stats = self.basic_stats
50
+ stats.each_pair do |key, value|
51
+ ivar = "@#{key.gsub(/ /, '_')}".to_sym
52
+ self.instance_variable_set(key, value)
53
+ end
51
54
  end
52
- end
53
55
 
54
- # Return a hash of statistics about this assembly
55
- def basic_stats
56
- cumulative_length = 0.0
57
- # we'll calculate Nx for all these x
58
- x = [90, 70, 50, 30, 10]
59
- x2 = x.clone
60
- cutoff = x2.pop / 100.0
61
- res = []
62
- n1k = 0
63
- n10k = 0
64
- @assembly.each do |s|
65
- new_cum_len = cumulative_length + s.length
66
- prop = new_cum_len / self.n_bases
67
- n1k += 1 if s.length > 1_000
68
- n10k += 1 if s.length > 10_000
69
- if prop >= cutoff
70
- res << s.length
71
- break if x2.empty?
72
- cutoff = x2.pop / 100.0
56
+ # Return a hash of statistics about this assembly
57
+ def basic_stats
58
+ cumulative_length = 0.0
59
+ # we'll calculate Nx for all these x
60
+ x = [90, 70, 50, 30, 10]
61
+ x2 = x.clone
62
+ cutoff = x2.pop / 100.0
63
+ res = []
64
+ n1k = 0
65
+ n10k = 0
66
+ @assembly.each do |s|
67
+ new_cum_len = cumulative_length + s.length
68
+ prop = new_cum_len / self.n_bases
69
+ n1k += 1 if s.length > 1_000
70
+ n10k += 1 if s.length > 10_000
71
+ if prop >= cutoff
72
+ res << s.length
73
+ break if x2.empty?
74
+ cutoff = x2.pop / 100.0
75
+ end
76
+ cumulative_length = new_cum_len
73
77
  end
74
- cumulative_length = new_cum_len
78
+ mean = cumulative_length / @assembly.size
79
+ ns = Hash[x.map { |n| "N#{n}" }.zip(res)]
80
+ {
81
+ "n_seqs" => @assembly.size,
82
+ "smallest" => @assembly.first.length,
83
+ "largest" => @assembly.last.length,
84
+ "n_bases" => @n_bases,
85
+ "mean_len" => mean,
86
+ "n > 1k" => n1k,
87
+ "n > 10k" => n10k
88
+ }.merge ns
75
89
  end
76
- mean = cumulative_length / @assembly.size
77
- ns = Hash[x.map { |n| "N#{n}" }.zip(res)]
78
- {
79
- "n_seqs" => @assembly.size,
80
- "smallest" => @assembly.first.length,
81
- "largest" => @assembly.last.length,
82
- "n_bases" => @n_bases,
83
- "mean_len" => mean,
84
- "n > 1k" => n1k,
85
- "n > 10k" => n10k
86
- }.merge ns
87
- end
88
90
 
89
- # return the number of bases in the assembly, calculating
90
- # from the assembly if it hasn't already been done.
91
- def n_bases
92
- unless @n_bases
93
- @n_bases = 0
94
- @assembly.each { |s| @n_bases += s.length }
91
+ # return the number of bases in the assembly, calculating
92
+ # from the assembly if it hasn't already been done.
93
+ def n_bases
94
+ unless @n_bases
95
+ @n_bases = 0
96
+ @assembly.each { |s| @n_bases += s.length }
97
+ end
98
+ @n_bases
99
+ end
100
+
101
+ def print_stats
102
+ self.basic_stats.map do |k, v|
103
+ "#{k}#{" " * (20 - (k.length + v.to_i.to_s.length))}#{v.to_i}"
104
+ end.join("\n")
95
105
  end
96
- @n_bases
97
- end
98
106
 
99
- def print_stats
100
- self.basic_stats.map do |k, v|
101
- "#{k}#{" " * (20 - (k.length + v.to_i.to_s.length))}#{v.to_i}"
102
- end.join("\n")
103
- end
107
+ end # Assembly
104
108
 
105
- end # Assembly
109
+ end # Transrate
@@ -4,7 +4,7 @@ module Transrate
4
4
  module VERSION
5
5
  MAJOR = 0
6
6
  MINOR = 0
7
- PATCH = 5
7
+ PATCH = 6
8
8
  BUILD = nil
9
9
 
10
10
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: transrate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: