miga-base 0.7.10.2 → 0.7.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f3b350bb3e5dce2f0f8d0006d7f0693ff746c157006890a1250f30d71d89a523
4
- data.tar.gz: 1d94e39c61bfe191388d309eeb992b995350f6191dc74d5c7b15ae9741bcca09
3
+ metadata.gz: 7b4a168130d732c670246cd4a874272e77e5f7d88fdef00e10d81ab8e5f9979a
4
+ data.tar.gz: '069e2dd280b4afecb67478612f1dee35bf2cada3ae57cbc61c6e70d0ef3bd233'
5
5
  SHA512:
6
- metadata.gz: f2a47ca016873723f030c9435db7daa81c819a3f19c65ed7614c21d8d68d556af0dd7313b79d2678508a12762ecd36f7bd922f5cf45640b383c6aaadde91cdfe
7
- data.tar.gz: e8218114c0e89d80eab2b6cf7eba5a1de6f26b7d27e3560af1da12fdfc9037f3557a5994e1b982cff35635cea3d23e18ef234758ef8e12b86fb56f782a41ef89
6
+ metadata.gz: a37fd7d69339c7a63d5ac38e0c232fed96d479c3f2f2bc67b2ee956bb908d8690a55f21a6fa0185c05f209139e16b6b2ddcd6b0f36fac471f9e0b4fd2c4a5f04
7
+ data.tar.gz: cb156656c79f1a765281f163691650e32f90056dc767827d4b3fe958b4042e6359b810f5a3249c6902bce042f6a457507a027836797c39328889d9cbbbc5c5d0
@@ -38,7 +38,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
38
38
  end
39
39
  if cli[:key].nil?
40
40
  r[:stats].each do |k, v|
41
- k_n = k == :g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize
41
+ k_n = k.to_s.unmiga_name.sub(/^./, &:upcase)
42
42
  cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
43
43
  end
44
44
  else
@@ -68,15 +68,20 @@ module MiGA::Common::Format
68
68
  # a FastA or FastQ file (supports gzipped files). The +format+ must be a
69
69
  # Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
70
70
  # controlled via the +opts+ Hash. Supported options include:
71
- # - +:n50+: If true, it also returns the N50 and the median (in bp)
72
- # - +:gc+: If true, it also returns the G+C content (in %)
73
- # - +:x+: If true, it also returns the undetermined bases content (in %)
71
+ # - +:n50+: Include the N50 and the median (in bp)
72
+ # - +:gc+: Include the G+C content (in %)
73
+ # - +:x+: Include the undetermined bases content (in %)
74
+ # - +:skew+: Include G-C and A-T sequence skew (in %; forces gc: true).
75
+ # See definition used here in DOI:10.1177/117693430700300006
74
76
  def seqs_length(file, format, opts = {})
77
+ opts[:gc] = true if opts[:skew]
75
78
  fh = file =~ /\.gz/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
76
79
  l = []
77
80
  gc = 0
78
81
  xn = 0
79
- i = 0 # <- Zlib::GzipReader doesn't set `$.`
82
+ t = 0
83
+ c = 0
84
+ i = 0 # <- Zlib::GzipReader doesn't set `$.`
80
85
  fh.each_line do |ln|
81
86
  i += 1
82
87
  if (format == :fasta and ln =~ /^>/) or
@@ -86,6 +91,10 @@ module MiGA::Common::Format
86
91
  l[l.size - 1] += ln.chomp.size
87
92
  gc += ln.scan(/[GCgc]/).count if opts[:gc]
88
93
  xn += ln.scan(/[XNxn]/).count if opts[:x]
94
+ if opts[:skew]
95
+ t += ln.scan(/[Tt]/).count
96
+ c += ln.scan(/[Cc]/).count
97
+ end
89
98
  end
90
99
  end
91
100
  fh.close
@@ -97,6 +106,12 @@ module MiGA::Common::Format
97
106
  o[:sd] = Math.sqrt o[:var]
98
107
  o[:gc] = 100.0 * gc / o[:tot] if opts[:gc]
99
108
  o[:x] = 100.0 * xn / o[:tot] if opts[:x]
109
+ if opts[:skew]
110
+ at = o[:tot] - gc
111
+ o[:at_skew] = 100.0 * (2 * t - at) / at
112
+ o[:gc_skew] = 100.0 * (2 * c - gc) / gc
113
+ end
114
+
100
115
  if opts[:n50]
101
116
  l.sort!
102
117
  thr = o[:tot] / 2
@@ -132,9 +147,14 @@ class String
132
147
  end
133
148
 
134
149
  ##
135
- # Replace underscores by spaces or dots (depending on context).
150
+ # Replace underscores by spaces or other symbols depending on context
136
151
  def unmiga_name
137
- gsub(/_(str|sp|subsp|pv)__/, '_\\1._').tr('_', ' ')
152
+ gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
153
+ .gsub(/g_c_(content)/, 'G+C \\1')
154
+ .gsub(/g_c_(skew)/, 'G-C \\1')
155
+ .gsub(/a_t_(skew)/, 'A-T \\1')
156
+ .gsub(/x_content/, &:capitalize)
157
+ .tr('_', ' ')
138
158
  end
139
159
 
140
160
  ##
@@ -21,28 +21,35 @@ module MiGA::Result::Stats
21
21
 
22
22
  def compute_stats_raw_reads
23
23
  stats = {}
24
+ seq_opts = { gc: true, x: true, skew: true }
24
25
  if self[:files][:pair1].nil?
25
- s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, gc: true, x: true)
26
+ s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
26
27
  stats = {
27
28
  reads: s[:n],
28
29
  length_average: [s[:avg], 'bp'],
29
30
  length_standard_deviation: [s[:sd], 'bp'],
30
31
  g_c_content: [s[:gc], '%'],
31
- x_content: [s[:x], '%']
32
+ x_content: [s[:x], '%'],
33
+ g_c_skew: [s[:gc_skew], '%'],
34
+ a_t_skew: [s[:at_skew], '%']
32
35
  }
33
36
  else
34
- s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, gc: true, x: true)
35
- s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, gc: true, x: true)
37
+ s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
38
+ s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
36
39
  stats = {
37
40
  read_pairs: s1[:n],
38
41
  forward_length_average: [s1[:avg], 'bp'],
39
42
  forward_length_standard_deviation: [s1[:sd], 'bp'],
40
43
  forward_g_c_content: [s1[:gc], '%'],
41
44
  forward_x_content: [s1[:x], '%'],
45
+ forward_g_c_skew: [s1[:gc_skew], '%'],
46
+ forward_a_t_skew: [s1[:at_skew], '%'],
42
47
  reverse_length_average: [s2[:avg], 'bp'],
43
48
  reverse_length_standard_deviation: [s2[:sd], 'bp'],
44
49
  reverse_g_c_content: [s2[:gc], '%'],
45
- reverse_x_content: [s2[:x], '%']
50
+ reverse_x_content: [s2[:x], '%'],
51
+ reverse_g_c_skew: [s2[:gc_skew], '%'],
52
+ reverse_a_t_skew: [s2[:at_skew], '%']
46
53
  }
47
54
  end
48
55
  stats
@@ -50,19 +57,22 @@ module MiGA::Result::Stats
50
57
 
51
58
  def compute_stats_trimmed_fasta
52
59
  f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
53
- s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true)
60
+ s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
54
61
  {
55
62
  reads: s[:n],
56
63
  length_average: [s[:avg], 'bp'],
57
64
  length_standard_deviation: [s[:sd], 'bp'],
58
65
  g_c_content: [s[:gc], '%'],
59
- x_content: [s[:x], '%']
66
+ x_content: [s[:x], '%'],
67
+ g_c_skew: [s[:gc_skew], '%'],
68
+ a_t_skew: [s[:at_skew], '%']
60
69
  }
61
70
  end
62
71
 
63
72
  def compute_stats_assembly
64
73
  s = MiGA::MiGA.seqs_length(
65
- file_path(:largecontigs), :fasta, n50: true, gc: true, x: true
74
+ file_path(:largecontigs), :fasta,
75
+ n50: true, gc: true, x: true, skew: true
66
76
  )
67
77
  {
68
78
  contigs: s[:n],
@@ -70,7 +80,9 @@ module MiGA::Result::Stats
70
80
  total_length: [s[:tot], 'bp'],
71
81
  longest_sequence: [s[:max], 'bp'],
72
82
  g_c_content: [s[:gc], '%'],
73
- x_content: [s[:x], '%']
83
+ x_content: [s[:x], '%'],
84
+ g_c_skew: [s[:gc_skew], '%'],
85
+ a_t_skew: [s[:at_skew], '%']
74
86
  }
75
87
  end
76
88
 
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 10, 2]
11
+ VERSION = [0.7, 11, 0]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 6, 30)
19
+ VERSION_DATE = Date.new(2020, 7, 1)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.10.2
4
+ version: 0.7.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-30 00:00:00.000000000 Z
11
+ date: 2020-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons