miga-base 0.7.10.2 → 0.7.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f3b350bb3e5dce2f0f8d0006d7f0693ff746c157006890a1250f30d71d89a523
4
- data.tar.gz: 1d94e39c61bfe191388d309eeb992b995350f6191dc74d5c7b15ae9741bcca09
3
+ metadata.gz: 7b4a168130d732c670246cd4a874272e77e5f7d88fdef00e10d81ab8e5f9979a
4
+ data.tar.gz: '069e2dd280b4afecb67478612f1dee35bf2cada3ae57cbc61c6e70d0ef3bd233'
5
5
  SHA512:
6
- metadata.gz: f2a47ca016873723f030c9435db7daa81c819a3f19c65ed7614c21d8d68d556af0dd7313b79d2678508a12762ecd36f7bd922f5cf45640b383c6aaadde91cdfe
7
- data.tar.gz: e8218114c0e89d80eab2b6cf7eba5a1de6f26b7d27e3560af1da12fdfc9037f3557a5994e1b982cff35635cea3d23e18ef234758ef8e12b86fb56f782a41ef89
6
+ metadata.gz: a37fd7d69339c7a63d5ac38e0c232fed96d479c3f2f2bc67b2ee956bb908d8690a55f21a6fa0185c05f209139e16b6b2ddcd6b0f36fac471f9e0b4fd2c4a5f04
7
+ data.tar.gz: cb156656c79f1a765281f163691650e32f90056dc767827d4b3fe958b4042e6359b810f5a3249c6902bce042f6a457507a027836797c39328889d9cbbbc5c5d0
@@ -38,7 +38,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
38
38
  end
39
39
  if cli[:key].nil?
40
40
  r[:stats].each do |k, v|
41
- k_n = k == :g_c_content ? 'G+C content' : k.to_s.unmiga_name.capitalize
41
+ k_n = k.to_s.unmiga_name.sub(/^./, &:upcase)
42
42
  cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
43
43
  end
44
44
  else
@@ -68,15 +68,20 @@ module MiGA::Common::Format
68
68
  # a FastA or FastQ file (supports gzipped files). The +format+ must be a
69
69
  # Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
70
70
  # controlled via the +opts+ Hash. Supported options include:
71
- # - +:n50+: If true, it also returns the N50 and the median (in bp)
72
- # - +:gc+: If true, it also returns the G+C content (in %)
73
- # - +:x+: If true, it also returns the undetermined bases content (in %)
71
+ # - +:n50+: Include the N50 and the median (in bp)
72
+ # - +:gc+: Include the G+C content (in %)
73
+ # - +:x+: Include the undetermined bases content (in %)
74
+ # - +:skew+: Include G-C and A-T sequence skew (in %; forces gc: true).
75
+ # See definition used here in DOI:10.1177/117693430700300006
74
76
  def seqs_length(file, format, opts = {})
77
+ opts[:gc] = true if opts[:skew]
75
78
  fh = file =~ /\.gz/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
76
79
  l = []
77
80
  gc = 0
78
81
  xn = 0
79
- i = 0 # <- Zlib::GzipReader doesn't set `$.`
82
+ t = 0
83
+ c = 0
84
+ i = 0 # <- Zlib::GzipReader doesn't set `$.`
80
85
  fh.each_line do |ln|
81
86
  i += 1
82
87
  if (format == :fasta and ln =~ /^>/) or
@@ -86,6 +91,10 @@ module MiGA::Common::Format
86
91
  l[l.size - 1] += ln.chomp.size
87
92
  gc += ln.scan(/[GCgc]/).count if opts[:gc]
88
93
  xn += ln.scan(/[XNxn]/).count if opts[:x]
94
+ if opts[:skew]
95
+ t += ln.scan(/[Tt]/).count
96
+ c += ln.scan(/[Cc]/).count
97
+ end
89
98
  end
90
99
  end
91
100
  fh.close
@@ -97,6 +106,12 @@ module MiGA::Common::Format
97
106
  o[:sd] = Math.sqrt o[:var]
98
107
  o[:gc] = 100.0 * gc / o[:tot] if opts[:gc]
99
108
  o[:x] = 100.0 * xn / o[:tot] if opts[:x]
109
+ if opts[:skew]
110
+ at = o[:tot] - gc
111
+ o[:at_skew] = 100.0 * (2 * t - at) / at
112
+ o[:gc_skew] = 100.0 * (2 * c - gc) / gc
113
+ end
114
+
100
115
  if opts[:n50]
101
116
  l.sort!
102
117
  thr = o[:tot] / 2
@@ -132,9 +147,14 @@ class String
132
147
  end
133
148
 
134
149
  ##
135
- # Replace underscores by spaces or dots (depending on context).
150
+ # Replace underscores by spaces or other symbols depending on context
136
151
  def unmiga_name
137
- gsub(/_(str|sp|subsp|pv)__/, '_\\1._').tr('_', ' ')
152
+ gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
153
+ .gsub(/g_c_(content)/, 'G+C \\1')
154
+ .gsub(/g_c_(skew)/, 'G-C \\1')
155
+ .gsub(/a_t_(skew)/, 'A-T \\1')
156
+ .gsub(/x_content/, &:capitalize)
157
+ .tr('_', ' ')
138
158
  end
139
159
 
140
160
  ##
@@ -21,28 +21,35 @@ module MiGA::Result::Stats
21
21
 
22
22
  def compute_stats_raw_reads
23
23
  stats = {}
24
+ seq_opts = { gc: true, x: true, skew: true }
24
25
  if self[:files][:pair1].nil?
25
- s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, gc: true, x: true)
26
+ s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
26
27
  stats = {
27
28
  reads: s[:n],
28
29
  length_average: [s[:avg], 'bp'],
29
30
  length_standard_deviation: [s[:sd], 'bp'],
30
31
  g_c_content: [s[:gc], '%'],
31
- x_content: [s[:x], '%']
32
+ x_content: [s[:x], '%'],
33
+ g_c_skew: [s[:gc_skew], '%'],
34
+ a_t_skew: [s[:at_skew], '%']
32
35
  }
33
36
  else
34
- s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, gc: true, x: true)
35
- s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, gc: true, x: true)
37
+ s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
38
+ s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
36
39
  stats = {
37
40
  read_pairs: s1[:n],
38
41
  forward_length_average: [s1[:avg], 'bp'],
39
42
  forward_length_standard_deviation: [s1[:sd], 'bp'],
40
43
  forward_g_c_content: [s1[:gc], '%'],
41
44
  forward_x_content: [s1[:x], '%'],
45
+ forward_g_c_skew: [s1[:gc_skew], '%'],
46
+ forward_a_t_skew: [s1[:at_skew], '%'],
42
47
  reverse_length_average: [s2[:avg], 'bp'],
43
48
  reverse_length_standard_deviation: [s2[:sd], 'bp'],
44
49
  reverse_g_c_content: [s2[:gc], '%'],
45
- reverse_x_content: [s2[:x], '%']
50
+ reverse_x_content: [s2[:x], '%'],
51
+ reverse_g_c_skew: [s2[:gc_skew], '%'],
52
+ reverse_a_t_skew: [s2[:at_skew], '%']
46
53
  }
47
54
  end
48
55
  stats
@@ -50,19 +57,22 @@ module MiGA::Result::Stats
50
57
 
51
58
  def compute_stats_trimmed_fasta
52
59
  f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
53
- s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true)
60
+ s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
54
61
  {
55
62
  reads: s[:n],
56
63
  length_average: [s[:avg], 'bp'],
57
64
  length_standard_deviation: [s[:sd], 'bp'],
58
65
  g_c_content: [s[:gc], '%'],
59
- x_content: [s[:x], '%']
66
+ x_content: [s[:x], '%'],
67
+ g_c_skew: [s[:gc_skew], '%'],
68
+ a_t_skew: [s[:at_skew], '%']
60
69
  }
61
70
  end
62
71
 
63
72
  def compute_stats_assembly
64
73
  s = MiGA::MiGA.seqs_length(
65
- file_path(:largecontigs), :fasta, n50: true, gc: true, x: true
74
+ file_path(:largecontigs), :fasta,
75
+ n50: true, gc: true, x: true, skew: true
66
76
  )
67
77
  {
68
78
  contigs: s[:n],
@@ -70,7 +80,9 @@ module MiGA::Result::Stats
70
80
  total_length: [s[:tot], 'bp'],
71
81
  longest_sequence: [s[:max], 'bp'],
72
82
  g_c_content: [s[:gc], '%'],
73
- x_content: [s[:x], '%']
83
+ x_content: [s[:x], '%'],
84
+ g_c_skew: [s[:gc_skew], '%'],
85
+ a_t_skew: [s[:at_skew], '%']
74
86
  }
75
87
  end
76
88
 
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 10, 2]
11
+ VERSION = [0.7, 11, 0]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 6, 30)
19
+ VERSION_DATE = Date.new(2020, 7, 1)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.10.2
4
+ version: 0.7.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-30 00:00:00.000000000 Z
11
+ date: 2020-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons