miga-base 0.7.10.2 → 0.7.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/stats.rb +1 -1
- data/lib/miga/common/format.rb +26 -6
- data/lib/miga/result/stats.rb +21 -9
- data/lib/miga/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7b4a168130d732c670246cd4a874272e77e5f7d88fdef00e10d81ab8e5f9979a
|
4
|
+
data.tar.gz: '069e2dd280b4afecb67478612f1dee35bf2cada3ae57cbc61c6e70d0ef3bd233'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a37fd7d69339c7a63d5ac38e0c232fed96d479c3f2f2bc67b2ee956bb908d8690a55f21a6fa0185c05f209139e16b6b2ddcd6b0f36fac471f9e0b4fd2c4a5f04
|
7
|
+
data.tar.gz: cb156656c79f1a765281f163691650e32f90056dc767827d4b3fe958b4042e6359b810f5a3249c6902bce042f6a457507a027836797c39328889d9cbbbc5c5d0
|
@@ -38,7 +38,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
|
38
38
|
end
|
39
39
|
if cli[:key].nil?
|
40
40
|
r[:stats].each do |k, v|
|
41
|
-
k_n = k
|
41
|
+
k_n = k.to_s.unmiga_name.sub(/^./, &:upcase)
|
42
42
|
cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
|
43
43
|
end
|
44
44
|
else
|
data/lib/miga/common/format.rb
CHANGED
@@ -68,15 +68,20 @@ module MiGA::Common::Format
|
|
68
68
|
# a FastA or FastQ file (supports gzipped files). The +format+ must be a
|
69
69
|
# Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
|
70
70
|
# controlled via the +opts+ Hash. Supported options include:
|
71
|
-
# - +:n50+:
|
72
|
-
# - +:gc+:
|
73
|
-
# - +:x+:
|
71
|
+
# - +:n50+: Include the N50 and the median (in bp)
|
72
|
+
# - +:gc+: Include the G+C content (in %)
|
73
|
+
# - +:x+: Include the undetermined bases content (in %)
|
74
|
+
# - +:skew+: Include G-C and A-T sequence skew (in %; forces gc: true).
|
75
|
+
# See definition used here in DOI:10.1177/117693430700300006
|
74
76
|
def seqs_length(file, format, opts = {})
|
77
|
+
opts[:gc] = true if opts[:skew]
|
75
78
|
fh = file =~ /\.gz/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
|
76
79
|
l = []
|
77
80
|
gc = 0
|
78
81
|
xn = 0
|
79
|
-
|
82
|
+
t = 0
|
83
|
+
c = 0
|
84
|
+
i = 0 # <- Zlib::GzipReader doesn't set `$.`
|
80
85
|
fh.each_line do |ln|
|
81
86
|
i += 1
|
82
87
|
if (format == :fasta and ln =~ /^>/) or
|
@@ -86,6 +91,10 @@ module MiGA::Common::Format
|
|
86
91
|
l[l.size - 1] += ln.chomp.size
|
87
92
|
gc += ln.scan(/[GCgc]/).count if opts[:gc]
|
88
93
|
xn += ln.scan(/[XNxn]/).count if opts[:x]
|
94
|
+
if opts[:skew]
|
95
|
+
t += ln.scan(/[Tt]/).count
|
96
|
+
c += ln.scan(/[Cc]/).count
|
97
|
+
end
|
89
98
|
end
|
90
99
|
end
|
91
100
|
fh.close
|
@@ -97,6 +106,12 @@ module MiGA::Common::Format
|
|
97
106
|
o[:sd] = Math.sqrt o[:var]
|
98
107
|
o[:gc] = 100.0 * gc / o[:tot] if opts[:gc]
|
99
108
|
o[:x] = 100.0 * xn / o[:tot] if opts[:x]
|
109
|
+
if opts[:skew]
|
110
|
+
at = o[:tot] - gc
|
111
|
+
o[:at_skew] = 100.0 * (2 * t - at) / at
|
112
|
+
o[:gc_skew] = 100.0 * (2 * c - gc) / gc
|
113
|
+
end
|
114
|
+
|
100
115
|
if opts[:n50]
|
101
116
|
l.sort!
|
102
117
|
thr = o[:tot] / 2
|
@@ -132,9 +147,14 @@ class String
|
|
132
147
|
end
|
133
148
|
|
134
149
|
##
|
135
|
-
# Replace underscores by spaces or
|
150
|
+
# Replace underscores by spaces or other symbols depending on context
|
136
151
|
def unmiga_name
|
137
|
-
gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
|
152
|
+
gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
|
153
|
+
.gsub(/g_c_(content)/, 'G+C \\1')
|
154
|
+
.gsub(/g_c_(skew)/, 'G-C \\1')
|
155
|
+
.gsub(/a_t_(skew)/, 'A-T \\1')
|
156
|
+
.gsub(/x_content/, &:capitalize)
|
157
|
+
.tr('_', ' ')
|
138
158
|
end
|
139
159
|
|
140
160
|
##
|
data/lib/miga/result/stats.rb
CHANGED
@@ -21,28 +21,35 @@ module MiGA::Result::Stats
|
|
21
21
|
|
22
22
|
def compute_stats_raw_reads
|
23
23
|
stats = {}
|
24
|
+
seq_opts = { gc: true, x: true, skew: true }
|
24
25
|
if self[:files][:pair1].nil?
|
25
|
-
s = MiGA::MiGA.seqs_length(file_path(:single), :fastq,
|
26
|
+
s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
|
26
27
|
stats = {
|
27
28
|
reads: s[:n],
|
28
29
|
length_average: [s[:avg], 'bp'],
|
29
30
|
length_standard_deviation: [s[:sd], 'bp'],
|
30
31
|
g_c_content: [s[:gc], '%'],
|
31
|
-
x_content: [s[:x], '%']
|
32
|
+
x_content: [s[:x], '%'],
|
33
|
+
g_c_skew: [s[:gc_skew], '%'],
|
34
|
+
a_t_skew: [s[:at_skew], '%']
|
32
35
|
}
|
33
36
|
else
|
34
|
-
s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq,
|
35
|
-
s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq,
|
37
|
+
s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
|
38
|
+
s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
|
36
39
|
stats = {
|
37
40
|
read_pairs: s1[:n],
|
38
41
|
forward_length_average: [s1[:avg], 'bp'],
|
39
42
|
forward_length_standard_deviation: [s1[:sd], 'bp'],
|
40
43
|
forward_g_c_content: [s1[:gc], '%'],
|
41
44
|
forward_x_content: [s1[:x], '%'],
|
45
|
+
forward_g_c_skew: [s1[:gc_skew], '%'],
|
46
|
+
forward_a_t_skew: [s1[:at_skew], '%'],
|
42
47
|
reverse_length_average: [s2[:avg], 'bp'],
|
43
48
|
reverse_length_standard_deviation: [s2[:sd], 'bp'],
|
44
49
|
reverse_g_c_content: [s2[:gc], '%'],
|
45
|
-
reverse_x_content: [s2[:x], '%']
|
50
|
+
reverse_x_content: [s2[:x], '%'],
|
51
|
+
reverse_g_c_skew: [s2[:gc_skew], '%'],
|
52
|
+
reverse_a_t_skew: [s2[:at_skew], '%']
|
46
53
|
}
|
47
54
|
end
|
48
55
|
stats
|
@@ -50,19 +57,22 @@ module MiGA::Result::Stats
|
|
50
57
|
|
51
58
|
def compute_stats_trimmed_fasta
|
52
59
|
f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
|
53
|
-
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true)
|
60
|
+
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
|
54
61
|
{
|
55
62
|
reads: s[:n],
|
56
63
|
length_average: [s[:avg], 'bp'],
|
57
64
|
length_standard_deviation: [s[:sd], 'bp'],
|
58
65
|
g_c_content: [s[:gc], '%'],
|
59
|
-
x_content: [s[:x], '%']
|
66
|
+
x_content: [s[:x], '%'],
|
67
|
+
g_c_skew: [s[:gc_skew], '%'],
|
68
|
+
a_t_skew: [s[:at_skew], '%']
|
60
69
|
}
|
61
70
|
end
|
62
71
|
|
63
72
|
def compute_stats_assembly
|
64
73
|
s = MiGA::MiGA.seqs_length(
|
65
|
-
file_path(:largecontigs), :fasta,
|
74
|
+
file_path(:largecontigs), :fasta,
|
75
|
+
n50: true, gc: true, x: true, skew: true
|
66
76
|
)
|
67
77
|
{
|
68
78
|
contigs: s[:n],
|
@@ -70,7 +80,9 @@ module MiGA::Result::Stats
|
|
70
80
|
total_length: [s[:tot], 'bp'],
|
71
81
|
longest_sequence: [s[:max], 'bp'],
|
72
82
|
g_c_content: [s[:gc], '%'],
|
73
|
-
x_content: [s[:x], '%']
|
83
|
+
x_content: [s[:x], '%'],
|
84
|
+
g_c_skew: [s[:gc_skew], '%'],
|
85
|
+
a_t_skew: [s[:at_skew], '%']
|
74
86
|
}
|
75
87
|
end
|
76
88
|
|
data/lib/miga/version.rb
CHANGED
@@ -8,7 +8,7 @@ module MiGA
|
|
8
8
|
# - Float representing the major.minor version.
|
9
9
|
# - Integer representing gem releases of the current version.
|
10
10
|
# - Integer representing minor changes that require new version number.
|
11
|
-
VERSION = [0.7,
|
11
|
+
VERSION = [0.7, 11, 0]
|
12
12
|
|
13
13
|
##
|
14
14
|
# Nickname for the current major.minor version.
|
@@ -16,7 +16,7 @@ module MiGA
|
|
16
16
|
|
17
17
|
##
|
18
18
|
# Date of the current gem release.
|
19
|
-
VERSION_DATE = Date.new(2020,
|
19
|
+
VERSION_DATE = Date.new(2020, 7, 1)
|
20
20
|
|
21
21
|
##
|
22
22
|
# Reference of MiGA.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|