miga-base 0.7.10.2 → 0.7.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action/stats.rb +1 -1
- data/lib/miga/common/format.rb +26 -6
- data/lib/miga/result/stats.rb +21 -9
- data/lib/miga/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7b4a168130d732c670246cd4a874272e77e5f7d88fdef00e10d81ab8e5f9979a
|
4
|
+
data.tar.gz: '069e2dd280b4afecb67478612f1dee35bf2cada3ae57cbc61c6e70d0ef3bd233'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a37fd7d69339c7a63d5ac38e0c232fed96d479c3f2f2bc67b2ee956bb908d8690a55f21a6fa0185c05f209139e16b6b2ddcd6b0f36fac471f9e0b4fd2c4a5f04
|
7
|
+
data.tar.gz: cb156656c79f1a765281f163691650e32f90056dc767827d4b3fe958b4042e6359b810f5a3249c6902bce042f6a457507a027836797c39328889d9cbbbc5c5d0
|
@@ -38,7 +38,7 @@ class MiGA::Cli::Action::Stats < MiGA::Cli::Action
|
|
38
38
|
end
|
39
39
|
if cli[:key].nil?
|
40
40
|
r[:stats].each do |k, v|
|
41
|
-
k_n = k
|
41
|
+
k_n = k.to_s.unmiga_name.sub(/^./, &:upcase)
|
42
42
|
cli.puts "#{k_n}: #{v.is_a?(Array) ? v.join(' ') : v}"
|
43
43
|
end
|
44
44
|
else
|
data/lib/miga/common/format.rb
CHANGED
@@ -68,15 +68,20 @@ module MiGA::Common::Format
|
|
68
68
|
# a FastA or FastQ file (supports gzipped files). The +format+ must be a
|
69
69
|
# Symbol, one of +:fasta+ or +:fastq+. Additional estimations can be
|
70
70
|
# controlled via the +opts+ Hash. Supported options include:
|
71
|
-
# - +:n50+:
|
72
|
-
# - +:gc+:
|
73
|
-
# - +:x+:
|
71
|
+
# - +:n50+: Include the N50 and the median (in bp)
|
72
|
+
# - +:gc+: Include the G+C content (in %)
|
73
|
+
# - +:x+: Include the undetermined bases content (in %)
|
74
|
+
# - +:skew+: Include G-C and A-T sequence skew (in %; forces gc: true).
|
75
|
+
# See definition used here in DOI:10.1177/117693430700300006
|
74
76
|
def seqs_length(file, format, opts = {})
|
77
|
+
opts[:gc] = true if opts[:skew]
|
75
78
|
fh = file =~ /\.gz/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
|
76
79
|
l = []
|
77
80
|
gc = 0
|
78
81
|
xn = 0
|
79
|
-
|
82
|
+
t = 0
|
83
|
+
c = 0
|
84
|
+
i = 0 # <- Zlib::GzipReader doesn't set `$.`
|
80
85
|
fh.each_line do |ln|
|
81
86
|
i += 1
|
82
87
|
if (format == :fasta and ln =~ /^>/) or
|
@@ -86,6 +91,10 @@ module MiGA::Common::Format
|
|
86
91
|
l[l.size - 1] += ln.chomp.size
|
87
92
|
gc += ln.scan(/[GCgc]/).count if opts[:gc]
|
88
93
|
xn += ln.scan(/[XNxn]/).count if opts[:x]
|
94
|
+
if opts[:skew]
|
95
|
+
t += ln.scan(/[Tt]/).count
|
96
|
+
c += ln.scan(/[Cc]/).count
|
97
|
+
end
|
89
98
|
end
|
90
99
|
end
|
91
100
|
fh.close
|
@@ -97,6 +106,12 @@ module MiGA::Common::Format
|
|
97
106
|
o[:sd] = Math.sqrt o[:var]
|
98
107
|
o[:gc] = 100.0 * gc / o[:tot] if opts[:gc]
|
99
108
|
o[:x] = 100.0 * xn / o[:tot] if opts[:x]
|
109
|
+
if opts[:skew]
|
110
|
+
at = o[:tot] - gc
|
111
|
+
o[:at_skew] = 100.0 * (2 * t - at) / at
|
112
|
+
o[:gc_skew] = 100.0 * (2 * c - gc) / gc
|
113
|
+
end
|
114
|
+
|
100
115
|
if opts[:n50]
|
101
116
|
l.sort!
|
102
117
|
thr = o[:tot] / 2
|
@@ -132,9 +147,14 @@ class String
|
|
132
147
|
end
|
133
148
|
|
134
149
|
##
|
135
|
-
# Replace underscores by spaces or
|
150
|
+
# Replace underscores by spaces or other symbols depending on context
|
136
151
|
def unmiga_name
|
137
|
-
gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
|
152
|
+
gsub(/_(str|sp|subsp|pv)__/, '_\\1._')
|
153
|
+
.gsub(/g_c_(content)/, 'G+C \\1')
|
154
|
+
.gsub(/g_c_(skew)/, 'G-C \\1')
|
155
|
+
.gsub(/a_t_(skew)/, 'A-T \\1')
|
156
|
+
.gsub(/x_content/, &:capitalize)
|
157
|
+
.tr('_', ' ')
|
138
158
|
end
|
139
159
|
|
140
160
|
##
|
data/lib/miga/result/stats.rb
CHANGED
@@ -21,28 +21,35 @@ module MiGA::Result::Stats
|
|
21
21
|
|
22
22
|
def compute_stats_raw_reads
|
23
23
|
stats = {}
|
24
|
+
seq_opts = { gc: true, x: true, skew: true }
|
24
25
|
if self[:files][:pair1].nil?
|
25
|
-
s = MiGA::MiGA.seqs_length(file_path(:single), :fastq,
|
26
|
+
s = MiGA::MiGA.seqs_length(file_path(:single), :fastq, seq_opts)
|
26
27
|
stats = {
|
27
28
|
reads: s[:n],
|
28
29
|
length_average: [s[:avg], 'bp'],
|
29
30
|
length_standard_deviation: [s[:sd], 'bp'],
|
30
31
|
g_c_content: [s[:gc], '%'],
|
31
|
-
x_content: [s[:x], '%']
|
32
|
+
x_content: [s[:x], '%'],
|
33
|
+
g_c_skew: [s[:gc_skew], '%'],
|
34
|
+
a_t_skew: [s[:at_skew], '%']
|
32
35
|
}
|
33
36
|
else
|
34
|
-
s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq,
|
35
|
-
s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq,
|
37
|
+
s1 = MiGA::MiGA.seqs_length(file_path(:pair1), :fastq, seq_opts)
|
38
|
+
s2 = MiGA::MiGA.seqs_length(file_path(:pair2), :fastq, seq_opts)
|
36
39
|
stats = {
|
37
40
|
read_pairs: s1[:n],
|
38
41
|
forward_length_average: [s1[:avg], 'bp'],
|
39
42
|
forward_length_standard_deviation: [s1[:sd], 'bp'],
|
40
43
|
forward_g_c_content: [s1[:gc], '%'],
|
41
44
|
forward_x_content: [s1[:x], '%'],
|
45
|
+
forward_g_c_skew: [s1[:gc_skew], '%'],
|
46
|
+
forward_a_t_skew: [s1[:at_skew], '%'],
|
42
47
|
reverse_length_average: [s2[:avg], 'bp'],
|
43
48
|
reverse_length_standard_deviation: [s2[:sd], 'bp'],
|
44
49
|
reverse_g_c_content: [s2[:gc], '%'],
|
45
|
-
reverse_x_content: [s2[:x], '%']
|
50
|
+
reverse_x_content: [s2[:x], '%'],
|
51
|
+
reverse_g_c_skew: [s2[:gc_skew], '%'],
|
52
|
+
reverse_a_t_skew: [s2[:at_skew], '%']
|
46
53
|
}
|
47
54
|
end
|
48
55
|
stats
|
@@ -50,19 +57,22 @@ module MiGA::Result::Stats
|
|
50
57
|
|
51
58
|
def compute_stats_trimmed_fasta
|
52
59
|
f = self[:files][:coupled].nil? ? file_path(:single) : file_path(:coupled)
|
53
|
-
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true)
|
60
|
+
s = MiGA::MiGA.seqs_length(f, :fasta, gc: true, x: true, skew: true)
|
54
61
|
{
|
55
62
|
reads: s[:n],
|
56
63
|
length_average: [s[:avg], 'bp'],
|
57
64
|
length_standard_deviation: [s[:sd], 'bp'],
|
58
65
|
g_c_content: [s[:gc], '%'],
|
59
|
-
x_content: [s[:x], '%']
|
66
|
+
x_content: [s[:x], '%'],
|
67
|
+
g_c_skew: [s[:gc_skew], '%'],
|
68
|
+
a_t_skew: [s[:at_skew], '%']
|
60
69
|
}
|
61
70
|
end
|
62
71
|
|
63
72
|
def compute_stats_assembly
|
64
73
|
s = MiGA::MiGA.seqs_length(
|
65
|
-
file_path(:largecontigs), :fasta,
|
74
|
+
file_path(:largecontigs), :fasta,
|
75
|
+
n50: true, gc: true, x: true, skew: true
|
66
76
|
)
|
67
77
|
{
|
68
78
|
contigs: s[:n],
|
@@ -70,7 +80,9 @@ module MiGA::Result::Stats
|
|
70
80
|
total_length: [s[:tot], 'bp'],
|
71
81
|
longest_sequence: [s[:max], 'bp'],
|
72
82
|
g_c_content: [s[:gc], '%'],
|
73
|
-
x_content: [s[:x], '%']
|
83
|
+
x_content: [s[:x], '%'],
|
84
|
+
g_c_skew: [s[:gc_skew], '%'],
|
85
|
+
a_t_skew: [s[:at_skew], '%']
|
74
86
|
}
|
75
87
|
end
|
76
88
|
|
data/lib/miga/version.rb
CHANGED
@@ -8,7 +8,7 @@ module MiGA
|
|
8
8
|
# - Float representing the major.minor version.
|
9
9
|
# - Integer representing gem releases of the current version.
|
10
10
|
# - Integer representing minor changes that require new version number.
|
11
|
-
VERSION = [0.7,
|
11
|
+
VERSION = [0.7, 11, 0]
|
12
12
|
|
13
13
|
##
|
14
14
|
# Nickname for the current major.minor version.
|
@@ -16,7 +16,7 @@ module MiGA
|
|
16
16
|
|
17
17
|
##
|
18
18
|
# Date of the current gem release.
|
19
|
-
VERSION_DATE = Date.new(2020,
|
19
|
+
VERSION_DATE = Date.new(2020, 7, 1)
|
20
20
|
|
21
21
|
##
|
22
22
|
# Reference of MiGA.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|