miga-base 0.3.2.0 → 0.3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/actions/doctor.rb +16 -1
- data/bin/miga +7 -3
- data/lib/miga/common/format.rb +18 -20
- data/lib/miga/dataset.rb +23 -4
- data/lib/miga/dataset/result.rb +7 -1
- data/lib/miga/version.rb +1 -1
- data/scripts/miga.bash +3 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 82b444c1fd986a4bc95f5cca2e2485b2fb0dd29b82d7424ef7615e41d9f901d3
|
4
|
+
data.tar.gz: 8bc59e799f8af6e13dc1a3e0653743490c44189e056fbe8a9f86dc53b029a6bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3aabd7fa5fd037b4e427b131c311a66a103f68f5ff0aaf4907abf4663ed285e7010d89a10d2443e72f4d9c1f0a032ba112faf961972da34bc44b449a3531332
|
7
|
+
data.tar.gz: 4b1f302f219addc604f66526f72171f711c1e89252a7f152d68e839d4235ab723a43811f16664b81dee77b91c149fd38e06e54cca7d8412bf84594f26de36dcb
|
data/actions/doctor.rb
CHANGED
@@ -6,7 +6,8 @@
|
|
6
6
|
require "sqlite3"
|
7
7
|
|
8
8
|
o = {q:true, ld:false,
|
9
|
-
db: true, dist: true, files: true,
|
9
|
+
db: true, dist: true, files: true,
|
10
|
+
ess: true, mts: true, start: true, tax: true}
|
10
11
|
OptionParser.new do |opt|
|
11
12
|
opt_banner(opt)
|
12
13
|
opt_object(opt, o, [:project])
|
@@ -22,6 +23,8 @@ OptionParser.new do |opt|
|
|
22
23
|
"Do not check unarchived essential genes."){ |v| o[:ess]=!v }
|
23
24
|
opt.on("--ignore-mytaxa-scan",
|
24
25
|
"Do not check unarchived MyTaxa scan."){ |v| o[:mts]=!v }
|
26
|
+
opt.on("--ignore-start",
|
27
|
+
"Do not check lingering legacy .start files."){ |v| o[:start]=!v }
|
25
28
|
opt.on("--ignore-taxonomy",
|
26
29
|
"Do not check taxonomy consistency."){ |v| o[:tax]=!v }
|
27
30
|
opt_common(opt, o)
|
@@ -174,6 +177,18 @@ if o[:mts]
|
|
174
177
|
end
|
175
178
|
end
|
176
179
|
|
180
|
+
if o[:start]
|
181
|
+
$stderr.puts "o Looking for legacy .start files lingering." unless o[:q]
|
182
|
+
p.each_dataset do |d|
|
183
|
+
d.each_result do |r_k, r|
|
184
|
+
if File.exist? r.path(:start)
|
185
|
+
$stderr.puts " > Registering again #{d.name}:#{r_k}" if o[:ld]
|
186
|
+
r.save
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
177
192
|
if o[:tax]
|
178
193
|
#$stderr.puts "o Checking for taxonomy/distances consistency" unless o[:q]
|
179
194
|
# TODO: Find 95%ANI clusters with entries from different species
|
data/bin/miga
CHANGED
@@ -171,9 +171,13 @@ def add_metadata(o, obj)
|
|
171
171
|
o[:metadata].split(",").each do |pair|
|
172
172
|
(k,v) = pair.split("=")
|
173
173
|
case v
|
174
|
-
when
|
175
|
-
when
|
176
|
-
when
|
174
|
+
when 'true'; v = true
|
175
|
+
when 'false'; v = false
|
176
|
+
when 'nil'; v = nil
|
177
|
+
end
|
178
|
+
if k=='_step'
|
179
|
+
obj.metadata["_try_#{v}"] ||= 0
|
180
|
+
obj.metadata["_try_#{v}"] += 1
|
177
181
|
end
|
178
182
|
obj.metadata[k] = v
|
179
183
|
end unless o[:metadata].nil?
|
data/lib/miga/common/format.rb
CHANGED
@@ -3,18 +3,17 @@ require 'tempfile'
|
|
3
3
|
require 'zlib'
|
4
4
|
|
5
5
|
module MiGA::Common::Format
|
6
|
-
|
7
6
|
##
|
8
7
|
# Tabulates an +values+, and Array of Arrays, all with the same number of
|
9
8
|
# entries as +header+. Returns an Array of String, one per line.
|
10
9
|
def tabulate(header, values)
|
11
10
|
fields = [header.map(&:to_s)]
|
12
|
-
fields << fields.first.map{ |h| h.gsub(/\S/, '-') }
|
13
|
-
fields += values.map{ |
|
14
|
-
clen = fields.map{ |
|
15
|
-
fields.map do |
|
16
|
-
(0 .. clen.size-1).map do |col_n|
|
17
|
-
col_n==0 ?
|
11
|
+
fields << fields.first.map { |h| h.gsub(/\S/, '-') }
|
12
|
+
fields += values.map { |r| r.map { |cell| cell.nil? ? '?' : cell.to_s } }
|
13
|
+
clen = fields.map { |r| r.map(&:length) }.transpose.map(&:max)
|
14
|
+
fields.map do |r|
|
15
|
+
(0 .. clen.size - 1).map do |col_n|
|
16
|
+
col_n == 0 ? r[col_n].rjust(clen[col_n]) : r[col_n].ljust(clen[col_n])
|
18
17
|
end.join(' ')
|
19
18
|
end
|
20
19
|
end
|
@@ -40,7 +39,7 @@ module MiGA::Common::Format
|
|
40
39
|
(id, df) = [$1, $2]
|
41
40
|
tmp_fh.print buffer.wrap_width(80)
|
42
41
|
buffer = ''
|
43
|
-
tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/,
|
42
|
+
tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}"
|
44
43
|
else
|
45
44
|
buffer << ln.gsub(/[^A-Za-z\.\-]/, '')
|
46
45
|
end
|
@@ -65,30 +64,30 @@ module MiGA::Common::Format
|
|
65
64
|
# controlled via the +opts+ Hash. Supported options include:
|
66
65
|
# - +:n50+: If true, it also returns the N50 and the median (in bp).
|
67
66
|
# - +gc+: If true, it also returns the G+C content (in %).
|
68
|
-
def seqs_length(file, format, opts={})
|
67
|
+
def seqs_length(file, format, opts = {})
|
69
68
|
fh = (file =~ /\.gz/) ? Zlib::GzipReader.open(file) : File.open(file, 'r')
|
70
69
|
l = []
|
71
70
|
gc = 0
|
72
71
|
i = 0 # <- Zlib::GzipReader doesn't set $.
|
73
72
|
fh.each_line do |ln|
|
74
73
|
i += 1
|
75
|
-
if (format
|
74
|
+
if (format == :fasta and ln =~ /^>/) or (format == :fastq and (i % 4)==1)
|
76
75
|
l << 0
|
77
|
-
elsif format
|
76
|
+
elsif format == :fasta or (i % 4) == 2
|
78
77
|
l[l.size-1] += ln.chomp.size
|
79
78
|
gc += ln.scan(/[GCgc]/).count if opts[:gc]
|
80
79
|
end
|
81
80
|
end
|
82
81
|
fh.close
|
83
|
-
|
82
|
+
|
84
83
|
o = { n: l.size, tot: l.inject(:+) }
|
85
|
-
o[:avg] = o[:tot].to_f/l.size
|
86
|
-
o[:var] = l.map{ |a| a
|
84
|
+
o[:avg] = o[:tot].to_f / l.size
|
85
|
+
o[:var] = l.map { |a| a**2 }.inject(:+).to_f / l.size - o[:avg]**2
|
87
86
|
o[:sd] = Math.sqrt o[:var]
|
88
|
-
o[:gc] = 100.0*gc/o[:tot] if opts[:gc]
|
87
|
+
o[:gc] = 100.0 * gc / o[:tot] if opts[:gc]
|
89
88
|
if opts[:n50]
|
90
89
|
l.sort!
|
91
|
-
thr = o[:tot]/2
|
90
|
+
thr = o[:tot] / 2
|
92
91
|
pos = 0
|
93
92
|
l.each do |a|
|
94
93
|
pos += a
|
@@ -96,7 +95,7 @@ module MiGA::Common::Format
|
|
96
95
|
break if pos >= thr
|
97
96
|
end
|
98
97
|
o[:med] = o[:n].even? ?
|
99
|
-
|
98
|
+
0.5 * l[o[:n] / 2 - 1, 2].inject(:+) : l[(o[:n] - 1) / 2]
|
100
99
|
end
|
101
100
|
o
|
102
101
|
end
|
@@ -105,7 +104,6 @@ end
|
|
105
104
|
##
|
106
105
|
# MiGA extensions to the String class.
|
107
106
|
class String
|
108
|
-
|
109
107
|
##
|
110
108
|
# Replace any character not allowed in a MiGA name for underscore (_). This
|
111
109
|
# results in a MiGA-compliant name EXCEPT for empty strings, that results in
|
@@ -123,13 +121,13 @@ class String
|
|
123
121
|
##
|
124
122
|
# Replace underscores by spaces or dots (depending on context).
|
125
123
|
def unmiga_name
|
126
|
-
gsub(/_(str|sp|subsp|pv)__/,
|
124
|
+
gsub(/_(str|sp|subsp|pv)__/, '_\\1._').tr('_', ' ')
|
127
125
|
end
|
128
126
|
|
129
127
|
##
|
130
128
|
# Wraps the string with fixed Integer +width+.
|
131
129
|
def wrap_width(width)
|
132
|
-
gsub(/([^\n\r]{1,#{width}})/,"\\1\n")
|
130
|
+
gsub(/([^\n\r]{1,#{width}})/, "\\1\n")
|
133
131
|
end
|
134
132
|
end
|
135
133
|
|
data/lib/miga/dataset.rb
CHANGED
@@ -76,6 +76,20 @@ class MiGA::Dataset < MiGA::MiGA
|
|
76
76
|
self.results.each{ |r| r.remove! }
|
77
77
|
self.metadata.remove!
|
78
78
|
end
|
79
|
+
|
80
|
+
##
|
81
|
+
# Inactivate a dataset. This halts automated processing by the daemon.
|
82
|
+
def inactivate!
|
83
|
+
self.metadata[:inactive] = true
|
84
|
+
self.metadata.save
|
85
|
+
end
|
86
|
+
|
87
|
+
##
|
88
|
+
# Activate a dataset. This removes the +:inactive+ flag.
|
89
|
+
def activate!
|
90
|
+
self.metadata[:inactive] = nil
|
91
|
+
self.metadata.save
|
92
|
+
end
|
79
93
|
|
80
94
|
##
|
81
95
|
# Get standard metadata values for the dataset as Array.
|
@@ -96,22 +110,27 @@ class MiGA::Dataset < MiGA::MiGA
|
|
96
110
|
##
|
97
111
|
# Is this dataset known to be multi-organism?
|
98
112
|
def is_multi?
|
99
|
-
return false if metadata[:type].nil? or
|
100
|
-
@@KNOWN_TYPES[type].nil?
|
113
|
+
return false if metadata[:type].nil? or @@KNOWN_TYPES[type].nil?
|
101
114
|
@@KNOWN_TYPES[type][:multi]
|
102
115
|
end
|
103
116
|
|
104
117
|
##
|
105
118
|
# Is this dataset known to be single-organism?
|
106
119
|
def is_nonmulti?
|
107
|
-
return false if metadata[:type].nil? or
|
108
|
-
@@KNOWN_TYPES[type].nil?
|
120
|
+
return false if metadata[:type].nil? or @@KNOWN_TYPES[type].nil?
|
109
121
|
!@@KNOWN_TYPES[type][:multi]
|
110
122
|
end
|
123
|
+
|
124
|
+
##
|
125
|
+
# Is this dataset active?
|
126
|
+
def is_active?
|
127
|
+
metadata[:inactive].nil? or !metadata[:inactive]
|
128
|
+
end
|
111
129
|
|
112
130
|
##
|
113
131
|
# Should I ignore +task+ for this dataset?
|
114
132
|
def ignore_task?(task)
|
133
|
+
return true unless is_active?
|
115
134
|
return !metadata["run_#{task}"] unless metadata["run_#{task}"].nil?
|
116
135
|
return true if task==:taxonomy and project.metadata[:ref_project].nil?
|
117
136
|
pattern = [true, false]
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -78,7 +78,13 @@ module MiGA::Dataset::Result
|
|
78
78
|
return nil if first.nil?
|
79
79
|
@@PREPROCESSING_TASKS.each do |t|
|
80
80
|
next if ignore_task? t
|
81
|
-
|
81
|
+
if after_first and add_result(t, save).nil?
|
82
|
+
if (metadata["_try_#{t}"] || 0) > (project.metadata[:max_try] || 10)
|
83
|
+
inactivate!
|
84
|
+
return nil
|
85
|
+
end
|
86
|
+
return t
|
87
|
+
end
|
82
88
|
after_first = (after_first or (t==first))
|
83
89
|
end
|
84
90
|
nil
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.3, 2,
|
13
|
+
VERSION = [0.3, 2, 1]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
data/scripts/miga.bash
CHANGED
@@ -14,6 +14,9 @@ for i in $(miga plugins -P "$PROJECT") ; do
|
|
14
14
|
source "$i/scripts-plugin.bash"
|
15
15
|
done
|
16
16
|
|
17
|
+
[[ -n $DATASET ]] \
|
18
|
+
&& miga add -P "$PROJECT" -D "$DATASET" -m "_step=$SCRIPT" --update
|
19
|
+
|
17
20
|
#if [[ "$RUNTYPE" == "qsub" ]] ; then
|
18
21
|
#elif [[ "$RUNTYPE" == "msub" ]] ; then
|
19
22
|
#fi
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.2.
|
4
|
+
version: 0.3.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -503,7 +503,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
503
503
|
version: '0'
|
504
504
|
requirements: []
|
505
505
|
rubyforge_project:
|
506
|
-
rubygems_version: 2.6
|
506
|
+
rubygems_version: 2.7.6
|
507
507
|
signing_key:
|
508
508
|
specification_version: 4
|
509
509
|
summary: MiGA
|