miga-base 0.3.0.7 → 0.3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/stats.rb +33 -6
- data/actions/tax_test.rb +14 -3
- data/lib/miga/dataset.rb +11 -9
- data/lib/miga/dataset_result.rb +7 -0
- data/lib/miga/project.rb +1 -1
- data/lib/miga/result.rb +1 -0
- data/lib/miga/taxonomy.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/_distances_noref_nomulti.bash +33 -11
- data/scripts/distances.bash +3 -2
- data/scripts/taxonomy.bash +40 -0
- data/utils/arch-ess-genes.rb +57 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 220a35c60112758e6f1f2226c9978774db5b6de0
|
4
|
+
data.tar.gz: e08ef0fb54d966b76376f2c87284f532e3c3fa9b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3ab68a7fefcd05049dfb9939adade6efe490171afff1039650d49ba53ba8b55ad6be54c334b6d2ab0188c2ecc469d161455e1ea3eca0e1dab60063a686704ef5
|
7
|
+
data.tar.gz: 03c15a94148521c562dab2b26c377666d59e6c2720702b12c646ad900247089ffb28aef9706e5b299e80eca1a38addfdb6afb78c0a2bce91a28cd401a15aa461
|
data/actions/stats.rb
CHANGED
@@ -69,15 +69,42 @@ if o[:compute]
|
|
69
69
|
s = `FastA.length.pl '#{f}' | #{scr}`.chomp.split(" ")
|
70
70
|
stats = {predicted_proteins: s[0].to_i, average_length: [s[1].to_f, "aa"]}
|
71
71
|
when :essential_genes
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
72
|
+
if d.is_multi?
|
73
|
+
stats = {median_copies:0, mean_copies:0}
|
74
|
+
File.open(r.file_path(:report), "r") do |fh|
|
75
|
+
fh.each_line do |ln|
|
76
|
+
if /^! (Mean|Median) number of copies per model: (.*)\./.match(ln)
|
77
|
+
stats["#{$1.downcase}_copies".to_sym] = $2.to_f
|
78
|
+
end
|
77
79
|
end
|
78
80
|
end
|
81
|
+
else
|
82
|
+
# Fix estimate for Archaea
|
83
|
+
if not d.metadata[:tax].nil? and
|
84
|
+
d.metadata[:tax].is_in? MiGA::Taxonomy.new("d:Archaea") and
|
85
|
+
r.file_path(:bac_report).nil?
|
86
|
+
scr = "#{MiGA::MiGA.root_path}/utils/arch-ess-genes.rb"
|
87
|
+
rep = r.file_path(:report)
|
88
|
+
$stderr.print `ruby '#{scr}' '#{rep}' '#{rep}.archaea'`
|
89
|
+
r.add_file(:bac_report, "#{d.name}.ess/log")
|
90
|
+
r.add_file(:report, "#{d.name}.ess/log.archaea")
|
91
|
+
end
|
92
|
+
# Extract/compute quality values
|
93
|
+
stats = {completeness:[0.0,"%"], contamination:[0.0,"%"]}
|
94
|
+
File.open(r.file_path(:report), "r") do |fh|
|
95
|
+
fh.each_line do |ln|
|
96
|
+
if /^! (Completeness|Contamination): (.*)%/.match(ln)
|
97
|
+
stats[$1.downcase.to_sym][0] = $2.to_f
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
stats[:quality] = stats[:completeness][0] - stats[:contamination][0]*5
|
102
|
+
q_range = stats[:quality] > 80.0 ? :excellent :
|
103
|
+
stats[:quality] > 50.0 ? :high :
|
104
|
+
stats[:quality] > 20.0 ? :intermediate : :low
|
105
|
+
d.metadata[:quality_interval] = q_range
|
106
|
+
d.save
|
79
107
|
end
|
80
|
-
stats[:quality] = stats[:completeness][0] - stats[:contamination][0]*5
|
81
108
|
when :distances
|
82
109
|
d.cleanup_distances! unless d.nil?
|
83
110
|
else
|
data/actions/tax_test.rb
CHANGED
@@ -5,10 +5,13 @@
|
|
5
5
|
|
6
6
|
require "miga/tax_dist"
|
7
7
|
|
8
|
-
o = {q:true, test:"both"}
|
8
|
+
o = {q:true, test:"both", ref_project:false}
|
9
9
|
OptionParser.new do |opt|
|
10
10
|
opt_banner(opt)
|
11
11
|
opt_object(opt, o, [:project, :dataset])
|
12
|
+
opt.on("--ref-project",
|
13
|
+
"Use the taxonomy from the reference project, not the current project."
|
14
|
+
){ |v| o[:ref_project]=v }
|
12
15
|
opt.on("-t", "--test STRING",
|
13
16
|
"Test to perform. Supported values: intax, novel, both."
|
14
17
|
){ |v| o[:test]=v.downcase }
|
@@ -26,7 +29,7 @@ $stderr.puts "Loading dataset." unless o[:q]
|
|
26
29
|
ds = p.dataset(o[:dataset])
|
27
30
|
|
28
31
|
$stderr.puts "Finding closest relative." unless o[:q]
|
29
|
-
cr = ds.closest_relatives(1)
|
32
|
+
cr = ds.closest_relatives(1, o[:ref_project])
|
30
33
|
|
31
34
|
if cr.nil? or cr.empty?
|
32
35
|
raise "This action is not supported for the project or dataset." if cr.nil?
|
@@ -35,7 +38,15 @@ else
|
|
35
38
|
$stderr.puts "Querying probability distributions." unless o[:q]
|
36
39
|
cr = cr[0]
|
37
40
|
puts "Closest relative: #{cr[0]} with AAI: #{cr[1]}."
|
38
|
-
|
41
|
+
if o[:ref_project]
|
42
|
+
ref = p.metadata[:ref_project]
|
43
|
+
raise "--ref-project requested, but no reference project has been set." if ref.nil?
|
44
|
+
q = MiGA::Project.load(ref)
|
45
|
+
raise "--ref-project requested, but reference project doesn't exist." if q.nil?
|
46
|
+
tax = q.dataset(cr[0]).metadata[:tax]
|
47
|
+
else
|
48
|
+
tax = p.dataset(cr[0]).metadata[:tax]
|
49
|
+
end
|
39
50
|
tax ||= {}
|
40
51
|
|
41
52
|
if %w[intax both].include? o[:test]
|
data/lib/miga/dataset.rb
CHANGED
@@ -28,7 +28,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
28
28
|
mytaxa: "07.annotation/02.taxonomy/01.mytaxa",
|
29
29
|
mytaxa_scan: "07.annotation/03.qa/02.mytaxa_scan",
|
30
30
|
# Distances (for single-species datasets)
|
31
|
-
distances: "09.distances",
|
31
|
+
distances: "09.distances", taxonomy: "09.distances/05.taxonomy",
|
32
32
|
# General statistics
|
33
33
|
stats: "90.stats"
|
34
34
|
}
|
@@ -52,17 +52,17 @@ class MiGA::Dataset < MiGA::MiGA
|
|
52
52
|
def self.PREPROCESSING_TASKS ; @@PREPROCESSING_TASKS ; end
|
53
53
|
@@PREPROCESSING_TASKS = [:raw_reads, :trimmed_reads, :read_quality,
|
54
54
|
:trimmed_fasta, :assembly, :cds, :essential_genes, :ssu, :mytaxa,
|
55
|
-
:mytaxa_scan, :distances, :stats]
|
55
|
+
:mytaxa_scan, :distances, :taxonomy, :stats]
|
56
56
|
|
57
57
|
##
|
58
58
|
# Tasks to be excluded from query datasets.
|
59
|
-
@@EXCLUDE_NOREF_TASKS = [:mytaxa_scan]
|
59
|
+
@@EXCLUDE_NOREF_TASKS = [:mytaxa_scan, :taxonomy]
|
60
60
|
@@_EXCLUDE_NOREF_TASKS_H = Hash[@@EXCLUDE_NOREF_TASKS.map{ |i| [i,true] }]
|
61
61
|
|
62
62
|
##
|
63
63
|
# Tasks to be executed only in datasets that are not multi-organism. These
|
64
64
|
# tasks are ignored for multi-organism datasets or for unknown types.
|
65
|
-
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances]
|
65
|
+
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances, :taxonomy]
|
66
66
|
@@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map{ |i| [i,true] }]
|
67
67
|
|
68
68
|
##
|
@@ -237,6 +237,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
237
237
|
# Should I ignore +task+ for this dataset?
|
238
238
|
def ignore_task?(task)
|
239
239
|
return !metadata["run_#{task}"] unless metadata["run_#{task}"].nil?
|
240
|
+
return true if task==:taxonomy and project.metadata[:ref_project].nil?
|
240
241
|
pattern = [true, false]
|
241
242
|
( [@@_EXCLUDE_NOREF_TASKS_H[task], is_ref? ]==pattern or
|
242
243
|
[@@_ONLY_MULTI_TASKS_H[task], is_multi? ]==pattern or
|
@@ -271,14 +272,15 @@ class MiGA::Dataset < MiGA::MiGA
|
|
271
272
|
end
|
272
273
|
|
273
274
|
##
|
274
|
-
# Returns an Array of duples (Arrays) sorted by AAI:
|
275
|
+
# Returns an Array of +how_many+ duples (Arrays) sorted by AAI:
|
275
276
|
# - +0+: A String with the name(s) of the reference dataset.
|
276
277
|
# - +1+: A Float with the AAI.
|
277
|
-
# This function is currently only supported for query datasets
|
278
|
+
# This function is currently only supported for query datasets when +ref_project+ is false
|
279
|
+
# (default), and only for reference dataset when +ref_project+ is true. It returns
|
278
280
|
# +nil+ if this analysis is not supported.
|
279
|
-
def closest_relatives(how_many=1)
|
280
|
-
return nil if is_ref? or
|
281
|
-
r = result :distances
|
281
|
+
def closest_relatives(how_many=1, ref_project=false)
|
282
|
+
return nil if (is_ref? != ref_project) or is_multi?
|
283
|
+
r = result(ref_project ? :taxonomy : :distances)
|
282
284
|
return nil if r.nil?
|
283
285
|
db = SQLite3::Database.new(r.file_path :aai_db)
|
284
286
|
db.execute("SELECT seq2, aai FROM aai WHERE seq2 != ? " +
|
data/lib/miga/dataset_result.rb
CHANGED
@@ -188,6 +188,13 @@ module MiGA::DatasetResult
|
|
188
188
|
end
|
189
189
|
end
|
190
190
|
|
191
|
+
##
|
192
|
+
# Add result type +:taxonomy+ at +base+ (no +_opts+ supported).
|
193
|
+
def add_result_taxonomy(base, _opts)
|
194
|
+
r = add_result_distances_nonref(base)
|
195
|
+
add_files_to_ds_result(r, name, intax_test:".intax.txt")
|
196
|
+
end
|
197
|
+
|
191
198
|
##
|
192
199
|
# Add result type +:stats+ at +base+ (no +_opts+ supported).
|
193
200
|
def add_result_stats(base, _opts)
|
data/lib/miga/project.rb
CHANGED
@@ -29,7 +29,7 @@ class MiGA::Project < MiGA::MiGA
|
|
29
29
|
07.annotation/03.qa/02.mytaxa_scan
|
30
30
|
08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene
|
31
31
|
09.distances 09.distances/01.haai 09.distances/02.aai
|
32
|
-
09.distances/03.ani 09.distances/04.ssu
|
32
|
+
09.distances/03.ani 09.distances/04.ssu 09.distances/05.taxonomy
|
33
33
|
10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
|
34
34
|
10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
|
35
35
|
10.clades/04.phylogeny/02.core 10.clades/05.metadata
|
data/lib/miga/result.rb
CHANGED
@@ -112,6 +112,7 @@ class MiGA::Result < MiGA::MiGA
|
|
112
112
|
# Load (or reload) result data in the JSON file #path.
|
113
113
|
def load
|
114
114
|
json = File.read(path)
|
115
|
+
raise "Impossible to load result, empty descriptor: #{path}." if json.empty?
|
115
116
|
@data = JSON.parse(json, {:symbolize_names=>true})
|
116
117
|
@data[:files] ||= {}
|
117
118
|
@results = (self[:results] || []).map{ |rs| MiGA::Result.new rs }
|
data/lib/miga/taxonomy.rb
CHANGED
@@ -133,7 +133,7 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
133
133
|
##
|
134
134
|
# Generate cannonical String for the taxonomy.
|
135
135
|
def to_s
|
136
|
-
sorted_ranks.map{ |r| "#{r[0]}:#{r[1].gsub(
|
136
|
+
sorted_ranks.map{ |r| "#{r[0]}:#{r[1].gsub(/[\s:]/,"_")}" }.join(" ")
|
137
137
|
end
|
138
138
|
|
139
139
|
##
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.3,
|
13
|
+
VERSION = [0.3, 1, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2017,
|
21
|
+
VERSION_DATE = Date.new(2017, 9, 14)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
@@ -28,32 +28,35 @@ fx_exists miga-noref_haai_or_aai || function miga-noref_haai_or_aai {
|
|
28
28
|
local Q=$1
|
29
29
|
local S=$2
|
30
30
|
[[ -s $TMPDIR/$Q.faa ]] \
|
31
|
-
|| cp "
|
32
|
-
miga-haai_or_aai "$ESS/$Q.ess.faa" "$ESS/$S.ess.faa"
|
33
|
-
"$TMPDIR/$Q.
|
31
|
+
|| cp "$PROJECT/data/06.cds/$Q.faa" "$TMPDIR/$Q.faa"
|
32
|
+
miga-haai_or_aai "$PROJECT/$ESS/$Q.ess.faa" "$S_PROJ/$ESS/$S.ess.faa" \
|
33
|
+
"$TMPDIR/$Q.haai.db" "$TMPDIR/$Q.faa" "$S_PROJ/data/06.cds/$S.faa" \
|
34
|
+
"$TMPDIR/$Q.aai.db" "$CORES"
|
34
35
|
}
|
35
36
|
|
36
37
|
fx_exists miga-noref_ani || function miga-noref_ani {
|
37
38
|
local Q=$1
|
38
39
|
local S=$2
|
39
40
|
[[ -s "$TMPDIR/$Q.LargeContigs.fna" ]] \
|
40
|
-
|| cp "
|
41
|
-
|
41
|
+
|| cp "$PROJECT/data/05.assembly/$Q.LargeContigs.fna" \
|
42
|
+
"$TMPDIR/$Q.LargeContigs.fna"
|
43
|
+
miga-ani "$TMPDIR/$Q.LargeContigs.fna" \
|
44
|
+
"$S_PROJ/data/05.assembly/$S.LargeContigs.fna" \
|
42
45
|
"$CORES" "$TMPDIR/$Q.ani.db"
|
43
46
|
}
|
44
47
|
|
45
48
|
# Calculate the classification-informed AAI/ANI traverse (if not classified)
|
46
|
-
ESS="
|
47
|
-
if [[ $(miga
|
49
|
+
ESS="data/07.annotation/01.function/01.essential"
|
50
|
+
if [[ $(miga about -P "$S_PROJ" -m type) != "clade" ]] ; then
|
48
51
|
# Classify aai-clade (if project type is not clade)
|
49
|
-
CLADES="
|
52
|
+
CLADES="$S_PROJ/data/10.clades/01.find"
|
50
53
|
METRIC="aai"
|
51
|
-
REF_TABLE="02.aai/miga-project.txt.gz"
|
54
|
+
REF_TABLE="$S_PROJ/data/09.distances/02.aai/miga-project.txt.gz"
|
52
55
|
else
|
53
56
|
# Classify ani-clade (if project type is clade)
|
54
|
-
CLADES="
|
57
|
+
CLADES="$S_PROJ/data/10.clades/02.ani"
|
55
58
|
METRIC="ani"
|
56
|
-
REF_TABLE="03.ani/miga-project.txt.gz"
|
59
|
+
REF_TABLE="$S_PROJ/data/09.distances/03.ani/miga-project.txt.gz"
|
57
60
|
fi
|
58
61
|
|
59
62
|
CLASSIF="."
|
@@ -125,3 +128,22 @@ if [[ -s "${DATASET}.${METRIC}.db" ]] ; then
|
|
125
128
|
"$MIGA/utils/ref-tree.R" "${DATASET}.txt" "$DATASET" "$DATASET"
|
126
129
|
rm "$DATASET".tmp[012] "${DATASET}.txt"
|
127
130
|
fi
|
131
|
+
|
132
|
+
# Test taxonomy
|
133
|
+
(
|
134
|
+
trap 'rm "$DATASET.json" "$DATASET.done"' EXIT
|
135
|
+
FLAGS=""
|
136
|
+
[[ "$PROJECT" == "$S_PROJ" ]] || FLAGS="--ref-project"
|
137
|
+
miga date > "$DATASET.done"
|
138
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
139
|
+
miga tax_test -P "$PROJECT" -D "$DATASET" -t intax \
|
140
|
+
$FLAGS > "$DATASET.intax.txt"
|
141
|
+
)
|
142
|
+
|
143
|
+
# Transfer taxonomy
|
144
|
+
TAX_PVALUE=$(miga about -P "$PROJECT" -m tax_pvalue)
|
145
|
+
[[ "$TAX_PVALUE" == "?" ]] && TAX_PVALUE="0.05"
|
146
|
+
NEW_TAX=$(tail -n +6 "$DATASET.intax.txt" | head -n -3 \
|
147
|
+
| awk '$3<'$TAX_PVALUE'{print $1":"$2}' | grep -v "?" \
|
148
|
+
| tr "\\n" ' ' | perl -pe 's/ *$//')
|
149
|
+
miga tax_set -P "$PROJECT" -D "$DATASET" -s "$NEW_TAX"
|
data/scripts/distances.bash
CHANGED
@@ -14,9 +14,9 @@ TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
|
|
14
14
|
trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
|
15
15
|
|
16
16
|
# Check type of dataset
|
17
|
-
NOMULTI=$(miga
|
17
|
+
NOMULTI=$(miga ls -P "$PROJECT" -D "$DATASET" --no-multi \
|
18
18
|
| wc -l | awk '{print $1}')
|
19
|
-
REF=$(miga
|
19
|
+
REF=$(miga ls -P "$PROJECT" -D "$DATASET" --ref \
|
20
20
|
| wc -l | awk '{print $1}')
|
21
21
|
|
22
22
|
# Call submodules
|
@@ -26,6 +26,7 @@ if [[ "$NOMULTI" -eq "1" && "$REF" -eq "1" ]] ; then
|
|
26
26
|
# shellcheck source=scripts/_distances_ref_nomulti.bash
|
27
27
|
source "$MIGA/scripts/_distances_ref_nomulti.bash"
|
28
28
|
elif [[ "$NOMULTI" -eq "1" ]] ; then
|
29
|
+
S_PROJ=$PROJECT
|
29
30
|
# shellcheck source=scripts/_distances_noref_nomulti.bash
|
30
31
|
source "$MIGA/scripts/_distances_noref_nomulti.bash"
|
31
32
|
fi
|
@@ -0,0 +1,40 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES, $DATASET
|
3
|
+
set -e
|
4
|
+
SCRIPT="taxonomy"
|
5
|
+
echo "MiGA: $MIGA"
|
6
|
+
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
8
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
9
|
+
DIR="$PROJECT/data/09.distances/05.taxonomy"
|
10
|
+
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
11
|
+
cd "$DIR"
|
12
|
+
|
13
|
+
# Initialize
|
14
|
+
miga date > "$DATASET.start"
|
15
|
+
|
16
|
+
# Check if there is a reference project
|
17
|
+
S_PROJ=$(miga about -P "$PROJECT" -m ref_project)
|
18
|
+
|
19
|
+
if [[ "$S_PROJ" != "?" ]] ; then
|
20
|
+
|
21
|
+
# Check type of dataset
|
22
|
+
NOMULTI=$(miga ls -P "$PROJECT" -D "$DATASET" --no-multi \
|
23
|
+
| wc -l | awk '{print $1}')
|
24
|
+
|
25
|
+
if [[ "$NOMULTI" -eq "1" ]] ; then
|
26
|
+
# Call submodules
|
27
|
+
TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
|
28
|
+
trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
|
29
|
+
# shellcheck source=scripts/_distances_functions.bash
|
30
|
+
source "$MIGA/scripts/_distances_functions.bash"
|
31
|
+
# shellcheck source=scripts/_distances_noref_nomulti.bash
|
32
|
+
source "$MIGA/scripts/_distances_noref_nomulti.bash"
|
33
|
+
rm -R "$TMPDIR"
|
34
|
+
fi
|
35
|
+
|
36
|
+
fi
|
37
|
+
|
38
|
+
# Finalize
|
39
|
+
miga date > "$DATASET.done"
|
40
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
@@ -0,0 +1,57 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
esslog = ARGV.shift
|
4
|
+
outlog = ARGV.shift
|
5
|
+
l_all = `HMM.essential.rb -l -q`.chomp.split("\n").map{ |i| i.gsub(/\t.*/,"") }
|
6
|
+
n_arc = Hash[
|
7
|
+
`HMM.essential.rb -l -q -A`.chomp.split("\n").map{ |i| i.split("\t") }
|
8
|
+
]
|
9
|
+
l_arc = n_arc.keys
|
10
|
+
|
11
|
+
def quality(hsh)
|
12
|
+
q = {}
|
13
|
+
q[:found] = hsh.values.map{ |i| i==0 ? 0 : 1 }.inject(:+)
|
14
|
+
q[:multi] = hsh.values.map{ |i| i==0 ? 0 : i-1 }.inject(:+)
|
15
|
+
q[:cmp] = 100.0*q[:found].to_f/hsh.size
|
16
|
+
q[:cnt] = 100.0*q[:multi].to_f/hsh.size
|
17
|
+
q
|
18
|
+
end
|
19
|
+
|
20
|
+
cnt_ref = {}
|
21
|
+
l_all.each{ |i| cnt_ref[i] = 1 }
|
22
|
+
|
23
|
+
at = :header
|
24
|
+
File.open(esslog, "r") do |fh|
|
25
|
+
fh.each_line do |ln|
|
26
|
+
v = ln.chomp.gsub(/^! +/, "")
|
27
|
+
if v=="Multiple copies: "
|
28
|
+
at = :multi
|
29
|
+
elsif v=="Missing genes: "
|
30
|
+
at = :missing
|
31
|
+
elsif at==:multi
|
32
|
+
v =~ /^(\d+) (\S+): .*/ or raise "Unexpected multi-copies format: #{v}"
|
33
|
+
cnt_ref[$2] = $1.to_i
|
34
|
+
elsif at==:missing
|
35
|
+
v =~ /^(\S+): .*/ or raise "Unexpected missing format: #{v}"
|
36
|
+
cnt_ref[$1] = 0
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
cnt_arc = {}
|
42
|
+
l_arc.each{ |i| cnt_arc[i] = cnt_ref[i] }
|
43
|
+
|
44
|
+
q = quality(cnt_arc)
|
45
|
+
File.open(outlog, "w") do |ofh|
|
46
|
+
ofh.puts "! Essential genes found: #{q[:found]}/#{cnt_arc.size}."
|
47
|
+
ofh.puts "! Completeness: #{q[:cmp].round(1)}%."
|
48
|
+
ofh.puts "! Contamination: #{q[:cnt].round(1)}%."
|
49
|
+
if q[:multi] > 0
|
50
|
+
ofh.puts "! Multiple copies: "
|
51
|
+
cnt_arc.each{ |k,v| ofh.puts "! #{v} #{k}: #{n_arc[k]}." if v>1 }
|
52
|
+
end
|
53
|
+
if q[:found] < cnt_arc.size
|
54
|
+
ofh.puts "! Missing genes: "
|
55
|
+
cnt_arc.each{ |k,v| ofh.puts "! #{k}: #{n_arc[k]}." if v==0 }
|
56
|
+
end
|
57
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.0
|
4
|
+
version: 0.3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -160,9 +160,11 @@ files:
|
|
160
160
|
- scripts/ssu.bash
|
161
161
|
- scripts/stats.bash
|
162
162
|
- scripts/subclades.bash
|
163
|
+
- scripts/taxonomy.bash
|
163
164
|
- scripts/trimmed_fasta.bash
|
164
165
|
- scripts/trimmed_reads.bash
|
165
166
|
- utils/adapters.fa
|
167
|
+
- utils/arch-ess-genes.rb
|
166
168
|
- utils/core-pan-plot.R
|
167
169
|
- utils/enveomics/build_enveomics_r.bash
|
168
170
|
- utils/enveomics/enveomics.R/data/growth.curves.rda
|