miga-base 0.3.2.1 → 0.3.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/project/result.rb +1 -1
- data/lib/miga/version.rb +1 -1
- data/scripts/aai_distances.bash +1 -1
- data/scripts/ani_distances.bash +1 -1
- data/scripts/assembly.bash +1 -1
- data/scripts/cds.bash +1 -1
- data/scripts/clade_finding.bash +1 -1
- data/scripts/distances.bash +1 -1
- data/scripts/essential_genes.bash +1 -1
- data/scripts/haai_distances.bash +1 -1
- data/scripts/miga.bash +2 -0
- data/scripts/mytaxa.bash +1 -1
- data/scripts/mytaxa_scan.bash +1 -1
- data/scripts/ogs.bash +20 -17
- data/scripts/project_stats.bash +1 -1
- data/scripts/read_quality.bash +1 -1
- data/scripts/ssu.bash +1 -1
- data/scripts/stats.bash +1 -1
- data/scripts/subclades.bash +1 -1
- data/scripts/taxonomy.bash +3 -2
- data/scripts/trimmed_fasta.bash +1 -1
- data/scripts/trimmed_reads.bash +1 -1
- data/utils/distance/pipeline.rb +1 -1
- data/utils/distance/runner.rb +5 -4
- data/utils/subclades.R +1 -0
- metadata +2 -5
- data/scripts/_distances_functions.bash +0 -104
- data/scripts/_distances_noref_nomulti.bash +0 -149
- data/scripts/_distances_ref_nomulti.bash +0 -79
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 83a243d552f8c0f850cd46e827069e2a0b6abb5b8ed3f2cda41cec21a5b93c85
|
4
|
+
data.tar.gz: 85d9838dc0f9708d50e7d4375b8e641bc85ef096d95c88fcc43e8b5d1421e449
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7705f7987ae8a13a6664d8d3a9ca586bbbebc5447c5858fb5c9980c9d31d97a57e779e6987d4ca49dc06b7ccbb8602f22a2747cb1f55acfda3a5bf4edaa1798f
|
7
|
+
data.tar.gz: 3920ee4cdbd9b6666e0d0233c3557218ad30abe4a4e9276de8db9532da628626ba764080766bf27ca5cee335c892cf7c72d610d2fc356dd2da464b24c881c228
|
data/lib/miga/project/result.rb
CHANGED
@@ -118,8 +118,8 @@ module MiGA::Project::Result
|
|
118
118
|
return nil unless result_files_exist?(base, %w[.ogs .stats])
|
119
119
|
r = MiGA::Result.new("#{base}.json")
|
120
120
|
r.add_file(:ogs, "miga-project.ogs")
|
121
|
+
r.add_file(:abc, "miga-project.abc")
|
121
122
|
r.add_file(:stats, "miga-project.stats")
|
122
|
-
r.add_file(:rbm, "miga-project.rbm")
|
123
123
|
r.add_file(:core_pan, "miga-project.core-pan.tsv")
|
124
124
|
r.add_file(:core_pan_plot, "miga-project.core-pan.pdf")
|
125
125
|
r
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.3, 2,
|
13
|
+
VERSION = [0.3, 2, 3]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
data/scripts/aai_distances.bash
CHANGED
data/scripts/ani_distances.bash
CHANGED
data/scripts/assembly.bash
CHANGED
data/scripts/cds.bash
CHANGED
data/scripts/clade_finding.bash
CHANGED
data/scripts/distances.bash
CHANGED
data/scripts/haai_distances.bash
CHANGED
data/scripts/miga.bash
CHANGED
data/scripts/mytaxa.bash
CHANGED
data/scripts/mytaxa_scan.bash
CHANGED
data/scripts/ogs.bash
CHANGED
@@ -14,25 +14,28 @@ miga date > "miga-project.start"
|
|
14
14
|
DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
15
15
|
if [[ ! -s miga-project.ogs ]] ; then
|
16
16
|
# Extract RBMs
|
17
|
-
[[ -
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
file
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
[[ -s "$file" ]] || rm "$file"
|
17
|
+
if [[ ! -s miga-project.abc ]] ; then
|
18
|
+
[[ -d miga-project.tmp ]] || mkdir miga-project.tmp
|
19
|
+
for i in $DS ; do
|
20
|
+
file="miga-project.tmp/$i.abc"
|
21
|
+
[[ -s "$file" ]] && continue
|
22
|
+
echo "SELECT seq1,id1,seq2,id2,bitscore from rbm;" \
|
23
|
+
| sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
|
24
|
+
| awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
|
25
|
+
> "$file.tmp"
|
26
|
+
mv "$file.tmp" "$file"
|
28
27
|
done
|
29
|
-
|
30
|
-
|
28
|
+
cat miga-project.tmp/*.abc > miga-project.abc
|
29
|
+
fi
|
30
|
+
rm -rf miga-project.tmp
|
31
31
|
|
32
32
|
# Estimate OGs and Clean RBMs
|
33
|
-
ogs.mcl.rb -o miga-project.ogs
|
34
|
-
[[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]]
|
35
|
-
|
33
|
+
ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
|
34
|
+
if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
|
35
|
+
rm miga-project.abc
|
36
|
+
else
|
37
|
+
gzip -9 miga-project.abc
|
38
|
+
fi
|
36
39
|
fi
|
37
40
|
|
38
41
|
# Calculate Statistics
|
@@ -43,4 +46,4 @@ Rscript "$MIGA/utils/core-pan-plot.R" \
|
|
43
46
|
|
44
47
|
# Finalize
|
45
48
|
miga date > "miga-project.done"
|
46
|
-
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
49
|
+
miga add_result -P "$PROJECT" -r "$SCRIPT" -f
|
data/scripts/project_stats.bash
CHANGED
data/scripts/read_quality.bash
CHANGED
data/scripts/ssu.bash
CHANGED
data/scripts/stats.bash
CHANGED
data/scripts/subclades.bash
CHANGED
data/scripts/taxonomy.bash
CHANGED
@@ -14,8 +14,9 @@ cd "$DIR"
|
|
14
14
|
miga date > "$DATASET.start"
|
15
15
|
|
16
16
|
# Run
|
17
|
-
ruby "$MIGA/
|
17
|
+
ruby -I "$MIGA/lib" \
|
18
|
+
"$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
|
18
19
|
|
19
20
|
# Finalize
|
20
21
|
miga date > "$DATASET.done"
|
21
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
22
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
|
data/scripts/trimmed_fasta.bash
CHANGED
data/scripts/trimmed_reads.bash
CHANGED
data/utils/distance/pipeline.rb
CHANGED
@@ -25,7 +25,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end
|
28
|
-
classif =
|
28
|
+
classif = "#{classif}/miga-project.sc-#{val_cls}"
|
29
29
|
result_fh.puts [val_cls, val_med, max_val, classif].join("\t")
|
30
30
|
classify(clades, classif, metric, result_fh, val_cls)
|
31
31
|
end
|
data/utils/distance/runner.rb
CHANGED
@@ -21,13 +21,14 @@ class MiGA::DistanceRunner
|
|
21
21
|
@project = MiGA::Project.load(project_path) or
|
22
22
|
raise "No project at #{project_path}"
|
23
23
|
@dataset = project.dataset(dataset_name)
|
24
|
-
@home = File.expand_path(
|
24
|
+
@home = File.expand_path('data/09.distances', project.path)
|
25
25
|
# Default opts
|
26
|
-
@opts[:aai_save_rbm] ||= ENV.fetch(
|
27
|
-
project.is_clade? ?
|
26
|
+
@opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
|
27
|
+
project.is_clade? ? 'save-rbm' : 'no-save-rbm'
|
28
28
|
end
|
29
29
|
@opts[:thr] ||= ENV.fetch("CORES"){ 2 }.to_i
|
30
30
|
if opts[:run_taxonomy] && project.metadata[:ref_project]
|
31
|
+
@home = File.expand_path('05.taxonomy', @home)
|
31
32
|
@ref_project = MiGA::Project.load(project.metadata[:ref_project])
|
32
33
|
end
|
33
34
|
@ref_project ||= project
|
@@ -85,7 +86,7 @@ class MiGA::DistanceRunner
|
|
85
86
|
r = ln.chomp.split("\t")
|
86
87
|
next unless r[1].to_i==val_cls
|
87
88
|
target = ref_project.dataset(r[0])
|
88
|
-
aai = (
|
89
|
+
aai = (v[1]==:aai) ? aai(target) : 100.0
|
89
90
|
ani(target) if aai >= 90.0
|
90
91
|
end
|
91
92
|
end
|
data/utils/subclades.R
CHANGED
@@ -52,6 +52,7 @@ subclades <- function(ani_file, out_base, thr=1, ani=c()) {
|
|
52
52
|
s.avg.z <- (s[1,]-mean(s[1,]))/(sd(s[1,])+0.0001)
|
53
53
|
s.neg.z <- (s[2,]-mean(s[2,]))/(sd(s[2,])+0.01)
|
54
54
|
ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
|
55
|
+
if(mean(s[1,]<0)<0.75) ds[s[1,]<0] <- mean(ds) # <- k's with negative average
|
55
56
|
top.n <- k[which.max(ds)]
|
56
57
|
|
57
58
|
# Classify genomes
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.2.
|
4
|
+
version: 0.3.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-07-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -167,9 +167,6 @@ files:
|
|
167
167
|
- lib/miga/tax_index.rb
|
168
168
|
- lib/miga/taxonomy.rb
|
169
169
|
- lib/miga/version.rb
|
170
|
-
- scripts/_distances_functions.bash
|
171
|
-
- scripts/_distances_noref_nomulti.bash
|
172
|
-
- scripts/_distances_ref_nomulti.bash
|
173
170
|
- scripts/aai_distances.bash
|
174
171
|
- scripts/ani_distances.bash
|
175
172
|
- scripts/assembly.bash
|
@@ -1,104 +0,0 @@
|
|
1
|
-
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
|
3
|
-
# $NOMULTI, $REF
|
4
|
-
|
5
|
-
set -e
|
6
|
-
|
7
|
-
if [[ ! -n $MIGA_AAI_SAVE_RBM ]] ; then
|
8
|
-
MIGA_AAI_SAVE_RBM="save-rbm"
|
9
|
-
if [[ -n $PROJECT ]] ; then
|
10
|
-
if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
11
|
-
MIGA_AAI_SAVE_RBM="no-save-rbm"
|
12
|
-
fi
|
13
|
-
fi
|
14
|
-
fi
|
15
|
-
|
16
|
-
fx_exists miga-make_empty_aai_db || function miga-make_empty_aai_db {
|
17
|
-
local DB=$1
|
18
|
-
echo "create table if not exists aai(seq1 varchar(256), seq2 varchar(256)," \
|
19
|
-
" aai float, sd float, n int, omega int);" | sqlite3 "$DB"
|
20
|
-
}
|
21
|
-
|
22
|
-
fx_exists miga-ds_name || function miga-ds_name {
|
23
|
-
basename "$1" | perl -pe "s/[^A-Za-z0-9_].*//"
|
24
|
-
}
|
25
|
-
|
26
|
-
fx_exists miga-aai || function miga-aai {
|
27
|
-
local F1=$1
|
28
|
-
local F2=$2
|
29
|
-
local TH=$3
|
30
|
-
local DB=$4
|
31
|
-
local N1
|
32
|
-
N1=$(miga-ds_name "$F1")
|
33
|
-
local N2
|
34
|
-
N2=$(miga-ds_name "$F2")
|
35
|
-
aai.rb -1 "$F1" -2 "$F2" -t "$TH" -a --lookup-first -S "$DB" --name1 "$N1" \
|
36
|
-
--name2 "$N2" --$MIGA_AAI_SAVE_RBM || echo "0"
|
37
|
-
}
|
38
|
-
|
39
|
-
fx_exists miga-ani || function miga-ani {
|
40
|
-
local F1=$1
|
41
|
-
local F2=$2
|
42
|
-
local TH=$3
|
43
|
-
local DB=$4
|
44
|
-
local N1
|
45
|
-
N1=$(miga-ds_name "$F1")
|
46
|
-
local N2
|
47
|
-
N2=$(miga-ds_name "$F2")
|
48
|
-
ani.rb -1 "$F1" -2 "$F2" -t "$TH" -a --no-save-regions --no-save-rbm \
|
49
|
-
--lookup-first -S "$DB" --name1 "$N1" --name2 "$N2" || echo "0"
|
50
|
-
}
|
51
|
-
|
52
|
-
fx_exists miga-haai || function miga-haai {
|
53
|
-
local F1=$1
|
54
|
-
local F2=$2
|
55
|
-
local TH=$3
|
56
|
-
local DB=$4
|
57
|
-
local AAI_DB=$5
|
58
|
-
local N1
|
59
|
-
N1=$(miga-ds_name "$F1")
|
60
|
-
local N2
|
61
|
-
N2=$(miga-ds_name "$F2")
|
62
|
-
local HAAI
|
63
|
-
HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" miga-aai "$F1" "$F2" "$TH" "$DB")
|
64
|
-
if [[ "$HAAI" != "" && $(perl -e "print 1 if '$HAAI' <= 90") == "1" ]] ; then
|
65
|
-
local AAI
|
66
|
-
AAI=$(perl -e "print (100-exp(2.435076 + 0.4275193*log(100-$HAAI)))")
|
67
|
-
[[ ! -s $AAI_DB ]] && miga-make_empty_aai_db "$AAI_DB"
|
68
|
-
echo "insert into aai values('$N1','$N2','$AAI',0,0,0);" | sqlite3 "$AAI_DB"
|
69
|
-
echo "$AAI"
|
70
|
-
fi
|
71
|
-
}
|
72
|
-
|
73
|
-
fx_exists miga-haai_or_aai || function miga-haai_or_aai {
|
74
|
-
local FH1=$1
|
75
|
-
local FH2=$2
|
76
|
-
local DBH=$3
|
77
|
-
local F1=$4
|
78
|
-
local F2=$5
|
79
|
-
local DB=$6
|
80
|
-
local TH=$7
|
81
|
-
local AAI
|
82
|
-
AAI=$(miga-haai "$FH1" "$FH2" "$TH" "$DBH" "$DB")
|
83
|
-
[[ "${AAI%.*}" -le 0 ]] && AAI=$(miga-aai "$F1" "$F2" "$TH" "$DB")
|
84
|
-
echo "$AAI"
|
85
|
-
}
|
86
|
-
|
87
|
-
fx_exists miga-val_from_db || function miga-val_from_db {
|
88
|
-
local N1=$1
|
89
|
-
local N2=$2
|
90
|
-
local DB=$3
|
91
|
-
local MT=$4
|
92
|
-
if [[ -s $DB ]] ; then
|
93
|
-
echo "select $MT from $MT where seq1='$N1' and seq2='$N2';" \
|
94
|
-
| sqlite3 "$DB" || echo 0
|
95
|
-
fi
|
96
|
-
}
|
97
|
-
|
98
|
-
fx_exists miga-aai_from_db || function miga-aai_from_db {
|
99
|
-
miga-val_from_db "$1" "$2" "$3" aai
|
100
|
-
}
|
101
|
-
|
102
|
-
fx_exists miga-ani_from_db || function miga-ani_from_db {
|
103
|
-
miga-val_from_db "$1" "$2" "$3" ani
|
104
|
-
}
|
@@ -1,149 +0,0 @@
|
|
1
|
-
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
|
3
|
-
# $NOMULTI, $REF
|
4
|
-
|
5
|
-
set -e
|
6
|
-
|
7
|
-
# Deal with previous runs (if any)
|
8
|
-
exists "$DATASET".haai.db && cp "$DATASET".haai.db "$TMPDIR"
|
9
|
-
exists "$DATASET".a[an]i.db && cp "$DATASET".a[an]i.db "$TMPDIR"
|
10
|
-
exists "$DATASET".a[an]i.9[05] && rm "$DATASET".a[an]i.9[05]
|
11
|
-
N=0
|
12
|
-
fx_exists miga-checkpoint_n || function miga-checkpoint_n {
|
13
|
-
let N=$N+1
|
14
|
-
if [[ $N -ge 10 ]] ; then
|
15
|
-
for metric in haai aai ani ; do
|
16
|
-
if [[ -s $TMPDIR/$DATASET.$metric.db ]] ; then
|
17
|
-
echo "select count(*) from ${metric#h};" \
|
18
|
-
| sqlite3 "$TMPDIR/$DATASET.$metric.db" \
|
19
|
-
>/dev/null || exit 1
|
20
|
-
cp "$TMPDIR/$DATASET.$metric.db" .
|
21
|
-
fi
|
22
|
-
done
|
23
|
-
N=0
|
24
|
-
fi
|
25
|
-
}
|
26
|
-
|
27
|
-
fx_exists miga-noref_haai_or_aai || function miga-noref_haai_or_aai {
|
28
|
-
local Q=$1
|
29
|
-
local S=$2
|
30
|
-
[[ -s $TMPDIR/$Q.faa ]] \
|
31
|
-
|| cp "$PROJECT/data/06.cds/$Q.faa" "$TMPDIR/$Q.faa"
|
32
|
-
miga-haai_or_aai "$PROJECT/$ESS/$Q.ess.faa" "$S_PROJ/$ESS/$S.ess.faa" \
|
33
|
-
"$TMPDIR/$Q.haai.db" "$TMPDIR/$Q.faa" "$S_PROJ/data/06.cds/$S.faa" \
|
34
|
-
"$TMPDIR/$Q.aai.db" "$CORES"
|
35
|
-
}
|
36
|
-
|
37
|
-
fx_exists miga-noref_ani || function miga-noref_ani {
|
38
|
-
local Q=$1
|
39
|
-
local S=$2
|
40
|
-
[[ -s "$TMPDIR/$Q.LargeContigs.fna" ]] \
|
41
|
-
|| cp "$PROJECT/data/05.assembly/$Q.LargeContigs.fna" \
|
42
|
-
"$TMPDIR/$Q.LargeContigs.fna"
|
43
|
-
miga-ani "$TMPDIR/$Q.LargeContigs.fna" \
|
44
|
-
"$S_PROJ/data/05.assembly/$S.LargeContigs.fna" \
|
45
|
-
"$CORES" "$TMPDIR/$Q.ani.db"
|
46
|
-
}
|
47
|
-
|
48
|
-
# Calculate the classification-informed AAI/ANI traverse (if not classified)
|
49
|
-
ESS="data/07.annotation/01.function/01.essential"
|
50
|
-
if [[ $(miga about -P "$S_PROJ" -m type) != "clade" ]] ; then
|
51
|
-
# Classify aai-clade (if project type is not clade)
|
52
|
-
CLADES="$S_PROJ/data/10.clades/01.find"
|
53
|
-
METRIC="aai"
|
54
|
-
REF_TABLE="$S_PROJ/data/09.distances/02.aai/miga-project.txt.gz"
|
55
|
-
else
|
56
|
-
# Classify ani-clade (if project type is clade)
|
57
|
-
CLADES="$S_PROJ/data/10.clades/02.ani"
|
58
|
-
METRIC="ani"
|
59
|
-
REF_TABLE="$S_PROJ/data/09.distances/03.ani/miga-project.txt.gz"
|
60
|
-
fi
|
61
|
-
|
62
|
-
CLASSIF="."
|
63
|
-
[[ -e "$DATASET.$METRIC-medoids.tsv" ]] && rm "$DATASET.$METRIC-medoids.tsv"
|
64
|
-
[[ -s "$CLADES/$CLASSIF/miga-project.medoids" ]] || \
|
65
|
-
touch "$DATASET.$METRIC-medoids.tsv" "${DATASET}.${METRIC}.db"
|
66
|
-
while [[ -s "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
|
67
|
-
MAX_VAL=0
|
68
|
-
VAL_MED=""
|
69
|
-
VAL_CLS=""
|
70
|
-
i_n=0
|
71
|
-
while read -r i ; do
|
72
|
-
let i_n=$i_n+1
|
73
|
-
if [[ $METRIC == "aai" ]] ; then
|
74
|
-
VAL=$(miga-noref_haai_or_aai "$DATASET" "$i")
|
75
|
-
else
|
76
|
-
VAL=$(miga-noref_ani "$DATASET" "$i")
|
77
|
-
fi
|
78
|
-
miga-checkpoint_n
|
79
|
-
if [[ $(perl -e "print 1 if '$VAL' >= '$MAX_VAL'") == "1" ]] ; then
|
80
|
-
MAX_VAL=$VAL
|
81
|
-
VAL_MED=$i
|
82
|
-
VAL_CLS=$i_n
|
83
|
-
echo "[$CLASSIF] New max: $VAL_MED ($VAL_CLS): $MAX_VAL"
|
84
|
-
fi
|
85
|
-
done < "$CLADES/$CLASSIF/miga-project.medoids"
|
86
|
-
CLASSIF="$CLASSIF/miga-project.sc-$VAL_CLS"
|
87
|
-
echo "$VAL_CLS $VAL_MED $MAX_VAL $CLASSIF" \
|
88
|
-
>> "$DATASET.$METRIC-medoids.tsv"
|
89
|
-
done
|
90
|
-
|
91
|
-
# Calculate all the AAIs/ANIs against the lowest subclade (if classified)
|
92
|
-
if [[ "$CLASSIF" != "." ]] ; then
|
93
|
-
PAR=$(dirname "$CLADES/$CLASSIF")/miga-project.classif
|
94
|
-
if [[ -s "$PAR" ]] ; then
|
95
|
-
while read -r i ; do
|
96
|
-
if [[ $METRIC == "aai" ]] ; then
|
97
|
-
AAI=$(miga-noref_haai_or_aai "$DATASET" "$i")
|
98
|
-
else
|
99
|
-
AAI=100
|
100
|
-
fi
|
101
|
-
if [[ $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
|
102
|
-
miga-noref_ani "$DATASET" "$i"
|
103
|
-
fi
|
104
|
-
miga-checkpoint_n
|
105
|
-
done < <(awk "\$2==$VAL_CLS{print \$1}" < "$PAR")
|
106
|
-
fi
|
107
|
-
fi
|
108
|
-
|
109
|
-
# Finalize
|
110
|
-
N=11
|
111
|
-
miga-checkpoint_n
|
112
|
-
|
113
|
-
# Build tree with medoids
|
114
|
-
if [[ -s "${DATASET}.${METRIC}.db" ]] ; then
|
115
|
-
echo "select seq2 from $METRIC;" | sqlite3 "${DATASET}.${METRIC}.db" \
|
116
|
-
| sort | uniq > "${DATASET}.tmp0"
|
117
|
-
perl -pe "s/^/^/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
|
118
|
-
> "${DATASET}.tmp1"
|
119
|
-
perl -pe "s/^/\\t/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
|
120
|
-
> "${DATASET}.tmp2"
|
121
|
-
echo "a b value" | tr " " "\\t" > "${DATASET}.txt"
|
122
|
-
gzip -c -d "$REF_TABLE" | cut -f 2-4 \
|
123
|
-
| grep -f "${DATASET}.tmp1" | grep -f "${DATASET}.tmp2" \
|
124
|
-
>> "${DATASET}.txt"
|
125
|
-
echo "select seq1, seq2, $METRIC from $METRIC;" \
|
126
|
-
| sqlite3 "${DATASET}.${METRIC}.db" | tr "\\|" "\\t" \
|
127
|
-
>> "${DATASET}.txt"
|
128
|
-
"$MIGA/utils/ref-tree.R" "${DATASET}.txt" "$DATASET" "$DATASET"
|
129
|
-
rm "$DATASET".tmp[012] "${DATASET}.txt"
|
130
|
-
fi
|
131
|
-
|
132
|
-
# Test taxonomy
|
133
|
-
(
|
134
|
-
trap 'rm "$DATASET.json" "$DATASET.done"' EXIT
|
135
|
-
FLAGS=""
|
136
|
-
[[ "$PROJECT" == "$S_PROJ" ]] || FLAGS="--ref-project"
|
137
|
-
miga date > "$DATASET.done"
|
138
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
139
|
-
miga tax_test -P "$PROJECT" -D "$DATASET" -t intax \
|
140
|
-
$FLAGS > "$DATASET.intax.txt"
|
141
|
-
)
|
142
|
-
|
143
|
-
# Transfer taxonomy
|
144
|
-
TAX_PVALUE=$(miga about -P "$PROJECT" -m tax_pvalue)
|
145
|
-
[[ "$TAX_PVALUE" == "?" ]] && TAX_PVALUE="0.05"
|
146
|
-
NEW_TAX=$(tail -n +6 "$DATASET.intax.txt" | head -n -3 \
|
147
|
-
| awk '$3<'$TAX_PVALUE'{print $1":"$2}' | grep -v "?" \
|
148
|
-
| tr "\\n" ' ' | perl -pe 's/ *$//')
|
149
|
-
miga tax_set -P "$PROJECT" -D "$DATASET" -s "$NEW_TAX"
|
@@ -1,79 +0,0 @@
|
|
1
|
-
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
|
3
|
-
# $NOMULTI, $REF
|
4
|
-
|
5
|
-
set -e
|
6
|
-
|
7
|
-
fx_exists miga-checkpoint_n || function miga-checkpoint_n {
|
8
|
-
if [[ $N -eq 10 ]] ; then
|
9
|
-
for t in 01.haai 02.aai 03.ani ; do
|
10
|
-
if [[ -s $TMPDIR/$t.db ]] ; then
|
11
|
-
tab="aai"
|
12
|
-
[[ "$t" == "03.ani" ]] && tab="ani"
|
13
|
-
echo "select count(*) from $tab;" \
|
14
|
-
| sqlite3 "$TMPDIR/$t.db" \
|
15
|
-
>/dev/null || exit 1
|
16
|
-
cp "$TMPDIR/$t.db" "$t/$DATASET.db"
|
17
|
-
fi
|
18
|
-
done
|
19
|
-
N=0
|
20
|
-
fi
|
21
|
-
let N=$N+1
|
22
|
-
}
|
23
|
-
|
24
|
-
ESS="../07.annotation/01.function/01.essential"
|
25
|
-
|
26
|
-
# Initialize temporals
|
27
|
-
for t in 01.haai 02.aai 03.ani ; do
|
28
|
-
[[ -s $t/$DATASET.db ]] && cp "$t/$DATASET.db" "$TMPDIR/$t.db"
|
29
|
-
done
|
30
|
-
N=1
|
31
|
-
|
32
|
-
# Traverse "nearly-half" of the ref-datasets using first-come-first-served
|
33
|
-
for i in $(miga list_datasets -P "$PROJECT" --ref --no-multi) ; do
|
34
|
-
echo "[ $(date "+%Y-%m-%d %H:%M:%S %z") ] $i"
|
35
|
-
AAI=""; ANI="";
|
36
|
-
# Check if the i-th dataset is ready
|
37
|
-
[[ -s $ESS/$i.done && -s $ESS/$i.json ]] || continue
|
38
|
-
# Check if this is done (e.g., in a previous failed iteration)
|
39
|
-
AAI=$(miga-aai_from_db "$DATASET" "$i" "$TMPDIR/02.aai.db")
|
40
|
-
# Try the other direction
|
41
|
-
[[ "${AAI%.*}" -le 0 ]] \
|
42
|
-
&& AAI=$(miga-aai_from_db "$i" "$DATASET" "02.aai/$i.db")
|
43
|
-
# Try with hAAI
|
44
|
-
if [[ "${AAI%.*}" -le 0 ]] ; then
|
45
|
-
[[ -e "$TMPDIR/$DATASET.ess.faa" ]] \
|
46
|
-
|| cp "$ESS/$DATASET.ess.faa" "$TMPDIR/$DATASET.ess.faa"
|
47
|
-
AAI=$(miga-haai "$TMPDIR/$DATASET.ess.faa" "$ESS/$i.ess.faa" \
|
48
|
-
"$CORES" "$TMPDIR/01.haai.db" "$TMPDIR/02.aai.db")
|
49
|
-
fi
|
50
|
-
# Try with complete AAI
|
51
|
-
if [[ "${AAI%.*}" -le 0 ]] ; then
|
52
|
-
[[ -e "$TMPDIR/$DATASET.faa" ]] \
|
53
|
-
|| cp "../06.cds/$DATASET.faa" "$TMPDIR/$DATASET.faa"
|
54
|
-
AAI=$(miga-aai "$TMPDIR/$DATASET.faa" "../06.cds/$i.faa" \
|
55
|
-
"$CORES" "$TMPDIR/02.aai.db")
|
56
|
-
fi
|
57
|
-
# Check if ANI is meaningful
|
58
|
-
if [[ -e "../05.assembly/$DATASET.LargeContigs.fna" \
|
59
|
-
&& -e "../05.assembly/$i.LargeContigs.fna" \
|
60
|
-
&& $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
|
61
|
-
# Check if this is done (e.g., in a previous failed iteration)
|
62
|
-
ANI=$(miga-ani_from_db "$DATASET" "$i" "$TMPDIR/03.ani.db")
|
63
|
-
# Try the other direction
|
64
|
-
[[ "${ANI%.*}" -le 0 ]] \
|
65
|
-
&& ANI=$(miga-ani_from_db "$i" "$DATASET" "03.ani/$i.db")
|
66
|
-
# Calculate it
|
67
|
-
if [[ "${ANI%.*}" -le 0 ]] ; then
|
68
|
-
[[ -e "$TMPDIR/$DATASET.LargeContigs.fna" ]] \
|
69
|
-
|| cp "../05.assembly/$DATASET.LargeContigs.fna" \
|
70
|
-
"$TMPDIR/$DATASET.LargeContigs.fna"
|
71
|
-
ANI=$(miga-ani "$TMPDIR/$DATASET.LargeContigs.fna" \
|
72
|
-
"../05.assembly/$i.LargeContigs.fna" "$CORES" "$TMPDIR/03.ani.db")
|
73
|
-
fi
|
74
|
-
fi
|
75
|
-
miga-checkpoint_n
|
76
|
-
done
|
77
|
-
N=10
|
78
|
-
miga-checkpoint_n
|
79
|
-
|