miga-base 0.3.2.1 → 0.3.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/project/result.rb +1 -1
- data/lib/miga/version.rb +1 -1
- data/scripts/aai_distances.bash +1 -1
- data/scripts/ani_distances.bash +1 -1
- data/scripts/assembly.bash +1 -1
- data/scripts/cds.bash +1 -1
- data/scripts/clade_finding.bash +1 -1
- data/scripts/distances.bash +1 -1
- data/scripts/essential_genes.bash +1 -1
- data/scripts/haai_distances.bash +1 -1
- data/scripts/miga.bash +2 -0
- data/scripts/mytaxa.bash +1 -1
- data/scripts/mytaxa_scan.bash +1 -1
- data/scripts/ogs.bash +20 -17
- data/scripts/project_stats.bash +1 -1
- data/scripts/read_quality.bash +1 -1
- data/scripts/ssu.bash +1 -1
- data/scripts/stats.bash +1 -1
- data/scripts/subclades.bash +1 -1
- data/scripts/taxonomy.bash +3 -2
- data/scripts/trimmed_fasta.bash +1 -1
- data/scripts/trimmed_reads.bash +1 -1
- data/utils/distance/pipeline.rb +1 -1
- data/utils/distance/runner.rb +5 -4
- data/utils/subclades.R +1 -0
- metadata +2 -5
- data/scripts/_distances_functions.bash +0 -104
- data/scripts/_distances_noref_nomulti.bash +0 -149
- data/scripts/_distances_ref_nomulti.bash +0 -79
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 83a243d552f8c0f850cd46e827069e2a0b6abb5b8ed3f2cda41cec21a5b93c85
|
4
|
+
data.tar.gz: 85d9838dc0f9708d50e7d4375b8e641bc85ef096d95c88fcc43e8b5d1421e449
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7705f7987ae8a13a6664d8d3a9ca586bbbebc5447c5858fb5c9980c9d31d97a57e779e6987d4ca49dc06b7ccbb8602f22a2747cb1f55acfda3a5bf4edaa1798f
|
7
|
+
data.tar.gz: 3920ee4cdbd9b6666e0d0233c3557218ad30abe4a4e9276de8db9532da628626ba764080766bf27ca5cee335c892cf7c72d610d2fc356dd2da464b24c881c228
|
data/lib/miga/project/result.rb
CHANGED
@@ -118,8 +118,8 @@ module MiGA::Project::Result
|
|
118
118
|
return nil unless result_files_exist?(base, %w[.ogs .stats])
|
119
119
|
r = MiGA::Result.new("#{base}.json")
|
120
120
|
r.add_file(:ogs, "miga-project.ogs")
|
121
|
+
r.add_file(:abc, "miga-project.abc")
|
121
122
|
r.add_file(:stats, "miga-project.stats")
|
122
|
-
r.add_file(:rbm, "miga-project.rbm")
|
123
123
|
r.add_file(:core_pan, "miga-project.core-pan.tsv")
|
124
124
|
r.add_file(:core_pan_plot, "miga-project.core-pan.pdf")
|
125
125
|
r
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.3, 2,
|
13
|
+
VERSION = [0.3, 2, 3]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
data/scripts/aai_distances.bash
CHANGED
data/scripts/ani_distances.bash
CHANGED
data/scripts/assembly.bash
CHANGED
data/scripts/cds.bash
CHANGED
data/scripts/clade_finding.bash
CHANGED
data/scripts/distances.bash
CHANGED
data/scripts/haai_distances.bash
CHANGED
data/scripts/miga.bash
CHANGED
data/scripts/mytaxa.bash
CHANGED
data/scripts/mytaxa_scan.bash
CHANGED
data/scripts/ogs.bash
CHANGED
@@ -14,25 +14,28 @@ miga date > "miga-project.start"
|
|
14
14
|
DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
15
15
|
if [[ ! -s miga-project.ogs ]] ; then
|
16
16
|
# Extract RBMs
|
17
|
-
[[ -
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
file
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
[[ -s "$file" ]] || rm "$file"
|
17
|
+
if [[ ! -s miga-project.abc ]] ; then
|
18
|
+
[[ -d miga-project.tmp ]] || mkdir miga-project.tmp
|
19
|
+
for i in $DS ; do
|
20
|
+
file="miga-project.tmp/$i.abc"
|
21
|
+
[[ -s "$file" ]] && continue
|
22
|
+
echo "SELECT seq1,id1,seq2,id2,bitscore from rbm;" \
|
23
|
+
| sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
|
24
|
+
| awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
|
25
|
+
> "$file.tmp"
|
26
|
+
mv "$file.tmp" "$file"
|
28
27
|
done
|
29
|
-
|
30
|
-
|
28
|
+
cat miga-project.tmp/*.abc > miga-project.abc
|
29
|
+
fi
|
30
|
+
rm -rf miga-project.tmp
|
31
31
|
|
32
32
|
# Estimate OGs and Clean RBMs
|
33
|
-
ogs.mcl.rb -o miga-project.ogs
|
34
|
-
[[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]]
|
35
|
-
|
33
|
+
ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
|
34
|
+
if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
|
35
|
+
rm miga-project.abc
|
36
|
+
else
|
37
|
+
gzip -9 miga-project.abc
|
38
|
+
fi
|
36
39
|
fi
|
37
40
|
|
38
41
|
# Calculate Statistics
|
@@ -43,4 +46,4 @@ Rscript "$MIGA/utils/core-pan-plot.R" \
|
|
43
46
|
|
44
47
|
# Finalize
|
45
48
|
miga date > "miga-project.done"
|
46
|
-
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
49
|
+
miga add_result -P "$PROJECT" -r "$SCRIPT" -f
|
data/scripts/project_stats.bash
CHANGED
data/scripts/read_quality.bash
CHANGED
data/scripts/ssu.bash
CHANGED
data/scripts/stats.bash
CHANGED
data/scripts/subclades.bash
CHANGED
data/scripts/taxonomy.bash
CHANGED
@@ -14,8 +14,9 @@ cd "$DIR"
|
|
14
14
|
miga date > "$DATASET.start"
|
15
15
|
|
16
16
|
# Run
|
17
|
-
ruby "$MIGA/
|
17
|
+
ruby -I "$MIGA/lib" \
|
18
|
+
"$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
|
18
19
|
|
19
20
|
# Finalize
|
20
21
|
miga date > "$DATASET.done"
|
21
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
22
|
+
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
|
data/scripts/trimmed_fasta.bash
CHANGED
data/scripts/trimmed_reads.bash
CHANGED
data/utils/distance/pipeline.rb
CHANGED
@@ -25,7 +25,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
25
25
|
end
|
26
26
|
end
|
27
27
|
end
|
28
|
-
classif =
|
28
|
+
classif = "#{classif}/miga-project.sc-#{val_cls}"
|
29
29
|
result_fh.puts [val_cls, val_med, max_val, classif].join("\t")
|
30
30
|
classify(clades, classif, metric, result_fh, val_cls)
|
31
31
|
end
|
data/utils/distance/runner.rb
CHANGED
@@ -21,13 +21,14 @@ class MiGA::DistanceRunner
|
|
21
21
|
@project = MiGA::Project.load(project_path) or
|
22
22
|
raise "No project at #{project_path}"
|
23
23
|
@dataset = project.dataset(dataset_name)
|
24
|
-
@home = File.expand_path(
|
24
|
+
@home = File.expand_path('data/09.distances', project.path)
|
25
25
|
# Default opts
|
26
|
-
@opts[:aai_save_rbm] ||= ENV.fetch(
|
27
|
-
project.is_clade? ?
|
26
|
+
@opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
|
27
|
+
project.is_clade? ? 'save-rbm' : 'no-save-rbm'
|
28
28
|
end
|
29
29
|
@opts[:thr] ||= ENV.fetch("CORES"){ 2 }.to_i
|
30
30
|
if opts[:run_taxonomy] && project.metadata[:ref_project]
|
31
|
+
@home = File.expand_path('05.taxonomy', @home)
|
31
32
|
@ref_project = MiGA::Project.load(project.metadata[:ref_project])
|
32
33
|
end
|
33
34
|
@ref_project ||= project
|
@@ -85,7 +86,7 @@ class MiGA::DistanceRunner
|
|
85
86
|
r = ln.chomp.split("\t")
|
86
87
|
next unless r[1].to_i==val_cls
|
87
88
|
target = ref_project.dataset(r[0])
|
88
|
-
aai = (
|
89
|
+
aai = (v[1]==:aai) ? aai(target) : 100.0
|
89
90
|
ani(target) if aai >= 90.0
|
90
91
|
end
|
91
92
|
end
|
data/utils/subclades.R
CHANGED
@@ -52,6 +52,7 @@ subclades <- function(ani_file, out_base, thr=1, ani=c()) {
|
|
52
52
|
s.avg.z <- (s[1,]-mean(s[1,]))/(sd(s[1,])+0.0001)
|
53
53
|
s.neg.z <- (s[2,]-mean(s[2,]))/(sd(s[2,])+0.01)
|
54
54
|
ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
|
55
|
+
if(mean(s[1,]<0)<0.75) ds[s[1,]<0] <- mean(ds) # <- k's with negative average
|
55
56
|
top.n <- k[which.max(ds)]
|
56
57
|
|
57
58
|
# Classify genomes
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.2.
|
4
|
+
version: 0.3.2.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-07-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -167,9 +167,6 @@ files:
|
|
167
167
|
- lib/miga/tax_index.rb
|
168
168
|
- lib/miga/taxonomy.rb
|
169
169
|
- lib/miga/version.rb
|
170
|
-
- scripts/_distances_functions.bash
|
171
|
-
- scripts/_distances_noref_nomulti.bash
|
172
|
-
- scripts/_distances_ref_nomulti.bash
|
173
170
|
- scripts/aai_distances.bash
|
174
171
|
- scripts/ani_distances.bash
|
175
172
|
- scripts/assembly.bash
|
@@ -1,104 +0,0 @@
|
|
1
|
-
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
|
3
|
-
# $NOMULTI, $REF
|
4
|
-
|
5
|
-
set -e
|
6
|
-
|
7
|
-
if [[ ! -n $MIGA_AAI_SAVE_RBM ]] ; then
|
8
|
-
MIGA_AAI_SAVE_RBM="save-rbm"
|
9
|
-
if [[ -n $PROJECT ]] ; then
|
10
|
-
if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
11
|
-
MIGA_AAI_SAVE_RBM="no-save-rbm"
|
12
|
-
fi
|
13
|
-
fi
|
14
|
-
fi
|
15
|
-
|
16
|
-
fx_exists miga-make_empty_aai_db || function miga-make_empty_aai_db {
|
17
|
-
local DB=$1
|
18
|
-
echo "create table if not exists aai(seq1 varchar(256), seq2 varchar(256)," \
|
19
|
-
" aai float, sd float, n int, omega int);" | sqlite3 "$DB"
|
20
|
-
}
|
21
|
-
|
22
|
-
fx_exists miga-ds_name || function miga-ds_name {
|
23
|
-
basename "$1" | perl -pe "s/[^A-Za-z0-9_].*//"
|
24
|
-
}
|
25
|
-
|
26
|
-
fx_exists miga-aai || function miga-aai {
|
27
|
-
local F1=$1
|
28
|
-
local F2=$2
|
29
|
-
local TH=$3
|
30
|
-
local DB=$4
|
31
|
-
local N1
|
32
|
-
N1=$(miga-ds_name "$F1")
|
33
|
-
local N2
|
34
|
-
N2=$(miga-ds_name "$F2")
|
35
|
-
aai.rb -1 "$F1" -2 "$F2" -t "$TH" -a --lookup-first -S "$DB" --name1 "$N1" \
|
36
|
-
--name2 "$N2" --$MIGA_AAI_SAVE_RBM || echo "0"
|
37
|
-
}
|
38
|
-
|
39
|
-
fx_exists miga-ani || function miga-ani {
|
40
|
-
local F1=$1
|
41
|
-
local F2=$2
|
42
|
-
local TH=$3
|
43
|
-
local DB=$4
|
44
|
-
local N1
|
45
|
-
N1=$(miga-ds_name "$F1")
|
46
|
-
local N2
|
47
|
-
N2=$(miga-ds_name "$F2")
|
48
|
-
ani.rb -1 "$F1" -2 "$F2" -t "$TH" -a --no-save-regions --no-save-rbm \
|
49
|
-
--lookup-first -S "$DB" --name1 "$N1" --name2 "$N2" || echo "0"
|
50
|
-
}
|
51
|
-
|
52
|
-
fx_exists miga-haai || function miga-haai {
|
53
|
-
local F1=$1
|
54
|
-
local F2=$2
|
55
|
-
local TH=$3
|
56
|
-
local DB=$4
|
57
|
-
local AAI_DB=$5
|
58
|
-
local N1
|
59
|
-
N1=$(miga-ds_name "$F1")
|
60
|
-
local N2
|
61
|
-
N2=$(miga-ds_name "$F2")
|
62
|
-
local HAAI
|
63
|
-
HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" miga-aai "$F1" "$F2" "$TH" "$DB")
|
64
|
-
if [[ "$HAAI" != "" && $(perl -e "print 1 if '$HAAI' <= 90") == "1" ]] ; then
|
65
|
-
local AAI
|
66
|
-
AAI=$(perl -e "print (100-exp(2.435076 + 0.4275193*log(100-$HAAI)))")
|
67
|
-
[[ ! -s $AAI_DB ]] && miga-make_empty_aai_db "$AAI_DB"
|
68
|
-
echo "insert into aai values('$N1','$N2','$AAI',0,0,0);" | sqlite3 "$AAI_DB"
|
69
|
-
echo "$AAI"
|
70
|
-
fi
|
71
|
-
}
|
72
|
-
|
73
|
-
fx_exists miga-haai_or_aai || function miga-haai_or_aai {
|
74
|
-
local FH1=$1
|
75
|
-
local FH2=$2
|
76
|
-
local DBH=$3
|
77
|
-
local F1=$4
|
78
|
-
local F2=$5
|
79
|
-
local DB=$6
|
80
|
-
local TH=$7
|
81
|
-
local AAI
|
82
|
-
AAI=$(miga-haai "$FH1" "$FH2" "$TH" "$DBH" "$DB")
|
83
|
-
[[ "${AAI%.*}" -le 0 ]] && AAI=$(miga-aai "$F1" "$F2" "$TH" "$DB")
|
84
|
-
echo "$AAI"
|
85
|
-
}
|
86
|
-
|
87
|
-
fx_exists miga-val_from_db || function miga-val_from_db {
|
88
|
-
local N1=$1
|
89
|
-
local N2=$2
|
90
|
-
local DB=$3
|
91
|
-
local MT=$4
|
92
|
-
if [[ -s $DB ]] ; then
|
93
|
-
echo "select $MT from $MT where seq1='$N1' and seq2='$N2';" \
|
94
|
-
| sqlite3 "$DB" || echo 0
|
95
|
-
fi
|
96
|
-
}
|
97
|
-
|
98
|
-
fx_exists miga-aai_from_db || function miga-aai_from_db {
|
99
|
-
miga-val_from_db "$1" "$2" "$3" aai
|
100
|
-
}
|
101
|
-
|
102
|
-
fx_exists miga-ani_from_db || function miga-ani_from_db {
|
103
|
-
miga-val_from_db "$1" "$2" "$3" ani
|
104
|
-
}
|
@@ -1,149 +0,0 @@
|
|
1
|
-
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
|
3
|
-
# $NOMULTI, $REF
|
4
|
-
|
5
|
-
set -e
|
6
|
-
|
7
|
-
# Deal with previous runs (if any)
|
8
|
-
exists "$DATASET".haai.db && cp "$DATASET".haai.db "$TMPDIR"
|
9
|
-
exists "$DATASET".a[an]i.db && cp "$DATASET".a[an]i.db "$TMPDIR"
|
10
|
-
exists "$DATASET".a[an]i.9[05] && rm "$DATASET".a[an]i.9[05]
|
11
|
-
N=0
|
12
|
-
fx_exists miga-checkpoint_n || function miga-checkpoint_n {
|
13
|
-
let N=$N+1
|
14
|
-
if [[ $N -ge 10 ]] ; then
|
15
|
-
for metric in haai aai ani ; do
|
16
|
-
if [[ -s $TMPDIR/$DATASET.$metric.db ]] ; then
|
17
|
-
echo "select count(*) from ${metric#h};" \
|
18
|
-
| sqlite3 "$TMPDIR/$DATASET.$metric.db" \
|
19
|
-
>/dev/null || exit 1
|
20
|
-
cp "$TMPDIR/$DATASET.$metric.db" .
|
21
|
-
fi
|
22
|
-
done
|
23
|
-
N=0
|
24
|
-
fi
|
25
|
-
}
|
26
|
-
|
27
|
-
fx_exists miga-noref_haai_or_aai || function miga-noref_haai_or_aai {
|
28
|
-
local Q=$1
|
29
|
-
local S=$2
|
30
|
-
[[ -s $TMPDIR/$Q.faa ]] \
|
31
|
-
|| cp "$PROJECT/data/06.cds/$Q.faa" "$TMPDIR/$Q.faa"
|
32
|
-
miga-haai_or_aai "$PROJECT/$ESS/$Q.ess.faa" "$S_PROJ/$ESS/$S.ess.faa" \
|
33
|
-
"$TMPDIR/$Q.haai.db" "$TMPDIR/$Q.faa" "$S_PROJ/data/06.cds/$S.faa" \
|
34
|
-
"$TMPDIR/$Q.aai.db" "$CORES"
|
35
|
-
}
|
36
|
-
|
37
|
-
fx_exists miga-noref_ani || function miga-noref_ani {
|
38
|
-
local Q=$1
|
39
|
-
local S=$2
|
40
|
-
[[ -s "$TMPDIR/$Q.LargeContigs.fna" ]] \
|
41
|
-
|| cp "$PROJECT/data/05.assembly/$Q.LargeContigs.fna" \
|
42
|
-
"$TMPDIR/$Q.LargeContigs.fna"
|
43
|
-
miga-ani "$TMPDIR/$Q.LargeContigs.fna" \
|
44
|
-
"$S_PROJ/data/05.assembly/$S.LargeContigs.fna" \
|
45
|
-
"$CORES" "$TMPDIR/$Q.ani.db"
|
46
|
-
}
|
47
|
-
|
48
|
-
# Calculate the classification-informed AAI/ANI traverse (if not classified)
|
49
|
-
ESS="data/07.annotation/01.function/01.essential"
|
50
|
-
if [[ $(miga about -P "$S_PROJ" -m type) != "clade" ]] ; then
|
51
|
-
# Classify aai-clade (if project type is not clade)
|
52
|
-
CLADES="$S_PROJ/data/10.clades/01.find"
|
53
|
-
METRIC="aai"
|
54
|
-
REF_TABLE="$S_PROJ/data/09.distances/02.aai/miga-project.txt.gz"
|
55
|
-
else
|
56
|
-
# Classify ani-clade (if project type is clade)
|
57
|
-
CLADES="$S_PROJ/data/10.clades/02.ani"
|
58
|
-
METRIC="ani"
|
59
|
-
REF_TABLE="$S_PROJ/data/09.distances/03.ani/miga-project.txt.gz"
|
60
|
-
fi
|
61
|
-
|
62
|
-
CLASSIF="."
|
63
|
-
[[ -e "$DATASET.$METRIC-medoids.tsv" ]] && rm "$DATASET.$METRIC-medoids.tsv"
|
64
|
-
[[ -s "$CLADES/$CLASSIF/miga-project.medoids" ]] || \
|
65
|
-
touch "$DATASET.$METRIC-medoids.tsv" "${DATASET}.${METRIC}.db"
|
66
|
-
while [[ -s "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
|
67
|
-
MAX_VAL=0
|
68
|
-
VAL_MED=""
|
69
|
-
VAL_CLS=""
|
70
|
-
i_n=0
|
71
|
-
while read -r i ; do
|
72
|
-
let i_n=$i_n+1
|
73
|
-
if [[ $METRIC == "aai" ]] ; then
|
74
|
-
VAL=$(miga-noref_haai_or_aai "$DATASET" "$i")
|
75
|
-
else
|
76
|
-
VAL=$(miga-noref_ani "$DATASET" "$i")
|
77
|
-
fi
|
78
|
-
miga-checkpoint_n
|
79
|
-
if [[ $(perl -e "print 1 if '$VAL' >= '$MAX_VAL'") == "1" ]] ; then
|
80
|
-
MAX_VAL=$VAL
|
81
|
-
VAL_MED=$i
|
82
|
-
VAL_CLS=$i_n
|
83
|
-
echo "[$CLASSIF] New max: $VAL_MED ($VAL_CLS): $MAX_VAL"
|
84
|
-
fi
|
85
|
-
done < "$CLADES/$CLASSIF/miga-project.medoids"
|
86
|
-
CLASSIF="$CLASSIF/miga-project.sc-$VAL_CLS"
|
87
|
-
echo "$VAL_CLS $VAL_MED $MAX_VAL $CLASSIF" \
|
88
|
-
>> "$DATASET.$METRIC-medoids.tsv"
|
89
|
-
done
|
90
|
-
|
91
|
-
# Calculate all the AAIs/ANIs against the lowest subclade (if classified)
|
92
|
-
if [[ "$CLASSIF" != "." ]] ; then
|
93
|
-
PAR=$(dirname "$CLADES/$CLASSIF")/miga-project.classif
|
94
|
-
if [[ -s "$PAR" ]] ; then
|
95
|
-
while read -r i ; do
|
96
|
-
if [[ $METRIC == "aai" ]] ; then
|
97
|
-
AAI=$(miga-noref_haai_or_aai "$DATASET" "$i")
|
98
|
-
else
|
99
|
-
AAI=100
|
100
|
-
fi
|
101
|
-
if [[ $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
|
102
|
-
miga-noref_ani "$DATASET" "$i"
|
103
|
-
fi
|
104
|
-
miga-checkpoint_n
|
105
|
-
done < <(awk "\$2==$VAL_CLS{print \$1}" < "$PAR")
|
106
|
-
fi
|
107
|
-
fi
|
108
|
-
|
109
|
-
# Finalize
|
110
|
-
N=11
|
111
|
-
miga-checkpoint_n
|
112
|
-
|
113
|
-
# Build tree with medoids
|
114
|
-
if [[ -s "${DATASET}.${METRIC}.db" ]] ; then
|
115
|
-
echo "select seq2 from $METRIC;" | sqlite3 "${DATASET}.${METRIC}.db" \
|
116
|
-
| sort | uniq > "${DATASET}.tmp0"
|
117
|
-
perl -pe "s/^/^/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
|
118
|
-
> "${DATASET}.tmp1"
|
119
|
-
perl -pe "s/^/\\t/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
|
120
|
-
> "${DATASET}.tmp2"
|
121
|
-
echo "a b value" | tr " " "\\t" > "${DATASET}.txt"
|
122
|
-
gzip -c -d "$REF_TABLE" | cut -f 2-4 \
|
123
|
-
| grep -f "${DATASET}.tmp1" | grep -f "${DATASET}.tmp2" \
|
124
|
-
>> "${DATASET}.txt"
|
125
|
-
echo "select seq1, seq2, $METRIC from $METRIC;" \
|
126
|
-
| sqlite3 "${DATASET}.${METRIC}.db" | tr "\\|" "\\t" \
|
127
|
-
>> "${DATASET}.txt"
|
128
|
-
"$MIGA/utils/ref-tree.R" "${DATASET}.txt" "$DATASET" "$DATASET"
|
129
|
-
rm "$DATASET".tmp[012] "${DATASET}.txt"
|
130
|
-
fi
|
131
|
-
|
132
|
-
# Test taxonomy
|
133
|
-
(
|
134
|
-
trap 'rm "$DATASET.json" "$DATASET.done"' EXIT
|
135
|
-
FLAGS=""
|
136
|
-
[[ "$PROJECT" == "$S_PROJ" ]] || FLAGS="--ref-project"
|
137
|
-
miga date > "$DATASET.done"
|
138
|
-
miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
|
139
|
-
miga tax_test -P "$PROJECT" -D "$DATASET" -t intax \
|
140
|
-
$FLAGS > "$DATASET.intax.txt"
|
141
|
-
)
|
142
|
-
|
143
|
-
# Transfer taxonomy
|
144
|
-
TAX_PVALUE=$(miga about -P "$PROJECT" -m tax_pvalue)
|
145
|
-
[[ "$TAX_PVALUE" == "?" ]] && TAX_PVALUE="0.05"
|
146
|
-
NEW_TAX=$(tail -n +6 "$DATASET.intax.txt" | head -n -3 \
|
147
|
-
| awk '$3<'$TAX_PVALUE'{print $1":"$2}' | grep -v "?" \
|
148
|
-
| tr "\\n" ' ' | perl -pe 's/ *$//')
|
149
|
-
miga tax_set -P "$PROJECT" -D "$DATASET" -s "$NEW_TAX"
|
@@ -1,79 +0,0 @@
|
|
1
|
-
#!/bin/bash
|
2
|
-
# Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
|
3
|
-
# $NOMULTI, $REF
|
4
|
-
|
5
|
-
set -e
|
6
|
-
|
7
|
-
fx_exists miga-checkpoint_n || function miga-checkpoint_n {
|
8
|
-
if [[ $N -eq 10 ]] ; then
|
9
|
-
for t in 01.haai 02.aai 03.ani ; do
|
10
|
-
if [[ -s $TMPDIR/$t.db ]] ; then
|
11
|
-
tab="aai"
|
12
|
-
[[ "$t" == "03.ani" ]] && tab="ani"
|
13
|
-
echo "select count(*) from $tab;" \
|
14
|
-
| sqlite3 "$TMPDIR/$t.db" \
|
15
|
-
>/dev/null || exit 1
|
16
|
-
cp "$TMPDIR/$t.db" "$t/$DATASET.db"
|
17
|
-
fi
|
18
|
-
done
|
19
|
-
N=0
|
20
|
-
fi
|
21
|
-
let N=$N+1
|
22
|
-
}
|
23
|
-
|
24
|
-
ESS="../07.annotation/01.function/01.essential"
|
25
|
-
|
26
|
-
# Initialize temporals
|
27
|
-
for t in 01.haai 02.aai 03.ani ; do
|
28
|
-
[[ -s $t/$DATASET.db ]] && cp "$t/$DATASET.db" "$TMPDIR/$t.db"
|
29
|
-
done
|
30
|
-
N=1
|
31
|
-
|
32
|
-
# Traverse "nearly-half" of the ref-datasets using first-come-first-served
|
33
|
-
for i in $(miga list_datasets -P "$PROJECT" --ref --no-multi) ; do
|
34
|
-
echo "[ $(date "+%Y-%m-%d %H:%M:%S %z") ] $i"
|
35
|
-
AAI=""; ANI="";
|
36
|
-
# Check if the i-th dataset is ready
|
37
|
-
[[ -s $ESS/$i.done && -s $ESS/$i.json ]] || continue
|
38
|
-
# Check if this is done (e.g., in a previous failed iteration)
|
39
|
-
AAI=$(miga-aai_from_db "$DATASET" "$i" "$TMPDIR/02.aai.db")
|
40
|
-
# Try the other direction
|
41
|
-
[[ "${AAI%.*}" -le 0 ]] \
|
42
|
-
&& AAI=$(miga-aai_from_db "$i" "$DATASET" "02.aai/$i.db")
|
43
|
-
# Try with hAAI
|
44
|
-
if [[ "${AAI%.*}" -le 0 ]] ; then
|
45
|
-
[[ -e "$TMPDIR/$DATASET.ess.faa" ]] \
|
46
|
-
|| cp "$ESS/$DATASET.ess.faa" "$TMPDIR/$DATASET.ess.faa"
|
47
|
-
AAI=$(miga-haai "$TMPDIR/$DATASET.ess.faa" "$ESS/$i.ess.faa" \
|
48
|
-
"$CORES" "$TMPDIR/01.haai.db" "$TMPDIR/02.aai.db")
|
49
|
-
fi
|
50
|
-
# Try with complete AAI
|
51
|
-
if [[ "${AAI%.*}" -le 0 ]] ; then
|
52
|
-
[[ -e "$TMPDIR/$DATASET.faa" ]] \
|
53
|
-
|| cp "../06.cds/$DATASET.faa" "$TMPDIR/$DATASET.faa"
|
54
|
-
AAI=$(miga-aai "$TMPDIR/$DATASET.faa" "../06.cds/$i.faa" \
|
55
|
-
"$CORES" "$TMPDIR/02.aai.db")
|
56
|
-
fi
|
57
|
-
# Check if ANI is meaningful
|
58
|
-
if [[ -e "../05.assembly/$DATASET.LargeContigs.fna" \
|
59
|
-
&& -e "../05.assembly/$i.LargeContigs.fna" \
|
60
|
-
&& $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
|
61
|
-
# Check if this is done (e.g., in a previous failed iteration)
|
62
|
-
ANI=$(miga-ani_from_db "$DATASET" "$i" "$TMPDIR/03.ani.db")
|
63
|
-
# Try the other direction
|
64
|
-
[[ "${ANI%.*}" -le 0 ]] \
|
65
|
-
&& ANI=$(miga-ani_from_db "$i" "$DATASET" "03.ani/$i.db")
|
66
|
-
# Calculate it
|
67
|
-
if [[ "${ANI%.*}" -le 0 ]] ; then
|
68
|
-
[[ -e "$TMPDIR/$DATASET.LargeContigs.fna" ]] \
|
69
|
-
|| cp "../05.assembly/$DATASET.LargeContigs.fna" \
|
70
|
-
"$TMPDIR/$DATASET.LargeContigs.fna"
|
71
|
-
ANI=$(miga-ani "$TMPDIR/$DATASET.LargeContigs.fna" \
|
72
|
-
"../05.assembly/$i.LargeContigs.fna" "$CORES" "$TMPDIR/03.ani.db")
|
73
|
-
fi
|
74
|
-
fi
|
75
|
-
miga-checkpoint_n
|
76
|
-
done
|
77
|
-
N=10
|
78
|
-
miga-checkpoint_n
|
79
|
-
|