miga-base 0.3.2.1 → 0.3.2.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82b444c1fd986a4bc95f5cca2e2485b2fb0dd29b82d7424ef7615e41d9f901d3
4
- data.tar.gz: 8bc59e799f8af6e13dc1a3e0653743490c44189e056fbe8a9f86dc53b029a6bf
3
+ metadata.gz: 83a243d552f8c0f850cd46e827069e2a0b6abb5b8ed3f2cda41cec21a5b93c85
4
+ data.tar.gz: 85d9838dc0f9708d50e7d4375b8e641bc85ef096d95c88fcc43e8b5d1421e449
5
5
  SHA512:
6
- metadata.gz: a3aabd7fa5fd037b4e427b131c311a66a103f68f5ff0aaf4907abf4663ed285e7010d89a10d2443e72f4d9c1f0a032ba112faf961972da34bc44b449a3531332
7
- data.tar.gz: 4b1f302f219addc604f66526f72171f711c1e89252a7f152d68e839d4235ab723a43811f16664b81dee77b91c149fd38e06e54cca7d8412bf84594f26de36dcb
6
+ metadata.gz: 7705f7987ae8a13a6664d8d3a9ca586bbbebc5447c5858fb5c9980c9d31d97a57e779e6987d4ca49dc06b7ccbb8602f22a2747cb1f55acfda3a5bf4edaa1798f
7
+ data.tar.gz: 3920ee4cdbd9b6666e0d0233c3557218ad30abe4a4e9276de8db9532da628626ba764080766bf27ca5cee335c892cf7c72d610d2fc356dd2da464b24c881c228
@@ -118,8 +118,8 @@ module MiGA::Project::Result
118
118
  return nil unless result_files_exist?(base, %w[.ogs .stats])
119
119
  r = MiGA::Result.new("#{base}.json")
120
120
  r.add_file(:ogs, "miga-project.ogs")
121
+ r.add_file(:abc, "miga-project.abc")
121
122
  r.add_file(:stats, "miga-project.stats")
122
- r.add_file(:rbm, "miga-project.rbm")
123
123
  r.add_file(:core_pan, "miga-project.core-pan.tsv")
124
124
  r.add_file(:core_pan_plot, "miga-project.core-pan.pdf")
125
125
  r
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 2, 1]
13
+ VERSION = [0.3, 2, 3]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -42,4 +42,4 @@ gzip -9 -f miga-project.txt
42
42
 
43
43
  # Finalize
44
44
  miga date > "miga-project.done"
45
- miga add_result -P "$PROJECT" -r "$SCRIPT"
45
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -41,4 +41,4 @@ gzip -9 -f miga-project.txt
41
41
 
42
42
  # Finalize
43
43
  miga date > "miga-project.done"
44
- miga add_result -P "$PROJECT" -r "$SCRIPT"
44
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -50,4 +50,4 @@ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
50
50
 
51
51
  # Finalize
52
52
  miga date > "$DATASET.done"
53
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
53
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
data/scripts/cds.bash CHANGED
@@ -42,4 +42,4 @@ gzip -9 -f "$DATASET.gff3"
42
42
 
43
43
  # Finalize
44
44
  miga date > "$DATASET.done"
45
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
45
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -44,4 +44,4 @@ fi
44
44
 
45
45
  # Finalize
46
46
  miga date > "miga-project.done"
47
- miga add_result -P "$PROJECT" -r "$SCRIPT"
47
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -17,4 +17,4 @@ ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
17
17
  # Finalize
18
18
  rm -R "$TMPDIR"
19
19
  miga date > "$DATASET.done"
20
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
20
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -45,4 +45,4 @@ fi
45
45
 
46
46
  # Finalize
47
47
  miga date > "$DATASET.done"
48
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
48
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -41,4 +41,4 @@ gzip -9 -f miga-project.txt
41
41
 
42
42
  # Finalize
43
43
  miga date > "miga-project.done"
44
- miga add_result -P "$PROJECT" -r "$SCRIPT"
44
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
data/scripts/miga.bash CHANGED
@@ -17,6 +17,8 @@ done
17
17
  [[ -n $DATASET ]] \
18
18
  && miga add -P "$PROJECT" -D "$DATASET" -m "_step=$SCRIPT" --update
19
19
 
20
+ true
21
+
20
22
  #if [[ "$RUNTYPE" == "qsub" ]] ; then
21
23
  #elif [[ "$RUNTYPE" == "msub" ]] ; then
22
24
  #fi
data/scripts/mytaxa.bash CHANGED
@@ -98,4 +98,4 @@ fi
98
98
 
99
99
  # Finalize
100
100
  miga date > "$DATASET.done"
101
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
101
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -92,4 +92,4 @@ fi
92
92
 
93
93
  # Finalize
94
94
  miga date > "$DATASET.done"
95
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
95
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
data/scripts/ogs.bash CHANGED
@@ -14,25 +14,28 @@ miga date > "miga-project.start"
14
14
  DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
15
15
  if [[ ! -s miga-project.ogs ]] ; then
16
16
  # Extract RBMs
17
- [[ -d miga-project.rbm ]] || mkdir miga-project.rbm
18
- echo -n "" > miga-project.log
19
- for i in $DS ; do
20
- for j in $DS ; do
21
- file="miga-project.rbm/$i-$j.rbm"
22
- [[ -s $file ]] && continue
23
- echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
24
- "where seq1='$i' and seq2='$j' ;" \
25
- | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
26
- > "$file"
27
- [[ -s "$file" ]] || rm "$file"
17
+ if [[ ! -s miga-project.abc ]] ; then
18
+ [[ -d miga-project.tmp ]] || mkdir miga-project.tmp
19
+ for i in $DS ; do
20
+ file="miga-project.tmp/$i.abc"
21
+ [[ -s "$file" ]] && continue
22
+ echo "SELECT seq1,id1,seq2,id2,bitscore from rbm;" \
23
+ | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
24
+ | awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
25
+ > "$file.tmp"
26
+ mv "$file.tmp" "$file"
28
27
  done
29
- echo "$i" >> miga-project.log
30
- done
28
+ cat miga-project.tmp/*.abc > miga-project.abc
29
+ fi
30
+ rm -rf miga-project.tmp
31
31
 
32
32
  # Estimate OGs and Clean RBMs
33
- ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t "$CORES"
34
- [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] \
35
- || rm -rf miga-project.rbm
33
+ ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
34
+ if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
35
+ rm miga-project.abc
36
+ else
37
+ gzip -9 miga-project.abc
38
+ fi
36
39
  fi
37
40
 
38
41
  # Calculate Statistics
@@ -43,4 +46,4 @@ Rscript "$MIGA/utils/core-pan-plot.R" \
43
46
 
44
47
  # Finalize
45
48
  miga date > "miga-project.done"
46
- miga add_result -P "$PROJECT" -r "$SCRIPT"
49
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -22,4 +22,4 @@ ruby -I "$MIGA/lib" \
22
22
 
23
23
  # Finalize
24
24
  miga date > "miga-project.done"
25
- miga add_result -P "$PROJECT" -r "$SCRIPT"
25
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -31,4 +31,4 @@ rm -f "../02.trimmed_reads/$b".[12].fastq
31
31
 
32
32
  # Finalize
33
33
  miga date > "$DATASET.done"
34
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
34
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
data/scripts/ssu.bash CHANGED
@@ -33,4 +33,4 @@ fi
33
33
 
34
34
  # Finalize
35
35
  miga date > "$DATASET.done"
36
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
36
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
data/scripts/stats.bash CHANGED
@@ -21,4 +21,4 @@ done
21
21
 
22
22
  # Finalize
23
23
  miga date > "$DATASET.done"
24
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
24
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -24,4 +24,4 @@ ruby "$MIGA/utils/subclades-compile.rb" . \
24
24
 
25
25
  # Finalize
26
26
  miga date > "miga-project.done"
27
- miga add_result -P "$PROJECT" -r "$SCRIPT"
27
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -14,8 +14,9 @@ cd "$DIR"
14
14
  miga date > "$DATASET.start"
15
15
 
16
16
  # Run
17
- ruby "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
17
+ ruby -I "$MIGA/lib" \
18
+ "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
18
19
 
19
20
  # Finalize
20
21
  miga date > "$DATASET.done"
21
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
22
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -47,4 +47,4 @@ done
47
47
 
48
48
  # Finalize
49
49
  miga date > "$DATASET.done"
50
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
50
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -57,4 +57,4 @@ rm -f "$b".[12].*.discard
57
57
 
58
58
  # Finalize
59
59
  miga date > "$DATASET.done"
60
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
60
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -25,7 +25,7 @@ module MiGA::DistanceRunner::Pipeline
25
25
  end
26
26
  end
27
27
  end
28
- classif = File.expand_path("miga-project.sc-#{val_cls}", classif)
28
+ classif = "#{classif}/miga-project.sc-#{val_cls}"
29
29
  result_fh.puts [val_cls, val_med, max_val, classif].join("\t")
30
30
  classify(clades, classif, metric, result_fh, val_cls)
31
31
  end
@@ -21,13 +21,14 @@ class MiGA::DistanceRunner
21
21
  @project = MiGA::Project.load(project_path) or
22
22
  raise "No project at #{project_path}"
23
23
  @dataset = project.dataset(dataset_name)
24
- @home = File.expand_path("data/09.distances", project.path)
24
+ @home = File.expand_path('data/09.distances', project.path)
25
25
  # Default opts
26
- @opts[:aai_save_rbm] ||= ENV.fetch("MIGA_AAI_SAVE_RBM") do
27
- project.is_clade? ? "save-rbm" : "no-save-rbm"
26
+ @opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
27
+ project.is_clade? ? 'save-rbm' : 'no-save-rbm'
28
28
  end
29
29
  @opts[:thr] ||= ENV.fetch("CORES"){ 2 }.to_i
30
30
  if opts[:run_taxonomy] && project.metadata[:ref_project]
31
+ @home = File.expand_path('05.taxonomy', @home)
31
32
  @ref_project = MiGA::Project.load(project.metadata[:ref_project])
32
33
  end
33
34
  @ref_project ||= project
@@ -85,7 +86,7 @@ class MiGA::DistanceRunner
85
86
  r = ln.chomp.split("\t")
86
87
  next unless r[1].to_i==val_cls
87
88
  target = ref_project.dataset(r[0])
88
- aai = (metric==:aai) ? aai(target) : 100.0
89
+ aai = (v[1]==:aai) ? aai(target) : 100.0
89
90
  ani(target) if aai >= 90.0
90
91
  end
91
92
  end
data/utils/subclades.R CHANGED
@@ -52,6 +52,7 @@ subclades <- function(ani_file, out_base, thr=1, ani=c()) {
52
52
  s.avg.z <- (s[1,]-mean(s[1,]))/(sd(s[1,])+0.0001)
53
53
  s.neg.z <- (s[2,]-mean(s[2,]))/(sd(s[2,])+0.01)
54
54
  ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
55
+ if(mean(s[1,]<0)<0.75) ds[s[1,]<0] <- mean(ds) # <- k's with negative average
55
56
  top.n <- k[which.max(ds)]
56
57
 
57
58
  # Classify genomes
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2.1
4
+ version: 0.3.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-28 00:00:00.000000000 Z
11
+ date: 2018-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
@@ -167,9 +167,6 @@ files:
167
167
  - lib/miga/tax_index.rb
168
168
  - lib/miga/taxonomy.rb
169
169
  - lib/miga/version.rb
170
- - scripts/_distances_functions.bash
171
- - scripts/_distances_noref_nomulti.bash
172
- - scripts/_distances_ref_nomulti.bash
173
170
  - scripts/aai_distances.bash
174
171
  - scripts/ani_distances.bash
175
172
  - scripts/assembly.bash
@@ -1,104 +0,0 @@
1
- #!/bin/bash
2
- # Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
3
- # $NOMULTI, $REF
4
-
5
- set -e
6
-
7
- if [[ ! -n $MIGA_AAI_SAVE_RBM ]] ; then
8
- MIGA_AAI_SAVE_RBM="save-rbm"
9
- if [[ -n $PROJECT ]] ; then
10
- if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
11
- MIGA_AAI_SAVE_RBM="no-save-rbm"
12
- fi
13
- fi
14
- fi
15
-
16
- fx_exists miga-make_empty_aai_db || function miga-make_empty_aai_db {
17
- local DB=$1
18
- echo "create table if not exists aai(seq1 varchar(256), seq2 varchar(256)," \
19
- " aai float, sd float, n int, omega int);" | sqlite3 "$DB"
20
- }
21
-
22
- fx_exists miga-ds_name || function miga-ds_name {
23
- basename "$1" | perl -pe "s/[^A-Za-z0-9_].*//"
24
- }
25
-
26
- fx_exists miga-aai || function miga-aai {
27
- local F1=$1
28
- local F2=$2
29
- local TH=$3
30
- local DB=$4
31
- local N1
32
- N1=$(miga-ds_name "$F1")
33
- local N2
34
- N2=$(miga-ds_name "$F2")
35
- aai.rb -1 "$F1" -2 "$F2" -t "$TH" -a --lookup-first -S "$DB" --name1 "$N1" \
36
- --name2 "$N2" --$MIGA_AAI_SAVE_RBM || echo "0"
37
- }
38
-
39
- fx_exists miga-ani || function miga-ani {
40
- local F1=$1
41
- local F2=$2
42
- local TH=$3
43
- local DB=$4
44
- local N1
45
- N1=$(miga-ds_name "$F1")
46
- local N2
47
- N2=$(miga-ds_name "$F2")
48
- ani.rb -1 "$F1" -2 "$F2" -t "$TH" -a --no-save-regions --no-save-rbm \
49
- --lookup-first -S "$DB" --name1 "$N1" --name2 "$N2" || echo "0"
50
- }
51
-
52
- fx_exists miga-haai || function miga-haai {
53
- local F1=$1
54
- local F2=$2
55
- local TH=$3
56
- local DB=$4
57
- local AAI_DB=$5
58
- local N1
59
- N1=$(miga-ds_name "$F1")
60
- local N2
61
- N2=$(miga-ds_name "$F2")
62
- local HAAI
63
- HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" miga-aai "$F1" "$F2" "$TH" "$DB")
64
- if [[ "$HAAI" != "" && $(perl -e "print 1 if '$HAAI' <= 90") == "1" ]] ; then
65
- local AAI
66
- AAI=$(perl -e "print (100-exp(2.435076 + 0.4275193*log(100-$HAAI)))")
67
- [[ ! -s $AAI_DB ]] && miga-make_empty_aai_db "$AAI_DB"
68
- echo "insert into aai values('$N1','$N2','$AAI',0,0,0);" | sqlite3 "$AAI_DB"
69
- echo "$AAI"
70
- fi
71
- }
72
-
73
- fx_exists miga-haai_or_aai || function miga-haai_or_aai {
74
- local FH1=$1
75
- local FH2=$2
76
- local DBH=$3
77
- local F1=$4
78
- local F2=$5
79
- local DB=$6
80
- local TH=$7
81
- local AAI
82
- AAI=$(miga-haai "$FH1" "$FH2" "$TH" "$DBH" "$DB")
83
- [[ "${AAI%.*}" -le 0 ]] && AAI=$(miga-aai "$F1" "$F2" "$TH" "$DB")
84
- echo "$AAI"
85
- }
86
-
87
- fx_exists miga-val_from_db || function miga-val_from_db {
88
- local N1=$1
89
- local N2=$2
90
- local DB=$3
91
- local MT=$4
92
- if [[ -s $DB ]] ; then
93
- echo "select $MT from $MT where seq1='$N1' and seq2='$N2';" \
94
- | sqlite3 "$DB" || echo 0
95
- fi
96
- }
97
-
98
- fx_exists miga-aai_from_db || function miga-aai_from_db {
99
- miga-val_from_db "$1" "$2" "$3" aai
100
- }
101
-
102
- fx_exists miga-ani_from_db || function miga-ani_from_db {
103
- miga-val_from_db "$1" "$2" "$3" ani
104
- }
@@ -1,149 +0,0 @@
1
- #!/bin/bash
2
- # Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
3
- # $NOMULTI, $REF
4
-
5
- set -e
6
-
7
- # Deal with previous runs (if any)
8
- exists "$DATASET".haai.db && cp "$DATASET".haai.db "$TMPDIR"
9
- exists "$DATASET".a[an]i.db && cp "$DATASET".a[an]i.db "$TMPDIR"
10
- exists "$DATASET".a[an]i.9[05] && rm "$DATASET".a[an]i.9[05]
11
- N=0
12
- fx_exists miga-checkpoint_n || function miga-checkpoint_n {
13
- let N=$N+1
14
- if [[ $N -ge 10 ]] ; then
15
- for metric in haai aai ani ; do
16
- if [[ -s $TMPDIR/$DATASET.$metric.db ]] ; then
17
- echo "select count(*) from ${metric#h};" \
18
- | sqlite3 "$TMPDIR/$DATASET.$metric.db" \
19
- >/dev/null || exit 1
20
- cp "$TMPDIR/$DATASET.$metric.db" .
21
- fi
22
- done
23
- N=0
24
- fi
25
- }
26
-
27
- fx_exists miga-noref_haai_or_aai || function miga-noref_haai_or_aai {
28
- local Q=$1
29
- local S=$2
30
- [[ -s $TMPDIR/$Q.faa ]] \
31
- || cp "$PROJECT/data/06.cds/$Q.faa" "$TMPDIR/$Q.faa"
32
- miga-haai_or_aai "$PROJECT/$ESS/$Q.ess.faa" "$S_PROJ/$ESS/$S.ess.faa" \
33
- "$TMPDIR/$Q.haai.db" "$TMPDIR/$Q.faa" "$S_PROJ/data/06.cds/$S.faa" \
34
- "$TMPDIR/$Q.aai.db" "$CORES"
35
- }
36
-
37
- fx_exists miga-noref_ani || function miga-noref_ani {
38
- local Q=$1
39
- local S=$2
40
- [[ -s "$TMPDIR/$Q.LargeContigs.fna" ]] \
41
- || cp "$PROJECT/data/05.assembly/$Q.LargeContigs.fna" \
42
- "$TMPDIR/$Q.LargeContigs.fna"
43
- miga-ani "$TMPDIR/$Q.LargeContigs.fna" \
44
- "$S_PROJ/data/05.assembly/$S.LargeContigs.fna" \
45
- "$CORES" "$TMPDIR/$Q.ani.db"
46
- }
47
-
48
- # Calculate the classification-informed AAI/ANI traverse (if not classified)
49
- ESS="data/07.annotation/01.function/01.essential"
50
- if [[ $(miga about -P "$S_PROJ" -m type) != "clade" ]] ; then
51
- # Classify aai-clade (if project type is not clade)
52
- CLADES="$S_PROJ/data/10.clades/01.find"
53
- METRIC="aai"
54
- REF_TABLE="$S_PROJ/data/09.distances/02.aai/miga-project.txt.gz"
55
- else
56
- # Classify ani-clade (if project type is clade)
57
- CLADES="$S_PROJ/data/10.clades/02.ani"
58
- METRIC="ani"
59
- REF_TABLE="$S_PROJ/data/09.distances/03.ani/miga-project.txt.gz"
60
- fi
61
-
62
- CLASSIF="."
63
- [[ -e "$DATASET.$METRIC-medoids.tsv" ]] && rm "$DATASET.$METRIC-medoids.tsv"
64
- [[ -s "$CLADES/$CLASSIF/miga-project.medoids" ]] || \
65
- touch "$DATASET.$METRIC-medoids.tsv" "${DATASET}.${METRIC}.db"
66
- while [[ -s "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
67
- MAX_VAL=0
68
- VAL_MED=""
69
- VAL_CLS=""
70
- i_n=0
71
- while read -r i ; do
72
- let i_n=$i_n+1
73
- if [[ $METRIC == "aai" ]] ; then
74
- VAL=$(miga-noref_haai_or_aai "$DATASET" "$i")
75
- else
76
- VAL=$(miga-noref_ani "$DATASET" "$i")
77
- fi
78
- miga-checkpoint_n
79
- if [[ $(perl -e "print 1 if '$VAL' >= '$MAX_VAL'") == "1" ]] ; then
80
- MAX_VAL=$VAL
81
- VAL_MED=$i
82
- VAL_CLS=$i_n
83
- echo "[$CLASSIF] New max: $VAL_MED ($VAL_CLS): $MAX_VAL"
84
- fi
85
- done < "$CLADES/$CLASSIF/miga-project.medoids"
86
- CLASSIF="$CLASSIF/miga-project.sc-$VAL_CLS"
87
- echo "$VAL_CLS $VAL_MED $MAX_VAL $CLASSIF" \
88
- >> "$DATASET.$METRIC-medoids.tsv"
89
- done
90
-
91
- # Calculate all the AAIs/ANIs against the lowest subclade (if classified)
92
- if [[ "$CLASSIF" != "." ]] ; then
93
- PAR=$(dirname "$CLADES/$CLASSIF")/miga-project.classif
94
- if [[ -s "$PAR" ]] ; then
95
- while read -r i ; do
96
- if [[ $METRIC == "aai" ]] ; then
97
- AAI=$(miga-noref_haai_or_aai "$DATASET" "$i")
98
- else
99
- AAI=100
100
- fi
101
- if [[ $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
102
- miga-noref_ani "$DATASET" "$i"
103
- fi
104
- miga-checkpoint_n
105
- done < <(awk "\$2==$VAL_CLS{print \$1}" < "$PAR")
106
- fi
107
- fi
108
-
109
- # Finalize
110
- N=11
111
- miga-checkpoint_n
112
-
113
- # Build tree with medoids
114
- if [[ -s "${DATASET}.${METRIC}.db" ]] ; then
115
- echo "select seq2 from $METRIC;" | sqlite3 "${DATASET}.${METRIC}.db" \
116
- | sort | uniq > "${DATASET}.tmp0"
117
- perl -pe "s/^/^/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
118
- > "${DATASET}.tmp1"
119
- perl -pe "s/^/\\t/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
120
- > "${DATASET}.tmp2"
121
- echo "a b value" | tr " " "\\t" > "${DATASET}.txt"
122
- gzip -c -d "$REF_TABLE" | cut -f 2-4 \
123
- | grep -f "${DATASET}.tmp1" | grep -f "${DATASET}.tmp2" \
124
- >> "${DATASET}.txt"
125
- echo "select seq1, seq2, $METRIC from $METRIC;" \
126
- | sqlite3 "${DATASET}.${METRIC}.db" | tr "\\|" "\\t" \
127
- >> "${DATASET}.txt"
128
- "$MIGA/utils/ref-tree.R" "${DATASET}.txt" "$DATASET" "$DATASET"
129
- rm "$DATASET".tmp[012] "${DATASET}.txt"
130
- fi
131
-
132
- # Test taxonomy
133
- (
134
- trap 'rm "$DATASET.json" "$DATASET.done"' EXIT
135
- FLAGS=""
136
- [[ "$PROJECT" == "$S_PROJ" ]] || FLAGS="--ref-project"
137
- miga date > "$DATASET.done"
138
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
139
- miga tax_test -P "$PROJECT" -D "$DATASET" -t intax \
140
- $FLAGS > "$DATASET.intax.txt"
141
- )
142
-
143
- # Transfer taxonomy
144
- TAX_PVALUE=$(miga about -P "$PROJECT" -m tax_pvalue)
145
- [[ "$TAX_PVALUE" == "?" ]] && TAX_PVALUE="0.05"
146
- NEW_TAX=$(tail -n +6 "$DATASET.intax.txt" | head -n -3 \
147
- | awk '$3<'$TAX_PVALUE'{print $1":"$2}' | grep -v "?" \
148
- | tr "\\n" ' ' | perl -pe 's/ *$//')
149
- miga tax_set -P "$PROJECT" -D "$DATASET" -s "$NEW_TAX"
@@ -1,79 +0,0 @@
1
- #!/bin/bash
2
- # Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
3
- # $NOMULTI, $REF
4
-
5
- set -e
6
-
7
- fx_exists miga-checkpoint_n || function miga-checkpoint_n {
8
- if [[ $N -eq 10 ]] ; then
9
- for t in 01.haai 02.aai 03.ani ; do
10
- if [[ -s $TMPDIR/$t.db ]] ; then
11
- tab="aai"
12
- [[ "$t" == "03.ani" ]] && tab="ani"
13
- echo "select count(*) from $tab;" \
14
- | sqlite3 "$TMPDIR/$t.db" \
15
- >/dev/null || exit 1
16
- cp "$TMPDIR/$t.db" "$t/$DATASET.db"
17
- fi
18
- done
19
- N=0
20
- fi
21
- let N=$N+1
22
- }
23
-
24
- ESS="../07.annotation/01.function/01.essential"
25
-
26
- # Initialize temporals
27
- for t in 01.haai 02.aai 03.ani ; do
28
- [[ -s $t/$DATASET.db ]] && cp "$t/$DATASET.db" "$TMPDIR/$t.db"
29
- done
30
- N=1
31
-
32
- # Traverse "nearly-half" of the ref-datasets using first-come-first-served
33
- for i in $(miga list_datasets -P "$PROJECT" --ref --no-multi) ; do
34
- echo "[ $(date "+%Y-%m-%d %H:%M:%S %z") ] $i"
35
- AAI=""; ANI="";
36
- # Check if the i-th dataset is ready
37
- [[ -s $ESS/$i.done && -s $ESS/$i.json ]] || continue
38
- # Check if this is done (e.g., in a previous failed iteration)
39
- AAI=$(miga-aai_from_db "$DATASET" "$i" "$TMPDIR/02.aai.db")
40
- # Try the other direction
41
- [[ "${AAI%.*}" -le 0 ]] \
42
- && AAI=$(miga-aai_from_db "$i" "$DATASET" "02.aai/$i.db")
43
- # Try with hAAI
44
- if [[ "${AAI%.*}" -le 0 ]] ; then
45
- [[ -e "$TMPDIR/$DATASET.ess.faa" ]] \
46
- || cp "$ESS/$DATASET.ess.faa" "$TMPDIR/$DATASET.ess.faa"
47
- AAI=$(miga-haai "$TMPDIR/$DATASET.ess.faa" "$ESS/$i.ess.faa" \
48
- "$CORES" "$TMPDIR/01.haai.db" "$TMPDIR/02.aai.db")
49
- fi
50
- # Try with complete AAI
51
- if [[ "${AAI%.*}" -le 0 ]] ; then
52
- [[ -e "$TMPDIR/$DATASET.faa" ]] \
53
- || cp "../06.cds/$DATASET.faa" "$TMPDIR/$DATASET.faa"
54
- AAI=$(miga-aai "$TMPDIR/$DATASET.faa" "../06.cds/$i.faa" \
55
- "$CORES" "$TMPDIR/02.aai.db")
56
- fi
57
- # Check if ANI is meaningful
58
- if [[ -e "../05.assembly/$DATASET.LargeContigs.fna" \
59
- && -e "../05.assembly/$i.LargeContigs.fna" \
60
- && $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
61
- # Check if this is done (e.g., in a previous failed iteration)
62
- ANI=$(miga-ani_from_db "$DATASET" "$i" "$TMPDIR/03.ani.db")
63
- # Try the other direction
64
- [[ "${ANI%.*}" -le 0 ]] \
65
- && ANI=$(miga-ani_from_db "$i" "$DATASET" "03.ani/$i.db")
66
- # Calculate it
67
- if [[ "${ANI%.*}" -le 0 ]] ; then
68
- [[ -e "$TMPDIR/$DATASET.LargeContigs.fna" ]] \
69
- || cp "../05.assembly/$DATASET.LargeContigs.fna" \
70
- "$TMPDIR/$DATASET.LargeContigs.fna"
71
- ANI=$(miga-ani "$TMPDIR/$DATASET.LargeContigs.fna" \
72
- "../05.assembly/$i.LargeContigs.fna" "$CORES" "$TMPDIR/03.ani.db")
73
- fi
74
- fi
75
- miga-checkpoint_n
76
- done
77
- N=10
78
- miga-checkpoint_n
79
-