miga-base 0.3.2.1 → 0.3.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 82b444c1fd986a4bc95f5cca2e2485b2fb0dd29b82d7424ef7615e41d9f901d3
4
- data.tar.gz: 8bc59e799f8af6e13dc1a3e0653743490c44189e056fbe8a9f86dc53b029a6bf
3
+ metadata.gz: 83a243d552f8c0f850cd46e827069e2a0b6abb5b8ed3f2cda41cec21a5b93c85
4
+ data.tar.gz: 85d9838dc0f9708d50e7d4375b8e641bc85ef096d95c88fcc43e8b5d1421e449
5
5
  SHA512:
6
- metadata.gz: a3aabd7fa5fd037b4e427b131c311a66a103f68f5ff0aaf4907abf4663ed285e7010d89a10d2443e72f4d9c1f0a032ba112faf961972da34bc44b449a3531332
7
- data.tar.gz: 4b1f302f219addc604f66526f72171f711c1e89252a7f152d68e839d4235ab723a43811f16664b81dee77b91c149fd38e06e54cca7d8412bf84594f26de36dcb
6
+ metadata.gz: 7705f7987ae8a13a6664d8d3a9ca586bbbebc5447c5858fb5c9980c9d31d97a57e779e6987d4ca49dc06b7ccbb8602f22a2747cb1f55acfda3a5bf4edaa1798f
7
+ data.tar.gz: 3920ee4cdbd9b6666e0d0233c3557218ad30abe4a4e9276de8db9532da628626ba764080766bf27ca5cee335c892cf7c72d610d2fc356dd2da464b24c881c228
@@ -118,8 +118,8 @@ module MiGA::Project::Result
118
118
  return nil unless result_files_exist?(base, %w[.ogs .stats])
119
119
  r = MiGA::Result.new("#{base}.json")
120
120
  r.add_file(:ogs, "miga-project.ogs")
121
+ r.add_file(:abc, "miga-project.abc")
121
122
  r.add_file(:stats, "miga-project.stats")
122
- r.add_file(:rbm, "miga-project.rbm")
123
123
  r.add_file(:core_pan, "miga-project.core-pan.tsv")
124
124
  r.add_file(:core_pan_plot, "miga-project.core-pan.pdf")
125
125
  r
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 2, 1]
13
+ VERSION = [0.3, 2, 3]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -42,4 +42,4 @@ gzip -9 -f miga-project.txt
42
42
 
43
43
  # Finalize
44
44
  miga date > "miga-project.done"
45
- miga add_result -P "$PROJECT" -r "$SCRIPT"
45
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -41,4 +41,4 @@ gzip -9 -f miga-project.txt
41
41
 
42
42
  # Finalize
43
43
  miga date > "miga-project.done"
44
- miga add_result -P "$PROJECT" -r "$SCRIPT"
44
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -50,4 +50,4 @@ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
50
50
 
51
51
  # Finalize
52
52
  miga date > "$DATASET.done"
53
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
53
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
data/scripts/cds.bash CHANGED
@@ -42,4 +42,4 @@ gzip -9 -f "$DATASET.gff3"
42
42
 
43
43
  # Finalize
44
44
  miga date > "$DATASET.done"
45
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
45
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -44,4 +44,4 @@ fi
44
44
 
45
45
  # Finalize
46
46
  miga date > "miga-project.done"
47
- miga add_result -P "$PROJECT" -r "$SCRIPT"
47
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -17,4 +17,4 @@ ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
17
17
  # Finalize
18
18
  rm -R "$TMPDIR"
19
19
  miga date > "$DATASET.done"
20
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
20
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -45,4 +45,4 @@ fi
45
45
 
46
46
  # Finalize
47
47
  miga date > "$DATASET.done"
48
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
48
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -41,4 +41,4 @@ gzip -9 -f miga-project.txt
41
41
 
42
42
  # Finalize
43
43
  miga date > "miga-project.done"
44
- miga add_result -P "$PROJECT" -r "$SCRIPT"
44
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
data/scripts/miga.bash CHANGED
@@ -17,6 +17,8 @@ done
17
17
  [[ -n $DATASET ]] \
18
18
  && miga add -P "$PROJECT" -D "$DATASET" -m "_step=$SCRIPT" --update
19
19
 
20
+ true
21
+
20
22
  #if [[ "$RUNTYPE" == "qsub" ]] ; then
21
23
  #elif [[ "$RUNTYPE" == "msub" ]] ; then
22
24
  #fi
data/scripts/mytaxa.bash CHANGED
@@ -98,4 +98,4 @@ fi
98
98
 
99
99
  # Finalize
100
100
  miga date > "$DATASET.done"
101
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
101
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -92,4 +92,4 @@ fi
92
92
 
93
93
  # Finalize
94
94
  miga date > "$DATASET.done"
95
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
95
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
data/scripts/ogs.bash CHANGED
@@ -14,25 +14,28 @@ miga date > "miga-project.start"
14
14
  DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
15
15
  if [[ ! -s miga-project.ogs ]] ; then
16
16
  # Extract RBMs
17
- [[ -d miga-project.rbm ]] || mkdir miga-project.rbm
18
- echo -n "" > miga-project.log
19
- for i in $DS ; do
20
- for j in $DS ; do
21
- file="miga-project.rbm/$i-$j.rbm"
22
- [[ -s $file ]] && continue
23
- echo "SELECT id1,id2,id,0,0,0,0,0,0,0,evalue,bitscore from rbm" \
24
- "where seq1='$i' and seq2='$j' ;" \
25
- | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" "\\t" \
26
- > "$file"
27
- [[ -s "$file" ]] || rm "$file"
17
+ if [[ ! -s miga-project.abc ]] ; then
18
+ [[ -d miga-project.tmp ]] || mkdir miga-project.tmp
19
+ for i in $DS ; do
20
+ file="miga-project.tmp/$i.abc"
21
+ [[ -s "$file" ]] && continue
22
+ echo "SELECT seq1,id1,seq2,id2,bitscore from rbm;" \
23
+ | sqlite3 "../../09.distances/02.aai/$i.db" | tr "\\|" " " \
24
+ | awk '{ print $1">"$2"'"\\t"'"$3">"$4"'"\\t"'"$5 }' \
25
+ > "$file.tmp"
26
+ mv "$file.tmp" "$file"
28
27
  done
29
- echo "$i" >> miga-project.log
30
- done
28
+ cat miga-project.tmp/*.abc > miga-project.abc
29
+ fi
30
+ rm -rf miga-project.tmp
31
31
 
32
32
  # Estimate OGs and Clean RBMs
33
- ogs.mcl.rb -o miga-project.ogs -d miga-project.rbm -t "$CORES"
34
- [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] \
35
- || rm -rf miga-project.rbm
33
+ ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
34
+ if [[ $(miga about -P "$PROJECT" -m clean_ogs) == "false" ]] ; then
35
+ rm miga-project.abc
36
+ else
37
+ gzip -9 miga-project.abc
38
+ fi
36
39
  fi
37
40
 
38
41
  # Calculate Statistics
@@ -43,4 +46,4 @@ Rscript "$MIGA/utils/core-pan-plot.R" \
43
46
 
44
47
  # Finalize
45
48
  miga date > "miga-project.done"
46
- miga add_result -P "$PROJECT" -r "$SCRIPT"
49
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -22,4 +22,4 @@ ruby -I "$MIGA/lib" \
22
22
 
23
23
  # Finalize
24
24
  miga date > "miga-project.done"
25
- miga add_result -P "$PROJECT" -r "$SCRIPT"
25
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -31,4 +31,4 @@ rm -f "../02.trimmed_reads/$b".[12].fastq
31
31
 
32
32
  # Finalize
33
33
  miga date > "$DATASET.done"
34
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
34
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
data/scripts/ssu.bash CHANGED
@@ -33,4 +33,4 @@ fi
33
33
 
34
34
  # Finalize
35
35
  miga date > "$DATASET.done"
36
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
36
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
data/scripts/stats.bash CHANGED
@@ -21,4 +21,4 @@ done
21
21
 
22
22
  # Finalize
23
23
  miga date > "$DATASET.done"
24
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
24
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -24,4 +24,4 @@ ruby "$MIGA/utils/subclades-compile.rb" . \
24
24
 
25
25
  # Finalize
26
26
  miga date > "miga-project.done"
27
- miga add_result -P "$PROJECT" -r "$SCRIPT"
27
+ miga add_result -P "$PROJECT" -r "$SCRIPT" -f
@@ -14,8 +14,9 @@ cd "$DIR"
14
14
  miga date > "$DATASET.start"
15
15
 
16
16
  # Run
17
- ruby "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
17
+ ruby -I "$MIGA/lib" \
18
+ "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
18
19
 
19
20
  # Finalize
20
21
  miga date > "$DATASET.done"
21
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
22
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -47,4 +47,4 @@ done
47
47
 
48
48
  # Finalize
49
49
  miga date > "$DATASET.done"
50
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
50
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -57,4 +57,4 @@ rm -f "$b".[12].*.discard
57
57
 
58
58
  # Finalize
59
59
  miga date > "$DATASET.done"
60
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
60
+ miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
@@ -25,7 +25,7 @@ module MiGA::DistanceRunner::Pipeline
25
25
  end
26
26
  end
27
27
  end
28
- classif = File.expand_path("miga-project.sc-#{val_cls}", classif)
28
+ classif = "#{classif}/miga-project.sc-#{val_cls}"
29
29
  result_fh.puts [val_cls, val_med, max_val, classif].join("\t")
30
30
  classify(clades, classif, metric, result_fh, val_cls)
31
31
  end
@@ -21,13 +21,14 @@ class MiGA::DistanceRunner
21
21
  @project = MiGA::Project.load(project_path) or
22
22
  raise "No project at #{project_path}"
23
23
  @dataset = project.dataset(dataset_name)
24
- @home = File.expand_path("data/09.distances", project.path)
24
+ @home = File.expand_path('data/09.distances', project.path)
25
25
  # Default opts
26
- @opts[:aai_save_rbm] ||= ENV.fetch("MIGA_AAI_SAVE_RBM") do
27
- project.is_clade? ? "save-rbm" : "no-save-rbm"
26
+ @opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
27
+ project.is_clade? ? 'save-rbm' : 'no-save-rbm'
28
28
  end
29
29
  @opts[:thr] ||= ENV.fetch("CORES"){ 2 }.to_i
30
30
  if opts[:run_taxonomy] && project.metadata[:ref_project]
31
+ @home = File.expand_path('05.taxonomy', @home)
31
32
  @ref_project = MiGA::Project.load(project.metadata[:ref_project])
32
33
  end
33
34
  @ref_project ||= project
@@ -85,7 +86,7 @@ class MiGA::DistanceRunner
85
86
  r = ln.chomp.split("\t")
86
87
  next unless r[1].to_i==val_cls
87
88
  target = ref_project.dataset(r[0])
88
- aai = (metric==:aai) ? aai(target) : 100.0
89
+ aai = (v[1]==:aai) ? aai(target) : 100.0
89
90
  ani(target) if aai >= 90.0
90
91
  end
91
92
  end
data/utils/subclades.R CHANGED
@@ -52,6 +52,7 @@ subclades <- function(ani_file, out_base, thr=1, ani=c()) {
52
52
  s.avg.z <- (s[1,]-mean(s[1,]))/(sd(s[1,])+0.0001)
53
53
  s.neg.z <- (s[2,]-mean(s[2,]))/(sd(s[2,])+0.01)
54
54
  ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
55
+ if(mean(s[1,]<0)<0.75) ds[s[1,]<0] <- mean(ds) # <- k's with negative average
55
56
  top.n <- k[which.max(ds)]
56
57
 
57
58
  # Classify genomes
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2.1
4
+ version: 0.3.2.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-28 00:00:00.000000000 Z
11
+ date: 2018-07-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
@@ -167,9 +167,6 @@ files:
167
167
  - lib/miga/tax_index.rb
168
168
  - lib/miga/taxonomy.rb
169
169
  - lib/miga/version.rb
170
- - scripts/_distances_functions.bash
171
- - scripts/_distances_noref_nomulti.bash
172
- - scripts/_distances_ref_nomulti.bash
173
170
  - scripts/aai_distances.bash
174
171
  - scripts/ani_distances.bash
175
172
  - scripts/assembly.bash
@@ -1,104 +0,0 @@
1
- #!/bin/bash
2
- # Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
3
- # $NOMULTI, $REF
4
-
5
- set -e
6
-
7
- if [[ ! -n $MIGA_AAI_SAVE_RBM ]] ; then
8
- MIGA_AAI_SAVE_RBM="save-rbm"
9
- if [[ -n $PROJECT ]] ; then
10
- if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
11
- MIGA_AAI_SAVE_RBM="no-save-rbm"
12
- fi
13
- fi
14
- fi
15
-
16
- fx_exists miga-make_empty_aai_db || function miga-make_empty_aai_db {
17
- local DB=$1
18
- echo "create table if not exists aai(seq1 varchar(256), seq2 varchar(256)," \
19
- " aai float, sd float, n int, omega int);" | sqlite3 "$DB"
20
- }
21
-
22
- fx_exists miga-ds_name || function miga-ds_name {
23
- basename "$1" | perl -pe "s/[^A-Za-z0-9_].*//"
24
- }
25
-
26
- fx_exists miga-aai || function miga-aai {
27
- local F1=$1
28
- local F2=$2
29
- local TH=$3
30
- local DB=$4
31
- local N1
32
- N1=$(miga-ds_name "$F1")
33
- local N2
34
- N2=$(miga-ds_name "$F2")
35
- aai.rb -1 "$F1" -2 "$F2" -t "$TH" -a --lookup-first -S "$DB" --name1 "$N1" \
36
- --name2 "$N2" --$MIGA_AAI_SAVE_RBM || echo "0"
37
- }
38
-
39
- fx_exists miga-ani || function miga-ani {
40
- local F1=$1
41
- local F2=$2
42
- local TH=$3
43
- local DB=$4
44
- local N1
45
- N1=$(miga-ds_name "$F1")
46
- local N2
47
- N2=$(miga-ds_name "$F2")
48
- ani.rb -1 "$F1" -2 "$F2" -t "$TH" -a --no-save-regions --no-save-rbm \
49
- --lookup-first -S "$DB" --name1 "$N1" --name2 "$N2" || echo "0"
50
- }
51
-
52
- fx_exists miga-haai || function miga-haai {
53
- local F1=$1
54
- local F2=$2
55
- local TH=$3
56
- local DB=$4
57
- local AAI_DB=$5
58
- local N1
59
- N1=$(miga-ds_name "$F1")
60
- local N2
61
- N2=$(miga-ds_name "$F2")
62
- local HAAI
63
- HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" miga-aai "$F1" "$F2" "$TH" "$DB")
64
- if [[ "$HAAI" != "" && $(perl -e "print 1 if '$HAAI' <= 90") == "1" ]] ; then
65
- local AAI
66
- AAI=$(perl -e "print (100-exp(2.435076 + 0.4275193*log(100-$HAAI)))")
67
- [[ ! -s $AAI_DB ]] && miga-make_empty_aai_db "$AAI_DB"
68
- echo "insert into aai values('$N1','$N2','$AAI',0,0,0);" | sqlite3 "$AAI_DB"
69
- echo "$AAI"
70
- fi
71
- }
72
-
73
- fx_exists miga-haai_or_aai || function miga-haai_or_aai {
74
- local FH1=$1
75
- local FH2=$2
76
- local DBH=$3
77
- local F1=$4
78
- local F2=$5
79
- local DB=$6
80
- local TH=$7
81
- local AAI
82
- AAI=$(miga-haai "$FH1" "$FH2" "$TH" "$DBH" "$DB")
83
- [[ "${AAI%.*}" -le 0 ]] && AAI=$(miga-aai "$F1" "$F2" "$TH" "$DB")
84
- echo "$AAI"
85
- }
86
-
87
- fx_exists miga-val_from_db || function miga-val_from_db {
88
- local N1=$1
89
- local N2=$2
90
- local DB=$3
91
- local MT=$4
92
- if [[ -s $DB ]] ; then
93
- echo "select $MT from $MT where seq1='$N1' and seq2='$N2';" \
94
- | sqlite3 "$DB" || echo 0
95
- fi
96
- }
97
-
98
- fx_exists miga-aai_from_db || function miga-aai_from_db {
99
- miga-val_from_db "$1" "$2" "$3" aai
100
- }
101
-
102
- fx_exists miga-ani_from_db || function miga-ani_from_db {
103
- miga-val_from_db "$1" "$2" "$3" ani
104
- }
@@ -1,149 +0,0 @@
1
- #!/bin/bash
2
- # Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
3
- # $NOMULTI, $REF
4
-
5
- set -e
6
-
7
- # Deal with previous runs (if any)
8
- exists "$DATASET".haai.db && cp "$DATASET".haai.db "$TMPDIR"
9
- exists "$DATASET".a[an]i.db && cp "$DATASET".a[an]i.db "$TMPDIR"
10
- exists "$DATASET".a[an]i.9[05] && rm "$DATASET".a[an]i.9[05]
11
- N=0
12
- fx_exists miga-checkpoint_n || function miga-checkpoint_n {
13
- let N=$N+1
14
- if [[ $N -ge 10 ]] ; then
15
- for metric in haai aai ani ; do
16
- if [[ -s $TMPDIR/$DATASET.$metric.db ]] ; then
17
- echo "select count(*) from ${metric#h};" \
18
- | sqlite3 "$TMPDIR/$DATASET.$metric.db" \
19
- >/dev/null || exit 1
20
- cp "$TMPDIR/$DATASET.$metric.db" .
21
- fi
22
- done
23
- N=0
24
- fi
25
- }
26
-
27
- fx_exists miga-noref_haai_or_aai || function miga-noref_haai_or_aai {
28
- local Q=$1
29
- local S=$2
30
- [[ -s $TMPDIR/$Q.faa ]] \
31
- || cp "$PROJECT/data/06.cds/$Q.faa" "$TMPDIR/$Q.faa"
32
- miga-haai_or_aai "$PROJECT/$ESS/$Q.ess.faa" "$S_PROJ/$ESS/$S.ess.faa" \
33
- "$TMPDIR/$Q.haai.db" "$TMPDIR/$Q.faa" "$S_PROJ/data/06.cds/$S.faa" \
34
- "$TMPDIR/$Q.aai.db" "$CORES"
35
- }
36
-
37
- fx_exists miga-noref_ani || function miga-noref_ani {
38
- local Q=$1
39
- local S=$2
40
- [[ -s "$TMPDIR/$Q.LargeContigs.fna" ]] \
41
- || cp "$PROJECT/data/05.assembly/$Q.LargeContigs.fna" \
42
- "$TMPDIR/$Q.LargeContigs.fna"
43
- miga-ani "$TMPDIR/$Q.LargeContigs.fna" \
44
- "$S_PROJ/data/05.assembly/$S.LargeContigs.fna" \
45
- "$CORES" "$TMPDIR/$Q.ani.db"
46
- }
47
-
48
- # Calculate the classification-informed AAI/ANI traverse (if not classified)
49
- ESS="data/07.annotation/01.function/01.essential"
50
- if [[ $(miga about -P "$S_PROJ" -m type) != "clade" ]] ; then
51
- # Classify aai-clade (if project type is not clade)
52
- CLADES="$S_PROJ/data/10.clades/01.find"
53
- METRIC="aai"
54
- REF_TABLE="$S_PROJ/data/09.distances/02.aai/miga-project.txt.gz"
55
- else
56
- # Classify ani-clade (if project type is clade)
57
- CLADES="$S_PROJ/data/10.clades/02.ani"
58
- METRIC="ani"
59
- REF_TABLE="$S_PROJ/data/09.distances/03.ani/miga-project.txt.gz"
60
- fi
61
-
62
- CLASSIF="."
63
- [[ -e "$DATASET.$METRIC-medoids.tsv" ]] && rm "$DATASET.$METRIC-medoids.tsv"
64
- [[ -s "$CLADES/$CLASSIF/miga-project.medoids" ]] || \
65
- touch "$DATASET.$METRIC-medoids.tsv" "${DATASET}.${METRIC}.db"
66
- while [[ -s "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
67
- MAX_VAL=0
68
- VAL_MED=""
69
- VAL_CLS=""
70
- i_n=0
71
- while read -r i ; do
72
- let i_n=$i_n+1
73
- if [[ $METRIC == "aai" ]] ; then
74
- VAL=$(miga-noref_haai_or_aai "$DATASET" "$i")
75
- else
76
- VAL=$(miga-noref_ani "$DATASET" "$i")
77
- fi
78
- miga-checkpoint_n
79
- if [[ $(perl -e "print 1 if '$VAL' >= '$MAX_VAL'") == "1" ]] ; then
80
- MAX_VAL=$VAL
81
- VAL_MED=$i
82
- VAL_CLS=$i_n
83
- echo "[$CLASSIF] New max: $VAL_MED ($VAL_CLS): $MAX_VAL"
84
- fi
85
- done < "$CLADES/$CLASSIF/miga-project.medoids"
86
- CLASSIF="$CLASSIF/miga-project.sc-$VAL_CLS"
87
- echo "$VAL_CLS $VAL_MED $MAX_VAL $CLASSIF" \
88
- >> "$DATASET.$METRIC-medoids.tsv"
89
- done
90
-
91
- # Calculate all the AAIs/ANIs against the lowest subclade (if classified)
92
- if [[ "$CLASSIF" != "." ]] ; then
93
- PAR=$(dirname "$CLADES/$CLASSIF")/miga-project.classif
94
- if [[ -s "$PAR" ]] ; then
95
- while read -r i ; do
96
- if [[ $METRIC == "aai" ]] ; then
97
- AAI=$(miga-noref_haai_or_aai "$DATASET" "$i")
98
- else
99
- AAI=100
100
- fi
101
- if [[ $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
102
- miga-noref_ani "$DATASET" "$i"
103
- fi
104
- miga-checkpoint_n
105
- done < <(awk "\$2==$VAL_CLS{print \$1}" < "$PAR")
106
- fi
107
- fi
108
-
109
- # Finalize
110
- N=11
111
- miga-checkpoint_n
112
-
113
- # Build tree with medoids
114
- if [[ -s "${DATASET}.${METRIC}.db" ]] ; then
115
- echo "select seq2 from $METRIC;" | sqlite3 "${DATASET}.${METRIC}.db" \
116
- | sort | uniq > "${DATASET}.tmp0"
117
- perl -pe "s/^/^/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
118
- > "${DATASET}.tmp1"
119
- perl -pe "s/^/\\t/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
120
- > "${DATASET}.tmp2"
121
- echo "a b value" | tr " " "\\t" > "${DATASET}.txt"
122
- gzip -c -d "$REF_TABLE" | cut -f 2-4 \
123
- | grep -f "${DATASET}.tmp1" | grep -f "${DATASET}.tmp2" \
124
- >> "${DATASET}.txt"
125
- echo "select seq1, seq2, $METRIC from $METRIC;" \
126
- | sqlite3 "${DATASET}.${METRIC}.db" | tr "\\|" "\\t" \
127
- >> "${DATASET}.txt"
128
- "$MIGA/utils/ref-tree.R" "${DATASET}.txt" "$DATASET" "$DATASET"
129
- rm "$DATASET".tmp[012] "${DATASET}.txt"
130
- fi
131
-
132
- # Test taxonomy
133
- (
134
- trap 'rm "$DATASET.json" "$DATASET.done"' EXIT
135
- FLAGS=""
136
- [[ "$PROJECT" == "$S_PROJ" ]] || FLAGS="--ref-project"
137
- miga date > "$DATASET.done"
138
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT"
139
- miga tax_test -P "$PROJECT" -D "$DATASET" -t intax \
140
- $FLAGS > "$DATASET.intax.txt"
141
- )
142
-
143
- # Transfer taxonomy
144
- TAX_PVALUE=$(miga about -P "$PROJECT" -m tax_pvalue)
145
- [[ "$TAX_PVALUE" == "?" ]] && TAX_PVALUE="0.05"
146
- NEW_TAX=$(tail -n +6 "$DATASET.intax.txt" | head -n -3 \
147
- | awk '$3<'$TAX_PVALUE'{print $1":"$2}' | grep -v "?" \
148
- | tr "\\n" ' ' | perl -pe 's/ *$//')
149
- miga tax_set -P "$PROJECT" -D "$DATASET" -s "$NEW_TAX"
@@ -1,79 +0,0 @@
1
- #!/bin/bash
2
- # Available variables: $PROJECT, $DATASET, $RUNTYPE, $MIGA, $CORES, $TMPDIR,
3
- # $NOMULTI, $REF
4
-
5
- set -e
6
-
7
- fx_exists miga-checkpoint_n || function miga-checkpoint_n {
8
- if [[ $N -eq 10 ]] ; then
9
- for t in 01.haai 02.aai 03.ani ; do
10
- if [[ -s $TMPDIR/$t.db ]] ; then
11
- tab="aai"
12
- [[ "$t" == "03.ani" ]] && tab="ani"
13
- echo "select count(*) from $tab;" \
14
- | sqlite3 "$TMPDIR/$t.db" \
15
- >/dev/null || exit 1
16
- cp "$TMPDIR/$t.db" "$t/$DATASET.db"
17
- fi
18
- done
19
- N=0
20
- fi
21
- let N=$N+1
22
- }
23
-
24
- ESS="../07.annotation/01.function/01.essential"
25
-
26
- # Initialize temporals
27
- for t in 01.haai 02.aai 03.ani ; do
28
- [[ -s $t/$DATASET.db ]] && cp "$t/$DATASET.db" "$TMPDIR/$t.db"
29
- done
30
- N=1
31
-
32
- # Traverse "nearly-half" of the ref-datasets using first-come-first-served
33
- for i in $(miga list_datasets -P "$PROJECT" --ref --no-multi) ; do
34
- echo "[ $(date "+%Y-%m-%d %H:%M:%S %z") ] $i"
35
- AAI=""; ANI="";
36
- # Check if the i-th dataset is ready
37
- [[ -s $ESS/$i.done && -s $ESS/$i.json ]] || continue
38
- # Check if this is done (e.g., in a previous failed iteration)
39
- AAI=$(miga-aai_from_db "$DATASET" "$i" "$TMPDIR/02.aai.db")
40
- # Try the other direction
41
- [[ "${AAI%.*}" -le 0 ]] \
42
- && AAI=$(miga-aai_from_db "$i" "$DATASET" "02.aai/$i.db")
43
- # Try with hAAI
44
- if [[ "${AAI%.*}" -le 0 ]] ; then
45
- [[ -e "$TMPDIR/$DATASET.ess.faa" ]] \
46
- || cp "$ESS/$DATASET.ess.faa" "$TMPDIR/$DATASET.ess.faa"
47
- AAI=$(miga-haai "$TMPDIR/$DATASET.ess.faa" "$ESS/$i.ess.faa" \
48
- "$CORES" "$TMPDIR/01.haai.db" "$TMPDIR/02.aai.db")
49
- fi
50
- # Try with complete AAI
51
- if [[ "${AAI%.*}" -le 0 ]] ; then
52
- [[ -e "$TMPDIR/$DATASET.faa" ]] \
53
- || cp "../06.cds/$DATASET.faa" "$TMPDIR/$DATASET.faa"
54
- AAI=$(miga-aai "$TMPDIR/$DATASET.faa" "../06.cds/$i.faa" \
55
- "$CORES" "$TMPDIR/02.aai.db")
56
- fi
57
- # Check if ANI is meaningful
58
- if [[ -e "../05.assembly/$DATASET.LargeContigs.fna" \
59
- && -e "../05.assembly/$i.LargeContigs.fna" \
60
- && $(perl -e "print 1 if '$AAI' >= 90") == "1" ]] ; then
61
- # Check if this is done (e.g., in a previous failed iteration)
62
- ANI=$(miga-ani_from_db "$DATASET" "$i" "$TMPDIR/03.ani.db")
63
- # Try the other direction
64
- [[ "${ANI%.*}" -le 0 ]] \
65
- && ANI=$(miga-ani_from_db "$i" "$DATASET" "03.ani/$i.db")
66
- # Calculate it
67
- if [[ "${ANI%.*}" -le 0 ]] ; then
68
- [[ -e "$TMPDIR/$DATASET.LargeContigs.fna" ]] \
69
- || cp "../05.assembly/$DATASET.LargeContigs.fna" \
70
- "$TMPDIR/$DATASET.LargeContigs.fna"
71
- ANI=$(miga-ani "$TMPDIR/$DATASET.LargeContigs.fna" \
72
- "../05.assembly/$i.LargeContigs.fna" "$CORES" "$TMPDIR/03.ani.db")
73
- fi
74
- fi
75
- miga-checkpoint_n
76
- done
77
- N=10
78
- miga-checkpoint_n
79
-