miga-base 0.7.21.0 → 0.7.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/README.md +1 -1
- data/Rakefile +1 -0
- data/lib/miga/cli/action/add.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +12 -11
- data/lib/miga/cli/action/derep_wf.rb +3 -9
- data/lib/miga/cli/action/edit.rb +0 -1
- data/lib/miga/cli/action/find.rb +1 -1
- data/lib/miga/cli/action/generic.rb +1 -1
- data/lib/miga/cli/action/get.rb +7 -2
- data/lib/miga/cli/action/get_db.rb +16 -21
- data/lib/miga/cli/action/init.rb +41 -93
- data/lib/miga/cli/action/init/daemon_helper.rb +1 -2
- data/lib/miga/cli/action/init/files_helper.rb +118 -0
- data/lib/miga/cli/action/ncbi_get.rb +1 -1
- data/lib/miga/cli/action/new.rb +15 -9
- data/lib/miga/cli/action/option.rb +44 -0
- data/lib/miga/cli/action/quality_wf.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +1 -1
- data/lib/miga/cli/action/tax_test.rb +1 -1
- data/lib/miga/cli/action/wf.rb +32 -30
- data/lib/miga/cli/base.rb +1 -0
- data/lib/miga/cli/objects_helper.rb +23 -18
- data/lib/miga/common.rb +4 -2
- data/lib/miga/common/net.rb +74 -0
- data/lib/miga/common/with_option.rb +83 -0
- data/lib/miga/common/with_result.rb +3 -2
- data/lib/miga/dataset/base.rb +20 -2
- data/lib/miga/dataset/result.rb +3 -2
- data/lib/miga/metadata.rb +25 -13
- data/lib/miga/project/base.rb +82 -2
- data/lib/miga/project/result.rb +4 -4
- data/lib/miga/remote_dataset.rb +2 -0
- data/lib/miga/result/stats.rb +2 -2
- data/lib/miga/version.rb +4 -2
- data/scripts/aai_distances.bash +1 -1
- data/scripts/ani_distances.bash +1 -1
- data/scripts/essential_genes.bash +1 -2
- data/scripts/haai_distances.bash +1 -1
- data/scripts/mytaxa.bash +6 -5
- data/scripts/mytaxa_scan.bash +8 -7
- data/scripts/ogs.bash +2 -3
- data/scripts/ssu.bash +16 -2
- data/test/dataset_test.rb +5 -5
- data/test/net_test.rb +34 -0
- data/test/with_option_test.rb +115 -0
- data/utils/cleanup-databases.rb +2 -3
- data/utils/distance/commands.rb +2 -2
- data/utils/distance/database.rb +1 -1
- data/utils/distance/pipeline.rb +2 -4
- data/utils/distance/runner.rb +15 -23
- data/utils/index_metadata.rb +1 -2
- data/utils/requirements.txt +6 -5
- data/utils/subclade/runner.rb +10 -11
- metadata +9 -3
data/scripts/aai_distances.bash
CHANGED
@@ -19,7 +19,7 @@ rm -f miga-project.txt
|
|
19
19
|
for i in $DS ; do
|
20
20
|
echo "SELECT CASE WHEN omega!=0 THEN 'AAI' ELSE 'hAAI_AAI' END," \
|
21
21
|
" seq1, seq2, aai, sd, n, omega from aai;" \
|
22
|
-
| sqlite3 "$i.db" | tr "\\|" "\\t"
|
22
|
+
| sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
|
23
23
|
echo "$i" >> miga-project.log
|
24
24
|
done
|
25
25
|
) | gzip -9c > miga-project.txt.gz
|
data/scripts/ani_distances.bash
CHANGED
@@ -18,7 +18,7 @@ rm -f miga-project.txt
|
|
18
18
|
echo "metric a b value sd n omega" | tr " " "\\t"
|
19
19
|
for i in $DS ; do
|
20
20
|
echo "SELECT 'ANI', seq1, seq2, ani, sd, n, omega from ani ;" \
|
21
|
-
| sqlite3 "$i.db" | tr "\\|" "\\t"
|
21
|
+
| sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
|
22
22
|
echo "$i" >> miga-project.log
|
23
23
|
done
|
24
24
|
) | gzip -9c > miga-project.txt.gz
|
@@ -24,8 +24,7 @@ fi
|
|
24
24
|
mkdir "${DATASET}.ess"
|
25
25
|
TYPE=$(miga ls -P "$PROJECT" -D "$DATASET" \
|
26
26
|
--metadata "type" | awk '{print $2}')
|
27
|
-
COLL=$(miga
|
28
|
-
[[ "$COLL" == "?" ]] && COLL=dupont_2012
|
27
|
+
COLL=$(miga option -P "$PROJECT" --key ess_coll)
|
29
28
|
if [[ "$TYPE" == "metagenome" || "$TYPE" == "virome" ]] ; then
|
30
29
|
FLAGS="--metagenome"
|
31
30
|
else
|
data/scripts/haai_distances.bash
CHANGED
@@ -22,7 +22,7 @@ rm -f miga-project.txt
|
|
22
22
|
echo "metric a b value sd n omega" | tr " " "\\t"
|
23
23
|
for i in $DS ; do
|
24
24
|
echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
|
25
|
-
| sqlite3 "$i.db" | tr "\\|" "\\t"
|
25
|
+
| sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
|
26
26
|
echo "$i" >> miga-project.log
|
27
27
|
done
|
28
28
|
) | gzip -9c > miga-project.txt.gz
|
data/scripts/mytaxa.bash
CHANGED
@@ -14,15 +14,16 @@ if [[ "$MIGA_MYTAXA" == "no" ]] ; then
|
|
14
14
|
echo "This system doesn't currently support MyTaxa." \
|
15
15
|
> "$DATASET.nomytaxa.txt"
|
16
16
|
else
|
17
|
-
MT=$(dirname -- "$(which MyTaxa)")
|
18
|
-
|
19
17
|
# Check type of dataset
|
20
18
|
MULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --multi \
|
21
19
|
| wc -l | awk '{print $1}')
|
22
20
|
if [[ "$MULTI" -eq "1" ]] ; then
|
23
21
|
# Check requirements
|
24
|
-
|
25
|
-
|
22
|
+
MT=$(dirname -- "$(which MyTaxa)")
|
23
|
+
DB="$MIGA_HOME/.miga_db/AllGenomes.faa.dmnd"
|
24
|
+
[[ -e "$DB" ]] || DB="$MT/AllGenomes.faa.dmnd"
|
25
|
+
if [[ ! -e "$DB" ]] ; then
|
26
|
+
echo "Cannot locate the database: AllGenomes.faa.dmnd:" \
|
26
27
|
"no such file or directory" >&2
|
27
28
|
exit 1
|
28
29
|
fi
|
@@ -40,7 +41,7 @@ else
|
|
40
41
|
# Execute search
|
41
42
|
FAA="../../../06.cds/$DATASET.faa"
|
42
43
|
[[ -s "$FAA" ]] || FAA="${FAA}.gz"
|
43
|
-
diamond blastp -q "$FAA" -d "$
|
44
|
+
diamond blastp -q "$FAA" -d "$DB" \
|
44
45
|
-a "$DATASET.daa" -k 5 -p "$CORES" --min-score 60
|
45
46
|
diamond view -a "$DATASET.daa" -o "$DATASET.blast"
|
46
47
|
|
data/scripts/mytaxa_scan.bash
CHANGED
@@ -5,7 +5,6 @@ SCRIPT="mytaxa_scan"
|
|
5
5
|
# shellcheck source=scripts/miga.bash
|
6
6
|
. "$MIGA/scripts/miga.bash" || exit 1
|
7
7
|
DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
|
8
|
-
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
9
8
|
cd "$DIR"
|
10
9
|
|
11
10
|
# Initialize
|
@@ -14,17 +13,16 @@ if [[ "$MIGA_MYTAXA" == "no" ]] ; then
|
|
14
13
|
echo "This system doesn't currently support MyTaxa." \
|
15
14
|
> "$DATASET.nomytaxa.txt"
|
16
15
|
else
|
17
|
-
MT=$(dirname -- "$(which MyTaxa)")
|
18
|
-
TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
|
19
|
-
trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
|
20
|
-
|
21
16
|
# Check type of dataset
|
22
17
|
NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
|
23
18
|
| wc -l | awk '{print $1}')
|
24
19
|
if [[ "$NOMULTI" -eq "1" ]] ; then
|
25
20
|
# Check requirements
|
21
|
+
MT=$(dirname -- "$(which MyTaxa)")
|
22
|
+
DB="$MIGA_HOME/.miga_db/AllGenomes.faa.dmnd"
|
23
|
+
[[ -e "$DB" ]] || DB="$MT/AllGenomes.faa.dmnd"
|
26
24
|
if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
|
27
|
-
echo "Cannot locate the database:
|
25
|
+
echo "Cannot locate the database: AllGenomes.faa.dmnd:" \
|
28
26
|
"no such file or directory" >&2
|
29
27
|
exit 1
|
30
28
|
fi
|
@@ -39,13 +37,16 @@ else
|
|
39
37
|
exit 1
|
40
38
|
fi
|
41
39
|
|
40
|
+
TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
|
41
|
+
trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
|
42
|
+
|
42
43
|
FAA="../../../06.cds/$DATASET.faa"
|
43
44
|
[[ -s "$FAA" ]] || FAA="${FAA}.gz"
|
44
45
|
if [[ ! -s "$DATASET.mytaxa" ]] ; then
|
45
46
|
# Execute search
|
46
47
|
if [[ ! -s "$DATASET.blast" ]] ; then
|
47
48
|
diamond blastp -q "$FAA" -a "$DATASET.daa" -t "$TMPDIR" \
|
48
|
-
-d "$
|
49
|
+
-d "$DB" -k 5 -p "$CORES" --min-score 60
|
49
50
|
diamond view -a "$DATASET.daa" -o "$DATASET.blast" -t "$TMPDIR"
|
50
51
|
fi
|
51
52
|
|
data/scripts/ogs.bash
CHANGED
@@ -12,8 +12,7 @@ miga_start_project_step "$DIR"
|
|
12
12
|
DS=$(miga ls -P "$PROJECT" --ref --no-multi)
|
13
13
|
|
14
14
|
if [[ -n $DS ]] ; then
|
15
|
-
MIN_ID=$(miga
|
16
|
-
[[ $MIN_ID == "?" ]] && MIN_ID=80
|
15
|
+
MIN_ID=$(miga option -P "$PROJECT" --key ogs_identity)
|
17
16
|
if [[ ! -s miga-project.ogs ]] ; then
|
18
17
|
# Extract RBMs
|
19
18
|
if [[ ! -s miga-project.abc ]] ; then
|
@@ -34,7 +33,7 @@ if [[ -n $DS ]] ; then
|
|
34
33
|
|
35
34
|
# Estimate OGs and Clean RBMs
|
36
35
|
ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
|
37
|
-
if [[ $(miga
|
36
|
+
if [[ $(miga option -P "$PROJECT" --key clean_ogs) == "false" ]] ; then
|
38
37
|
gzip -9 miga-project.abc
|
39
38
|
else
|
40
39
|
rm miga-project.abc
|
data/scripts/ssu.bash
CHANGED
@@ -16,6 +16,7 @@ if [[ -s $fa ]] ; then
|
|
16
16
|
# Run barrnap
|
17
17
|
barrnap --quiet --threads "$CORES" "$fa" | grep "^##gff\\|;product=16S " \
|
18
18
|
> "$DATASET.ssu.gff"
|
19
|
+
|
19
20
|
# Extract
|
20
21
|
bedtools getfasta -s "-fi" "$fa" -bed "$DATASET.ssu.gff" \
|
21
22
|
-fo "$DATASET.ssu.all.fa"
|
@@ -24,9 +25,22 @@ if [[ -s $fa ]] ; then
|
|
24
25
|
FastA.filter.pl "$DATASET.ssu.fa.id" "$DATASET.ssu.all.fa" > "$DATASET.ssu.fa"
|
25
26
|
rm "$DATASET.ssu.fa.id"
|
26
27
|
[[ -e "$fa.fai" ]] && rm "$fa.fai"
|
28
|
+
|
29
|
+
# RDP classifier
|
30
|
+
if [[ "$MIGA_RDP" == "yes" && -s "$DATASET.ssu.all.fa" ]] ; then
|
31
|
+
java -jar "$MIGA_HOME/.miga_db/classifier.jar" classify \
|
32
|
+
-c 0.8 -f fixrank -g 16srrna -o "$DATASET.rdp.tsv" \
|
33
|
+
"$DATASET.ssu.all.fa"
|
34
|
+
echo "# Version: $(perl -pe 's/.*://' \
|
35
|
+
< "$MIGA_HOME/.miga_db/classifier.version.txt" \
|
36
|
+
| grep . | paste - - | perl -pe 's/\t/; /')" \
|
37
|
+
>> "$DATASET.rdp.tsv"
|
38
|
+
fi
|
39
|
+
|
27
40
|
# Gzip
|
28
|
-
|
29
|
-
|
41
|
+
for x in ssu.gff ssu.all.fa rdp.tsv ; do
|
42
|
+
[[ -e "${DATASET}.${x}" ]] && gzip -9 -f "${DATASET}.${x}"
|
43
|
+
done
|
30
44
|
fi
|
31
45
|
|
32
46
|
# Finalize
|
data/test/dataset_test.rb
CHANGED
@@ -29,7 +29,7 @@ class DatasetTest < Test::Unit::TestCase
|
|
29
29
|
assert_raise { MiGA::Dataset.new(project, 'dataset-1') }
|
30
30
|
assert_equal(project, dataset.project)
|
31
31
|
assert_equal('dataset0', dataset.name)
|
32
|
-
assert_predicate(dataset, :
|
32
|
+
assert_predicate(dataset, :ref?)
|
33
33
|
assert_equal(MiGA::Metadata, dataset.metadata.class)
|
34
34
|
assert_equal(:incomplete, dataset.status)
|
35
35
|
end
|
@@ -38,14 +38,14 @@ class DatasetTest < Test::Unit::TestCase
|
|
38
38
|
d2 = project.add_dataset('ds_save')
|
39
39
|
assert_respond_to(d2, :save)
|
40
40
|
d2.save
|
41
|
-
assert_not_predicate(d2, :
|
42
|
-
assert_not_predicate(d2, :
|
41
|
+
assert_not_predicate(d2, :multi?)
|
42
|
+
assert_not_predicate(d2, :nonmulti?)
|
43
43
|
assert_nil(d2.metadata[:type])
|
44
44
|
d2.metadata[:type] = :metagenome
|
45
45
|
d2.save
|
46
46
|
assert_equal(:metagenome, d2.metadata[:type])
|
47
|
-
assert_predicate(d2, :
|
48
|
-
assert_not_predicate(d2, :
|
47
|
+
assert_predicate(d2, :multi?)
|
48
|
+
assert_not_predicate(d2, :nonmulti?)
|
49
49
|
end
|
50
50
|
|
51
51
|
def test_remove
|
data/test/net_test.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class FormatTest < Test::Unit::TestCase
|
4
|
+
include TestHelper
|
5
|
+
|
6
|
+
def test_known_hosts
|
7
|
+
m = MiGA::MiGA
|
8
|
+
assert_not_nil(m.known_hosts(:miga_db))
|
9
|
+
assert_not_nil(m.known_hosts('miga_db'))
|
10
|
+
assert_not_nil(m.known_hosts(:miga_dist))
|
11
|
+
assert_raise { m.known_kosts(:not_a_host) }
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_remote_connection
|
15
|
+
declare_remote_access
|
16
|
+
m = MiGA::MiGA
|
17
|
+
assert_raise { m.remote_connection(:bad_descriptor) }
|
18
|
+
assert_raise { m.remote_connection('http://microbial-genomes.org/') }
|
19
|
+
c = m.remote_connection(:miga_db)
|
20
|
+
assert_equal(Net::FTP, c.class)
|
21
|
+
c.close
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_download_file_ftp
|
25
|
+
declare_remote_access
|
26
|
+
m = MiGA::MiGA
|
27
|
+
f = tmpfile('t/test.txt')
|
28
|
+
d = File.dirname(f)
|
29
|
+
assert(!Dir.exist?(d))
|
30
|
+
m.download_file_ftp(:miga_online_ftp, 'test.txt', f)
|
31
|
+
assert(Dir.exist?(d))
|
32
|
+
assert_equal('miga', File.read(f).chomp)
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
require 'miga/common/with_option'
|
5
|
+
|
6
|
+
class WithDaemonTest < Test::Unit::TestCase
|
7
|
+
include TestHelper
|
8
|
+
|
9
|
+
class TestWithOption < MiGA::MiGA
|
10
|
+
include MiGA::Common::WithOption
|
11
|
+
|
12
|
+
attr_reader :metadata, :saved
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@metadata = { range: 0.9 }
|
16
|
+
@saved = false
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.OPTIONS
|
20
|
+
{
|
21
|
+
empty: {},
|
22
|
+
float: { type: Float },
|
23
|
+
range: { default: 1.0, in: -5.5..5.5, type: Float },
|
24
|
+
default: { default: 9, type: Integer },
|
25
|
+
token: { type: Integer, tokens: %w[yes no 0] },
|
26
|
+
proc: { default: proc { Date.today } },
|
27
|
+
bool: { in: [true, false] }
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def save
|
32
|
+
@saved = true
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_with_option
|
37
|
+
o = TestWithOption.new
|
38
|
+
assert_respond_to(o, :option)
|
39
|
+
assert_equal(1, o.metadata.size)
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_option
|
43
|
+
o = TestWithOption.new
|
44
|
+
assert_equal(9, o.option(:default))
|
45
|
+
assert_nil(o.option(:bool))
|
46
|
+
assert_raise { o.option(:not_an_option) }
|
47
|
+
assert_nil(o.option(:empty))
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_set_bool
|
51
|
+
o = TestWithOption.new
|
52
|
+
assert_nil(o.option(:bool))
|
53
|
+
assert(!o.saved)
|
54
|
+
assert_raise { o.set_option(:bool, 'true') }
|
55
|
+
assert_nil(o.option(:bool))
|
56
|
+
assert(!o.saved)
|
57
|
+
assert_equal(true, o.set_option(:bool, 'true', true))
|
58
|
+
assert(o.saved)
|
59
|
+
assert_equal(false, o.set_option(:bool, false))
|
60
|
+
assert_equal(false, o.set_option(:bool, 'false', true))
|
61
|
+
assert_nil(o.set_option(:bool, nil))
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_set_empty
|
65
|
+
o = TestWithOption.new
|
66
|
+
assert_nil(o.option(:empty))
|
67
|
+
assert_equal('a', o.set_option(:empty, 'a'))
|
68
|
+
assert_equal('1', o.set_option(:empty, '1', true))
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_all_options
|
72
|
+
o = TestWithOption.new
|
73
|
+
assert(o.all_options.is_a?(Hash))
|
74
|
+
assert_include(o.all_options.keys, :bool)
|
75
|
+
assert_nil(o.all_options[:bool])
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_option?
|
79
|
+
o = TestWithOption.new
|
80
|
+
assert(o.option?(:range))
|
81
|
+
assert(!o.option?(:not_an_option))
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_option_metadata
|
85
|
+
o = TestWithOption.new
|
86
|
+
assert_equal(0.9, o.option(:range))
|
87
|
+
assert_equal(1.0, o.set_option(:range, nil))
|
88
|
+
assert_equal(2.0, o.set_option(:range, 2.0))
|
89
|
+
assert_equal(3.0, o.set_option(:range, '3', true))
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_option_range
|
93
|
+
o = TestWithOption.new
|
94
|
+
assert_raise { o.set_option(:range, 9.0) }
|
95
|
+
assert_raise { o.set_option(:range, 3) }
|
96
|
+
assert_raise { o.set_option(:range, true) }
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_option_proc
|
100
|
+
o = TestWithOption.new
|
101
|
+
assert(o.option(:proc).is_a?(Date))
|
102
|
+
assert(o.set_option(:proc, 1).is_a?(Integer))
|
103
|
+
assert(o.set_option(:proc, nil).is_a?(Date))
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_token
|
107
|
+
o = TestWithOption.new
|
108
|
+
assert_nil(o.option(:token))
|
109
|
+
assert_equal(1, o.set_option(:token, 1))
|
110
|
+
assert_equal(-2, o.set_option(:token, '-2', true))
|
111
|
+
assert_equal('yes', o.set_option(:token, 'yes'))
|
112
|
+
assert_equal('0', o.set_option(:token, '0', true))
|
113
|
+
assert_raise { o.set_option(:token, 'maybe') }
|
114
|
+
end
|
115
|
+
end
|
data/utils/cleanup-databases.rb
CHANGED
@@ -15,11 +15,11 @@ m.say 'Cleaning Databases'
|
|
15
15
|
(0..thr - 1).each do |t|
|
16
16
|
fork do
|
17
17
|
dsn.each_with_index do |i, idx|
|
18
|
-
m.advance('Dataset:',
|
18
|
+
m.advance('Dataset:', idx + 1, dsn.size) if t == 0
|
19
19
|
next unless (idx % thr) == t
|
20
20
|
|
21
21
|
d = p.dataset(i)
|
22
|
-
next unless d.
|
22
|
+
next unless d.ref? && d.active?
|
23
23
|
|
24
24
|
d.cleanup_distances!
|
25
25
|
end
|
@@ -28,4 +28,3 @@ end
|
|
28
28
|
Process.waitall
|
29
29
|
m.advance('Dataset:', dsn.size, dsn.size)
|
30
30
|
m.say
|
31
|
-
|
data/utils/distance/commands.rb
CHANGED
@@ -9,7 +9,7 @@ module MiGA::DistanceRunner::Commands
|
|
9
9
|
return y unless y.nil? || y.zero?
|
10
10
|
|
11
11
|
# Try hAAI (except in clade projects)
|
12
|
-
unless @ref_project.
|
12
|
+
unless @ref_project.clade?
|
13
13
|
y = haai(target)
|
14
14
|
return y unless y.nil? || y.zero?
|
15
15
|
end
|
@@ -75,7 +75,7 @@ module MiGA::DistanceRunner::Commands
|
|
75
75
|
v = `aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
|
76
76
|
--name1 "#{n1}" --name2 "#{n2}" \
|
77
77
|
-t "#{o[:thr]}" -a --lookup-first "--#{o[:aai_save_rbm]}" \
|
78
|
-
-p "#{o[:aai_p]
|
78
|
+
-p "#{o[:aai_p]}"`.chomp
|
79
79
|
(v.nil? || v.empty?) ? 0 : v.to_f
|
80
80
|
end
|
81
81
|
|
data/utils/distance/database.rb
CHANGED
@@ -66,7 +66,7 @@ module MiGA::DistanceRunner::Database
|
|
66
66
|
return y unless y.nil? || y.zero?
|
67
67
|
|
68
68
|
# Check if self.dataset <- target is done (another thread)
|
69
|
-
if dataset.
|
69
|
+
if dataset.ref? && project.path == ref_project.path
|
70
70
|
y = data_from_db(
|
71
71
|
target.name, dataset.name, ref_db(metric, target.name), metric
|
72
72
|
)
|
data/utils/distance/pipeline.rb
CHANGED
@@ -31,10 +31,8 @@ module MiGA::DistanceRunner::Pipeline
|
|
31
31
|
|
32
32
|
# Run distances against datasets listed in metadata's +:dist_req+
|
33
33
|
def distances_by_request(metric)
|
34
|
-
return unless dataset.metadata[:dist_req]
|
35
|
-
|
36
34
|
$stderr.puts 'Running distances by request'
|
37
|
-
dataset.
|
35
|
+
dataset.option(:dist_req).each do |target|
|
38
36
|
ds = ref_project.dataset(target) and send(metric, ds)
|
39
37
|
end
|
40
38
|
end
|
@@ -112,7 +110,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
112
110
|
$stderr.puts 'Transferring taxonomy'
|
113
111
|
return if tax.nil?
|
114
112
|
|
115
|
-
pval =
|
113
|
+
pval = project.option(:tax_pvalue)
|
116
114
|
tax_a = tax
|
117
115
|
.select { |i| i[1] != '?' && i[2] <= pval }
|
118
116
|
.map { |i| i[0, 2].join(':') }
|
data/utils/distance/runner.rb
CHANGED
@@ -21,47 +21,39 @@ class MiGA::DistanceRunner
|
|
21
21
|
@home = File.expand_path('data/09.distances', project.path)
|
22
22
|
|
23
23
|
# Default opts
|
24
|
-
|
25
|
-
|
26
|
-
end
|
27
|
-
@opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
|
28
|
-
project.is_clade? ? 'save-rbm' : 'no-save-rbm'
|
29
|
-
end
|
24
|
+
@opts[:aai_save_rbm] =
|
25
|
+
project.option(:aai_save_rbm) ? 'save-rbm' : 'no-save-rbm'
|
30
26
|
@opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
|
31
|
-
if opts[:run_taxonomy]
|
32
|
-
ref_path = project.
|
27
|
+
if opts[:run_taxonomy] && project.option(:ref_project)
|
28
|
+
ref_path = project.option(:ref_project)
|
33
29
|
@home = File.expand_path('05.taxonomy', @home)
|
34
30
|
@ref_project = MiGA::Project.load(ref_path)
|
35
31
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
36
|
-
elsif !opts[:run_taxonomy]
|
37
|
-
ref_path = dataset.
|
38
|
-
if project.
|
39
|
-
ref_path = File.expand_path(ref_path, project.
|
32
|
+
elsif !opts[:run_taxonomy] && dataset.option(:db_project)
|
33
|
+
ref_path = dataset.option(:db_project)
|
34
|
+
if project.option(:db_proj_dir)
|
35
|
+
ref_path = File.expand_path(ref_path, project.option(:db_proj_dir))
|
40
36
|
end
|
41
37
|
@ref_project = MiGA::Project.load(ref_path)
|
42
38
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
43
39
|
else
|
44
40
|
@ref_project = project
|
45
41
|
end
|
46
|
-
[
|
47
|
-
@opts[m] ||= ref_project.
|
42
|
+
%i[haai_p aai_p ani_p distances_checkpoint].each do |m|
|
43
|
+
@opts[m] ||= ref_project.option(m)
|
48
44
|
end
|
49
|
-
@opts[:aai_p] ||= 'blast+'
|
50
|
-
@opts[:ani_p] ||= 'blast+'
|
51
|
-
@opts[:distances_checkpoint] ||= 10
|
52
|
-
@opts[:distances_checkpoint] = @opts[:distances_checkpoint].to_i
|
53
45
|
$stderr.puts "Options: #{opts}"
|
54
46
|
end
|
55
47
|
|
56
48
|
# Launch the appropriate analysis
|
57
49
|
def go!
|
58
50
|
$stderr.puts "Launching analysis"
|
59
|
-
return if dataset.
|
51
|
+
return if dataset.multi?
|
60
52
|
|
61
53
|
Dir.mktmpdir do |tmp_dir|
|
62
54
|
@tmp = tmp_dir
|
63
55
|
create_temporals
|
64
|
-
opts[:run_taxonomy] ? go_taxonomy! : dataset.
|
56
|
+
opts[:run_taxonomy] ? go_taxonomy! : dataset.ref? ? go_ref! : go_query!
|
65
57
|
end
|
66
58
|
end
|
67
59
|
|
@@ -73,7 +65,7 @@ class MiGA::DistanceRunner
|
|
73
65
|
|
74
66
|
# first-come-first-serve traverse
|
75
67
|
ref_project.each_dataset do |ds|
|
76
|
-
next if !ds.
|
68
|
+
next if !ds.ref? or ds.multi? or ds.result(:essential_genes).nil?
|
77
69
|
|
78
70
|
puts "[ #{Time.now} ] #{ds.name}"
|
79
71
|
ani_after_aai(ds)
|
@@ -88,7 +80,7 @@ class MiGA::DistanceRunner
|
|
88
80
|
def go_query!
|
89
81
|
$stderr.puts 'Launching analysis for query dataset'
|
90
82
|
# Check if project is ready
|
91
|
-
tsk = ref_project.
|
83
|
+
tsk = ref_project.clade? ? [:subclades, :ani] : [:clade_finding, :aai]
|
92
84
|
res = ref_project.result(tsk[0])
|
93
85
|
return if res.nil?
|
94
86
|
|
@@ -137,7 +129,7 @@ class MiGA::DistanceRunner
|
|
137
129
|
# Launch analysis for taxonomy jobs
|
138
130
|
def go_taxonomy!
|
139
131
|
$stderr.puts 'Launching taxonomy analysis'
|
140
|
-
return unless project.
|
132
|
+
return unless project.option(:ref_project)
|
141
133
|
|
142
134
|
go_query! # <- yeah, it's actually the same, just different ref_project
|
143
135
|
end
|