miga-base 0.7.21.0 → 0.7.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/README.md +1 -1
- data/Rakefile +1 -0
- data/lib/miga/cli/action/add.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +12 -11
- data/lib/miga/cli/action/derep_wf.rb +3 -9
- data/lib/miga/cli/action/edit.rb +0 -1
- data/lib/miga/cli/action/find.rb +1 -1
- data/lib/miga/cli/action/generic.rb +1 -1
- data/lib/miga/cli/action/get.rb +7 -2
- data/lib/miga/cli/action/get_db.rb +16 -21
- data/lib/miga/cli/action/init.rb +41 -93
- data/lib/miga/cli/action/init/daemon_helper.rb +1 -2
- data/lib/miga/cli/action/init/files_helper.rb +118 -0
- data/lib/miga/cli/action/ncbi_get.rb +1 -1
- data/lib/miga/cli/action/new.rb +15 -9
- data/lib/miga/cli/action/option.rb +44 -0
- data/lib/miga/cli/action/quality_wf.rb +3 -3
- data/lib/miga/cli/action/tax_dist.rb +1 -1
- data/lib/miga/cli/action/tax_test.rb +1 -1
- data/lib/miga/cli/action/wf.rb +32 -30
- data/lib/miga/cli/base.rb +1 -0
- data/lib/miga/cli/objects_helper.rb +23 -18
- data/lib/miga/common.rb +4 -2
- data/lib/miga/common/net.rb +74 -0
- data/lib/miga/common/with_option.rb +83 -0
- data/lib/miga/common/with_result.rb +3 -2
- data/lib/miga/dataset/base.rb +20 -2
- data/lib/miga/dataset/result.rb +3 -2
- data/lib/miga/metadata.rb +25 -13
- data/lib/miga/project/base.rb +82 -2
- data/lib/miga/project/result.rb +4 -4
- data/lib/miga/remote_dataset.rb +2 -0
- data/lib/miga/result/stats.rb +2 -2
- data/lib/miga/version.rb +4 -2
- data/scripts/aai_distances.bash +1 -1
- data/scripts/ani_distances.bash +1 -1
- data/scripts/essential_genes.bash +1 -2
- data/scripts/haai_distances.bash +1 -1
- data/scripts/mytaxa.bash +6 -5
- data/scripts/mytaxa_scan.bash +8 -7
- data/scripts/ogs.bash +2 -3
- data/scripts/ssu.bash +16 -2
- data/test/dataset_test.rb +5 -5
- data/test/net_test.rb +34 -0
- data/test/with_option_test.rb +115 -0
- data/utils/cleanup-databases.rb +2 -3
- data/utils/distance/commands.rb +2 -2
- data/utils/distance/database.rb +1 -1
- data/utils/distance/pipeline.rb +2 -4
- data/utils/distance/runner.rb +15 -23
- data/utils/index_metadata.rb +1 -2
- data/utils/requirements.txt +6 -5
- data/utils/subclade/runner.rb +10 -11
- metadata +9 -3
data/scripts/aai_distances.bash
CHANGED
@@ -19,7 +19,7 @@ rm -f miga-project.txt
|
|
19
19
|
for i in $DS ; do
|
20
20
|
echo "SELECT CASE WHEN omega!=0 THEN 'AAI' ELSE 'hAAI_AAI' END," \
|
21
21
|
" seq1, seq2, aai, sd, n, omega from aai;" \
|
22
|
-
| sqlite3 "$i.db" | tr "\\|" "\\t"
|
22
|
+
| sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
|
23
23
|
echo "$i" >> miga-project.log
|
24
24
|
done
|
25
25
|
) | gzip -9c > miga-project.txt.gz
|
data/scripts/ani_distances.bash
CHANGED
@@ -18,7 +18,7 @@ rm -f miga-project.txt
|
|
18
18
|
echo "metric a b value sd n omega" | tr " " "\\t"
|
19
19
|
for i in $DS ; do
|
20
20
|
echo "SELECT 'ANI', seq1, seq2, ani, sd, n, omega from ani ;" \
|
21
|
-
| sqlite3 "$i.db" | tr "\\|" "\\t"
|
21
|
+
| sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
|
22
22
|
echo "$i" >> miga-project.log
|
23
23
|
done
|
24
24
|
) | gzip -9c > miga-project.txt.gz
|
@@ -24,8 +24,7 @@ fi
|
|
24
24
|
mkdir "${DATASET}.ess"
|
25
25
|
TYPE=$(miga ls -P "$PROJECT" -D "$DATASET" \
|
26
26
|
--metadata "type" | awk '{print $2}')
|
27
|
-
COLL=$(miga
|
28
|
-
[[ "$COLL" == "?" ]] && COLL=dupont_2012
|
27
|
+
COLL=$(miga option -P "$PROJECT" --key ess_coll)
|
29
28
|
if [[ "$TYPE" == "metagenome" || "$TYPE" == "virome" ]] ; then
|
30
29
|
FLAGS="--metagenome"
|
31
30
|
else
|
data/scripts/haai_distances.bash
CHANGED
@@ -22,7 +22,7 @@ rm -f miga-project.txt
|
|
22
22
|
echo "metric a b value sd n omega" | tr " " "\\t"
|
23
23
|
for i in $DS ; do
|
24
24
|
echo "SELECT 'hAAI', seq1, seq2, aai, sd, n, omega from aai ;" \
|
25
|
-
| sqlite3 "$i.db" | tr "\\|" "\\t"
|
25
|
+
| sqlite3 "$DIR/$i.db" | tr "\\|" "\\t"
|
26
26
|
echo "$i" >> miga-project.log
|
27
27
|
done
|
28
28
|
) | gzip -9c > miga-project.txt.gz
|
data/scripts/mytaxa.bash
CHANGED
@@ -14,15 +14,16 @@ if [[ "$MIGA_MYTAXA" == "no" ]] ; then
|
|
14
14
|
echo "This system doesn't currently support MyTaxa." \
|
15
15
|
> "$DATASET.nomytaxa.txt"
|
16
16
|
else
|
17
|
-
MT=$(dirname -- "$(which MyTaxa)")
|
18
|
-
|
19
17
|
# Check type of dataset
|
20
18
|
MULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --multi \
|
21
19
|
| wc -l | awk '{print $1}')
|
22
20
|
if [[ "$MULTI" -eq "1" ]] ; then
|
23
21
|
# Check requirements
|
24
|
-
|
25
|
-
|
22
|
+
MT=$(dirname -- "$(which MyTaxa)")
|
23
|
+
DB="$MIGA_HOME/.miga_db/AllGenomes.faa.dmnd"
|
24
|
+
[[ -e "$DB" ]] || DB="$MT/AllGenomes.faa.dmnd"
|
25
|
+
if [[ ! -e "$DB" ]] ; then
|
26
|
+
echo "Cannot locate the database: AllGenomes.faa.dmnd:" \
|
26
27
|
"no such file or directory" >&2
|
27
28
|
exit 1
|
28
29
|
fi
|
@@ -40,7 +41,7 @@ else
|
|
40
41
|
# Execute search
|
41
42
|
FAA="../../../06.cds/$DATASET.faa"
|
42
43
|
[[ -s "$FAA" ]] || FAA="${FAA}.gz"
|
43
|
-
diamond blastp -q "$FAA" -d "$
|
44
|
+
diamond blastp -q "$FAA" -d "$DB" \
|
44
45
|
-a "$DATASET.daa" -k 5 -p "$CORES" --min-score 60
|
45
46
|
diamond view -a "$DATASET.daa" -o "$DATASET.blast"
|
46
47
|
|
data/scripts/mytaxa_scan.bash
CHANGED
@@ -5,7 +5,6 @@ SCRIPT="mytaxa_scan"
|
|
5
5
|
# shellcheck source=scripts/miga.bash
|
6
6
|
. "$MIGA/scripts/miga.bash" || exit 1
|
7
7
|
DIR="$PROJECT/data/07.annotation/03.qa/02.mytaxa_scan"
|
8
|
-
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
9
8
|
cd "$DIR"
|
10
9
|
|
11
10
|
# Initialize
|
@@ -14,17 +13,16 @@ if [[ "$MIGA_MYTAXA" == "no" ]] ; then
|
|
14
13
|
echo "This system doesn't currently support MyTaxa." \
|
15
14
|
> "$DATASET.nomytaxa.txt"
|
16
15
|
else
|
17
|
-
MT=$(dirname -- "$(which MyTaxa)")
|
18
|
-
TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
|
19
|
-
trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
|
20
|
-
|
21
16
|
# Check type of dataset
|
22
17
|
NOMULTI=$(miga list_datasets -P "$PROJECT" -D "$DATASET" --no-multi \
|
23
18
|
| wc -l | awk '{print $1}')
|
24
19
|
if [[ "$NOMULTI" -eq "1" ]] ; then
|
25
20
|
# Check requirements
|
21
|
+
MT=$(dirname -- "$(which MyTaxa)")
|
22
|
+
DB="$MIGA_HOME/.miga_db/AllGenomes.faa.dmnd"
|
23
|
+
[[ -e "$DB" ]] || DB="$MT/AllGenomes.faa.dmnd"
|
26
24
|
if [[ ! -e "$MT/AllGenomes.faa.dmnd" ]] ; then
|
27
|
-
echo "Cannot locate the database:
|
25
|
+
echo "Cannot locate the database: AllGenomes.faa.dmnd:" \
|
28
26
|
"no such file or directory" >&2
|
29
27
|
exit 1
|
30
28
|
fi
|
@@ -39,13 +37,16 @@ else
|
|
39
37
|
exit 1
|
40
38
|
fi
|
41
39
|
|
40
|
+
TMPDIR=$(mktemp -d /tmp/MiGA.XXXXXXXXXXXX)
|
41
|
+
trap "rm -rf '$TMPDIR'; exit" SIGHUP SIGINT SIGTERM
|
42
|
+
|
42
43
|
FAA="../../../06.cds/$DATASET.faa"
|
43
44
|
[[ -s "$FAA" ]] || FAA="${FAA}.gz"
|
44
45
|
if [[ ! -s "$DATASET.mytaxa" ]] ; then
|
45
46
|
# Execute search
|
46
47
|
if [[ ! -s "$DATASET.blast" ]] ; then
|
47
48
|
diamond blastp -q "$FAA" -a "$DATASET.daa" -t "$TMPDIR" \
|
48
|
-
-d "$
|
49
|
+
-d "$DB" -k 5 -p "$CORES" --min-score 60
|
49
50
|
diamond view -a "$DATASET.daa" -o "$DATASET.blast" -t "$TMPDIR"
|
50
51
|
fi
|
51
52
|
|
data/scripts/ogs.bash
CHANGED
@@ -12,8 +12,7 @@ miga_start_project_step "$DIR"
|
|
12
12
|
DS=$(miga ls -P "$PROJECT" --ref --no-multi)
|
13
13
|
|
14
14
|
if [[ -n $DS ]] ; then
|
15
|
-
MIN_ID=$(miga
|
16
|
-
[[ $MIN_ID == "?" ]] && MIN_ID=80
|
15
|
+
MIN_ID=$(miga option -P "$PROJECT" --key ogs_identity)
|
17
16
|
if [[ ! -s miga-project.ogs ]] ; then
|
18
17
|
# Extract RBMs
|
19
18
|
if [[ ! -s miga-project.abc ]] ; then
|
@@ -34,7 +33,7 @@ if [[ -n $DS ]] ; then
|
|
34
33
|
|
35
34
|
# Estimate OGs and Clean RBMs
|
36
35
|
ogs.mcl.rb -o miga-project.ogs --abc miga-project.abc -t "$CORES"
|
37
|
-
if [[ $(miga
|
36
|
+
if [[ $(miga option -P "$PROJECT" --key clean_ogs) == "false" ]] ; then
|
38
37
|
gzip -9 miga-project.abc
|
39
38
|
else
|
40
39
|
rm miga-project.abc
|
data/scripts/ssu.bash
CHANGED
@@ -16,6 +16,7 @@ if [[ -s $fa ]] ; then
|
|
16
16
|
# Run barrnap
|
17
17
|
barrnap --quiet --threads "$CORES" "$fa" | grep "^##gff\\|;product=16S " \
|
18
18
|
> "$DATASET.ssu.gff"
|
19
|
+
|
19
20
|
# Extract
|
20
21
|
bedtools getfasta -s "-fi" "$fa" -bed "$DATASET.ssu.gff" \
|
21
22
|
-fo "$DATASET.ssu.all.fa"
|
@@ -24,9 +25,22 @@ if [[ -s $fa ]] ; then
|
|
24
25
|
FastA.filter.pl "$DATASET.ssu.fa.id" "$DATASET.ssu.all.fa" > "$DATASET.ssu.fa"
|
25
26
|
rm "$DATASET.ssu.fa.id"
|
26
27
|
[[ -e "$fa.fai" ]] && rm "$fa.fai"
|
28
|
+
|
29
|
+
# RDP classifier
|
30
|
+
if [[ "$MIGA_RDP" == "yes" && -s "$DATASET.ssu.all.fa" ]] ; then
|
31
|
+
java -jar "$MIGA_HOME/.miga_db/classifier.jar" classify \
|
32
|
+
-c 0.8 -f fixrank -g 16srrna -o "$DATASET.rdp.tsv" \
|
33
|
+
"$DATASET.ssu.all.fa"
|
34
|
+
echo "# Version: $(perl -pe 's/.*://' \
|
35
|
+
< "$MIGA_HOME/.miga_db/classifier.version.txt" \
|
36
|
+
| grep . | paste - - | perl -pe 's/\t/; /')" \
|
37
|
+
>> "$DATASET.rdp.tsv"
|
38
|
+
fi
|
39
|
+
|
27
40
|
# Gzip
|
28
|
-
|
29
|
-
|
41
|
+
for x in ssu.gff ssu.all.fa rdp.tsv ; do
|
42
|
+
[[ -e "${DATASET}.${x}" ]] && gzip -9 -f "${DATASET}.${x}"
|
43
|
+
done
|
30
44
|
fi
|
31
45
|
|
32
46
|
# Finalize
|
data/test/dataset_test.rb
CHANGED
@@ -29,7 +29,7 @@ class DatasetTest < Test::Unit::TestCase
|
|
29
29
|
assert_raise { MiGA::Dataset.new(project, 'dataset-1') }
|
30
30
|
assert_equal(project, dataset.project)
|
31
31
|
assert_equal('dataset0', dataset.name)
|
32
|
-
assert_predicate(dataset, :
|
32
|
+
assert_predicate(dataset, :ref?)
|
33
33
|
assert_equal(MiGA::Metadata, dataset.metadata.class)
|
34
34
|
assert_equal(:incomplete, dataset.status)
|
35
35
|
end
|
@@ -38,14 +38,14 @@ class DatasetTest < Test::Unit::TestCase
|
|
38
38
|
d2 = project.add_dataset('ds_save')
|
39
39
|
assert_respond_to(d2, :save)
|
40
40
|
d2.save
|
41
|
-
assert_not_predicate(d2, :
|
42
|
-
assert_not_predicate(d2, :
|
41
|
+
assert_not_predicate(d2, :multi?)
|
42
|
+
assert_not_predicate(d2, :nonmulti?)
|
43
43
|
assert_nil(d2.metadata[:type])
|
44
44
|
d2.metadata[:type] = :metagenome
|
45
45
|
d2.save
|
46
46
|
assert_equal(:metagenome, d2.metadata[:type])
|
47
|
-
assert_predicate(d2, :
|
48
|
-
assert_not_predicate(d2, :
|
47
|
+
assert_predicate(d2, :multi?)
|
48
|
+
assert_not_predicate(d2, :nonmulti?)
|
49
49
|
end
|
50
50
|
|
51
51
|
def test_remove
|
data/test/net_test.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class FormatTest < Test::Unit::TestCase
|
4
|
+
include TestHelper
|
5
|
+
|
6
|
+
def test_known_hosts
|
7
|
+
m = MiGA::MiGA
|
8
|
+
assert_not_nil(m.known_hosts(:miga_db))
|
9
|
+
assert_not_nil(m.known_hosts('miga_db'))
|
10
|
+
assert_not_nil(m.known_hosts(:miga_dist))
|
11
|
+
assert_raise { m.known_kosts(:not_a_host) }
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_remote_connection
|
15
|
+
declare_remote_access
|
16
|
+
m = MiGA::MiGA
|
17
|
+
assert_raise { m.remote_connection(:bad_descriptor) }
|
18
|
+
assert_raise { m.remote_connection('http://microbial-genomes.org/') }
|
19
|
+
c = m.remote_connection(:miga_db)
|
20
|
+
assert_equal(Net::FTP, c.class)
|
21
|
+
c.close
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_download_file_ftp
|
25
|
+
declare_remote_access
|
26
|
+
m = MiGA::MiGA
|
27
|
+
f = tmpfile('t/test.txt')
|
28
|
+
d = File.dirname(f)
|
29
|
+
assert(!Dir.exist?(d))
|
30
|
+
m.download_file_ftp(:miga_online_ftp, 'test.txt', f)
|
31
|
+
assert(Dir.exist?(d))
|
32
|
+
assert_equal('miga', File.read(f).chomp)
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'test_helper'
|
4
|
+
require 'miga/common/with_option'
|
5
|
+
|
6
|
+
class WithDaemonTest < Test::Unit::TestCase
|
7
|
+
include TestHelper
|
8
|
+
|
9
|
+
class TestWithOption < MiGA::MiGA
|
10
|
+
include MiGA::Common::WithOption
|
11
|
+
|
12
|
+
attr_reader :metadata, :saved
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
@metadata = { range: 0.9 }
|
16
|
+
@saved = false
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.OPTIONS
|
20
|
+
{
|
21
|
+
empty: {},
|
22
|
+
float: { type: Float },
|
23
|
+
range: { default: 1.0, in: -5.5..5.5, type: Float },
|
24
|
+
default: { default: 9, type: Integer },
|
25
|
+
token: { type: Integer, tokens: %w[yes no 0] },
|
26
|
+
proc: { default: proc { Date.today } },
|
27
|
+
bool: { in: [true, false] }
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def save
|
32
|
+
@saved = true
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_with_option
|
37
|
+
o = TestWithOption.new
|
38
|
+
assert_respond_to(o, :option)
|
39
|
+
assert_equal(1, o.metadata.size)
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_option
|
43
|
+
o = TestWithOption.new
|
44
|
+
assert_equal(9, o.option(:default))
|
45
|
+
assert_nil(o.option(:bool))
|
46
|
+
assert_raise { o.option(:not_an_option) }
|
47
|
+
assert_nil(o.option(:empty))
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_set_bool
|
51
|
+
o = TestWithOption.new
|
52
|
+
assert_nil(o.option(:bool))
|
53
|
+
assert(!o.saved)
|
54
|
+
assert_raise { o.set_option(:bool, 'true') }
|
55
|
+
assert_nil(o.option(:bool))
|
56
|
+
assert(!o.saved)
|
57
|
+
assert_equal(true, o.set_option(:bool, 'true', true))
|
58
|
+
assert(o.saved)
|
59
|
+
assert_equal(false, o.set_option(:bool, false))
|
60
|
+
assert_equal(false, o.set_option(:bool, 'false', true))
|
61
|
+
assert_nil(o.set_option(:bool, nil))
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_set_empty
|
65
|
+
o = TestWithOption.new
|
66
|
+
assert_nil(o.option(:empty))
|
67
|
+
assert_equal('a', o.set_option(:empty, 'a'))
|
68
|
+
assert_equal('1', o.set_option(:empty, '1', true))
|
69
|
+
end
|
70
|
+
|
71
|
+
def test_all_options
|
72
|
+
o = TestWithOption.new
|
73
|
+
assert(o.all_options.is_a?(Hash))
|
74
|
+
assert_include(o.all_options.keys, :bool)
|
75
|
+
assert_nil(o.all_options[:bool])
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_option?
|
79
|
+
o = TestWithOption.new
|
80
|
+
assert(o.option?(:range))
|
81
|
+
assert(!o.option?(:not_an_option))
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_option_metadata
|
85
|
+
o = TestWithOption.new
|
86
|
+
assert_equal(0.9, o.option(:range))
|
87
|
+
assert_equal(1.0, o.set_option(:range, nil))
|
88
|
+
assert_equal(2.0, o.set_option(:range, 2.0))
|
89
|
+
assert_equal(3.0, o.set_option(:range, '3', true))
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_option_range
|
93
|
+
o = TestWithOption.new
|
94
|
+
assert_raise { o.set_option(:range, 9.0) }
|
95
|
+
assert_raise { o.set_option(:range, 3) }
|
96
|
+
assert_raise { o.set_option(:range, true) }
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_option_proc
|
100
|
+
o = TestWithOption.new
|
101
|
+
assert(o.option(:proc).is_a?(Date))
|
102
|
+
assert(o.set_option(:proc, 1).is_a?(Integer))
|
103
|
+
assert(o.set_option(:proc, nil).is_a?(Date))
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_token
|
107
|
+
o = TestWithOption.new
|
108
|
+
assert_nil(o.option(:token))
|
109
|
+
assert_equal(1, o.set_option(:token, 1))
|
110
|
+
assert_equal(-2, o.set_option(:token, '-2', true))
|
111
|
+
assert_equal('yes', o.set_option(:token, 'yes'))
|
112
|
+
assert_equal('0', o.set_option(:token, '0', true))
|
113
|
+
assert_raise { o.set_option(:token, 'maybe') }
|
114
|
+
end
|
115
|
+
end
|
data/utils/cleanup-databases.rb
CHANGED
@@ -15,11 +15,11 @@ m.say 'Cleaning Databases'
|
|
15
15
|
(0..thr - 1).each do |t|
|
16
16
|
fork do
|
17
17
|
dsn.each_with_index do |i, idx|
|
18
|
-
m.advance('Dataset:',
|
18
|
+
m.advance('Dataset:', idx + 1, dsn.size) if t == 0
|
19
19
|
next unless (idx % thr) == t
|
20
20
|
|
21
21
|
d = p.dataset(i)
|
22
|
-
next unless d.
|
22
|
+
next unless d.ref? && d.active?
|
23
23
|
|
24
24
|
d.cleanup_distances!
|
25
25
|
end
|
@@ -28,4 +28,3 @@ end
|
|
28
28
|
Process.waitall
|
29
29
|
m.advance('Dataset:', dsn.size, dsn.size)
|
30
30
|
m.say
|
31
|
-
|
data/utils/distance/commands.rb
CHANGED
@@ -9,7 +9,7 @@ module MiGA::DistanceRunner::Commands
|
|
9
9
|
return y unless y.nil? || y.zero?
|
10
10
|
|
11
11
|
# Try hAAI (except in clade projects)
|
12
|
-
unless @ref_project.
|
12
|
+
unless @ref_project.clade?
|
13
13
|
y = haai(target)
|
14
14
|
return y unless y.nil? || y.zero?
|
15
15
|
end
|
@@ -75,7 +75,7 @@ module MiGA::DistanceRunner::Commands
|
|
75
75
|
v = `aai.rb -1 "#{f1}" -2 "#{f2}" -S "#{db}" \
|
76
76
|
--name1 "#{n1}" --name2 "#{n2}" \
|
77
77
|
-t "#{o[:thr]}" -a --lookup-first "--#{o[:aai_save_rbm]}" \
|
78
|
-
-p "#{o[:aai_p]
|
78
|
+
-p "#{o[:aai_p]}"`.chomp
|
79
79
|
(v.nil? || v.empty?) ? 0 : v.to_f
|
80
80
|
end
|
81
81
|
|
data/utils/distance/database.rb
CHANGED
@@ -66,7 +66,7 @@ module MiGA::DistanceRunner::Database
|
|
66
66
|
return y unless y.nil? || y.zero?
|
67
67
|
|
68
68
|
# Check if self.dataset <- target is done (another thread)
|
69
|
-
if dataset.
|
69
|
+
if dataset.ref? && project.path == ref_project.path
|
70
70
|
y = data_from_db(
|
71
71
|
target.name, dataset.name, ref_db(metric, target.name), metric
|
72
72
|
)
|
data/utils/distance/pipeline.rb
CHANGED
@@ -31,10 +31,8 @@ module MiGA::DistanceRunner::Pipeline
|
|
31
31
|
|
32
32
|
# Run distances against datasets listed in metadata's +:dist_req+
|
33
33
|
def distances_by_request(metric)
|
34
|
-
return unless dataset.metadata[:dist_req]
|
35
|
-
|
36
34
|
$stderr.puts 'Running distances by request'
|
37
|
-
dataset.
|
35
|
+
dataset.option(:dist_req).each do |target|
|
38
36
|
ds = ref_project.dataset(target) and send(metric, ds)
|
39
37
|
end
|
40
38
|
end
|
@@ -112,7 +110,7 @@ module MiGA::DistanceRunner::Pipeline
|
|
112
110
|
$stderr.puts 'Transferring taxonomy'
|
113
111
|
return if tax.nil?
|
114
112
|
|
115
|
-
pval =
|
113
|
+
pval = project.option(:tax_pvalue)
|
116
114
|
tax_a = tax
|
117
115
|
.select { |i| i[1] != '?' && i[2] <= pval }
|
118
116
|
.map { |i| i[0, 2].join(':') }
|
data/utils/distance/runner.rb
CHANGED
@@ -21,47 +21,39 @@ class MiGA::DistanceRunner
|
|
21
21
|
@home = File.expand_path('data/09.distances', project.path)
|
22
22
|
|
23
23
|
# Default opts
|
24
|
-
|
25
|
-
|
26
|
-
end
|
27
|
-
@opts[:aai_save_rbm] ||= ENV.fetch('MIGA_AAI_SAVE_RBM') do
|
28
|
-
project.is_clade? ? 'save-rbm' : 'no-save-rbm'
|
29
|
-
end
|
24
|
+
@opts[:aai_save_rbm] =
|
25
|
+
project.option(:aai_save_rbm) ? 'save-rbm' : 'no-save-rbm'
|
30
26
|
@opts[:thr] ||= ENV.fetch('CORES') { 2 }.to_i
|
31
|
-
if opts[:run_taxonomy]
|
32
|
-
ref_path = project.
|
27
|
+
if opts[:run_taxonomy] && project.option(:ref_project)
|
28
|
+
ref_path = project.option(:ref_project)
|
33
29
|
@home = File.expand_path('05.taxonomy', @home)
|
34
30
|
@ref_project = MiGA::Project.load(ref_path)
|
35
31
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
36
|
-
elsif !opts[:run_taxonomy]
|
37
|
-
ref_path = dataset.
|
38
|
-
if project.
|
39
|
-
ref_path = File.expand_path(ref_path, project.
|
32
|
+
elsif !opts[:run_taxonomy] && dataset.option(:db_project)
|
33
|
+
ref_path = dataset.option(:db_project)
|
34
|
+
if project.option(:db_proj_dir)
|
35
|
+
ref_path = File.expand_path(ref_path, project.option(:db_proj_dir))
|
40
36
|
end
|
41
37
|
@ref_project = MiGA::Project.load(ref_path)
|
42
38
|
raise "Cannot load reference project: #{ref_path}" if @ref_project.nil?
|
43
39
|
else
|
44
40
|
@ref_project = project
|
45
41
|
end
|
46
|
-
[
|
47
|
-
@opts[m] ||= ref_project.
|
42
|
+
%i[haai_p aai_p ani_p distances_checkpoint].each do |m|
|
43
|
+
@opts[m] ||= ref_project.option(m)
|
48
44
|
end
|
49
|
-
@opts[:aai_p] ||= 'blast+'
|
50
|
-
@opts[:ani_p] ||= 'blast+'
|
51
|
-
@opts[:distances_checkpoint] ||= 10
|
52
|
-
@opts[:distances_checkpoint] = @opts[:distances_checkpoint].to_i
|
53
45
|
$stderr.puts "Options: #{opts}"
|
54
46
|
end
|
55
47
|
|
56
48
|
# Launch the appropriate analysis
|
57
49
|
def go!
|
58
50
|
$stderr.puts "Launching analysis"
|
59
|
-
return if dataset.
|
51
|
+
return if dataset.multi?
|
60
52
|
|
61
53
|
Dir.mktmpdir do |tmp_dir|
|
62
54
|
@tmp = tmp_dir
|
63
55
|
create_temporals
|
64
|
-
opts[:run_taxonomy] ? go_taxonomy! : dataset.
|
56
|
+
opts[:run_taxonomy] ? go_taxonomy! : dataset.ref? ? go_ref! : go_query!
|
65
57
|
end
|
66
58
|
end
|
67
59
|
|
@@ -73,7 +65,7 @@ class MiGA::DistanceRunner
|
|
73
65
|
|
74
66
|
# first-come-first-serve traverse
|
75
67
|
ref_project.each_dataset do |ds|
|
76
|
-
next if !ds.
|
68
|
+
next if !ds.ref? or ds.multi? or ds.result(:essential_genes).nil?
|
77
69
|
|
78
70
|
puts "[ #{Time.now} ] #{ds.name}"
|
79
71
|
ani_after_aai(ds)
|
@@ -88,7 +80,7 @@ class MiGA::DistanceRunner
|
|
88
80
|
def go_query!
|
89
81
|
$stderr.puts 'Launching analysis for query dataset'
|
90
82
|
# Check if project is ready
|
91
|
-
tsk = ref_project.
|
83
|
+
tsk = ref_project.clade? ? [:subclades, :ani] : [:clade_finding, :aai]
|
92
84
|
res = ref_project.result(tsk[0])
|
93
85
|
return if res.nil?
|
94
86
|
|
@@ -137,7 +129,7 @@ class MiGA::DistanceRunner
|
|
137
129
|
# Launch analysis for taxonomy jobs
|
138
130
|
def go_taxonomy!
|
139
131
|
$stderr.puts 'Launching taxonomy analysis'
|
140
|
-
return unless project.
|
132
|
+
return unless project.option(:ref_project)
|
141
133
|
|
142
134
|
go_query! # <- yeah, it's actually the same, just different ref_project
|
143
135
|
end
|