miga-base 1.3.8.2 → 1.3.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/miga/cli/action/add_result.rb +22 -1
- data/lib/miga/cli/action/browse/about.html +4 -2
- data/lib/miga/cli/action/download/gtdb.rb +1 -1
- data/lib/miga/cli/action/download/ncbi.rb +43 -68
- data/lib/miga/cli/action/download/seqcode.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +1 -8
- data/lib/miga/cli/action/wf.rb +15 -6
- data/lib/miga/cli/objects_helper.rb +3 -0
- data/lib/miga/cli/opt_helper.rb +8 -2
- data/lib/miga/common/net.rb +100 -18
- data/lib/miga/dataset/base.rb +40 -12
- data/lib/miga/dataset/hooks.rb +8 -0
- data/lib/miga/dataset/result/ignore.rb +14 -2
- data/lib/miga/dataset/type.rb +51 -0
- data/lib/miga/dataset.rb +3 -22
- data/lib/miga/json.rb +9 -0
- data/lib/miga/project/base.rb +15 -9
- data/lib/miga/project.rb +7 -1
- data/lib/miga/remote_dataset/base.rb +117 -36
- data/lib/miga/remote_dataset/download.rb +121 -54
- data/lib/miga/remote_dataset.rb +34 -13
- data/lib/miga/result/stats.rb +2 -0
- data/lib/miga/result/versions.rb +23 -0
- data/lib/miga/result.rb +7 -1
- data/lib/miga/taxonomy/base.rb +3 -2
- data/lib/miga/version.rb +2 -2
- data/scripts/assembly.bash +15 -1
- data/scripts/cds.bash +9 -3
- data/scripts/distances.bash +103 -5
- data/scripts/essential_genes.bash +14 -1
- data/scripts/mytaxa.bash +18 -3
- data/scripts/mytaxa_scan.bash +16 -3
- data/scripts/read_quality.bash +6 -2
- data/scripts/ssu.bash +19 -1
- data/scripts/stats.bash +9 -3
- data/scripts/taxonomy.bash +98 -2
- data/scripts/trimmed_fasta.bash +10 -2
- data/scripts/trimmed_reads.bash +26 -6
- data/test/dataset_test.rb +17 -2
- data/test/hook_test.rb +3 -2
- data/test/net_test.rb +21 -5
- data/test/project_test.rb +13 -0
- data/test/remote_dataset_test.rb +106 -7
- data/test/result_test.rb +47 -21
- data/test/taxonomy_test.rb +9 -3
- data/utils/distance/runner.rb +3 -1
- data/utils/distances.rb +1 -1
- metadata +4 -2
data/scripts/read_quality.bash
CHANGED
@@ -19,6 +19,10 @@ for s in 1 2 ; do
|
|
19
19
|
done
|
20
20
|
|
21
21
|
# Finalize
|
22
|
-
miga date > "$DATASET.done"
|
23
|
-
|
22
|
+
miga date > "${DATASET}.done"
|
23
|
+
cat <<VERSIONS \
|
24
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
25
|
+
=> MiGA
|
26
|
+
$(miga --version)
|
27
|
+
VERSIONS
|
24
28
|
|
data/scripts/ssu.bash
CHANGED
@@ -65,4 +65,22 @@ fi
|
|
65
65
|
|
66
66
|
# Finalize
|
67
67
|
miga date > "${DATASET}.done"
|
68
|
-
|
68
|
+
cat <<VERSIONS \
|
69
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
70
|
+
=> MiGA
|
71
|
+
$(miga --version)
|
72
|
+
$(
|
73
|
+
if [[ -s $fa ]] ; then
|
74
|
+
echo "=> barrnap"
|
75
|
+
barrnap --version 2>&1 | perl -pe 's/^barrnap //'
|
76
|
+
echo "=> bedtools"
|
77
|
+
bedtools --version 2>&1 | perl -pe 's/^bedtools //'
|
78
|
+
echo "=> Enveomics Collection"
|
79
|
+
echo "version unknown"
|
80
|
+
echo "=> RDP Naive Bayes Classifier"
|
81
|
+
gzip -cd "${DATASET}.rdp.tsv.gz" | tail -n 1 | perl -pe 's/.*: //'
|
82
|
+
echo "=> tRNAscan-SE"
|
83
|
+
tRNAscan-SE -h 2>&1 | head -n 2 | tail -n 1 | perl -pe 's/^tRNAscan-SE //'
|
84
|
+
fi
|
85
|
+
)
|
86
|
+
VERSIONS
|
data/scripts/stats.bash
CHANGED
@@ -12,11 +12,17 @@ cd "$DIR"
|
|
12
12
|
miga date > "${DATASET}.start"
|
13
13
|
|
14
14
|
# Calculate statistics
|
15
|
-
for i in raw_reads trimmed_fasta assembly
|
15
|
+
for i in raw_reads trimmed_fasta assembly \
|
16
|
+
cds essential_genes distances taxonomy ssu ; do
|
16
17
|
echo "# $i"
|
17
18
|
miga stats --compute-and-save --ignore-empty -P "$PROJECT" -D "$DATASET" -r $i
|
18
19
|
done
|
19
20
|
|
20
21
|
# Finalize
|
21
|
-
miga date > "$DATASET.done"
|
22
|
-
|
22
|
+
miga date > "${DATASET}.done"
|
23
|
+
cat <<VERSIONS \
|
24
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
25
|
+
=> MiGA
|
26
|
+
$(miga --version)
|
27
|
+
VERSIONS
|
28
|
+
|
data/scripts/taxonomy.bash
CHANGED
@@ -16,5 +16,101 @@ ruby -I "$MIGA/lib" \
|
|
16
16
|
"$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
|
17
17
|
|
18
18
|
# Finalize
|
19
|
-
|
20
|
-
|
19
|
+
fastaai=no
|
20
|
+
aai=no
|
21
|
+
ani=no
|
22
|
+
blast=no
|
23
|
+
blat=no
|
24
|
+
diamond=no
|
25
|
+
fastani=no
|
26
|
+
REF_PROJECT=$(miga option -P "$PROJECT" -k ref_project)
|
27
|
+
if [[ -S "$REF_PROJECT" ]] ; then
|
28
|
+
case $(miga option -P "$REF_PROJECT" -k haai_p) in
|
29
|
+
fastaai)
|
30
|
+
fastaai=yes
|
31
|
+
;;
|
32
|
+
diamond)
|
33
|
+
diamond=yes
|
34
|
+
aai=yes
|
35
|
+
;;
|
36
|
+
blast)
|
37
|
+
blast=yes
|
38
|
+
aai=yes
|
39
|
+
;;
|
40
|
+
esac
|
41
|
+
|
42
|
+
case $(miga option -P "$REF_PROJECT" -k aai_p) in
|
43
|
+
diamond)
|
44
|
+
diamond=yes
|
45
|
+
aai=yes
|
46
|
+
;;
|
47
|
+
blast)
|
48
|
+
blast=yes
|
49
|
+
aai=yes
|
50
|
+
;;
|
51
|
+
esac
|
52
|
+
|
53
|
+
case $(miga option -P "$REF_PROJECT" -k ani_p) in
|
54
|
+
blast)
|
55
|
+
blast=yes
|
56
|
+
ani=yes
|
57
|
+
;;
|
58
|
+
blat)
|
59
|
+
blat=yes
|
60
|
+
ani=yes
|
61
|
+
;;
|
62
|
+
fastani)
|
63
|
+
fastani=yes
|
64
|
+
;;
|
65
|
+
esac
|
66
|
+
fi
|
67
|
+
|
68
|
+
miga date > "${DATASET}.done"
|
69
|
+
cat <<VERSIONS \
|
70
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
71
|
+
=> MiGA
|
72
|
+
$(miga --version)
|
73
|
+
$(
|
74
|
+
if [[ "$fastaai" == "yes" ]] ; then
|
75
|
+
echo "=> FastAAI"
|
76
|
+
fastaai version 2>&1 | perl -pe 's/.*=//'
|
77
|
+
fi
|
78
|
+
)
|
79
|
+
$(
|
80
|
+
if [[ "$fastani" == "yes" ]] ; then
|
81
|
+
echo "=> FastANI"
|
82
|
+
fastANI --version 2>&1 | grep . | perl -pe 's/^version //'
|
83
|
+
fi
|
84
|
+
)
|
85
|
+
$(
|
86
|
+
if [[ "$aai" == "yes" ]] ; then
|
87
|
+
echo "=> Enveomics Collection: aai.rb"
|
88
|
+
aai.rb --version 2>&1 | perl -pe 's/.*: //'
|
89
|
+
fi
|
90
|
+
)
|
91
|
+
$(
|
92
|
+
if [[ "$ani" == "yes" ]] ; then
|
93
|
+
echo "=> Enveomics Collection: ani.rb"
|
94
|
+
ani.rb --version 2>&1 | perl -pe 's/.*: //'
|
95
|
+
fi
|
96
|
+
)
|
97
|
+
$(
|
98
|
+
if [[ "$blast" == "yes" ]] ; then
|
99
|
+
echo "=> NCBI BLAST+"
|
100
|
+
blastp -version 2>&1 | tail -n 1 | perl -pe 's/.*: blast //'
|
101
|
+
fi
|
102
|
+
)
|
103
|
+
$(
|
104
|
+
if [[ "$blat" == "yes" ]] ; then
|
105
|
+
echo "=> BLAT"
|
106
|
+
blat 2>&1 | head -n 1 | perl -pe 's/.* v\. //' | perl -pe 's/ fast .*//'
|
107
|
+
fi
|
108
|
+
)
|
109
|
+
$(
|
110
|
+
if [[ "$diamond" == "yes" ]] ; then
|
111
|
+
echo "=> Diamond"
|
112
|
+
diamond --version 2>&1 | perl -pe 's/^diamond version //'
|
113
|
+
fi
|
114
|
+
)
|
115
|
+
VERSIONS
|
116
|
+
|
data/scripts/trimmed_fasta.bash
CHANGED
@@ -32,6 +32,14 @@ for x in 1.fasta 2.fasta SingleReads.fa CoupledReads.fa ; do
|
|
32
32
|
done
|
33
33
|
|
34
34
|
# Finalize
|
35
|
-
miga date > "$DATASET.done"
|
36
|
-
|
35
|
+
miga date > "${DATASET}.done"
|
36
|
+
cat <<VERSIONS \
|
37
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
38
|
+
=> MiGA
|
39
|
+
$(miga --version)
|
40
|
+
=> Enveomics Collection: FastQ.maskQual.rb
|
41
|
+
$(FastQ.maskQual.rb --version | perl -pe 's/.* //')
|
42
|
+
=> Enveomics Collection: FastA.interpose.pl
|
43
|
+
version unknown
|
44
|
+
VERSIONS
|
37
45
|
|
data/scripts/trimmed_reads.bash
CHANGED
@@ -36,15 +36,19 @@ if [[ -s "$b.2.fastq.gz" ]] ; then
|
|
36
36
|
$CMD -1 "$b.1.fastq.gz" -2 "$b.2.fastq.gz"
|
37
37
|
for s in 1 2 ; do
|
38
38
|
mv "$b/${s}.post_trim_${b}.${s}.fq.gz" "${b}.${s}.clipped.fastq.gz"
|
39
|
-
mv "$b/${s}.pre_trim_QC_${b}.${s}.html"
|
40
|
-
|
39
|
+
mv "$b/${s}.pre_trim_QC_${b}.${s}.html" \
|
40
|
+
"../03.read_quality/${b}.pre.${s}.html"
|
41
|
+
mv "$b/${s}.post_trim_QC_${b}.${s}.html" \
|
42
|
+
"../03.read_quality/${b}.post.${s}.html"
|
41
43
|
done
|
42
44
|
else
|
43
45
|
# Unpaired
|
44
46
|
$CMD -u "$b.1.fastq.gz"
|
45
47
|
mv "$b/unpaired.post_trim_${b}.1.fq.gz" "${b}.1.clipped.fastq.gz"
|
46
|
-
mv "$b/unpaired.pre_trim_QC_${b}.1.html"
|
47
|
-
|
48
|
+
mv "$b/unpaired.pre_trim_QC_${b}.1.html" \
|
49
|
+
"../03.read_quality/${b}.pre.1.html"
|
50
|
+
mv "$b/unpaired.post_trim_QC_${b}.1.html" \
|
51
|
+
"../03.read_quality/${b}.post.1.html"
|
48
52
|
fi
|
49
53
|
mv "$b/Subsample_Adapter_Detection.stats.txt" \
|
50
54
|
"../03.read_quality/$b.adapters.txt"
|
@@ -54,6 +58,22 @@ rm -r "$b"
|
|
54
58
|
rm -f "$b".[12].fastq.gz
|
55
59
|
|
56
60
|
# Finalize
|
57
|
-
miga date > "$DATASET.done"
|
58
|
-
|
61
|
+
miga date > "${DATASET}.done"
|
62
|
+
cat <<VERSIONS \
|
63
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
64
|
+
=> MiGA
|
65
|
+
$(miga --version)
|
66
|
+
=> Enveomics Collection: FastQ.tag.rb
|
67
|
+
$(FastQ.tag.rb --version | perl -pe 's/.* //')
|
68
|
+
=> Multitrim
|
69
|
+
version unknown
|
70
|
+
=> FaQCs
|
71
|
+
$(FaQCs --version 2>&1 | perl -pe 's/.*: //')
|
72
|
+
=> Seqtk
|
73
|
+
$(seqtk 2>&1 | grep Version | perl -pe 's/.*: //')
|
74
|
+
=> Fastp
|
75
|
+
$(fastp --version 2>&1 | perl -pe 's/^fastp //')
|
76
|
+
=> Falco
|
77
|
+
$(falco -V 2>&1 | tee)
|
78
|
+
VERSIONS
|
59
79
|
|
data/test/dataset_test.rb
CHANGED
@@ -40,7 +40,7 @@ class DatasetTest < Test::Unit::TestCase
|
|
40
40
|
d2.save
|
41
41
|
assert_not_predicate(d2, :multi?)
|
42
42
|
assert_not_predicate(d2, :nonmulti?)
|
43
|
-
|
43
|
+
assert_equal(:empty, d2.metadata[:type])
|
44
44
|
d2.metadata[:type] = :metagenome
|
45
45
|
d2.save
|
46
46
|
assert_equal(:metagenome, d2.metadata[:type])
|
@@ -89,25 +89,40 @@ class DatasetTest < Test::Unit::TestCase
|
|
89
89
|
assert_equal(:trimmed_reads, d2.first_preprocessing(true))
|
90
90
|
assert_equal(:read_quality, d2.next_preprocessing(true))
|
91
91
|
assert { !d2.done_preprocessing?(true) }
|
92
|
-
|
92
|
+
|
93
|
+
# Ref and undeclared type (empty)
|
93
94
|
assert { d2.ignore_task?(:mytaxa) }
|
94
95
|
assert { d2.ignore_task?(:mytaxa_scan) }
|
95
96
|
assert { d2.ignore_task?(:distances) }
|
97
|
+
assert { d2.ignore_task?(:essential_genes) }
|
98
|
+
|
96
99
|
# Ref and multi
|
97
100
|
d2.metadata[:type] = :metagenome
|
98
101
|
assert { !d2.ignore_task?(:mytaxa) }
|
99
102
|
assert { d2.ignore_task?(:mytaxa_scan) }
|
100
103
|
assert { d2.ignore_task?(:distances) }
|
104
|
+
assert { !d2.ignore_task?(:essential_genes) }
|
105
|
+
|
101
106
|
# Ref and nonmulti
|
102
107
|
d2.metadata[:type] = :genome
|
103
108
|
assert { d2.ignore_task?(:mytaxa) }
|
104
109
|
assert { !d2.ignore_task?(:mytaxa_scan) }
|
105
110
|
assert { !d2.ignore_task?(:distances) }
|
111
|
+
assert { !d2.ignore_task?(:essential_genes) }
|
112
|
+
|
106
113
|
# Qry and nonmulti
|
107
114
|
d2.metadata[:ref] = false
|
108
115
|
assert { d2.ignore_task?(:mytaxa) }
|
109
116
|
assert { d2.ignore_task?(:mytaxa_scan) }
|
110
117
|
assert { !d2.ignore_task?(:distances) }
|
118
|
+
assert { !d2.ignore_task?(:essential_genes) }
|
119
|
+
|
120
|
+
# Qry and plasmid
|
121
|
+
d2.metadata[:type] = :plasmid
|
122
|
+
assert { d2.ignore_task?(:mytaxa) }
|
123
|
+
assert { d2.ignore_task?(:mytaxa_scan) }
|
124
|
+
assert { !d2.ignore_task?(:distances) }
|
125
|
+
assert { d2.ignore_task?(:essential_genes) }
|
111
126
|
end
|
112
127
|
|
113
128
|
def test_profile_advance
|
data/test/hook_test.rb
CHANGED
@@ -9,9 +9,10 @@ class HookTest < Test::Unit::TestCase
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def test_add_hook
|
12
|
-
assert_nil(dataset.hooks[:
|
13
|
-
dataset.add_hook(:on_save, :run_lambda, Proc.new { $counter += 1 })
|
12
|
+
assert_nil(dataset.hooks[:on_remove])
|
14
13
|
assert_equal(1, dataset.hooks[:on_save].size)
|
14
|
+
dataset.add_hook(:on_save, :run_lambda, Proc.new { $counter += 1 })
|
15
|
+
assert_equal(2, dataset.hooks[:on_save].size)
|
15
16
|
$counter = 1
|
16
17
|
dataset.save
|
17
18
|
assert_equal(2, $counter)
|
data/test/net_test.rb
CHANGED
@@ -15,20 +15,36 @@ class FormatTest < Test::Unit::TestCase
|
|
15
15
|
declare_remote_access
|
16
16
|
m = MiGA::MiGA
|
17
17
|
assert_raise { m.remote_connection(:bad_descriptor) }
|
18
|
-
assert_raise { m.remote_connection('
|
18
|
+
assert_raise { m.remote_connection('ssh://microbial-genomes.org/') }
|
19
19
|
c = m.remote_connection(:miga_db)
|
20
20
|
assert_equal(Net::FTP, c.class)
|
21
21
|
c.close
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
24
|
+
def test_download_file_http
|
25
25
|
declare_remote_access
|
26
26
|
m = MiGA::MiGA
|
27
|
+
#o = m.http_request(:get, 'http://uibk.microbial-genomes.org/robots.txt')
|
28
|
+
o = m.http_request(:get, 'http://disc-genomics.uibk.ac.at/miga/robots.txt')
|
29
|
+
o = o.split(/\n/)
|
30
|
+
assert_equal(6, o.count)
|
31
|
+
assert_equal('#', o[1])
|
32
|
+
assert_equal('User-agent: *', o[2])
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_download_file_ftp
|
36
|
+
declare_remote_access
|
27
37
|
f = tmpfile('t/test.txt')
|
28
38
|
d = File.dirname(f)
|
29
39
|
assert(!Dir.exist?(d))
|
30
|
-
|
31
|
-
|
32
|
-
|
40
|
+
# TODO
|
41
|
+
# Bring back when I can connect to the Gatech's FTP
|
42
|
+
### m = MiGA::MiGA
|
43
|
+
### m.download_file_ftp(:miga_online_ftp, 'api_test.txt', f)
|
44
|
+
### assert(Dir.exist?(d))
|
45
|
+
### assert_equal('miga', File.read(f).chomp)
|
46
|
+
### File.unlink(f)
|
47
|
+
### m.download_file_ftp(:miga_db, '../api_test.txt', f)
|
48
|
+
### assert_equal('miga', File.read(f).chomp)
|
33
49
|
end
|
34
50
|
end
|
data/test/project_test.rb
CHANGED
@@ -8,6 +8,12 @@ class ProjectTest < Test::Unit::TestCase
|
|
8
8
|
initialize_miga_home
|
9
9
|
end
|
10
10
|
|
11
|
+
def test_class_variables
|
12
|
+
assert(MiGA::Project.INCLADE_TASKS.is_a? Array)
|
13
|
+
assert(MiGA::Project.DISTANCE_TASKS.is_a? Array)
|
14
|
+
assert(MiGA::Project.KNOWN_TYPES.is_a? Hash)
|
15
|
+
end
|
16
|
+
|
11
17
|
def create_result_files(project, res, exts)
|
12
18
|
d = MiGA::Project.RESULT_DIRS[res]
|
13
19
|
(['.done'] + exts).each do |x|
|
@@ -155,4 +161,11 @@ class ProjectTest < Test::Unit::TestCase
|
|
155
161
|
date3 = p1.add_result(:ogs, true, force: true)[:created]
|
156
162
|
assert_not_equal(date1, date3)
|
157
163
|
end
|
164
|
+
|
165
|
+
def test_options
|
166
|
+
assert_equal('fastaai', project.option(:haai_p))
|
167
|
+
assert_equal(false, project.option(:aai_save_rbm))
|
168
|
+
project.metadata[:type] = 'clade'
|
169
|
+
assert_equal(true, project.option(:aai_save_rbm))
|
170
|
+
end
|
158
171
|
end
|
data/test/remote_dataset_test.rb
CHANGED
@@ -7,6 +7,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
7
7
|
|
8
8
|
def setup
|
9
9
|
initialize_miga_home
|
10
|
+
ENV.delete('NCBI_API_KEY')
|
10
11
|
end
|
11
12
|
|
12
13
|
def test_class_universe
|
@@ -19,7 +20,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
19
20
|
assert_raise { MiGA::RemoteDataset.new('ids', :google, :ebi) }
|
20
21
|
end
|
21
22
|
|
22
|
-
def
|
23
|
+
def test_get
|
23
24
|
hiv2 = 'M30502.1'
|
24
25
|
{ embl: :ebi, nuccore: :ncbi }.each do |db, universe|
|
25
26
|
rd = MiGA::RemoteDataset.new(hiv2, db, universe)
|
@@ -47,7 +48,8 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
47
48
|
end
|
48
49
|
|
49
50
|
def test_net_ftp
|
50
|
-
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/
|
51
|
+
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/' \
|
52
|
+
'public/gap/GAPJ01.fasta.gz'
|
51
53
|
n = 'Cjac_L14'
|
52
54
|
rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
|
53
55
|
assert_equal([cjac], rd.ids)
|
@@ -86,26 +88,56 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
86
88
|
def test_type_status_asm
|
87
89
|
declare_remote_access
|
88
90
|
rd = MiGA::RemoteDataset.new('GCF_000018105.1', :assembly, :ncbi)
|
89
|
-
|
91
|
+
md = rd.get_metadata
|
92
|
+
assert(md[:is_type])
|
90
93
|
end
|
91
94
|
|
92
95
|
def test_nontype_status_asm
|
93
96
|
declare_remote_access
|
94
97
|
rd = MiGA::RemoteDataset.new('GCA_004684205.1', :assembly, :ncbi)
|
95
|
-
|
98
|
+
md = rd.get_metadata
|
99
|
+
assert(!md[:is_type])
|
96
100
|
end
|
97
101
|
|
98
102
|
def test_type_status_nuccore
|
99
103
|
declare_remote_access
|
100
104
|
rd = MiGA::RemoteDataset.new('NC_019748.1', :nuccore, :ncbi)
|
101
|
-
|
105
|
+
md = rd.get_metadata
|
106
|
+
assert(md[:is_type])
|
102
107
|
end
|
103
108
|
|
104
109
|
def test_ref_type_status
|
105
110
|
declare_remote_access
|
106
111
|
rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
|
107
|
-
|
108
|
-
assert
|
112
|
+
md = rd.get_metadata
|
113
|
+
assert(!md[:is_type])
|
114
|
+
assert(md[:is_ref_type])
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_gtdb_taxonomy
|
118
|
+
declare_remote_access
|
119
|
+
rd = MiGA::RemoteDataset.new('GCA_018200315.1', :assembly, :gtdb)
|
120
|
+
md = rd.get_metadata
|
121
|
+
assert(!md[:is_type])
|
122
|
+
assert_not_nil(md[:gtdb_release])
|
123
|
+
assert(md[:tax].is_a? MiGA::Taxonomy)
|
124
|
+
assert_equal('GCA_018200315.1', md[:gtdb_assembly])
|
125
|
+
assert_equal('gtdb', md[:tax][:ns])
|
126
|
+
assert_equal('Bacteroidia', md[:tax][:c])
|
127
|
+
end
|
128
|
+
|
129
|
+
def test_gtdb_alt_taxonomy
|
130
|
+
declare_remote_access
|
131
|
+
rd = MiGA::RemoteDataset.new('GCA_018200315.1', :assembly, :gtdb)
|
132
|
+
rd.metadata[:get_ncbi_taxonomy] = true
|
133
|
+
md = rd.get_metadata
|
134
|
+
assert(md[:tax].is_a? MiGA::Taxonomy)
|
135
|
+
assert_equal('ncbi', md[:tax][:ns])
|
136
|
+
assert_equal('Flavobacteriia', md[:tax][:c])
|
137
|
+
assert(md[:tax].alternative(1).is_a? MiGA::Taxonomy)
|
138
|
+
assert(md[:tax].alternative(:gtdb).is_a? MiGA::Taxonomy)
|
139
|
+
assert_equal('gtdb', md[:tax].alternative(1)[:ns])
|
140
|
+
assert_equal('gtdb', md[:tax].alternative(:gtdb)[:ns])
|
109
141
|
end
|
110
142
|
|
111
143
|
def test_missing_data
|
@@ -114,6 +146,73 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
114
146
|
assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
|
115
147
|
end
|
116
148
|
|
149
|
+
def test_gtdb_request
|
150
|
+
# No remote access needed
|
151
|
+
rd = MiGA::RemoteDataset.new('g__Macondimonas', :taxon, :gtdb)
|
152
|
+
u = rd.download_uri
|
153
|
+
h = rd.download_headers
|
154
|
+
|
155
|
+
assert(u.is_a? URI)
|
156
|
+
assert_equal('https', u.scheme)
|
157
|
+
assert_equal('genomes', File.basename(u.path))
|
158
|
+
|
159
|
+
assert(h.is_a? Hash)
|
160
|
+
assert_equal(1, h.size)
|
161
|
+
assert_equal('application/json', h['Accept'])
|
162
|
+
end
|
163
|
+
|
164
|
+
def test_ncbi_datasets_download_request
|
165
|
+
# No remote access needed
|
166
|
+
rd = MiGA::RemoteDataset.new(
|
167
|
+
'GCF_004684205.1', :genome, :ncbi_datasets_download
|
168
|
+
)
|
169
|
+
u = rd.download_uri
|
170
|
+
h = rd.download_headers
|
171
|
+
|
172
|
+
assert(u.is_a? URI)
|
173
|
+
assert_equal('https', u.scheme)
|
174
|
+
assert_equal('download', File.basename(u.path))
|
175
|
+
|
176
|
+
assert(h.is_a? Hash)
|
177
|
+
assert_equal(1, h.size)
|
178
|
+
assert_equal('application/zip', h['Accept'])
|
179
|
+
|
180
|
+
ENV['NCBI_API_KEY'] = 'Not-a-real-key'
|
181
|
+
h = rd.download_headers
|
182
|
+
ENV.delete('NCBI_API_KEY')
|
183
|
+
assert_equal(2, h.size)
|
184
|
+
assert_equal('Not-a-real-key', h['api-key'])
|
185
|
+
end
|
186
|
+
|
187
|
+
def test_seqcode_request
|
188
|
+
# No remote access needed
|
189
|
+
rd = MiGA::RemoteDataset.new(nil, 'type-genomes', :seqcode)
|
190
|
+
u = rd.download_uri
|
191
|
+
|
192
|
+
assert(u.is_a? URI)
|
193
|
+
assert_equal('https', u.scheme)
|
194
|
+
assert_equal('type-genomes.json', File.basename(u.path))
|
195
|
+
end
|
196
|
+
|
197
|
+
def test_ncbi_datasets_request
|
198
|
+
rd = MiGA::RemoteDataset.new({ taxons: 'Bos' }, :genome, :ncbi_datasets)
|
199
|
+
u = rd.download_uri
|
200
|
+
h = rd.download_headers
|
201
|
+
p = rd.download_payload
|
202
|
+
|
203
|
+
assert(u.is_a? URI)
|
204
|
+
assert_equal('https', u.scheme)
|
205
|
+
assert_equal('dataset_report', File.basename(u.path))
|
206
|
+
|
207
|
+
assert(h.is_a? Hash)
|
208
|
+
assert_equal(1, h.size)
|
209
|
+
assert_equal('application/json', h['Content-Type'])
|
210
|
+
|
211
|
+
assert(p.is_a? String)
|
212
|
+
assert_equal('{', p[0])
|
213
|
+
assert_equal('}', p[-1])
|
214
|
+
end
|
215
|
+
|
117
216
|
# This test is too expensive (too much time to run it!)
|
118
217
|
# def test_net_timeout
|
119
218
|
# declare_remote_access
|
data/test/result_test.rb
CHANGED
@@ -6,27 +6,15 @@ class ResultTest < Test::Unit::TestCase
|
|
6
6
|
|
7
7
|
def setup
|
8
8
|
initialize_miga_home
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
File.join(
|
17
|
-
|
18
|
-
)
|
19
|
-
)
|
20
|
-
FileUtils.touch(
|
21
|
-
File.join(
|
22
|
-
project.path, 'data', '10.clades', '01.find', 'miga-project.empty'
|
23
|
-
)
|
24
|
-
)
|
25
|
-
FileUtils.touch(
|
26
|
-
File.join(
|
27
|
-
project.path, 'data', '10.clades', '01.find', 'miga-project.done'
|
28
|
-
)
|
29
|
-
)
|
9
|
+
to_touch = [
|
10
|
+
['02.trimmed_reads', "#{dataset.name}.1.clipped.fastq"],
|
11
|
+
['02.trimmed_reads', "#{dataset.name}.done"],
|
12
|
+
['10.clades', '01.find', 'miga-project.empty'],
|
13
|
+
['10.clades', '01.find', 'miga-project.done']
|
14
|
+
]
|
15
|
+
to_touch.each do |path|
|
16
|
+
FileUtils.touch(File.join(project.path, 'data', *path))
|
17
|
+
end
|
30
18
|
end
|
31
19
|
|
32
20
|
def test_add_result
|
@@ -89,4 +77,42 @@ class ResultTest < Test::Unit::TestCase
|
|
89
77
|
r = dataset.add_result(:trimmed_reads)
|
90
78
|
assert_equal(5.0, r.running_time)
|
91
79
|
end
|
80
|
+
|
81
|
+
def test_status
|
82
|
+
d = dataset
|
83
|
+
assert_equal(:ignore_empty, d.result_status(:trimmed_reads))
|
84
|
+
d.add_result(:trimmed_reads)
|
85
|
+
assert_equal(:-, d.result_status(:raw_reads))
|
86
|
+
assert_equal(:complete, d.result_status(:trimmed_reads))
|
87
|
+
assert_equal(:pending, d.result_status(:read_quality))
|
88
|
+
assert_equal(:pending, d.result_status(:assembly))
|
89
|
+
|
90
|
+
h = d.results_status
|
91
|
+
assert(h.is_a? Hash)
|
92
|
+
assert_equal(:-, h[:raw_reads])
|
93
|
+
assert_equal(:complete, h[:trimmed_reads])
|
94
|
+
assert_equal(:pending, h[:read_quality])
|
95
|
+
|
96
|
+
# Test the "advance" interface from Project
|
97
|
+
a = project.profile_datasets_advance
|
98
|
+
assert(a.is_a? Array)
|
99
|
+
assert_equal(1, a.size)
|
100
|
+
assert(a[0].is_a? Array)
|
101
|
+
assert_equal([0, 1, 2, 2], a[0][0..3])
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_versions
|
105
|
+
r = dataset.add_result(:trimmed_reads)
|
106
|
+
assert_respond_to(r, :add_versions)
|
107
|
+
assert_respond_to(r, :versions_md)
|
108
|
+
assert_equal(MiGA::VERSION.join('.'), r.versions[:MiGA])
|
109
|
+
assert_nil(r.versions[:GoodSoftware])
|
110
|
+
|
111
|
+
r.add_versions('GoodSoftware' => '1.2.3')
|
112
|
+
assert_equal('1.2.3', r.versions[:GoodSoftware])
|
113
|
+
|
114
|
+
md = r.versions_md
|
115
|
+
assert_equal('-', md[0])
|
116
|
+
assert_equal(2, md.split("\n").size)
|
117
|
+
end
|
92
118
|
end
|
data/test/taxonomy_test.rb
CHANGED
@@ -92,8 +92,12 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
92
92
|
end
|
93
93
|
|
94
94
|
def test_reset
|
95
|
-
tx = MiGA::Taxonomy.new(
|
96
|
-
|
95
|
+
tx = MiGA::Taxonomy.new(
|
96
|
+
'ns:Letters d:Latin s:A', nil,
|
97
|
+
['ns:Words d:English s:A', 'ns:Music d:Tone s:A']
|
98
|
+
)
|
99
|
+
assert_equal('Latin', tx.domain)
|
100
|
+
|
97
101
|
# Reset
|
98
102
|
assert_equal(2, tx.alternative.size)
|
99
103
|
assert_equal('Letters', tx.namespace)
|
@@ -102,11 +106,13 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
102
106
|
assert_nil(tx.namespace)
|
103
107
|
tx.reset('ns:Letters d:Latin s:A')
|
104
108
|
assert_equal('Letters', tx.namespace)
|
109
|
+
|
105
110
|
# Change of alternative
|
106
111
|
assert_equal('ns:Words d:English s:A', tx.alternative('Words').to_s)
|
107
112
|
tx.add_alternative(MiGA::Taxonomy.new('ns:Words d:Spanish s:A'))
|
108
113
|
assert_equal('ns:Words d:Spanish s:A', tx.alternative('Words').to_s)
|
109
|
-
|
114
|
+
|
115
|
+
# Change of main
|
110
116
|
assert_equal('ns:Letters d:Latin s:A', tx.to_s)
|
111
117
|
tx.add_alternative(MiGA::Taxonomy.new('ns:Letters d:Unicode s:A'))
|
112
118
|
assert_equal('ns:Letters d:Unicode s:A', tx.to_s)
|
data/utils/distance/runner.rb
CHANGED
@@ -54,7 +54,9 @@ class MiGA::DistanceRunner
|
|
54
54
|
# first-come-first-serve traverse
|
55
55
|
sbj = []
|
56
56
|
ref_project.each_dataset do |ds|
|
57
|
-
|
57
|
+
torun = ds.ref? && !ds.multi?
|
58
|
+
torun &&= ds.result(:essential_genes) || (!ds.markers? && ds.result(:cds))
|
59
|
+
sbj << ds if torun
|
58
60
|
end
|
59
61
|
ani_after_aai(sbj)
|
60
62
|
|
data/utils/distances.rb
CHANGED
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
|
|
4
4
|
|
5
5
|
project = ARGV.shift
|
6
6
|
dataset = ARGV.shift
|
7
|
-
opts = Hash[ARGV.map { |i| i.split(
|
7
|
+
opts = Hash[ARGV.map { |i| i.split('=', 2).tap { |j| j[0] = j[0].to_sym } }]
|
8
8
|
runner = MiGA::DistanceRunner.new(project, dataset, opts)
|
9
9
|
runner.go!
|