miga-base 1.3.8.2 → 1.3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/miga/cli/action/add_result.rb +22 -1
- data/lib/miga/cli/action/browse/about.html +4 -2
- data/lib/miga/cli/action/download/gtdb.rb +1 -1
- data/lib/miga/cli/action/download/ncbi.rb +43 -68
- data/lib/miga/cli/action/download/seqcode.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +1 -8
- data/lib/miga/cli/action/wf.rb +15 -6
- data/lib/miga/cli/objects_helper.rb +3 -0
- data/lib/miga/cli/opt_helper.rb +8 -2
- data/lib/miga/common/net.rb +100 -18
- data/lib/miga/dataset/base.rb +40 -12
- data/lib/miga/dataset/hooks.rb +8 -0
- data/lib/miga/dataset/result/ignore.rb +14 -2
- data/lib/miga/dataset/type.rb +51 -0
- data/lib/miga/dataset.rb +3 -22
- data/lib/miga/json.rb +9 -0
- data/lib/miga/project/base.rb +15 -9
- data/lib/miga/project.rb +7 -1
- data/lib/miga/remote_dataset/base.rb +117 -36
- data/lib/miga/remote_dataset/download.rb +121 -54
- data/lib/miga/remote_dataset.rb +34 -13
- data/lib/miga/result/stats.rb +2 -0
- data/lib/miga/result/versions.rb +23 -0
- data/lib/miga/result.rb +7 -1
- data/lib/miga/taxonomy/base.rb +3 -2
- data/lib/miga/version.rb +2 -2
- data/scripts/assembly.bash +15 -1
- data/scripts/cds.bash +9 -3
- data/scripts/distances.bash +103 -5
- data/scripts/essential_genes.bash +14 -1
- data/scripts/mytaxa.bash +18 -3
- data/scripts/mytaxa_scan.bash +16 -3
- data/scripts/read_quality.bash +6 -2
- data/scripts/ssu.bash +19 -1
- data/scripts/stats.bash +9 -3
- data/scripts/taxonomy.bash +98 -2
- data/scripts/trimmed_fasta.bash +10 -2
- data/scripts/trimmed_reads.bash +26 -6
- data/test/dataset_test.rb +17 -2
- data/test/hook_test.rb +3 -2
- data/test/net_test.rb +21 -5
- data/test/project_test.rb +13 -0
- data/test/remote_dataset_test.rb +106 -7
- data/test/result_test.rb +47 -21
- data/test/taxonomy_test.rb +9 -3
- data/utils/distance/runner.rb +3 -1
- data/utils/distances.rb +1 -1
- metadata +4 -2
data/scripts/read_quality.bash
CHANGED
@@ -19,6 +19,10 @@ for s in 1 2 ; do
|
|
19
19
|
done
|
20
20
|
|
21
21
|
# Finalize
|
22
|
-
miga date > "$DATASET.done"
|
23
|
-
|
22
|
+
miga date > "${DATASET}.done"
|
23
|
+
cat <<VERSIONS \
|
24
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
25
|
+
=> MiGA
|
26
|
+
$(miga --version)
|
27
|
+
VERSIONS
|
24
28
|
|
data/scripts/ssu.bash
CHANGED
@@ -65,4 +65,22 @@ fi
|
|
65
65
|
|
66
66
|
# Finalize
|
67
67
|
miga date > "${DATASET}.done"
|
68
|
-
|
68
|
+
cat <<VERSIONS \
|
69
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
70
|
+
=> MiGA
|
71
|
+
$(miga --version)
|
72
|
+
$(
|
73
|
+
if [[ -s $fa ]] ; then
|
74
|
+
echo "=> barrnap"
|
75
|
+
barrnap --version 2>&1 | perl -pe 's/^barrnap //'
|
76
|
+
echo "=> bedtools"
|
77
|
+
bedtools --version 2>&1 | perl -pe 's/^bedtools //'
|
78
|
+
echo "=> Enveomics Collection"
|
79
|
+
echo "version unknown"
|
80
|
+
echo "=> RDP Naive Bayes Classifier"
|
81
|
+
gzip -cd "${DATASET}.rdp.tsv.gz" | tail -n 1 | perl -pe 's/.*: //'
|
82
|
+
echo "=> tRNAscan-SE"
|
83
|
+
tRNAscan-SE -h 2>&1 | head -n 2 | tail -n 1 | perl -pe 's/^tRNAscan-SE //'
|
84
|
+
fi
|
85
|
+
)
|
86
|
+
VERSIONS
|
data/scripts/stats.bash
CHANGED
@@ -12,11 +12,17 @@ cd "$DIR"
|
|
12
12
|
miga date > "${DATASET}.start"
|
13
13
|
|
14
14
|
# Calculate statistics
|
15
|
-
for i in raw_reads trimmed_fasta assembly
|
15
|
+
for i in raw_reads trimmed_fasta assembly \
|
16
|
+
cds essential_genes distances taxonomy ssu ; do
|
16
17
|
echo "# $i"
|
17
18
|
miga stats --compute-and-save --ignore-empty -P "$PROJECT" -D "$DATASET" -r $i
|
18
19
|
done
|
19
20
|
|
20
21
|
# Finalize
|
21
|
-
miga date > "$DATASET.done"
|
22
|
-
|
22
|
+
miga date > "${DATASET}.done"
|
23
|
+
cat <<VERSIONS \
|
24
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
25
|
+
=> MiGA
|
26
|
+
$(miga --version)
|
27
|
+
VERSIONS
|
28
|
+
|
data/scripts/taxonomy.bash
CHANGED
@@ -16,5 +16,101 @@ ruby -I "$MIGA/lib" \
|
|
16
16
|
"$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
|
17
17
|
|
18
18
|
# Finalize
|
19
|
-
|
20
|
-
|
19
|
+
fastaai=no
|
20
|
+
aai=no
|
21
|
+
ani=no
|
22
|
+
blast=no
|
23
|
+
blat=no
|
24
|
+
diamond=no
|
25
|
+
fastani=no
|
26
|
+
REF_PROJECT=$(miga option -P "$PROJECT" -k ref_project)
|
27
|
+
if [[ -S "$REF_PROJECT" ]] ; then
|
28
|
+
case $(miga option -P "$REF_PROJECT" -k haai_p) in
|
29
|
+
fastaai)
|
30
|
+
fastaai=yes
|
31
|
+
;;
|
32
|
+
diamond)
|
33
|
+
diamond=yes
|
34
|
+
aai=yes
|
35
|
+
;;
|
36
|
+
blast)
|
37
|
+
blast=yes
|
38
|
+
aai=yes
|
39
|
+
;;
|
40
|
+
esac
|
41
|
+
|
42
|
+
case $(miga option -P "$REF_PROJECT" -k aai_p) in
|
43
|
+
diamond)
|
44
|
+
diamond=yes
|
45
|
+
aai=yes
|
46
|
+
;;
|
47
|
+
blast)
|
48
|
+
blast=yes
|
49
|
+
aai=yes
|
50
|
+
;;
|
51
|
+
esac
|
52
|
+
|
53
|
+
case $(miga option -P "$REF_PROJECT" -k ani_p) in
|
54
|
+
blast)
|
55
|
+
blast=yes
|
56
|
+
ani=yes
|
57
|
+
;;
|
58
|
+
blat)
|
59
|
+
blat=yes
|
60
|
+
ani=yes
|
61
|
+
;;
|
62
|
+
fastani)
|
63
|
+
fastani=yes
|
64
|
+
;;
|
65
|
+
esac
|
66
|
+
fi
|
67
|
+
|
68
|
+
miga date > "${DATASET}.done"
|
69
|
+
cat <<VERSIONS \
|
70
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
71
|
+
=> MiGA
|
72
|
+
$(miga --version)
|
73
|
+
$(
|
74
|
+
if [[ "$fastaai" == "yes" ]] ; then
|
75
|
+
echo "=> FastAAI"
|
76
|
+
fastaai version 2>&1 | perl -pe 's/.*=//'
|
77
|
+
fi
|
78
|
+
)
|
79
|
+
$(
|
80
|
+
if [[ "$fastani" == "yes" ]] ; then
|
81
|
+
echo "=> FastANI"
|
82
|
+
fastANI --version 2>&1 | grep . | perl -pe 's/^version //'
|
83
|
+
fi
|
84
|
+
)
|
85
|
+
$(
|
86
|
+
if [[ "$aai" == "yes" ]] ; then
|
87
|
+
echo "=> Enveomics Collection: aai.rb"
|
88
|
+
aai.rb --version 2>&1 | perl -pe 's/.*: //'
|
89
|
+
fi
|
90
|
+
)
|
91
|
+
$(
|
92
|
+
if [[ "$ani" == "yes" ]] ; then
|
93
|
+
echo "=> Enveomics Collection: ani.rb"
|
94
|
+
ani.rb --version 2>&1 | perl -pe 's/.*: //'
|
95
|
+
fi
|
96
|
+
)
|
97
|
+
$(
|
98
|
+
if [[ "$blast" == "yes" ]] ; then
|
99
|
+
echo "=> NCBI BLAST+"
|
100
|
+
blastp -version 2>&1 | tail -n 1 | perl -pe 's/.*: blast //'
|
101
|
+
fi
|
102
|
+
)
|
103
|
+
$(
|
104
|
+
if [[ "$blat" == "yes" ]] ; then
|
105
|
+
echo "=> BLAT"
|
106
|
+
blat 2>&1 | head -n 1 | perl -pe 's/.* v\. //' | perl -pe 's/ fast .*//'
|
107
|
+
fi
|
108
|
+
)
|
109
|
+
$(
|
110
|
+
if [[ "$diamond" == "yes" ]] ; then
|
111
|
+
echo "=> Diamond"
|
112
|
+
diamond --version 2>&1 | perl -pe 's/^diamond version //'
|
113
|
+
fi
|
114
|
+
)
|
115
|
+
VERSIONS
|
116
|
+
|
data/scripts/trimmed_fasta.bash
CHANGED
@@ -32,6 +32,14 @@ for x in 1.fasta 2.fasta SingleReads.fa CoupledReads.fa ; do
|
|
32
32
|
done
|
33
33
|
|
34
34
|
# Finalize
|
35
|
-
miga date > "$DATASET.done"
|
36
|
-
|
35
|
+
miga date > "${DATASET}.done"
|
36
|
+
cat <<VERSIONS \
|
37
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
38
|
+
=> MiGA
|
39
|
+
$(miga --version)
|
40
|
+
=> Enveomics Collection: FastQ.maskQual.rb
|
41
|
+
$(FastQ.maskQual.rb --version | perl -pe 's/.* //')
|
42
|
+
=> Enveomics Collection: FastA.interpose.pl
|
43
|
+
version unknown
|
44
|
+
VERSIONS
|
37
45
|
|
data/scripts/trimmed_reads.bash
CHANGED
@@ -36,15 +36,19 @@ if [[ -s "$b.2.fastq.gz" ]] ; then
|
|
36
36
|
$CMD -1 "$b.1.fastq.gz" -2 "$b.2.fastq.gz"
|
37
37
|
for s in 1 2 ; do
|
38
38
|
mv "$b/${s}.post_trim_${b}.${s}.fq.gz" "${b}.${s}.clipped.fastq.gz"
|
39
|
-
mv "$b/${s}.pre_trim_QC_${b}.${s}.html"
|
40
|
-
|
39
|
+
mv "$b/${s}.pre_trim_QC_${b}.${s}.html" \
|
40
|
+
"../03.read_quality/${b}.pre.${s}.html"
|
41
|
+
mv "$b/${s}.post_trim_QC_${b}.${s}.html" \
|
42
|
+
"../03.read_quality/${b}.post.${s}.html"
|
41
43
|
done
|
42
44
|
else
|
43
45
|
# Unpaired
|
44
46
|
$CMD -u "$b.1.fastq.gz"
|
45
47
|
mv "$b/unpaired.post_trim_${b}.1.fq.gz" "${b}.1.clipped.fastq.gz"
|
46
|
-
mv "$b/unpaired.pre_trim_QC_${b}.1.html"
|
47
|
-
|
48
|
+
mv "$b/unpaired.pre_trim_QC_${b}.1.html" \
|
49
|
+
"../03.read_quality/${b}.pre.1.html"
|
50
|
+
mv "$b/unpaired.post_trim_QC_${b}.1.html" \
|
51
|
+
"../03.read_quality/${b}.post.1.html"
|
48
52
|
fi
|
49
53
|
mv "$b/Subsample_Adapter_Detection.stats.txt" \
|
50
54
|
"../03.read_quality/$b.adapters.txt"
|
@@ -54,6 +58,22 @@ rm -r "$b"
|
|
54
58
|
rm -f "$b".[12].fastq.gz
|
55
59
|
|
56
60
|
# Finalize
|
57
|
-
miga date > "$DATASET.done"
|
58
|
-
|
61
|
+
miga date > "${DATASET}.done"
|
62
|
+
cat <<VERSIONS \
|
63
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
64
|
+
=> MiGA
|
65
|
+
$(miga --version)
|
66
|
+
=> Enveomics Collection: FastQ.tag.rb
|
67
|
+
$(FastQ.tag.rb --version | perl -pe 's/.* //')
|
68
|
+
=> Multitrim
|
69
|
+
version unknown
|
70
|
+
=> FaQCs
|
71
|
+
$(FaQCs --version 2>&1 | perl -pe 's/.*: //')
|
72
|
+
=> Seqtk
|
73
|
+
$(seqtk 2>&1 | grep Version | perl -pe 's/.*: //')
|
74
|
+
=> Fastp
|
75
|
+
$(fastp --version 2>&1 | perl -pe 's/^fastp //')
|
76
|
+
=> Falco
|
77
|
+
$(falco -V 2>&1 | tee)
|
78
|
+
VERSIONS
|
59
79
|
|
data/test/dataset_test.rb
CHANGED
@@ -40,7 +40,7 @@ class DatasetTest < Test::Unit::TestCase
|
|
40
40
|
d2.save
|
41
41
|
assert_not_predicate(d2, :multi?)
|
42
42
|
assert_not_predicate(d2, :nonmulti?)
|
43
|
-
|
43
|
+
assert_equal(:empty, d2.metadata[:type])
|
44
44
|
d2.metadata[:type] = :metagenome
|
45
45
|
d2.save
|
46
46
|
assert_equal(:metagenome, d2.metadata[:type])
|
@@ -89,25 +89,40 @@ class DatasetTest < Test::Unit::TestCase
|
|
89
89
|
assert_equal(:trimmed_reads, d2.first_preprocessing(true))
|
90
90
|
assert_equal(:read_quality, d2.next_preprocessing(true))
|
91
91
|
assert { !d2.done_preprocessing?(true) }
|
92
|
-
|
92
|
+
|
93
|
+
# Ref and undeclared type (empty)
|
93
94
|
assert { d2.ignore_task?(:mytaxa) }
|
94
95
|
assert { d2.ignore_task?(:mytaxa_scan) }
|
95
96
|
assert { d2.ignore_task?(:distances) }
|
97
|
+
assert { d2.ignore_task?(:essential_genes) }
|
98
|
+
|
96
99
|
# Ref and multi
|
97
100
|
d2.metadata[:type] = :metagenome
|
98
101
|
assert { !d2.ignore_task?(:mytaxa) }
|
99
102
|
assert { d2.ignore_task?(:mytaxa_scan) }
|
100
103
|
assert { d2.ignore_task?(:distances) }
|
104
|
+
assert { !d2.ignore_task?(:essential_genes) }
|
105
|
+
|
101
106
|
# Ref and nonmulti
|
102
107
|
d2.metadata[:type] = :genome
|
103
108
|
assert { d2.ignore_task?(:mytaxa) }
|
104
109
|
assert { !d2.ignore_task?(:mytaxa_scan) }
|
105
110
|
assert { !d2.ignore_task?(:distances) }
|
111
|
+
assert { !d2.ignore_task?(:essential_genes) }
|
112
|
+
|
106
113
|
# Qry and nonmulti
|
107
114
|
d2.metadata[:ref] = false
|
108
115
|
assert { d2.ignore_task?(:mytaxa) }
|
109
116
|
assert { d2.ignore_task?(:mytaxa_scan) }
|
110
117
|
assert { !d2.ignore_task?(:distances) }
|
118
|
+
assert { !d2.ignore_task?(:essential_genes) }
|
119
|
+
|
120
|
+
# Qry and plasmid
|
121
|
+
d2.metadata[:type] = :plasmid
|
122
|
+
assert { d2.ignore_task?(:mytaxa) }
|
123
|
+
assert { d2.ignore_task?(:mytaxa_scan) }
|
124
|
+
assert { !d2.ignore_task?(:distances) }
|
125
|
+
assert { d2.ignore_task?(:essential_genes) }
|
111
126
|
end
|
112
127
|
|
113
128
|
def test_profile_advance
|
data/test/hook_test.rb
CHANGED
@@ -9,9 +9,10 @@ class HookTest < Test::Unit::TestCase
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def test_add_hook
|
12
|
-
assert_nil(dataset.hooks[:
|
13
|
-
dataset.add_hook(:on_save, :run_lambda, Proc.new { $counter += 1 })
|
12
|
+
assert_nil(dataset.hooks[:on_remove])
|
14
13
|
assert_equal(1, dataset.hooks[:on_save].size)
|
14
|
+
dataset.add_hook(:on_save, :run_lambda, Proc.new { $counter += 1 })
|
15
|
+
assert_equal(2, dataset.hooks[:on_save].size)
|
15
16
|
$counter = 1
|
16
17
|
dataset.save
|
17
18
|
assert_equal(2, $counter)
|
data/test/net_test.rb
CHANGED
@@ -15,20 +15,36 @@ class FormatTest < Test::Unit::TestCase
|
|
15
15
|
declare_remote_access
|
16
16
|
m = MiGA::MiGA
|
17
17
|
assert_raise { m.remote_connection(:bad_descriptor) }
|
18
|
-
assert_raise { m.remote_connection('
|
18
|
+
assert_raise { m.remote_connection('ssh://microbial-genomes.org/') }
|
19
19
|
c = m.remote_connection(:miga_db)
|
20
20
|
assert_equal(Net::FTP, c.class)
|
21
21
|
c.close
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
24
|
+
def test_download_file_http
|
25
25
|
declare_remote_access
|
26
26
|
m = MiGA::MiGA
|
27
|
+
#o = m.http_request(:get, 'http://uibk.microbial-genomes.org/robots.txt')
|
28
|
+
o = m.http_request(:get, 'http://disc-genomics.uibk.ac.at/miga/robots.txt')
|
29
|
+
o = o.split(/\n/)
|
30
|
+
assert_equal(6, o.count)
|
31
|
+
assert_equal('#', o[1])
|
32
|
+
assert_equal('User-agent: *', o[2])
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_download_file_ftp
|
36
|
+
declare_remote_access
|
27
37
|
f = tmpfile('t/test.txt')
|
28
38
|
d = File.dirname(f)
|
29
39
|
assert(!Dir.exist?(d))
|
30
|
-
|
31
|
-
|
32
|
-
|
40
|
+
# TODO
|
41
|
+
# Bring back when I can connect to the Gatech's FTP
|
42
|
+
### m = MiGA::MiGA
|
43
|
+
### m.download_file_ftp(:miga_online_ftp, 'api_test.txt', f)
|
44
|
+
### assert(Dir.exist?(d))
|
45
|
+
### assert_equal('miga', File.read(f).chomp)
|
46
|
+
### File.unlink(f)
|
47
|
+
### m.download_file_ftp(:miga_db, '../api_test.txt', f)
|
48
|
+
### assert_equal('miga', File.read(f).chomp)
|
33
49
|
end
|
34
50
|
end
|
data/test/project_test.rb
CHANGED
@@ -8,6 +8,12 @@ class ProjectTest < Test::Unit::TestCase
|
|
8
8
|
initialize_miga_home
|
9
9
|
end
|
10
10
|
|
11
|
+
def test_class_variables
|
12
|
+
assert(MiGA::Project.INCLADE_TASKS.is_a? Array)
|
13
|
+
assert(MiGA::Project.DISTANCE_TASKS.is_a? Array)
|
14
|
+
assert(MiGA::Project.KNOWN_TYPES.is_a? Hash)
|
15
|
+
end
|
16
|
+
|
11
17
|
def create_result_files(project, res, exts)
|
12
18
|
d = MiGA::Project.RESULT_DIRS[res]
|
13
19
|
(['.done'] + exts).each do |x|
|
@@ -155,4 +161,11 @@ class ProjectTest < Test::Unit::TestCase
|
|
155
161
|
date3 = p1.add_result(:ogs, true, force: true)[:created]
|
156
162
|
assert_not_equal(date1, date3)
|
157
163
|
end
|
164
|
+
|
165
|
+
def test_options
|
166
|
+
assert_equal('fastaai', project.option(:haai_p))
|
167
|
+
assert_equal(false, project.option(:aai_save_rbm))
|
168
|
+
project.metadata[:type] = 'clade'
|
169
|
+
assert_equal(true, project.option(:aai_save_rbm))
|
170
|
+
end
|
158
171
|
end
|
data/test/remote_dataset_test.rb
CHANGED
@@ -7,6 +7,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
7
7
|
|
8
8
|
def setup
|
9
9
|
initialize_miga_home
|
10
|
+
ENV.delete('NCBI_API_KEY')
|
10
11
|
end
|
11
12
|
|
12
13
|
def test_class_universe
|
@@ -19,7 +20,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
19
20
|
assert_raise { MiGA::RemoteDataset.new('ids', :google, :ebi) }
|
20
21
|
end
|
21
22
|
|
22
|
-
def
|
23
|
+
def test_get
|
23
24
|
hiv2 = 'M30502.1'
|
24
25
|
{ embl: :ebi, nuccore: :ncbi }.each do |db, universe|
|
25
26
|
rd = MiGA::RemoteDataset.new(hiv2, db, universe)
|
@@ -47,7 +48,8 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
47
48
|
end
|
48
49
|
|
49
50
|
def test_net_ftp
|
50
|
-
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/
|
51
|
+
cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/' \
|
52
|
+
'public/gap/GAPJ01.fasta.gz'
|
51
53
|
n = 'Cjac_L14'
|
52
54
|
rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
|
53
55
|
assert_equal([cjac], rd.ids)
|
@@ -86,26 +88,56 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
86
88
|
def test_type_status_asm
|
87
89
|
declare_remote_access
|
88
90
|
rd = MiGA::RemoteDataset.new('GCF_000018105.1', :assembly, :ncbi)
|
89
|
-
|
91
|
+
md = rd.get_metadata
|
92
|
+
assert(md[:is_type])
|
90
93
|
end
|
91
94
|
|
92
95
|
def test_nontype_status_asm
|
93
96
|
declare_remote_access
|
94
97
|
rd = MiGA::RemoteDataset.new('GCA_004684205.1', :assembly, :ncbi)
|
95
|
-
|
98
|
+
md = rd.get_metadata
|
99
|
+
assert(!md[:is_type])
|
96
100
|
end
|
97
101
|
|
98
102
|
def test_type_status_nuccore
|
99
103
|
declare_remote_access
|
100
104
|
rd = MiGA::RemoteDataset.new('NC_019748.1', :nuccore, :ncbi)
|
101
|
-
|
105
|
+
md = rd.get_metadata
|
106
|
+
assert(md[:is_type])
|
102
107
|
end
|
103
108
|
|
104
109
|
def test_ref_type_status
|
105
110
|
declare_remote_access
|
106
111
|
rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
|
107
|
-
|
108
|
-
assert
|
112
|
+
md = rd.get_metadata
|
113
|
+
assert(!md[:is_type])
|
114
|
+
assert(md[:is_ref_type])
|
115
|
+
end
|
116
|
+
|
117
|
+
def test_gtdb_taxonomy
|
118
|
+
declare_remote_access
|
119
|
+
rd = MiGA::RemoteDataset.new('GCA_018200315.1', :assembly, :gtdb)
|
120
|
+
md = rd.get_metadata
|
121
|
+
assert(!md[:is_type])
|
122
|
+
assert_not_nil(md[:gtdb_release])
|
123
|
+
assert(md[:tax].is_a? MiGA::Taxonomy)
|
124
|
+
assert_equal('GCA_018200315.1', md[:gtdb_assembly])
|
125
|
+
assert_equal('gtdb', md[:tax][:ns])
|
126
|
+
assert_equal('Bacteroidia', md[:tax][:c])
|
127
|
+
end
|
128
|
+
|
129
|
+
def test_gtdb_alt_taxonomy
|
130
|
+
declare_remote_access
|
131
|
+
rd = MiGA::RemoteDataset.new('GCA_018200315.1', :assembly, :gtdb)
|
132
|
+
rd.metadata[:get_ncbi_taxonomy] = true
|
133
|
+
md = rd.get_metadata
|
134
|
+
assert(md[:tax].is_a? MiGA::Taxonomy)
|
135
|
+
assert_equal('ncbi', md[:tax][:ns])
|
136
|
+
assert_equal('Flavobacteriia', md[:tax][:c])
|
137
|
+
assert(md[:tax].alternative(1).is_a? MiGA::Taxonomy)
|
138
|
+
assert(md[:tax].alternative(:gtdb).is_a? MiGA::Taxonomy)
|
139
|
+
assert_equal('gtdb', md[:tax].alternative(1)[:ns])
|
140
|
+
assert_equal('gtdb', md[:tax].alternative(:gtdb)[:ns])
|
109
141
|
end
|
110
142
|
|
111
143
|
def test_missing_data
|
@@ -114,6 +146,73 @@ class RemoteDatasetTest < Test::Unit::TestCase
|
|
114
146
|
assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
|
115
147
|
end
|
116
148
|
|
149
|
+
def test_gtdb_request
|
150
|
+
# No remote access needed
|
151
|
+
rd = MiGA::RemoteDataset.new('g__Macondimonas', :taxon, :gtdb)
|
152
|
+
u = rd.download_uri
|
153
|
+
h = rd.download_headers
|
154
|
+
|
155
|
+
assert(u.is_a? URI)
|
156
|
+
assert_equal('https', u.scheme)
|
157
|
+
assert_equal('genomes', File.basename(u.path))
|
158
|
+
|
159
|
+
assert(h.is_a? Hash)
|
160
|
+
assert_equal(1, h.size)
|
161
|
+
assert_equal('application/json', h['Accept'])
|
162
|
+
end
|
163
|
+
|
164
|
+
def test_ncbi_datasets_download_request
|
165
|
+
# No remote access needed
|
166
|
+
rd = MiGA::RemoteDataset.new(
|
167
|
+
'GCF_004684205.1', :genome, :ncbi_datasets_download
|
168
|
+
)
|
169
|
+
u = rd.download_uri
|
170
|
+
h = rd.download_headers
|
171
|
+
|
172
|
+
assert(u.is_a? URI)
|
173
|
+
assert_equal('https', u.scheme)
|
174
|
+
assert_equal('download', File.basename(u.path))
|
175
|
+
|
176
|
+
assert(h.is_a? Hash)
|
177
|
+
assert_equal(1, h.size)
|
178
|
+
assert_equal('application/zip', h['Accept'])
|
179
|
+
|
180
|
+
ENV['NCBI_API_KEY'] = 'Not-a-real-key'
|
181
|
+
h = rd.download_headers
|
182
|
+
ENV.delete('NCBI_API_KEY')
|
183
|
+
assert_equal(2, h.size)
|
184
|
+
assert_equal('Not-a-real-key', h['api-key'])
|
185
|
+
end
|
186
|
+
|
187
|
+
def test_seqcode_request
|
188
|
+
# No remote access needed
|
189
|
+
rd = MiGA::RemoteDataset.new(nil, 'type-genomes', :seqcode)
|
190
|
+
u = rd.download_uri
|
191
|
+
|
192
|
+
assert(u.is_a? URI)
|
193
|
+
assert_equal('https', u.scheme)
|
194
|
+
assert_equal('type-genomes.json', File.basename(u.path))
|
195
|
+
end
|
196
|
+
|
197
|
+
def test_ncbi_datasets_request
|
198
|
+
rd = MiGA::RemoteDataset.new({ taxons: 'Bos' }, :genome, :ncbi_datasets)
|
199
|
+
u = rd.download_uri
|
200
|
+
h = rd.download_headers
|
201
|
+
p = rd.download_payload
|
202
|
+
|
203
|
+
assert(u.is_a? URI)
|
204
|
+
assert_equal('https', u.scheme)
|
205
|
+
assert_equal('dataset_report', File.basename(u.path))
|
206
|
+
|
207
|
+
assert(h.is_a? Hash)
|
208
|
+
assert_equal(1, h.size)
|
209
|
+
assert_equal('application/json', h['Content-Type'])
|
210
|
+
|
211
|
+
assert(p.is_a? String)
|
212
|
+
assert_equal('{', p[0])
|
213
|
+
assert_equal('}', p[-1])
|
214
|
+
end
|
215
|
+
|
117
216
|
# This test is too expensive (too much time to run it!)
|
118
217
|
# def test_net_timeout
|
119
218
|
# declare_remote_access
|
data/test/result_test.rb
CHANGED
@@ -6,27 +6,15 @@ class ResultTest < Test::Unit::TestCase
|
|
6
6
|
|
7
7
|
def setup
|
8
8
|
initialize_miga_home
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
File.join(
|
17
|
-
|
18
|
-
)
|
19
|
-
)
|
20
|
-
FileUtils.touch(
|
21
|
-
File.join(
|
22
|
-
project.path, 'data', '10.clades', '01.find', 'miga-project.empty'
|
23
|
-
)
|
24
|
-
)
|
25
|
-
FileUtils.touch(
|
26
|
-
File.join(
|
27
|
-
project.path, 'data', '10.clades', '01.find', 'miga-project.done'
|
28
|
-
)
|
29
|
-
)
|
9
|
+
to_touch = [
|
10
|
+
['02.trimmed_reads', "#{dataset.name}.1.clipped.fastq"],
|
11
|
+
['02.trimmed_reads', "#{dataset.name}.done"],
|
12
|
+
['10.clades', '01.find', 'miga-project.empty'],
|
13
|
+
['10.clades', '01.find', 'miga-project.done']
|
14
|
+
]
|
15
|
+
to_touch.each do |path|
|
16
|
+
FileUtils.touch(File.join(project.path, 'data', *path))
|
17
|
+
end
|
30
18
|
end
|
31
19
|
|
32
20
|
def test_add_result
|
@@ -89,4 +77,42 @@ class ResultTest < Test::Unit::TestCase
|
|
89
77
|
r = dataset.add_result(:trimmed_reads)
|
90
78
|
assert_equal(5.0, r.running_time)
|
91
79
|
end
|
80
|
+
|
81
|
+
def test_status
|
82
|
+
d = dataset
|
83
|
+
assert_equal(:ignore_empty, d.result_status(:trimmed_reads))
|
84
|
+
d.add_result(:trimmed_reads)
|
85
|
+
assert_equal(:-, d.result_status(:raw_reads))
|
86
|
+
assert_equal(:complete, d.result_status(:trimmed_reads))
|
87
|
+
assert_equal(:pending, d.result_status(:read_quality))
|
88
|
+
assert_equal(:pending, d.result_status(:assembly))
|
89
|
+
|
90
|
+
h = d.results_status
|
91
|
+
assert(h.is_a? Hash)
|
92
|
+
assert_equal(:-, h[:raw_reads])
|
93
|
+
assert_equal(:complete, h[:trimmed_reads])
|
94
|
+
assert_equal(:pending, h[:read_quality])
|
95
|
+
|
96
|
+
# Test the "advance" interface from Project
|
97
|
+
a = project.profile_datasets_advance
|
98
|
+
assert(a.is_a? Array)
|
99
|
+
assert_equal(1, a.size)
|
100
|
+
assert(a[0].is_a? Array)
|
101
|
+
assert_equal([0, 1, 2, 2], a[0][0..3])
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_versions
|
105
|
+
r = dataset.add_result(:trimmed_reads)
|
106
|
+
assert_respond_to(r, :add_versions)
|
107
|
+
assert_respond_to(r, :versions_md)
|
108
|
+
assert_equal(MiGA::VERSION.join('.'), r.versions[:MiGA])
|
109
|
+
assert_nil(r.versions[:GoodSoftware])
|
110
|
+
|
111
|
+
r.add_versions('GoodSoftware' => '1.2.3')
|
112
|
+
assert_equal('1.2.3', r.versions[:GoodSoftware])
|
113
|
+
|
114
|
+
md = r.versions_md
|
115
|
+
assert_equal('-', md[0])
|
116
|
+
assert_equal(2, md.split("\n").size)
|
117
|
+
end
|
92
118
|
end
|
data/test/taxonomy_test.rb
CHANGED
@@ -92,8 +92,12 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
92
92
|
end
|
93
93
|
|
94
94
|
def test_reset
|
95
|
-
tx = MiGA::Taxonomy.new(
|
96
|
-
|
95
|
+
tx = MiGA::Taxonomy.new(
|
96
|
+
'ns:Letters d:Latin s:A', nil,
|
97
|
+
['ns:Words d:English s:A', 'ns:Music d:Tone s:A']
|
98
|
+
)
|
99
|
+
assert_equal('Latin', tx.domain)
|
100
|
+
|
97
101
|
# Reset
|
98
102
|
assert_equal(2, tx.alternative.size)
|
99
103
|
assert_equal('Letters', tx.namespace)
|
@@ -102,11 +106,13 @@ class TaxonomyTest < Test::Unit::TestCase
|
|
102
106
|
assert_nil(tx.namespace)
|
103
107
|
tx.reset('ns:Letters d:Latin s:A')
|
104
108
|
assert_equal('Letters', tx.namespace)
|
109
|
+
|
105
110
|
# Change of alternative
|
106
111
|
assert_equal('ns:Words d:English s:A', tx.alternative('Words').to_s)
|
107
112
|
tx.add_alternative(MiGA::Taxonomy.new('ns:Words d:Spanish s:A'))
|
108
113
|
assert_equal('ns:Words d:Spanish s:A', tx.alternative('Words').to_s)
|
109
|
-
|
114
|
+
|
115
|
+
# Change of main
|
110
116
|
assert_equal('ns:Letters d:Latin s:A', tx.to_s)
|
111
117
|
tx.add_alternative(MiGA::Taxonomy.new('ns:Letters d:Unicode s:A'))
|
112
118
|
assert_equal('ns:Letters d:Unicode s:A', tx.to_s)
|
data/utils/distance/runner.rb
CHANGED
@@ -54,7 +54,9 @@ class MiGA::DistanceRunner
|
|
54
54
|
# first-come-first-serve traverse
|
55
55
|
sbj = []
|
56
56
|
ref_project.each_dataset do |ds|
|
57
|
-
|
57
|
+
torun = ds.ref? && !ds.multi?
|
58
|
+
torun &&= ds.result(:essential_genes) || (!ds.markers? && ds.result(:cds))
|
59
|
+
sbj << ds if torun
|
58
60
|
end
|
59
61
|
ani_after_aai(sbj)
|
60
62
|
|
data/utils/distances.rb
CHANGED
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
|
|
4
4
|
|
5
5
|
project = ARGV.shift
|
6
6
|
dataset = ARGV.shift
|
7
|
-
opts = Hash[ARGV.map { |i| i.split(
|
7
|
+
opts = Hash[ARGV.map { |i| i.split('=', 2).tap { |j| j[0] = j[0].to_sym } }]
|
8
8
|
runner = MiGA::DistanceRunner.new(project, dataset, opts)
|
9
9
|
runner.go!
|