miga-base 1.3.8.2 → 1.3.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/lib/miga/cli/action/add_result.rb +22 -1
  4. data/lib/miga/cli/action/browse/about.html +4 -2
  5. data/lib/miga/cli/action/download/gtdb.rb +1 -1
  6. data/lib/miga/cli/action/download/ncbi.rb +43 -68
  7. data/lib/miga/cli/action/download/seqcode.rb +1 -2
  8. data/lib/miga/cli/action/ncbi_get.rb +1 -8
  9. data/lib/miga/cli/action/wf.rb +15 -6
  10. data/lib/miga/cli/objects_helper.rb +3 -0
  11. data/lib/miga/cli/opt_helper.rb +8 -2
  12. data/lib/miga/common/net.rb +100 -18
  13. data/lib/miga/dataset/base.rb +40 -12
  14. data/lib/miga/dataset/hooks.rb +8 -0
  15. data/lib/miga/dataset/result/ignore.rb +14 -2
  16. data/lib/miga/dataset/type.rb +51 -0
  17. data/lib/miga/dataset.rb +3 -22
  18. data/lib/miga/json.rb +9 -0
  19. data/lib/miga/project/base.rb +15 -9
  20. data/lib/miga/project.rb +7 -1
  21. data/lib/miga/remote_dataset/base.rb +117 -36
  22. data/lib/miga/remote_dataset/download.rb +121 -54
  23. data/lib/miga/remote_dataset.rb +34 -13
  24. data/lib/miga/result/stats.rb +2 -0
  25. data/lib/miga/result/versions.rb +23 -0
  26. data/lib/miga/result.rb +7 -1
  27. data/lib/miga/taxonomy/base.rb +3 -2
  28. data/lib/miga/version.rb +2 -2
  29. data/scripts/assembly.bash +15 -1
  30. data/scripts/cds.bash +9 -3
  31. data/scripts/distances.bash +103 -5
  32. data/scripts/essential_genes.bash +14 -1
  33. data/scripts/mytaxa.bash +18 -3
  34. data/scripts/mytaxa_scan.bash +16 -3
  35. data/scripts/read_quality.bash +6 -2
  36. data/scripts/ssu.bash +19 -1
  37. data/scripts/stats.bash +9 -3
  38. data/scripts/taxonomy.bash +98 -2
  39. data/scripts/trimmed_fasta.bash +10 -2
  40. data/scripts/trimmed_reads.bash +26 -6
  41. data/test/dataset_test.rb +17 -2
  42. data/test/hook_test.rb +3 -2
  43. data/test/net_test.rb +21 -5
  44. data/test/project_test.rb +13 -0
  45. data/test/remote_dataset_test.rb +106 -7
  46. data/test/result_test.rb +47 -21
  47. data/test/taxonomy_test.rb +9 -3
  48. data/utils/distance/runner.rb +3 -1
  49. data/utils/distances.rb +1 -1
  50. metadata +4 -2
@@ -19,6 +19,10 @@ for s in 1 2 ; do
19
19
  done
20
20
 
21
21
  # Finalize
22
- miga date > "$DATASET.done"
23
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
22
+ miga date > "${DATASET}.done"
23
+ cat <<VERSIONS \
24
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
25
+ => MiGA
26
+ $(miga --version)
27
+ VERSIONS
24
28
 
data/scripts/ssu.bash CHANGED
@@ -65,4 +65,22 @@ fi
65
65
 
66
66
  # Finalize
67
67
  miga date > "${DATASET}.done"
68
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
68
+ cat <<VERSIONS \
69
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
70
+ => MiGA
71
+ $(miga --version)
72
+ $(
73
+ if [[ -s $fa ]] ; then
74
+ echo "=> barrnap"
75
+ barrnap --version 2>&1 | perl -pe 's/^barrnap //'
76
+ echo "=> bedtools"
77
+ bedtools --version 2>&1 | perl -pe 's/^bedtools //'
78
+ echo "=> Enveomics Collection"
79
+ echo "version unknown"
80
+ echo "=> RDP Naive Bayes Classifier"
81
+ gzip -cd "${DATASET}.rdp.tsv.gz" | tail -n 1 | perl -pe 's/.*: //'
82
+ echo "=> tRNAscan-SE"
83
+ tRNAscan-SE -h 2>&1 | head -n 2 | tail -n 1 | perl -pe 's/^tRNAscan-SE //'
84
+ fi
85
+ )
86
+ VERSIONS
data/scripts/stats.bash CHANGED
@@ -12,11 +12,17 @@ cd "$DIR"
12
12
  miga date > "${DATASET}.start"
13
13
 
14
14
  # Calculate statistics
15
- for i in raw_reads trimmed_fasta assembly cds essential_genes distances taxonomy ssu ; do
15
+ for i in raw_reads trimmed_fasta assembly \
16
+ cds essential_genes distances taxonomy ssu ; do
16
17
  echo "# $i"
17
18
  miga stats --compute-and-save --ignore-empty -P "$PROJECT" -D "$DATASET" -r $i
18
19
  done
19
20
 
20
21
  # Finalize
21
- miga date > "$DATASET.done"
22
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
22
+ miga date > "${DATASET}.done"
23
+ cat <<VERSIONS \
24
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
25
+ => MiGA
26
+ $(miga --version)
27
+ VERSIONS
28
+
@@ -16,5 +16,101 @@ ruby -I "$MIGA/lib" \
16
16
  "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
17
17
 
18
18
  # Finalize
19
- miga date > "$DATASET.done"
20
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
19
+ fastaai=no
20
+ aai=no
21
+ ani=no
22
+ blast=no
23
+ blat=no
24
+ diamond=no
25
+ fastani=no
26
+ REF_PROJECT=$(miga option -P "$PROJECT" -k ref_project)
27
+ if [[ -S "$REF_PROJECT" ]] ; then
28
+ case $(miga option -P "$REF_PROJECT" -k haai_p) in
29
+ fastaai)
30
+ fastaai=yes
31
+ ;;
32
+ diamond)
33
+ diamond=yes
34
+ aai=yes
35
+ ;;
36
+ blast)
37
+ blast=yes
38
+ aai=yes
39
+ ;;
40
+ esac
41
+
42
+ case $(miga option -P "$REF_PROJECT" -k aai_p) in
43
+ diamond)
44
+ diamond=yes
45
+ aai=yes
46
+ ;;
47
+ blast)
48
+ blast=yes
49
+ aai=yes
50
+ ;;
51
+ esac
52
+
53
+ case $(miga option -P "$REF_PROJECT" -k ani_p) in
54
+ blast)
55
+ blast=yes
56
+ ani=yes
57
+ ;;
58
+ blat)
59
+ blat=yes
60
+ ani=yes
61
+ ;;
62
+ fastani)
63
+ fastani=yes
64
+ ;;
65
+ esac
66
+ fi
67
+
68
+ miga date > "${DATASET}.done"
69
+ cat <<VERSIONS \
70
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
71
+ => MiGA
72
+ $(miga --version)
73
+ $(
74
+ if [[ "$fastaai" == "yes" ]] ; then
75
+ echo "=> FastAAI"
76
+ fastaai version 2>&1 | perl -pe 's/.*=//'
77
+ fi
78
+ )
79
+ $(
80
+ if [[ "$fastani" == "yes" ]] ; then
81
+ echo "=> FastANI"
82
+ fastANI --version 2>&1 | grep . | perl -pe 's/^version //'
83
+ fi
84
+ )
85
+ $(
86
+ if [[ "$aai" == "yes" ]] ; then
87
+ echo "=> Enveomics Collection: aai.rb"
88
+ aai.rb --version 2>&1 | perl -pe 's/.*: //'
89
+ fi
90
+ )
91
+ $(
92
+ if [[ "$ani" == "yes" ]] ; then
93
+ echo "=> Enveomics Collection: ani.rb"
94
+ ani.rb --version 2>&1 | perl -pe 's/.*: //'
95
+ fi
96
+ )
97
+ $(
98
+ if [[ "$blast" == "yes" ]] ; then
99
+ echo "=> NCBI BLAST+"
100
+ blastp -version 2>&1 | tail -n 1 | perl -pe 's/.*: blast //'
101
+ fi
102
+ )
103
+ $(
104
+ if [[ "$blat" == "yes" ]] ; then
105
+ echo "=> BLAT"
106
+ blat 2>&1 | head -n 1 | perl -pe 's/.* v\. //' | perl -pe 's/ fast .*//'
107
+ fi
108
+ )
109
+ $(
110
+ if [[ "$diamond" == "yes" ]] ; then
111
+ echo "=> Diamond"
112
+ diamond --version 2>&1 | perl -pe 's/^diamond version //'
113
+ fi
114
+ )
115
+ VERSIONS
116
+
@@ -32,6 +32,14 @@ for x in 1.fasta 2.fasta SingleReads.fa CoupledReads.fa ; do
32
32
  done
33
33
 
34
34
  # Finalize
35
- miga date > "$DATASET.done"
36
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
35
+ miga date > "${DATASET}.done"
36
+ cat <<VERSIONS \
37
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
38
+ => MiGA
39
+ $(miga --version)
40
+ => Enveomics Collection: FastQ.maskQual.rb
41
+ $(FastQ.maskQual.rb --version | perl -pe 's/.* //')
42
+ => Enveomics Collection: FastA.interpose.pl
43
+ version unknown
44
+ VERSIONS
37
45
 
@@ -36,15 +36,19 @@ if [[ -s "$b.2.fastq.gz" ]] ; then
36
36
  $CMD -1 "$b.1.fastq.gz" -2 "$b.2.fastq.gz"
37
37
  for s in 1 2 ; do
38
38
  mv "$b/${s}.post_trim_${b}.${s}.fq.gz" "${b}.${s}.clipped.fastq.gz"
39
- mv "$b/${s}.pre_trim_QC_${b}.${s}.html" "../03.read_quality/${b}.pre.${s}.html"
40
- mv "$b/${s}.post_trim_QC_${b}.${s}.html" "../03.read_quality/${b}.post.${s}.html"
39
+ mv "$b/${s}.pre_trim_QC_${b}.${s}.html" \
40
+ "../03.read_quality/${b}.pre.${s}.html"
41
+ mv "$b/${s}.post_trim_QC_${b}.${s}.html" \
42
+ "../03.read_quality/${b}.post.${s}.html"
41
43
  done
42
44
  else
43
45
  # Unpaired
44
46
  $CMD -u "$b.1.fastq.gz"
45
47
  mv "$b/unpaired.post_trim_${b}.1.fq.gz" "${b}.1.clipped.fastq.gz"
46
- mv "$b/unpaired.pre_trim_QC_${b}.1.html" "../03.read_quality/${b}.pre.1.html"
47
- mv "$b/unpaired.post_trim_QC_${b}.1.html" "../03.read_quality/${b}.post.1.html"
48
+ mv "$b/unpaired.pre_trim_QC_${b}.1.html" \
49
+ "../03.read_quality/${b}.pre.1.html"
50
+ mv "$b/unpaired.post_trim_QC_${b}.1.html" \
51
+ "../03.read_quality/${b}.post.1.html"
48
52
  fi
49
53
  mv "$b/Subsample_Adapter_Detection.stats.txt" \
50
54
  "../03.read_quality/$b.adapters.txt"
@@ -54,6 +58,22 @@ rm -r "$b"
54
58
  rm -f "$b".[12].fastq.gz
55
59
 
56
60
  # Finalize
57
- miga date > "$DATASET.done"
58
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
61
+ miga date > "${DATASET}.done"
62
+ cat <<VERSIONS \
63
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
64
+ => MiGA
65
+ $(miga --version)
66
+ => Enveomics Collection: FastQ.tag.rb
67
+ $(FastQ.tag.rb --version | perl -pe 's/.* //')
68
+ => Multitrim
69
+ version unknown
70
+ => FaQCs
71
+ $(FaQCs --version 2>&1 | perl -pe 's/.*: //')
72
+ => Seqtk
73
+ $(seqtk 2>&1 | grep Version | perl -pe 's/.*: //')
74
+ => Fastp
75
+ $(fastp --version 2>&1 | perl -pe 's/^fastp //')
76
+ => Falco
77
+ $(falco -V 2>&1 | tee)
78
+ VERSIONS
59
79
 
data/test/dataset_test.rb CHANGED
@@ -40,7 +40,7 @@ class DatasetTest < Test::Unit::TestCase
40
40
  d2.save
41
41
  assert_not_predicate(d2, :multi?)
42
42
  assert_not_predicate(d2, :nonmulti?)
43
- assert_nil(d2.metadata[:type])
43
+ assert_equal(:empty, d2.metadata[:type])
44
44
  d2.metadata[:type] = :metagenome
45
45
  d2.save
46
46
  assert_equal(:metagenome, d2.metadata[:type])
@@ -89,25 +89,40 @@ class DatasetTest < Test::Unit::TestCase
89
89
  assert_equal(:trimmed_reads, d2.first_preprocessing(true))
90
90
  assert_equal(:read_quality, d2.next_preprocessing(true))
91
91
  assert { !d2.done_preprocessing?(true) }
92
- # Ref and undeclared multi
92
+
93
+ # Ref and undeclared type (empty)
93
94
  assert { d2.ignore_task?(:mytaxa) }
94
95
  assert { d2.ignore_task?(:mytaxa_scan) }
95
96
  assert { d2.ignore_task?(:distances) }
97
+ assert { d2.ignore_task?(:essential_genes) }
98
+
96
99
  # Ref and multi
97
100
  d2.metadata[:type] = :metagenome
98
101
  assert { !d2.ignore_task?(:mytaxa) }
99
102
  assert { d2.ignore_task?(:mytaxa_scan) }
100
103
  assert { d2.ignore_task?(:distances) }
104
+ assert { !d2.ignore_task?(:essential_genes) }
105
+
101
106
  # Ref and nonmulti
102
107
  d2.metadata[:type] = :genome
103
108
  assert { d2.ignore_task?(:mytaxa) }
104
109
  assert { !d2.ignore_task?(:mytaxa_scan) }
105
110
  assert { !d2.ignore_task?(:distances) }
111
+ assert { !d2.ignore_task?(:essential_genes) }
112
+
106
113
  # Qry and nonmulti
107
114
  d2.metadata[:ref] = false
108
115
  assert { d2.ignore_task?(:mytaxa) }
109
116
  assert { d2.ignore_task?(:mytaxa_scan) }
110
117
  assert { !d2.ignore_task?(:distances) }
118
+ assert { !d2.ignore_task?(:essential_genes) }
119
+
120
+ # Qry and plasmid
121
+ d2.metadata[:type] = :plasmid
122
+ assert { d2.ignore_task?(:mytaxa) }
123
+ assert { d2.ignore_task?(:mytaxa_scan) }
124
+ assert { !d2.ignore_task?(:distances) }
125
+ assert { d2.ignore_task?(:essential_genes) }
111
126
  end
112
127
 
113
128
  def test_profile_advance
data/test/hook_test.rb CHANGED
@@ -9,9 +9,10 @@ class HookTest < Test::Unit::TestCase
9
9
  end
10
10
 
11
11
  def test_add_hook
12
- assert_nil(dataset.hooks[:on_save])
13
- dataset.add_hook(:on_save, :run_lambda, Proc.new { $counter += 1 })
12
+ assert_nil(dataset.hooks[:on_remove])
14
13
  assert_equal(1, dataset.hooks[:on_save].size)
14
+ dataset.add_hook(:on_save, :run_lambda, Proc.new { $counter += 1 })
15
+ assert_equal(2, dataset.hooks[:on_save].size)
15
16
  $counter = 1
16
17
  dataset.save
17
18
  assert_equal(2, $counter)
data/test/net_test.rb CHANGED
@@ -15,20 +15,36 @@ class FormatTest < Test::Unit::TestCase
15
15
  declare_remote_access
16
16
  m = MiGA::MiGA
17
17
  assert_raise { m.remote_connection(:bad_descriptor) }
18
- assert_raise { m.remote_connection('http://microbial-genomes.org/') }
18
+ assert_raise { m.remote_connection('ssh://microbial-genomes.org/') }
19
19
  c = m.remote_connection(:miga_db)
20
20
  assert_equal(Net::FTP, c.class)
21
21
  c.close
22
22
  end
23
23
 
24
- def test_download_file_ftp
24
+ def test_download_file_http
25
25
  declare_remote_access
26
26
  m = MiGA::MiGA
27
+ #o = m.http_request(:get, 'http://uibk.microbial-genomes.org/robots.txt')
28
+ o = m.http_request(:get, 'http://disc-genomics.uibk.ac.at/miga/robots.txt')
29
+ o = o.split(/\n/)
30
+ assert_equal(6, o.count)
31
+ assert_equal('#', o[1])
32
+ assert_equal('User-agent: *', o[2])
33
+ end
34
+
35
+ def test_download_file_ftp
36
+ declare_remote_access
27
37
  f = tmpfile('t/test.txt')
28
38
  d = File.dirname(f)
29
39
  assert(!Dir.exist?(d))
30
- m.download_file_ftp(:miga_online_ftp, 'test.txt', f)
31
- assert(Dir.exist?(d))
32
- assert_equal('miga', File.read(f).chomp)
40
+ # TODO
41
+ # Bring back when I can connect to the Gatech's FTP
42
+ ### m = MiGA::MiGA
43
+ ### m.download_file_ftp(:miga_online_ftp, 'api_test.txt', f)
44
+ ### assert(Dir.exist?(d))
45
+ ### assert_equal('miga', File.read(f).chomp)
46
+ ### File.unlink(f)
47
+ ### m.download_file_ftp(:miga_db, '../api_test.txt', f)
48
+ ### assert_equal('miga', File.read(f).chomp)
33
49
  end
34
50
  end
data/test/project_test.rb CHANGED
@@ -8,6 +8,12 @@ class ProjectTest < Test::Unit::TestCase
8
8
  initialize_miga_home
9
9
  end
10
10
 
11
+ def test_class_variables
12
+ assert(MiGA::Project.INCLADE_TASKS.is_a? Array)
13
+ assert(MiGA::Project.DISTANCE_TASKS.is_a? Array)
14
+ assert(MiGA::Project.KNOWN_TYPES.is_a? Hash)
15
+ end
16
+
11
17
  def create_result_files(project, res, exts)
12
18
  d = MiGA::Project.RESULT_DIRS[res]
13
19
  (['.done'] + exts).each do |x|
@@ -155,4 +161,11 @@ class ProjectTest < Test::Unit::TestCase
155
161
  date3 = p1.add_result(:ogs, true, force: true)[:created]
156
162
  assert_not_equal(date1, date3)
157
163
  end
164
+
165
+ def test_options
166
+ assert_equal('fastaai', project.option(:haai_p))
167
+ assert_equal(false, project.option(:aai_save_rbm))
168
+ project.metadata[:type] = 'clade'
169
+ assert_equal(true, project.option(:aai_save_rbm))
170
+ end
158
171
  end
@@ -7,6 +7,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
7
7
 
8
8
  def setup
9
9
  initialize_miga_home
10
+ ENV.delete('NCBI_API_KEY')
10
11
  end
11
12
 
12
13
  def test_class_universe
@@ -19,7 +20,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
19
20
  assert_raise { MiGA::RemoteDataset.new('ids', :google, :ebi) }
20
21
  end
21
22
 
22
- def test_rest
23
+ def test_get
23
24
  hiv2 = 'M30502.1'
24
25
  { embl: :ebi, nuccore: :ncbi }.each do |db, universe|
25
26
  rd = MiGA::RemoteDataset.new(hiv2, db, universe)
@@ -47,7 +48,8 @@ class RemoteDatasetTest < Test::Unit::TestCase
47
48
  end
48
49
 
49
50
  def test_net_ftp
50
- cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/gap/GAPJ01.fasta.gz'
51
+ cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/' \
52
+ 'public/gap/GAPJ01.fasta.gz'
51
53
  n = 'Cjac_L14'
52
54
  rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
53
55
  assert_equal([cjac], rd.ids)
@@ -86,26 +88,56 @@ class RemoteDatasetTest < Test::Unit::TestCase
86
88
  def test_type_status_asm
87
89
  declare_remote_access
88
90
  rd = MiGA::RemoteDataset.new('GCF_000018105.1', :assembly, :ncbi)
89
- assert { rd.get_metadata[:is_type] }
91
+ md = rd.get_metadata
92
+ assert(md[:is_type])
90
93
  end
91
94
 
92
95
  def test_nontype_status_asm
93
96
  declare_remote_access
94
97
  rd = MiGA::RemoteDataset.new('GCA_004684205.1', :assembly, :ncbi)
95
- assert { !rd.get_metadata[:is_type] }
98
+ md = rd.get_metadata
99
+ assert(!md[:is_type])
96
100
  end
97
101
 
98
102
  def test_type_status_nuccore
99
103
  declare_remote_access
100
104
  rd = MiGA::RemoteDataset.new('NC_019748.1', :nuccore, :ncbi)
101
- assert { rd.get_metadata[:is_type] }
105
+ md = rd.get_metadata
106
+ assert(md[:is_type])
102
107
  end
103
108
 
104
109
  def test_ref_type_status
105
110
  declare_remote_access
106
111
  rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
107
- assert { !rd.get_metadata[:is_type] }
108
- assert { rd.get_metadata[:is_ref_type] }
112
+ md = rd.get_metadata
113
+ assert(!md[:is_type])
114
+ assert(md[:is_ref_type])
115
+ end
116
+
117
+ def test_gtdb_taxonomy
118
+ declare_remote_access
119
+ rd = MiGA::RemoteDataset.new('GCA_018200315.1', :assembly, :gtdb)
120
+ md = rd.get_metadata
121
+ assert(!md[:is_type])
122
+ assert_not_nil(md[:gtdb_release])
123
+ assert(md[:tax].is_a? MiGA::Taxonomy)
124
+ assert_equal('GCA_018200315.1', md[:gtdb_assembly])
125
+ assert_equal('gtdb', md[:tax][:ns])
126
+ assert_equal('Bacteroidia', md[:tax][:c])
127
+ end
128
+
129
+ def test_gtdb_alt_taxonomy
130
+ declare_remote_access
131
+ rd = MiGA::RemoteDataset.new('GCA_018200315.1', :assembly, :gtdb)
132
+ rd.metadata[:get_ncbi_taxonomy] = true
133
+ md = rd.get_metadata
134
+ assert(md[:tax].is_a? MiGA::Taxonomy)
135
+ assert_equal('ncbi', md[:tax][:ns])
136
+ assert_equal('Flavobacteriia', md[:tax][:c])
137
+ assert(md[:tax].alternative(1).is_a? MiGA::Taxonomy)
138
+ assert(md[:tax].alternative(:gtdb).is_a? MiGA::Taxonomy)
139
+ assert_equal('gtdb', md[:tax].alternative(1)[:ns])
140
+ assert_equal('gtdb', md[:tax].alternative(:gtdb)[:ns])
109
141
  end
110
142
 
111
143
  def test_missing_data
@@ -114,6 +146,73 @@ class RemoteDatasetTest < Test::Unit::TestCase
114
146
  assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
115
147
  end
116
148
 
149
+ def test_gtdb_request
150
+ # No remote access needed
151
+ rd = MiGA::RemoteDataset.new('g__Macondimonas', :taxon, :gtdb)
152
+ u = rd.download_uri
153
+ h = rd.download_headers
154
+
155
+ assert(u.is_a? URI)
156
+ assert_equal('https', u.scheme)
157
+ assert_equal('genomes', File.basename(u.path))
158
+
159
+ assert(h.is_a? Hash)
160
+ assert_equal(1, h.size)
161
+ assert_equal('application/json', h['Accept'])
162
+ end
163
+
164
+ def test_ncbi_datasets_download_request
165
+ # No remote access needed
166
+ rd = MiGA::RemoteDataset.new(
167
+ 'GCF_004684205.1', :genome, :ncbi_datasets_download
168
+ )
169
+ u = rd.download_uri
170
+ h = rd.download_headers
171
+
172
+ assert(u.is_a? URI)
173
+ assert_equal('https', u.scheme)
174
+ assert_equal('download', File.basename(u.path))
175
+
176
+ assert(h.is_a? Hash)
177
+ assert_equal(1, h.size)
178
+ assert_equal('application/zip', h['Accept'])
179
+
180
+ ENV['NCBI_API_KEY'] = 'Not-a-real-key'
181
+ h = rd.download_headers
182
+ ENV.delete('NCBI_API_KEY')
183
+ assert_equal(2, h.size)
184
+ assert_equal('Not-a-real-key', h['api-key'])
185
+ end
186
+
187
+ def test_seqcode_request
188
+ # No remote access needed
189
+ rd = MiGA::RemoteDataset.new(nil, 'type-genomes', :seqcode)
190
+ u = rd.download_uri
191
+
192
+ assert(u.is_a? URI)
193
+ assert_equal('https', u.scheme)
194
+ assert_equal('type-genomes.json', File.basename(u.path))
195
+ end
196
+
197
+ def test_ncbi_datasets_request
198
+ rd = MiGA::RemoteDataset.new({ taxons: 'Bos' }, :genome, :ncbi_datasets)
199
+ u = rd.download_uri
200
+ h = rd.download_headers
201
+ p = rd.download_payload
202
+
203
+ assert(u.is_a? URI)
204
+ assert_equal('https', u.scheme)
205
+ assert_equal('dataset_report', File.basename(u.path))
206
+
207
+ assert(h.is_a? Hash)
208
+ assert_equal(1, h.size)
209
+ assert_equal('application/json', h['Content-Type'])
210
+
211
+ assert(p.is_a? String)
212
+ assert_equal('{', p[0])
213
+ assert_equal('}', p[-1])
214
+ end
215
+
117
216
  # This test is too expensive (too much time to run it!)
118
217
  # def test_net_timeout
119
218
  # declare_remote_access
data/test/result_test.rb CHANGED
@@ -6,27 +6,15 @@ class ResultTest < Test::Unit::TestCase
6
6
 
7
7
  def setup
8
8
  initialize_miga_home
9
- FileUtils.touch(
10
- File.join(
11
- project.path, 'data', '02.trimmed_reads',
12
- "#{dataset.name}.1.clipped.fastq"
13
- )
14
- )
15
- FileUtils.touch(
16
- File.join(
17
- project.path, 'data', '02.trimmed_reads', "#{dataset.name}.done"
18
- )
19
- )
20
- FileUtils.touch(
21
- File.join(
22
- project.path, 'data', '10.clades', '01.find', 'miga-project.empty'
23
- )
24
- )
25
- FileUtils.touch(
26
- File.join(
27
- project.path, 'data', '10.clades', '01.find', 'miga-project.done'
28
- )
29
- )
9
+ to_touch = [
10
+ ['02.trimmed_reads', "#{dataset.name}.1.clipped.fastq"],
11
+ ['02.trimmed_reads', "#{dataset.name}.done"],
12
+ ['10.clades', '01.find', 'miga-project.empty'],
13
+ ['10.clades', '01.find', 'miga-project.done']
14
+ ]
15
+ to_touch.each do |path|
16
+ FileUtils.touch(File.join(project.path, 'data', *path))
17
+ end
30
18
  end
31
19
 
32
20
  def test_add_result
@@ -89,4 +77,42 @@ class ResultTest < Test::Unit::TestCase
89
77
  r = dataset.add_result(:trimmed_reads)
90
78
  assert_equal(5.0, r.running_time)
91
79
  end
80
+
81
+ def test_status
82
+ d = dataset
83
+ assert_equal(:ignore_empty, d.result_status(:trimmed_reads))
84
+ d.add_result(:trimmed_reads)
85
+ assert_equal(:-, d.result_status(:raw_reads))
86
+ assert_equal(:complete, d.result_status(:trimmed_reads))
87
+ assert_equal(:pending, d.result_status(:read_quality))
88
+ assert_equal(:pending, d.result_status(:assembly))
89
+
90
+ h = d.results_status
91
+ assert(h.is_a? Hash)
92
+ assert_equal(:-, h[:raw_reads])
93
+ assert_equal(:complete, h[:trimmed_reads])
94
+ assert_equal(:pending, h[:read_quality])
95
+
96
+ # Test the "advance" interface from Project
97
+ a = project.profile_datasets_advance
98
+ assert(a.is_a? Array)
99
+ assert_equal(1, a.size)
100
+ assert(a[0].is_a? Array)
101
+ assert_equal([0, 1, 2, 2], a[0][0..3])
102
+ end
103
+
104
+ def test_versions
105
+ r = dataset.add_result(:trimmed_reads)
106
+ assert_respond_to(r, :add_versions)
107
+ assert_respond_to(r, :versions_md)
108
+ assert_equal(MiGA::VERSION.join('.'), r.versions[:MiGA])
109
+ assert_nil(r.versions[:GoodSoftware])
110
+
111
+ r.add_versions('GoodSoftware' => '1.2.3')
112
+ assert_equal('1.2.3', r.versions[:GoodSoftware])
113
+
114
+ md = r.versions_md
115
+ assert_equal('-', md[0])
116
+ assert_equal(2, md.split("\n").size)
117
+ end
92
118
  end
@@ -92,8 +92,12 @@ class TaxonomyTest < Test::Unit::TestCase
92
92
  end
93
93
 
94
94
  def test_reset
95
- tx = MiGA::Taxonomy.new('ns:Letters d:Latin s:A', nil,
96
- ['ns:Words d:English s:A', 'ns:Music d:Tone s:A'])
95
+ tx = MiGA::Taxonomy.new(
96
+ 'ns:Letters d:Latin s:A', nil,
97
+ ['ns:Words d:English s:A', 'ns:Music d:Tone s:A']
98
+ )
99
+ assert_equal('Latin', tx.domain)
100
+
97
101
  # Reset
98
102
  assert_equal(2, tx.alternative.size)
99
103
  assert_equal('Letters', tx.namespace)
@@ -102,11 +106,13 @@ class TaxonomyTest < Test::Unit::TestCase
102
106
  assert_nil(tx.namespace)
103
107
  tx.reset('ns:Letters d:Latin s:A')
104
108
  assert_equal('Letters', tx.namespace)
109
+
105
110
  # Change of alternative
106
111
  assert_equal('ns:Words d:English s:A', tx.alternative('Words').to_s)
107
112
  tx.add_alternative(MiGA::Taxonomy.new('ns:Words d:Spanish s:A'))
108
113
  assert_equal('ns:Words d:Spanish s:A', tx.alternative('Words').to_s)
109
- # Change of master
114
+
115
+ # Change of main
110
116
  assert_equal('ns:Letters d:Latin s:A', tx.to_s)
111
117
  tx.add_alternative(MiGA::Taxonomy.new('ns:Letters d:Unicode s:A'))
112
118
  assert_equal('ns:Letters d:Unicode s:A', tx.to_s)
@@ -54,7 +54,9 @@ class MiGA::DistanceRunner
54
54
  # first-come-first-serve traverse
55
55
  sbj = []
56
56
  ref_project.each_dataset do |ds|
57
- sbj << ds if ds.ref? && !ds.multi? && ds.result(:essential_genes)
57
+ torun = ds.ref? && !ds.multi?
58
+ torun &&= ds.result(:essential_genes) || (!ds.markers? && ds.result(:cds))
59
+ sbj << ds if torun
58
60
  end
59
61
  ani_after_aai(sbj)
60
62
 
data/utils/distances.rb CHANGED
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
4
4
 
5
5
  project = ARGV.shift
6
6
  dataset = ARGV.shift
7
- opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
7
+ opts = Hash[ARGV.map { |i| i.split('=', 2).tap { |j| j[0] = j[0].to_sym } }]
8
8
  runner = MiGA::DistanceRunner.new(project, dataset, opts)
9
9
  runner.go!