miga-base 1.3.8.2 → 1.3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/lib/miga/cli/action/add_result.rb +22 -1
  4. data/lib/miga/cli/action/browse/about.html +4 -2
  5. data/lib/miga/cli/action/download/gtdb.rb +1 -1
  6. data/lib/miga/cli/action/download/ncbi.rb +43 -68
  7. data/lib/miga/cli/action/download/seqcode.rb +1 -2
  8. data/lib/miga/cli/action/ncbi_get.rb +1 -8
  9. data/lib/miga/cli/action/wf.rb +15 -6
  10. data/lib/miga/cli/objects_helper.rb +3 -0
  11. data/lib/miga/cli/opt_helper.rb +8 -2
  12. data/lib/miga/common/net.rb +100 -18
  13. data/lib/miga/dataset/base.rb +40 -12
  14. data/lib/miga/dataset/hooks.rb +8 -0
  15. data/lib/miga/dataset/result/ignore.rb +14 -2
  16. data/lib/miga/dataset/type.rb +51 -0
  17. data/lib/miga/dataset.rb +3 -22
  18. data/lib/miga/json.rb +9 -0
  19. data/lib/miga/project/base.rb +15 -9
  20. data/lib/miga/project.rb +7 -1
  21. data/lib/miga/remote_dataset/base.rb +117 -36
  22. data/lib/miga/remote_dataset/download.rb +121 -54
  23. data/lib/miga/remote_dataset.rb +34 -13
  24. data/lib/miga/result/stats.rb +2 -0
  25. data/lib/miga/result/versions.rb +23 -0
  26. data/lib/miga/result.rb +7 -1
  27. data/lib/miga/taxonomy/base.rb +3 -2
  28. data/lib/miga/version.rb +2 -2
  29. data/scripts/assembly.bash +15 -1
  30. data/scripts/cds.bash +9 -3
  31. data/scripts/distances.bash +103 -5
  32. data/scripts/essential_genes.bash +14 -1
  33. data/scripts/mytaxa.bash +18 -3
  34. data/scripts/mytaxa_scan.bash +16 -3
  35. data/scripts/read_quality.bash +6 -2
  36. data/scripts/ssu.bash +19 -1
  37. data/scripts/stats.bash +9 -3
  38. data/scripts/taxonomy.bash +98 -2
  39. data/scripts/trimmed_fasta.bash +10 -2
  40. data/scripts/trimmed_reads.bash +26 -6
  41. data/test/dataset_test.rb +17 -2
  42. data/test/hook_test.rb +3 -2
  43. data/test/net_test.rb +21 -5
  44. data/test/project_test.rb +13 -0
  45. data/test/remote_dataset_test.rb +106 -7
  46. data/test/result_test.rb +47 -21
  47. data/test/taxonomy_test.rb +9 -3
  48. data/utils/distance/runner.rb +3 -1
  49. data/utils/distances.rb +1 -1
  50. metadata +4 -2
@@ -19,6 +19,10 @@ for s in 1 2 ; do
19
19
  done
20
20
 
21
21
  # Finalize
22
- miga date > "$DATASET.done"
23
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
22
+ miga date > "${DATASET}.done"
23
+ cat <<VERSIONS \
24
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
25
+ => MiGA
26
+ $(miga --version)
27
+ VERSIONS
24
28
 
data/scripts/ssu.bash CHANGED
@@ -65,4 +65,22 @@ fi
65
65
 
66
66
  # Finalize
67
67
  miga date > "${DATASET}.done"
68
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
68
+ cat <<VERSIONS \
69
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
70
+ => MiGA
71
+ $(miga --version)
72
+ $(
73
+ if [[ -s $fa ]] ; then
74
+ echo "=> barrnap"
75
+ barrnap --version 2>&1 | perl -pe 's/^barrnap //'
76
+ echo "=> bedtools"
77
+ bedtools --version 2>&1 | perl -pe 's/^bedtools //'
78
+ echo "=> Enveomics Collection"
79
+ echo "version unknown"
80
+ echo "=> RDP Naive Bayes Classifier"
81
+ gzip -cd "${DATASET}.rdp.tsv.gz" | tail -n 1 | perl -pe 's/.*: //'
82
+ echo "=> tRNAscan-SE"
83
+ tRNAscan-SE -h 2>&1 | head -n 2 | tail -n 1 | perl -pe 's/^tRNAscan-SE //'
84
+ fi
85
+ )
86
+ VERSIONS
data/scripts/stats.bash CHANGED
@@ -12,11 +12,17 @@ cd "$DIR"
12
12
  miga date > "${DATASET}.start"
13
13
 
14
14
  # Calculate statistics
15
- for i in raw_reads trimmed_fasta assembly cds essential_genes distances taxonomy ssu ; do
15
+ for i in raw_reads trimmed_fasta assembly \
16
+ cds essential_genes distances taxonomy ssu ; do
16
17
  echo "# $i"
17
18
  miga stats --compute-and-save --ignore-empty -P "$PROJECT" -D "$DATASET" -r $i
18
19
  done
19
20
 
20
21
  # Finalize
21
- miga date > "$DATASET.done"
22
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
22
+ miga date > "${DATASET}.done"
23
+ cat <<VERSIONS \
24
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
25
+ => MiGA
26
+ $(miga --version)
27
+ VERSIONS
28
+
@@ -16,5 +16,101 @@ ruby -I "$MIGA/lib" \
16
16
  "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET" run_taxonomy=1
17
17
 
18
18
  # Finalize
19
- miga date > "$DATASET.done"
20
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
19
+ fastaai=no
20
+ aai=no
21
+ ani=no
22
+ blast=no
23
+ blat=no
24
+ diamond=no
25
+ fastani=no
26
+ REF_PROJECT=$(miga option -P "$PROJECT" -k ref_project)
27
+ if [[ -S "$REF_PROJECT" ]] ; then
28
+ case $(miga option -P "$REF_PROJECT" -k haai_p) in
29
+ fastaai)
30
+ fastaai=yes
31
+ ;;
32
+ diamond)
33
+ diamond=yes
34
+ aai=yes
35
+ ;;
36
+ blast)
37
+ blast=yes
38
+ aai=yes
39
+ ;;
40
+ esac
41
+
42
+ case $(miga option -P "$REF_PROJECT" -k aai_p) in
43
+ diamond)
44
+ diamond=yes
45
+ aai=yes
46
+ ;;
47
+ blast)
48
+ blast=yes
49
+ aai=yes
50
+ ;;
51
+ esac
52
+
53
+ case $(miga option -P "$REF_PROJECT" -k ani_p) in
54
+ blast)
55
+ blast=yes
56
+ ani=yes
57
+ ;;
58
+ blat)
59
+ blat=yes
60
+ ani=yes
61
+ ;;
62
+ fastani)
63
+ fastani=yes
64
+ ;;
65
+ esac
66
+ fi
67
+
68
+ miga date > "${DATASET}.done"
69
+ cat <<VERSIONS \
70
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
71
+ => MiGA
72
+ $(miga --version)
73
+ $(
74
+ if [[ "$fastaai" == "yes" ]] ; then
75
+ echo "=> FastAAI"
76
+ fastaai version 2>&1 | perl -pe 's/.*=//'
77
+ fi
78
+ )
79
+ $(
80
+ if [[ "$fastani" == "yes" ]] ; then
81
+ echo "=> FastANI"
82
+ fastANI --version 2>&1 | grep . | perl -pe 's/^version //'
83
+ fi
84
+ )
85
+ $(
86
+ if [[ "$aai" == "yes" ]] ; then
87
+ echo "=> Enveomics Collection: aai.rb"
88
+ aai.rb --version 2>&1 | perl -pe 's/.*: //'
89
+ fi
90
+ )
91
+ $(
92
+ if [[ "$ani" == "yes" ]] ; then
93
+ echo "=> Enveomics Collection: ani.rb"
94
+ ani.rb --version 2>&1 | perl -pe 's/.*: //'
95
+ fi
96
+ )
97
+ $(
98
+ if [[ "$blast" == "yes" ]] ; then
99
+ echo "=> NCBI BLAST+"
100
+ blastp -version 2>&1 | tail -n 1 | perl -pe 's/.*: blast //'
101
+ fi
102
+ )
103
+ $(
104
+ if [[ "$blat" == "yes" ]] ; then
105
+ echo "=> BLAT"
106
+ blat 2>&1 | head -n 1 | perl -pe 's/.* v\. //' | perl -pe 's/ fast .*//'
107
+ fi
108
+ )
109
+ $(
110
+ if [[ "$diamond" == "yes" ]] ; then
111
+ echo "=> Diamond"
112
+ diamond --version 2>&1 | perl -pe 's/^diamond version //'
113
+ fi
114
+ )
115
+ VERSIONS
116
+
@@ -32,6 +32,14 @@ for x in 1.fasta 2.fasta SingleReads.fa CoupledReads.fa ; do
32
32
  done
33
33
 
34
34
  # Finalize
35
- miga date > "$DATASET.done"
36
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
35
+ miga date > "${DATASET}.done"
36
+ cat <<VERSIONS \
37
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
38
+ => MiGA
39
+ $(miga --version)
40
+ => Enveomics Collection: FastQ.maskQual.rb
41
+ $(FastQ.maskQual.rb --version | perl -pe 's/.* //')
42
+ => Enveomics Collection: FastA.interpose.pl
43
+ version unknown
44
+ VERSIONS
37
45
 
@@ -36,15 +36,19 @@ if [[ -s "$b.2.fastq.gz" ]] ; then
36
36
  $CMD -1 "$b.1.fastq.gz" -2 "$b.2.fastq.gz"
37
37
  for s in 1 2 ; do
38
38
  mv "$b/${s}.post_trim_${b}.${s}.fq.gz" "${b}.${s}.clipped.fastq.gz"
39
- mv "$b/${s}.pre_trim_QC_${b}.${s}.html" "../03.read_quality/${b}.pre.${s}.html"
40
- mv "$b/${s}.post_trim_QC_${b}.${s}.html" "../03.read_quality/${b}.post.${s}.html"
39
+ mv "$b/${s}.pre_trim_QC_${b}.${s}.html" \
40
+ "../03.read_quality/${b}.pre.${s}.html"
41
+ mv "$b/${s}.post_trim_QC_${b}.${s}.html" \
42
+ "../03.read_quality/${b}.post.${s}.html"
41
43
  done
42
44
  else
43
45
  # Unpaired
44
46
  $CMD -u "$b.1.fastq.gz"
45
47
  mv "$b/unpaired.post_trim_${b}.1.fq.gz" "${b}.1.clipped.fastq.gz"
46
- mv "$b/unpaired.pre_trim_QC_${b}.1.html" "../03.read_quality/${b}.pre.1.html"
47
- mv "$b/unpaired.post_trim_QC_${b}.1.html" "../03.read_quality/${b}.post.1.html"
48
+ mv "$b/unpaired.pre_trim_QC_${b}.1.html" \
49
+ "../03.read_quality/${b}.pre.1.html"
50
+ mv "$b/unpaired.post_trim_QC_${b}.1.html" \
51
+ "../03.read_quality/${b}.post.1.html"
48
52
  fi
49
53
  mv "$b/Subsample_Adapter_Detection.stats.txt" \
50
54
  "../03.read_quality/$b.adapters.txt"
@@ -54,6 +58,22 @@ rm -r "$b"
54
58
  rm -f "$b".[12].fastq.gz
55
59
 
56
60
  # Finalize
57
- miga date > "$DATASET.done"
58
- miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f
61
+ miga date > "${DATASET}.done"
62
+ cat <<VERSIONS \
63
+ | miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
64
+ => MiGA
65
+ $(miga --version)
66
+ => Enveomics Collection: FastQ.tag.rb
67
+ $(FastQ.tag.rb --version | perl -pe 's/.* //')
68
+ => Multitrim
69
+ version unknown
70
+ => FaQCs
71
+ $(FaQCs --version 2>&1 | perl -pe 's/.*: //')
72
+ => Seqtk
73
+ $(seqtk 2>&1 | grep Version | perl -pe 's/.*: //')
74
+ => Fastp
75
+ $(fastp --version 2>&1 | perl -pe 's/^fastp //')
76
+ => Falco
77
+ $(falco -V 2>&1 | tee)
78
+ VERSIONS
59
79
 
data/test/dataset_test.rb CHANGED
@@ -40,7 +40,7 @@ class DatasetTest < Test::Unit::TestCase
40
40
  d2.save
41
41
  assert_not_predicate(d2, :multi?)
42
42
  assert_not_predicate(d2, :nonmulti?)
43
- assert_nil(d2.metadata[:type])
43
+ assert_equal(:empty, d2.metadata[:type])
44
44
  d2.metadata[:type] = :metagenome
45
45
  d2.save
46
46
  assert_equal(:metagenome, d2.metadata[:type])
@@ -89,25 +89,40 @@ class DatasetTest < Test::Unit::TestCase
89
89
  assert_equal(:trimmed_reads, d2.first_preprocessing(true))
90
90
  assert_equal(:read_quality, d2.next_preprocessing(true))
91
91
  assert { !d2.done_preprocessing?(true) }
92
- # Ref and undeclared multi
92
+
93
+ # Ref and undeclared type (empty)
93
94
  assert { d2.ignore_task?(:mytaxa) }
94
95
  assert { d2.ignore_task?(:mytaxa_scan) }
95
96
  assert { d2.ignore_task?(:distances) }
97
+ assert { d2.ignore_task?(:essential_genes) }
98
+
96
99
  # Ref and multi
97
100
  d2.metadata[:type] = :metagenome
98
101
  assert { !d2.ignore_task?(:mytaxa) }
99
102
  assert { d2.ignore_task?(:mytaxa_scan) }
100
103
  assert { d2.ignore_task?(:distances) }
104
+ assert { !d2.ignore_task?(:essential_genes) }
105
+
101
106
  # Ref and nonmulti
102
107
  d2.metadata[:type] = :genome
103
108
  assert { d2.ignore_task?(:mytaxa) }
104
109
  assert { !d2.ignore_task?(:mytaxa_scan) }
105
110
  assert { !d2.ignore_task?(:distances) }
111
+ assert { !d2.ignore_task?(:essential_genes) }
112
+
106
113
  # Qry and nonmulti
107
114
  d2.metadata[:ref] = false
108
115
  assert { d2.ignore_task?(:mytaxa) }
109
116
  assert { d2.ignore_task?(:mytaxa_scan) }
110
117
  assert { !d2.ignore_task?(:distances) }
118
+ assert { !d2.ignore_task?(:essential_genes) }
119
+
120
+ # Qry and plasmid
121
+ d2.metadata[:type] = :plasmid
122
+ assert { d2.ignore_task?(:mytaxa) }
123
+ assert { d2.ignore_task?(:mytaxa_scan) }
124
+ assert { !d2.ignore_task?(:distances) }
125
+ assert { d2.ignore_task?(:essential_genes) }
111
126
  end
112
127
 
113
128
  def test_profile_advance
data/test/hook_test.rb CHANGED
@@ -9,9 +9,10 @@ class HookTest < Test::Unit::TestCase
9
9
  end
10
10
 
11
11
  def test_add_hook
12
- assert_nil(dataset.hooks[:on_save])
13
- dataset.add_hook(:on_save, :run_lambda, Proc.new { $counter += 1 })
12
+ assert_nil(dataset.hooks[:on_remove])
14
13
  assert_equal(1, dataset.hooks[:on_save].size)
14
+ dataset.add_hook(:on_save, :run_lambda, Proc.new { $counter += 1 })
15
+ assert_equal(2, dataset.hooks[:on_save].size)
15
16
  $counter = 1
16
17
  dataset.save
17
18
  assert_equal(2, $counter)
data/test/net_test.rb CHANGED
@@ -15,20 +15,36 @@ class FormatTest < Test::Unit::TestCase
15
15
  declare_remote_access
16
16
  m = MiGA::MiGA
17
17
  assert_raise { m.remote_connection(:bad_descriptor) }
18
- assert_raise { m.remote_connection('http://microbial-genomes.org/') }
18
+ assert_raise { m.remote_connection('ssh://microbial-genomes.org/') }
19
19
  c = m.remote_connection(:miga_db)
20
20
  assert_equal(Net::FTP, c.class)
21
21
  c.close
22
22
  end
23
23
 
24
- def test_download_file_ftp
24
+ def test_download_file_http
25
25
  declare_remote_access
26
26
  m = MiGA::MiGA
27
+ #o = m.http_request(:get, 'http://uibk.microbial-genomes.org/robots.txt')
28
+ o = m.http_request(:get, 'http://disc-genomics.uibk.ac.at/miga/robots.txt')
29
+ o = o.split(/\n/)
30
+ assert_equal(6, o.count)
31
+ assert_equal('#', o[1])
32
+ assert_equal('User-agent: *', o[2])
33
+ end
34
+
35
+ def test_download_file_ftp
36
+ declare_remote_access
27
37
  f = tmpfile('t/test.txt')
28
38
  d = File.dirname(f)
29
39
  assert(!Dir.exist?(d))
30
- m.download_file_ftp(:miga_online_ftp, 'test.txt', f)
31
- assert(Dir.exist?(d))
32
- assert_equal('miga', File.read(f).chomp)
40
+ # TODO
41
+ # Bring back when I can connect to the Gatech's FTP
42
+ ### m = MiGA::MiGA
43
+ ### m.download_file_ftp(:miga_online_ftp, 'api_test.txt', f)
44
+ ### assert(Dir.exist?(d))
45
+ ### assert_equal('miga', File.read(f).chomp)
46
+ ### File.unlink(f)
47
+ ### m.download_file_ftp(:miga_db, '../api_test.txt', f)
48
+ ### assert_equal('miga', File.read(f).chomp)
33
49
  end
34
50
  end
data/test/project_test.rb CHANGED
@@ -8,6 +8,12 @@ class ProjectTest < Test::Unit::TestCase
8
8
  initialize_miga_home
9
9
  end
10
10
 
11
+ def test_class_variables
12
+ assert(MiGA::Project.INCLADE_TASKS.is_a? Array)
13
+ assert(MiGA::Project.DISTANCE_TASKS.is_a? Array)
14
+ assert(MiGA::Project.KNOWN_TYPES.is_a? Hash)
15
+ end
16
+
11
17
  def create_result_files(project, res, exts)
12
18
  d = MiGA::Project.RESULT_DIRS[res]
13
19
  (['.done'] + exts).each do |x|
@@ -155,4 +161,11 @@ class ProjectTest < Test::Unit::TestCase
155
161
  date3 = p1.add_result(:ogs, true, force: true)[:created]
156
162
  assert_not_equal(date1, date3)
157
163
  end
164
+
165
+ def test_options
166
+ assert_equal('fastaai', project.option(:haai_p))
167
+ assert_equal(false, project.option(:aai_save_rbm))
168
+ project.metadata[:type] = 'clade'
169
+ assert_equal(true, project.option(:aai_save_rbm))
170
+ end
158
171
  end
@@ -7,6 +7,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
7
7
 
8
8
  def setup
9
9
  initialize_miga_home
10
+ ENV.delete('NCBI_API_KEY')
10
11
  end
11
12
 
12
13
  def test_class_universe
@@ -19,7 +20,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
19
20
  assert_raise { MiGA::RemoteDataset.new('ids', :google, :ebi) }
20
21
  end
21
22
 
22
- def test_rest
23
+ def test_get
23
24
  hiv2 = 'M30502.1'
24
25
  { embl: :ebi, nuccore: :ncbi }.each do |db, universe|
25
26
  rd = MiGA::RemoteDataset.new(hiv2, db, universe)
@@ -47,7 +48,8 @@ class RemoteDatasetTest < Test::Unit::TestCase
47
48
  end
48
49
 
49
50
  def test_net_ftp
50
- cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/gap/GAPJ01.fasta.gz'
51
+ cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/' \
52
+ 'public/gap/GAPJ01.fasta.gz'
51
53
  n = 'Cjac_L14'
52
54
  rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
53
55
  assert_equal([cjac], rd.ids)
@@ -86,26 +88,56 @@ class RemoteDatasetTest < Test::Unit::TestCase
86
88
  def test_type_status_asm
87
89
  declare_remote_access
88
90
  rd = MiGA::RemoteDataset.new('GCF_000018105.1', :assembly, :ncbi)
89
- assert { rd.get_metadata[:is_type] }
91
+ md = rd.get_metadata
92
+ assert(md[:is_type])
90
93
  end
91
94
 
92
95
  def test_nontype_status_asm
93
96
  declare_remote_access
94
97
  rd = MiGA::RemoteDataset.new('GCA_004684205.1', :assembly, :ncbi)
95
- assert { !rd.get_metadata[:is_type] }
98
+ md = rd.get_metadata
99
+ assert(!md[:is_type])
96
100
  end
97
101
 
98
102
  def test_type_status_nuccore
99
103
  declare_remote_access
100
104
  rd = MiGA::RemoteDataset.new('NC_019748.1', :nuccore, :ncbi)
101
- assert { rd.get_metadata[:is_type] }
105
+ md = rd.get_metadata
106
+ assert(md[:is_type])
102
107
  end
103
108
 
104
109
  def test_ref_type_status
105
110
  declare_remote_access
106
111
  rd = MiGA::RemoteDataset.new('GCA_003144295.1', :assembly, :ncbi)
107
- assert { !rd.get_metadata[:is_type] }
108
- assert { rd.get_metadata[:is_ref_type] }
112
+ md = rd.get_metadata
113
+ assert(!md[:is_type])
114
+ assert(md[:is_ref_type])
115
+ end
116
+
117
+ def test_gtdb_taxonomy
118
+ declare_remote_access
119
+ rd = MiGA::RemoteDataset.new('GCA_018200315.1', :assembly, :gtdb)
120
+ md = rd.get_metadata
121
+ assert(!md[:is_type])
122
+ assert_not_nil(md[:gtdb_release])
123
+ assert(md[:tax].is_a? MiGA::Taxonomy)
124
+ assert_equal('GCA_018200315.1', md[:gtdb_assembly])
125
+ assert_equal('gtdb', md[:tax][:ns])
126
+ assert_equal('Bacteroidia', md[:tax][:c])
127
+ end
128
+
129
+ def test_gtdb_alt_taxonomy
130
+ declare_remote_access
131
+ rd = MiGA::RemoteDataset.new('GCA_018200315.1', :assembly, :gtdb)
132
+ rd.metadata[:get_ncbi_taxonomy] = true
133
+ md = rd.get_metadata
134
+ assert(md[:tax].is_a? MiGA::Taxonomy)
135
+ assert_equal('ncbi', md[:tax][:ns])
136
+ assert_equal('Flavobacteriia', md[:tax][:c])
137
+ assert(md[:tax].alternative(1).is_a? MiGA::Taxonomy)
138
+ assert(md[:tax].alternative(:gtdb).is_a? MiGA::Taxonomy)
139
+ assert_equal('gtdb', md[:tax].alternative(1)[:ns])
140
+ assert_equal('gtdb', md[:tax].alternative(:gtdb)[:ns])
109
141
  end
110
142
 
111
143
  def test_missing_data
@@ -114,6 +146,73 @@ class RemoteDatasetTest < Test::Unit::TestCase
114
146
  assert_raise(MiGA::RemoteDataMissingError) { rd.save_to(project, 'bad') }
115
147
  end
116
148
 
149
+ def test_gtdb_request
150
+ # No remote access needed
151
+ rd = MiGA::RemoteDataset.new('g__Macondimonas', :taxon, :gtdb)
152
+ u = rd.download_uri
153
+ h = rd.download_headers
154
+
155
+ assert(u.is_a? URI)
156
+ assert_equal('https', u.scheme)
157
+ assert_equal('genomes', File.basename(u.path))
158
+
159
+ assert(h.is_a? Hash)
160
+ assert_equal(1, h.size)
161
+ assert_equal('application/json', h['Accept'])
162
+ end
163
+
164
+ def test_ncbi_datasets_download_request
165
+ # No remote access needed
166
+ rd = MiGA::RemoteDataset.new(
167
+ 'GCF_004684205.1', :genome, :ncbi_datasets_download
168
+ )
169
+ u = rd.download_uri
170
+ h = rd.download_headers
171
+
172
+ assert(u.is_a? URI)
173
+ assert_equal('https', u.scheme)
174
+ assert_equal('download', File.basename(u.path))
175
+
176
+ assert(h.is_a? Hash)
177
+ assert_equal(1, h.size)
178
+ assert_equal('application/zip', h['Accept'])
179
+
180
+ ENV['NCBI_API_KEY'] = 'Not-a-real-key'
181
+ h = rd.download_headers
182
+ ENV.delete('NCBI_API_KEY')
183
+ assert_equal(2, h.size)
184
+ assert_equal('Not-a-real-key', h['api-key'])
185
+ end
186
+
187
+ def test_seqcode_request
188
+ # No remote access needed
189
+ rd = MiGA::RemoteDataset.new(nil, 'type-genomes', :seqcode)
190
+ u = rd.download_uri
191
+
192
+ assert(u.is_a? URI)
193
+ assert_equal('https', u.scheme)
194
+ assert_equal('type-genomes.json', File.basename(u.path))
195
+ end
196
+
197
+ def test_ncbi_datasets_request
198
+ rd = MiGA::RemoteDataset.new({ taxons: 'Bos' }, :genome, :ncbi_datasets)
199
+ u = rd.download_uri
200
+ h = rd.download_headers
201
+ p = rd.download_payload
202
+
203
+ assert(u.is_a? URI)
204
+ assert_equal('https', u.scheme)
205
+ assert_equal('dataset_report', File.basename(u.path))
206
+
207
+ assert(h.is_a? Hash)
208
+ assert_equal(1, h.size)
209
+ assert_equal('application/json', h['Content-Type'])
210
+
211
+ assert(p.is_a? String)
212
+ assert_equal('{', p[0])
213
+ assert_equal('}', p[-1])
214
+ end
215
+
117
216
  # This test is too expensive (too much time to run it!)
118
217
  # def test_net_timeout
119
218
  # declare_remote_access
data/test/result_test.rb CHANGED
@@ -6,27 +6,15 @@ class ResultTest < Test::Unit::TestCase
6
6
 
7
7
  def setup
8
8
  initialize_miga_home
9
- FileUtils.touch(
10
- File.join(
11
- project.path, 'data', '02.trimmed_reads',
12
- "#{dataset.name}.1.clipped.fastq"
13
- )
14
- )
15
- FileUtils.touch(
16
- File.join(
17
- project.path, 'data', '02.trimmed_reads', "#{dataset.name}.done"
18
- )
19
- )
20
- FileUtils.touch(
21
- File.join(
22
- project.path, 'data', '10.clades', '01.find', 'miga-project.empty'
23
- )
24
- )
25
- FileUtils.touch(
26
- File.join(
27
- project.path, 'data', '10.clades', '01.find', 'miga-project.done'
28
- )
29
- )
9
+ to_touch = [
10
+ ['02.trimmed_reads', "#{dataset.name}.1.clipped.fastq"],
11
+ ['02.trimmed_reads', "#{dataset.name}.done"],
12
+ ['10.clades', '01.find', 'miga-project.empty'],
13
+ ['10.clades', '01.find', 'miga-project.done']
14
+ ]
15
+ to_touch.each do |path|
16
+ FileUtils.touch(File.join(project.path, 'data', *path))
17
+ end
30
18
  end
31
19
 
32
20
  def test_add_result
@@ -89,4 +77,42 @@ class ResultTest < Test::Unit::TestCase
89
77
  r = dataset.add_result(:trimmed_reads)
90
78
  assert_equal(5.0, r.running_time)
91
79
  end
80
+
81
+ def test_status
82
+ d = dataset
83
+ assert_equal(:ignore_empty, d.result_status(:trimmed_reads))
84
+ d.add_result(:trimmed_reads)
85
+ assert_equal(:-, d.result_status(:raw_reads))
86
+ assert_equal(:complete, d.result_status(:trimmed_reads))
87
+ assert_equal(:pending, d.result_status(:read_quality))
88
+ assert_equal(:pending, d.result_status(:assembly))
89
+
90
+ h = d.results_status
91
+ assert(h.is_a? Hash)
92
+ assert_equal(:-, h[:raw_reads])
93
+ assert_equal(:complete, h[:trimmed_reads])
94
+ assert_equal(:pending, h[:read_quality])
95
+
96
+ # Test the "advance" interface from Project
97
+ a = project.profile_datasets_advance
98
+ assert(a.is_a? Array)
99
+ assert_equal(1, a.size)
100
+ assert(a[0].is_a? Array)
101
+ assert_equal([0, 1, 2, 2], a[0][0..3])
102
+ end
103
+
104
+ def test_versions
105
+ r = dataset.add_result(:trimmed_reads)
106
+ assert_respond_to(r, :add_versions)
107
+ assert_respond_to(r, :versions_md)
108
+ assert_equal(MiGA::VERSION.join('.'), r.versions[:MiGA])
109
+ assert_nil(r.versions[:GoodSoftware])
110
+
111
+ r.add_versions('GoodSoftware' => '1.2.3')
112
+ assert_equal('1.2.3', r.versions[:GoodSoftware])
113
+
114
+ md = r.versions_md
115
+ assert_equal('-', md[0])
116
+ assert_equal(2, md.split("\n").size)
117
+ end
92
118
  end
@@ -92,8 +92,12 @@ class TaxonomyTest < Test::Unit::TestCase
92
92
  end
93
93
 
94
94
  def test_reset
95
- tx = MiGA::Taxonomy.new('ns:Letters d:Latin s:A', nil,
96
- ['ns:Words d:English s:A', 'ns:Music d:Tone s:A'])
95
+ tx = MiGA::Taxonomy.new(
96
+ 'ns:Letters d:Latin s:A', nil,
97
+ ['ns:Words d:English s:A', 'ns:Music d:Tone s:A']
98
+ )
99
+ assert_equal('Latin', tx.domain)
100
+
97
101
  # Reset
98
102
  assert_equal(2, tx.alternative.size)
99
103
  assert_equal('Letters', tx.namespace)
@@ -102,11 +106,13 @@ class TaxonomyTest < Test::Unit::TestCase
102
106
  assert_nil(tx.namespace)
103
107
  tx.reset('ns:Letters d:Latin s:A')
104
108
  assert_equal('Letters', tx.namespace)
109
+
105
110
  # Change of alternative
106
111
  assert_equal('ns:Words d:English s:A', tx.alternative('Words').to_s)
107
112
  tx.add_alternative(MiGA::Taxonomy.new('ns:Words d:Spanish s:A'))
108
113
  assert_equal('ns:Words d:Spanish s:A', tx.alternative('Words').to_s)
109
- # Change of master
114
+
115
+ # Change of main
110
116
  assert_equal('ns:Letters d:Latin s:A', tx.to_s)
111
117
  tx.add_alternative(MiGA::Taxonomy.new('ns:Letters d:Unicode s:A'))
112
118
  assert_equal('ns:Letters d:Unicode s:A', tx.to_s)
@@ -54,7 +54,9 @@ class MiGA::DistanceRunner
54
54
  # first-come-first-serve traverse
55
55
  sbj = []
56
56
  ref_project.each_dataset do |ds|
57
- sbj << ds if ds.ref? && !ds.multi? && ds.result(:essential_genes)
57
+ torun = ds.ref? && !ds.multi?
58
+ torun &&= ds.result(:essential_genes) || (!ds.markers? && ds.result(:cds))
59
+ sbj << ds if torun
58
60
  end
59
61
  ani_after_aai(sbj)
60
62
 
data/utils/distances.rb CHANGED
@@ -4,6 +4,6 @@ require_relative 'distance/runner.rb'
4
4
 
5
5
  project = ARGV.shift
6
6
  dataset = ARGV.shift
7
- opts = Hash[ARGV.map { |i| i.split("=", 2).tap { |j| j[0] = j[0].to_sym } }]
7
+ opts = Hash[ARGV.map { |i| i.split('=', 2).tap { |j| j[0] = j[0].to_sym } }]
8
8
  runner = MiGA::DistanceRunner.new(project, dataset, opts)
9
9
  runner.go!