miga-base 1.3.8.1 → 1.3.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/miga/cli/action/add_result.rb +22 -1
- data/lib/miga/cli/action/browse/about.html +4 -2
- data/lib/miga/cli/action/doctor.rb +1 -1
- data/lib/miga/cli/action/download/gtdb.rb +1 -1
- data/lib/miga/cli/action/download/ncbi.rb +43 -68
- data/lib/miga/cli/action/download/seqcode.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +1 -8
- data/lib/miga/cli/action/wf.rb +15 -6
- data/lib/miga/cli/objects_helper.rb +3 -0
- data/lib/miga/cli/opt_helper.rb +8 -2
- data/lib/miga/common/net.rb +100 -18
- data/lib/miga/dataset/base.rb +40 -12
- data/lib/miga/dataset/hooks.rb +8 -0
- data/lib/miga/dataset/result/ignore.rb +14 -2
- data/lib/miga/dataset/type.rb +51 -0
- data/lib/miga/dataset.rb +3 -22
- data/lib/miga/json.rb +9 -0
- data/lib/miga/project/base.rb +15 -9
- data/lib/miga/project.rb +7 -1
- data/lib/miga/remote_dataset/base.rb +117 -36
- data/lib/miga/remote_dataset/download.rb +121 -54
- data/lib/miga/remote_dataset.rb +34 -13
- data/lib/miga/result/stats.rb +2 -0
- data/lib/miga/result/versions.rb +23 -0
- data/lib/miga/result.rb +7 -1
- data/lib/miga/taxonomy/base.rb +3 -2
- data/lib/miga/version.rb +2 -2
- data/scripts/assembly.bash +15 -1
- data/scripts/cds.bash +9 -3
- data/scripts/distances.bash +103 -5
- data/scripts/essential_genes.bash +14 -1
- data/scripts/mytaxa.bash +18 -3
- data/scripts/mytaxa_scan.bash +16 -3
- data/scripts/read_quality.bash +6 -2
- data/scripts/ssu.bash +19 -1
- data/scripts/stats.bash +9 -3
- data/scripts/taxonomy.bash +98 -2
- data/scripts/trimmed_fasta.bash +10 -2
- data/scripts/trimmed_reads.bash +26 -6
- data/test/dataset_test.rb +17 -2
- data/test/hook_test.rb +3 -2
- data/test/net_test.rb +21 -5
- data/test/project_test.rb +13 -0
- data/test/remote_dataset_test.rb +106 -7
- data/test/result_test.rb +47 -21
- data/test/taxonomy_test.rb +9 -3
- data/utils/distance/runner.rb +3 -1
- data/utils/distances.rb +1 -1
- data/utils/subclades.R +15 -8
- metadata +4 -2
@@ -6,28 +6,40 @@ class MiGA::RemoteDataset
|
|
6
6
|
# Class-level
|
7
7
|
class << self
|
8
8
|
##
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
9
|
+
# Return hash of options used internally for the getter methods, including
|
10
|
+
# by +download+. The prepared request is for data from the +universe+ in the
|
11
|
+
# database +db+ with IDs +ids+ and in +format+. If passed, it saves the
|
12
|
+
# result in +file+. Additional parameters specific to the download method
|
13
|
+
# can be passed using +extra+. The +obj+ can also be passed as
|
14
|
+
# MiGA::RemoteDataset or MiGA::Dataset
|
15
|
+
def download_opts(
|
16
|
+
universe, db, ids, format, file = nil, extra = {}, obj = nil)
|
17
|
+
universe_hash = @@UNIVERSE[universe]
|
18
|
+
database_hash = universe_hash.dig(:dbs, db)
|
19
|
+
getter = database_hash[:getter] || :download
|
20
|
+
action = database_hash[:method] || universe_hash[:method]
|
21
|
+
|
22
|
+
{
|
23
|
+
universe: universe, db: db, ids: ids.is_a?(Array) ? ids : [ids],
|
24
|
+
format: format, file: file, obj: obj,
|
25
|
+
extra: (database_hash[:extra] || {}).merge(extra),
|
26
|
+
_fun: :"#{getter}_#{action}"
|
26
27
|
}
|
27
|
-
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# Returns String. The prequired parameters (+params+) are identical to those
|
32
|
+
# of +download_opts+ (see for details)
|
33
|
+
def download(*params)
|
34
|
+
opts = download_opts(*params)
|
35
|
+
doc = send(opts[:_fun], opts)
|
36
|
+
|
28
37
|
unless opts[:file].nil?
|
29
38
|
ofh = File.open(opts[:file], 'w')
|
30
|
-
|
39
|
+
unless opts[:file] =~ /\.([gb]?z|tar|zip|rar)$/i
|
40
|
+
doc = normalize_encoding(doc)
|
41
|
+
end
|
42
|
+
ofh.print doc
|
31
43
|
ofh.close
|
32
44
|
end
|
33
45
|
doc
|
@@ -39,9 +51,9 @@ class MiGA::RemoteDataset
|
|
39
51
|
# +obj+ (mandatory): MiGA::RemoteDataset
|
40
52
|
# +ids+ (mandatory): String or Array of String
|
41
53
|
# +file+: String, passed to download
|
42
|
-
# +extra+:
|
54
|
+
# +extra+: Hash, passed to download
|
43
55
|
# +format+: String, passed to download
|
44
|
-
def
|
56
|
+
def ncbi_asm_get(opts)
|
45
57
|
url_dir = opts[:obj].ncbi_asm_json_doc&.dig('ftppath_genbank')
|
46
58
|
if url_dir.nil? || url_dir.empty?
|
47
59
|
raise MiGA::RemoteDataMissingError.new(
|
@@ -58,8 +70,8 @@ class MiGA::RemoteDataset
|
|
58
70
|
|
59
71
|
##
|
60
72
|
# Download data from NCBI GenBank (nuccore) database using the REST method.
|
61
|
-
# Supported +opts+ (Hash) are the same as #download_rest and #
|
62
|
-
def
|
73
|
+
# Supported +opts+ (Hash) are the same as #download_rest and #ncbi_asm_get.
|
74
|
+
def ncbi_gb_get(opts)
|
63
75
|
# Simply use defaults, but ensure that the URL can be properly formed
|
64
76
|
o = download_rest(opts.merge(universe: :ncbi, db: :nuccore))
|
65
77
|
return o unless o.strip.empty?
|
@@ -70,54 +82,83 @@ class MiGA::RemoteDataset
|
|
70
82
|
File.unlink(opts[:file]) if File.exist? opts[:file]
|
71
83
|
opts[:file] = "#{opts[:file]}.gz"
|
72
84
|
end
|
73
|
-
|
85
|
+
ncbi_asm_get(opts)
|
74
86
|
end
|
75
87
|
|
76
88
|
##
|
77
|
-
# Download data using the
|
89
|
+
# Download data using the GET method. Supported +opts+ (Hash) include:
|
78
90
|
# +universe+ (mandatory): Symbol
|
79
|
-
# +db
|
80
|
-
# +ids
|
91
|
+
# +db+: Symbol
|
92
|
+
# +ids+: Array of String
|
81
93
|
# +format+: String
|
82
|
-
# +extra+:
|
83
|
-
def
|
94
|
+
# +extra+: Hash
|
95
|
+
def download_get(opts)
|
84
96
|
u = @@UNIVERSE[opts[:universe]]
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
97
|
+
download_uri(u[:uri][opts], u[:headers] ? u[:headers][opts] : {})
|
98
|
+
end
|
99
|
+
|
100
|
+
##
|
101
|
+
# Download data using the POST method. Supported +opts+ (Hash) include:
|
102
|
+
# +universe+ (mandatory): Symbol
|
103
|
+
# +db+: Symbol
|
104
|
+
# +ids+: Array of String
|
105
|
+
# +format+: String
|
106
|
+
# +extra+: Hash
|
107
|
+
def download_post(opts)
|
108
|
+
u = @@UNIVERSE[opts[:universe]]
|
109
|
+
uri = u[:uri][opts]
|
110
|
+
payload = u[:payload] ? u[:payload][opts] : ''
|
111
|
+
headers = u[:headers] ? u[:headers][opts] : {}
|
112
|
+
net_method(:post, uri, payload, headers)
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Download data using the FTP protocol. Supported +opts+ (Hash) include:
|
117
|
+
# +universe+ (mandatory): Symbol
|
118
|
+
# +db+: Symbol
|
119
|
+
# +ids+: Array of String
|
120
|
+
# +format+: String
|
121
|
+
# +extra+: Hash
|
122
|
+
def download_ftp(opts)
|
123
|
+
u = @@UNIVERSE[opts[:universe]]
|
124
|
+
net_method(:ftp, u[:uri][opts])
|
125
|
+
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# Redirects to +download_get+ or +download_ftp+, depending on the URI's
|
129
|
+
# protocol
|
130
|
+
def download_net(opts)
|
131
|
+
u = @@UNIVERSE[opts[:universe]]
|
132
|
+
if u[:scheme][opts] == 'ftp'
|
133
|
+
download_ftp(opts)
|
134
|
+
else
|
135
|
+
download_get(opts)
|
136
|
+
end
|
90
137
|
end
|
91
138
|
|
92
139
|
##
|
93
140
|
# Alias of download_rest
|
94
|
-
alias
|
141
|
+
alias download_rest download_get
|
142
|
+
|
143
|
+
##
|
144
|
+
# Download the given +URI+ and return the result regardless of response
|
145
|
+
# code. Attempts download up to three times before raising Net::ReadTimeout.
|
146
|
+
def download_uri(uri, headers = {})
|
147
|
+
net_method(:get, uri, headers)
|
148
|
+
end
|
95
149
|
|
96
150
|
##
|
97
151
|
# Download the given +url+ and return the result regardless of response
|
98
152
|
# code. Attempts download up to three times before raising Net::ReadTimeout.
|
99
|
-
def download_url(url)
|
100
|
-
|
101
|
-
@timeout_try = 0
|
102
|
-
begin
|
103
|
-
DEBUG 'GET: ' + url
|
104
|
-
URI.parse(url).open(read_timeout: 600) { |f| doc = f.read }
|
105
|
-
rescue => e
|
106
|
-
@timeout_try += 1
|
107
|
-
raise e if @timeout_try >= 3
|
108
|
-
|
109
|
-
sleep 5 # <- For: 429 Too Many Requests
|
110
|
-
DEBUG "RETRYING after: #{e}"
|
111
|
-
retry
|
112
|
-
end
|
113
|
-
doc
|
153
|
+
def download_url(url, headers = {})
|
154
|
+
download_uri(URI.parse(url), headers)
|
114
155
|
end
|
115
156
|
|
116
157
|
##
|
117
158
|
# Looks for the entry +id+ in +dbfrom+, and returns the linked
|
118
159
|
# identifier in +db+ (or nil).
|
119
160
|
def ncbi_map(id, dbfrom, db)
|
120
|
-
doc = download(:ncbi_map, dbfrom, id, :json, nil,
|
161
|
+
doc = download(:ncbi_map, dbfrom, id, :json, nil, db: db)
|
121
162
|
return if doc.empty?
|
122
163
|
|
123
164
|
tree = MiGA::Json.parse(doc, contents: true)
|
@@ -134,8 +175,34 @@ module MiGA::RemoteDataset::Download
|
|
134
175
|
##
|
135
176
|
# Download data into +file+
|
136
177
|
def download(file)
|
137
|
-
|
138
|
-
|
139
|
-
|
178
|
+
self.class.download(*download_params(file))
|
179
|
+
end
|
180
|
+
|
181
|
+
def universe_hash
|
182
|
+
self.class.UNIVERSE[universe]
|
183
|
+
end
|
184
|
+
|
185
|
+
def database_hash
|
186
|
+
universe_hash.dig(:dbs, db)
|
187
|
+
end
|
188
|
+
|
189
|
+
def download_params(file = nil)
|
190
|
+
[universe, db, ids, database_hash[:format], file, {}, self]
|
191
|
+
end
|
192
|
+
|
193
|
+
def download_opts(file = nil)
|
194
|
+
self.class.download_opts(*download_params(file))
|
195
|
+
end
|
196
|
+
|
197
|
+
def download_uri
|
198
|
+
universe_hash[:uri][download_opts]
|
199
|
+
end
|
200
|
+
|
201
|
+
def download_headers
|
202
|
+
universe_hash[:headers][download_opts]
|
203
|
+
end
|
204
|
+
|
205
|
+
def download_payload
|
206
|
+
universe_hash[:payload][download_opts]
|
140
207
|
end
|
141
208
|
end
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -193,7 +193,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
193
193
|
|
194
194
|
doc = MiGA::Json.parse(
|
195
195
|
MiGA::RemoteDataset.download(
|
196
|
-
:gtdb, :genome, gtdb_genome, 'taxon-history'
|
196
|
+
:gtdb, :genome, gtdb_genome, 'taxon-history'
|
197
197
|
),
|
198
198
|
contents: true
|
199
199
|
)
|
@@ -237,15 +237,24 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
237
237
|
private
|
238
238
|
|
239
239
|
def get_ncbi_taxid_from_web
|
240
|
-
|
240
|
+
# Check first if metadata was pulled from NCBI already
|
241
|
+
taxid = metadata.dig(:ncbi_dataset, :organism, :tax_id)
|
242
|
+
return taxid if taxid
|
241
243
|
|
242
|
-
|
244
|
+
# Otherwise, try to get the Assembly JSON document
|
245
|
+
ncbi_asm_json_doc&.dig('taxid')
|
243
246
|
end
|
244
247
|
|
245
248
|
def get_ncbi_taxid_from_ncbi
|
249
|
+
# Try first from Assembly data
|
246
250
|
return get_ncbi_taxid_from_web if db == :assembly
|
247
251
|
|
248
|
-
|
252
|
+
# Try from previously pulled NCBI data
|
253
|
+
taxid = metadata.dig(:ncbi_dataset, :organism, :tax_id)
|
254
|
+
return taxid if taxid
|
255
|
+
|
256
|
+
# Try from GenBank document (obtain it)
|
257
|
+
doc = self.class.download(:ncbi, db, ids, :gb, nil, {}, self).split(/\n/)
|
249
258
|
ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
|
250
259
|
return nil if ln.nil?
|
251
260
|
|
@@ -283,14 +292,25 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
283
292
|
end
|
284
293
|
|
285
294
|
def get_type_status_ncbi_asm(metadata)
|
286
|
-
|
295
|
+
from_type = nil
|
296
|
+
|
297
|
+
# Try first from previously pulled NCBI metadata
|
298
|
+
if metadata[:ncbi_dataset]
|
299
|
+
from_type = metadata.dig(
|
300
|
+
:ncbi_dataset, :type_material, :type_display_text
|
301
|
+
)
|
302
|
+
else
|
303
|
+
# Otherwise, check Assembly JSON document
|
304
|
+
return metadata if ncbi_asm_json_doc.nil?
|
305
|
+
|
306
|
+
metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
|
307
|
+
metadata[:suspect] = nil if metadata[:suspect].empty?
|
308
|
+
return metadata if metadata[:is_type] # If predefined, as in SeqCode
|
287
309
|
|
288
|
-
|
289
|
-
|
290
|
-
|
310
|
+
from_type = ncbi_asm_json_doc['from_type']
|
311
|
+
from_type = ncbi_asm_json_doc['fromtype'] if from_type.nil?
|
312
|
+
end
|
291
313
|
|
292
|
-
from_type = ncbi_asm_json_doc['from_type']
|
293
|
-
from_type = ncbi_asm_json_doc['fromtype'] if from_type.nil?
|
294
314
|
case from_type
|
295
315
|
when nil
|
296
316
|
# Do nothing
|
@@ -316,10 +336,11 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
316
336
|
a_ctg = "#{base}.AllContigs.fna"
|
317
337
|
File.open("#{base}.start", 'w') { |ofh| ofh.puts Time.now.to_s }
|
318
338
|
if udb[:format] == :fasta_gz
|
319
|
-
|
320
|
-
|
339
|
+
l_ctg_gz = "#{l_ctg}.gz"
|
340
|
+
download(l_ctg_gz)
|
341
|
+
self.class.run_cmd(['gzip', '-f', '-d', l_ctg_gz])
|
321
342
|
else
|
322
|
-
download
|
343
|
+
download(l_ctg)
|
323
344
|
end
|
324
345
|
File.unlink(a_ctg) if File.exist? a_ctg
|
325
346
|
File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
|
data/lib/miga/result/stats.rb
CHANGED
@@ -219,6 +219,8 @@ module MiGA::Result::Stats
|
|
219
219
|
|
220
220
|
def compute_stats_taxonomy
|
221
221
|
stats = {}
|
222
|
+
return stats unless file_path(:intax_test)
|
223
|
+
|
222
224
|
File.open(file_path(:intax_test), 'r') do |fh|
|
223
225
|
fh.gets.chomp =~ /Closest relative: (\S+) with AAI: (\S+)\.?/
|
224
226
|
stats[:closest_relative] = $1
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'miga/result/base'
|
2
|
+
|
3
|
+
##
|
4
|
+
# Helper module including functions for results to handle software versions
|
5
|
+
module MiGA::Result::Versions
|
6
|
+
##
|
7
|
+
# Return the versions hash
|
8
|
+
def versions
|
9
|
+
self[:versions]
|
10
|
+
end
|
11
|
+
|
12
|
+
##
|
13
|
+
# Add version information for the Software used by this result
|
14
|
+
def add_versions(versions)
|
15
|
+
versions.each { |k, v| self[:versions][k.to_sym] = v }
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# Get list of software and their versions as raw text (Markdown)
|
20
|
+
def versions_md
|
21
|
+
versions.map { |k, v| "- #{k}: #{v}" }.join("\n")
|
22
|
+
end
|
23
|
+
end
|
data/lib/miga/result.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'miga/result/dates'
|
4
4
|
require 'miga/result/source'
|
5
5
|
require 'miga/result/stats'
|
6
|
+
require 'miga/result/versions'
|
6
7
|
|
7
8
|
##
|
8
9
|
# The result from a task run. It can be project-wide or dataset-specific.
|
@@ -10,6 +11,7 @@ class MiGA::Result < MiGA::MiGA
|
|
10
11
|
include MiGA::Result::Dates
|
11
12
|
include MiGA::Result::Source
|
12
13
|
include MiGA::Result::Stats
|
14
|
+
include MiGA::Result::Versions
|
13
15
|
|
14
16
|
# Class-level
|
15
17
|
class << self
|
@@ -151,7 +153,11 @@ class MiGA::Result < MiGA::MiGA
|
|
151
153
|
##
|
152
154
|
# Initialize and #save empty result
|
153
155
|
def create
|
154
|
-
@data = {
|
156
|
+
@data = {
|
157
|
+
created: Time.now.to_s,
|
158
|
+
stats: {}, files: {},
|
159
|
+
versions: { 'MiGA' => MiGA::VERSION.join('.') }
|
160
|
+
}
|
155
161
|
save
|
156
162
|
end
|
157
163
|
|
data/lib/miga/taxonomy/base.rb
CHANGED
@@ -6,14 +6,15 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
6
6
|
##
|
7
7
|
# Returns cannonical rank (Symbol) for the +rank+ String
|
8
8
|
def normalize_rank(rank)
|
9
|
+
return unless rank
|
9
10
|
return rank.to_sym if @@_KNOWN_RANKS_H[rank.to_sym]
|
10
11
|
|
11
12
|
rank = rank.to_s.downcase
|
12
|
-
return
|
13
|
+
return if rank == 'no rank'
|
13
14
|
|
14
15
|
rank = @@RANK_SYNONYMS[rank] unless @@RANK_SYNONYMS[rank].nil?
|
15
16
|
rank = rank.to_sym
|
16
|
-
return
|
17
|
+
return unless @@_KNOWN_RANKS_H[rank]
|
17
18
|
|
18
19
|
rank
|
19
20
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3,
|
15
|
+
VERSION = [1.3, 9, 0].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(
|
23
|
+
VERSION_DATE = Date.new(2024, 1, 22)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/scripts/assembly.bash
CHANGED
@@ -10,6 +10,7 @@ cd "$PROJECT/data/05.assembly"
|
|
10
10
|
miga date > "$DATASET.start"
|
11
11
|
|
12
12
|
# Interpose (if needed)
|
13
|
+
interpose=no
|
13
14
|
TF="../04.trimmed_fasta"
|
14
15
|
b=$DATASET
|
15
16
|
if [[ -s "$TF/${b}.2.fasta" || -s "$TF/${b}.2.fasta.gz" ]] ; then
|
@@ -22,6 +23,7 @@ if [[ -s "$TF/${b}.2.fasta" || -s "$TF/${b}.2.fasta.gz" ]] ; then
|
|
22
23
|
gzip -cd "$TF/${b}.${s}.fasta.gz" > "${b}.${s}.tmp"
|
23
24
|
fi
|
24
25
|
done
|
26
|
+
interpose=yes
|
25
27
|
FastA.interpose.pl "$cr" "$b".[12].tmp
|
26
28
|
rm "$b".[12].tmp
|
27
29
|
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
|
@@ -62,5 +64,17 @@ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
|
|
62
64
|
|
63
65
|
# Finalize
|
64
66
|
miga date > "$DATASET.done"
|
65
|
-
|
67
|
+
cat <<VERSIONS \
|
68
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
69
|
+
=> MiGA
|
70
|
+
$(miga --version)
|
71
|
+
$(
|
72
|
+
if [[ "$interpose" == "yes" ]] ; then
|
73
|
+
echo "=> Enveomics Collection: FastA.interpose.pl"
|
74
|
+
echo "version unknown"
|
75
|
+
fi
|
76
|
+
)
|
77
|
+
=> IDBA-UD
|
78
|
+
version unknown
|
79
|
+
VERSIONS
|
66
80
|
|
data/scripts/cds.bash
CHANGED
@@ -19,7 +19,7 @@ fi
|
|
19
19
|
# Run Prodigal
|
20
20
|
TYPE=$(miga ls -P "$PROJECT" -D "$DATASET" -m type | cut -f 2)
|
21
21
|
case "$TYPE" in
|
22
|
-
metagenome|virome)
|
22
|
+
metagenome|virome|plasmid)
|
23
23
|
prodigal -a "${DATASET}.faa" -d "${DATASET}.fna" -o "${DATASET}.gff3" \
|
24
24
|
-f gff -q -i "../05.assembly/${DATASET}.LargeContigs.fna" -p meta
|
25
25
|
;;
|
@@ -68,6 +68,12 @@ for ext in gff3 faa fna ; do
|
|
68
68
|
done
|
69
69
|
|
70
70
|
# Finalize
|
71
|
-
miga date > "$DATASET.done"
|
72
|
-
|
71
|
+
miga date > "${DATASET}.done"
|
72
|
+
cat <<VERSIONS \
|
73
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
74
|
+
=> MiGA
|
75
|
+
$(miga --version)
|
76
|
+
=> Prodigal
|
77
|
+
$(prodigal -v 2>&1 | grep . | perl -pe 's/^Prodigal //')
|
78
|
+
VERSIONS
|
73
79
|
|
data/scripts/distances.bash
CHANGED
@@ -10,13 +10,111 @@ cd "$PROJECT/data/09.distances"
|
|
10
10
|
miga date > "$DATASET.start"
|
11
11
|
|
12
12
|
# Check quality
|
13
|
-
miga
|
14
|
-
|
15
|
-
[[ "$
|
13
|
+
MARKERS=$(miga ls -P "$PROJECT" -D "$DATASET" --markers \
|
14
|
+
| wc -l | awk '{print $1}')
|
15
|
+
if [[ "$MARKERS" -eq "1" ]] ; then
|
16
|
+
miga stats -P "$PROJECT" -D "$DATASET" -r essential_genes --compute-and-save
|
17
|
+
inactive=$(miga ls -P "$PROJECT" -D "$DATASET" -m inactive | cut -f 2)
|
18
|
+
[[ "$inactive" == "true" ]] && exit
|
19
|
+
fi
|
16
20
|
|
17
21
|
# Run distances
|
18
22
|
ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
|
19
23
|
|
20
24
|
# Finalize
|
21
|
-
|
22
|
-
|
25
|
+
fastaai=no
|
26
|
+
aai=no
|
27
|
+
ani=no
|
28
|
+
blast=no
|
29
|
+
blat=no
|
30
|
+
diamond=no
|
31
|
+
fastani=no
|
32
|
+
case $(miga option -P "$PROJECT" -k haai_p) in
|
33
|
+
fastaai)
|
34
|
+
fastaai=yes
|
35
|
+
;;
|
36
|
+
diamond)
|
37
|
+
diamond=yes
|
38
|
+
aai=yes
|
39
|
+
;;
|
40
|
+
blast)
|
41
|
+
blast=yes
|
42
|
+
aai=yes
|
43
|
+
;;
|
44
|
+
esac
|
45
|
+
|
46
|
+
case $(miga option -P "$PROJECT" -k aai_p) in
|
47
|
+
diamond)
|
48
|
+
diamond=yes
|
49
|
+
aai=yes
|
50
|
+
;;
|
51
|
+
blast)
|
52
|
+
blast=yes
|
53
|
+
aai=yes
|
54
|
+
;;
|
55
|
+
esac
|
56
|
+
|
57
|
+
case $(miga option -P "$PROJECT" -k ani_p) in
|
58
|
+
blast)
|
59
|
+
blast=yes
|
60
|
+
ani=yes
|
61
|
+
;;
|
62
|
+
blat)
|
63
|
+
blat=yes
|
64
|
+
ani=yes
|
65
|
+
;;
|
66
|
+
fastani)
|
67
|
+
fastani=yes
|
68
|
+
;;
|
69
|
+
esac
|
70
|
+
|
71
|
+
|
72
|
+
miga date > "${DATASET}.done"
|
73
|
+
cat <<VERSIONS \
|
74
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
75
|
+
=> MiGA
|
76
|
+
$(miga --version)
|
77
|
+
$(
|
78
|
+
if [[ "$fastaai" == "yes" ]] ; then
|
79
|
+
echo "=> FastAAI"
|
80
|
+
fastaai version 2>&1 | perl -pe 's/.*=//'
|
81
|
+
fi
|
82
|
+
)
|
83
|
+
$(
|
84
|
+
if [[ "$fastani" == "yes" ]] ; then
|
85
|
+
echo "=> FastANI"
|
86
|
+
fastANI --version 2>&1 | grep . | perl -pe 's/^version //'
|
87
|
+
fi
|
88
|
+
)
|
89
|
+
$(
|
90
|
+
if [[ "$aai" == "yes" ]] ; then
|
91
|
+
echo "=> Enveomics Collection: aai.rb"
|
92
|
+
aai.rb --version 2>&1 | perl -pe 's/.*: //'
|
93
|
+
fi
|
94
|
+
)
|
95
|
+
$(
|
96
|
+
if [[ "$ani" == "yes" ]] ; then
|
97
|
+
echo "=> Enveomics Collection: ani.rb"
|
98
|
+
ani.rb --version 2>&1 | perl -pe 's/.*: //'
|
99
|
+
fi
|
100
|
+
)
|
101
|
+
$(
|
102
|
+
if [[ "$blast" == "yes" ]] ; then
|
103
|
+
echo "=> NCBI BLAST+"
|
104
|
+
blastp -version 2>&1 | tail -n 1 | perl -pe 's/.*: blast //'
|
105
|
+
fi
|
106
|
+
)
|
107
|
+
$(
|
108
|
+
if [[ "$blat" == "yes" ]] ; then
|
109
|
+
echo "=> BLAT"
|
110
|
+
blat 2>&1 | head -n 1 | perl -pe 's/.* v\. //' | perl -pe 's/ fast .*//'
|
111
|
+
fi
|
112
|
+
)
|
113
|
+
$(
|
114
|
+
if [[ "$diamond" == "yes" ]] ; then
|
115
|
+
echo "=> Diamond"
|
116
|
+
diamond --version 2>&1 | perl -pe 's/^diamond version //'
|
117
|
+
fi
|
118
|
+
)
|
119
|
+
VERSIONS
|
120
|
+
|
@@ -70,4 +70,17 @@ fi
|
|
70
70
|
|
71
71
|
# Finalize
|
72
72
|
miga date > "${DATASET}.done"
|
73
|
-
|
73
|
+
cat <<VERSIONS \
|
74
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
75
|
+
=> MiGA
|
76
|
+
$(miga --version)
|
77
|
+
=> Enveomics Collection: HMM.essential.rb
|
78
|
+
$(HMM.essential.rb --version 2>&1 | perl -pe 's/.*: //')
|
79
|
+
$(
|
80
|
+
if [[ "$NOMULTI" -eq "1" ]] ; then
|
81
|
+
echo "=> FastAAI"
|
82
|
+
fastaai version 2>&1 | perl -pe 's/.*=//'
|
83
|
+
fi
|
84
|
+
)
|
85
|
+
VERSIONS
|
86
|
+
|
data/scripts/mytaxa.bash
CHANGED
@@ -15,7 +15,7 @@ if [[ "$MIGA_MYTAXA" == "no" ]] ; then
|
|
15
15
|
> "$DATASET.nomytaxa.txt"
|
16
16
|
else
|
17
17
|
# Check type of dataset
|
18
|
-
MULTI=$(miga
|
18
|
+
MULTI=$(miga ls -P "$PROJECT" -D "$DATASET" --multi \
|
19
19
|
| wc -l | awk '{print $1}')
|
20
20
|
if [[ "$MULTI" -eq "1" ]] ; then
|
21
21
|
# Check requirements
|
@@ -98,5 +98,20 @@ else
|
|
98
98
|
fi
|
99
99
|
|
100
100
|
# Finalize
|
101
|
-
miga date > "$DATASET.done"
|
102
|
-
|
101
|
+
miga date > "${DATASET}.done"
|
102
|
+
cat <<VERSIONS \
|
103
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
104
|
+
=> MiGA
|
105
|
+
$(miga --version)
|
106
|
+
$(
|
107
|
+
if [[ "$MIGA_MYTAXA" != "no" && "$MULTI" -eq "1" ]] ; then
|
108
|
+
echo "=> MyTaxa"
|
109
|
+
MyTaxa | grep Version: | perl -pe 's/.*: //'
|
110
|
+
echo "=> Diamond"
|
111
|
+
diamond --version 2>&1 | perl -pe 's/^diamond version //'
|
112
|
+
echo "=> Krona"
|
113
|
+
ktImportText | head -n 2 | tail -n 1 | awk '{ print $3 }'
|
114
|
+
fi
|
115
|
+
)
|
116
|
+
VERSIONS
|
117
|
+
|
data/scripts/mytaxa_scan.bash
CHANGED
@@ -14,7 +14,7 @@ if [[ "$MIGA_MYTAXA" == "no" ]] ; then
|
|
14
14
|
> "$DATASET.nomytaxa.txt"
|
15
15
|
else
|
16
16
|
# Check type of dataset
|
17
|
-
NOMULTI=$(miga
|
17
|
+
NOMULTI=$(miga ls -P "$PROJECT" -D "$DATASET" --no-multi \
|
18
18
|
| wc -l | awk '{print $1}')
|
19
19
|
if [[ "$NOMULTI" -eq "1" ]] ; then
|
20
20
|
# Check requirements
|
@@ -97,5 +97,18 @@ else
|
|
97
97
|
fi
|
98
98
|
|
99
99
|
# Finalize
|
100
|
-
miga date > "$DATASET.done"
|
101
|
-
|
100
|
+
miga date > "${DATASET}.done"
|
101
|
+
cat <<VERSIONS \
|
102
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
103
|
+
=> MiGA
|
104
|
+
$(miga --version)
|
105
|
+
$(
|
106
|
+
if [[ "$MIGA_MYTAXA" != "no" && "$NOMULTI" -eq "1" ]] ; then
|
107
|
+
echo "=> MyTaxa"
|
108
|
+
MyTaxa | grep Version: | perl -pe 's/.*: //'
|
109
|
+
echo "=> Diamond"
|
110
|
+
diamond --version 2>&1 | perl -pe 's/^diamond version //'
|
111
|
+
fi
|
112
|
+
)
|
113
|
+
VERSIONS
|
114
|
+
|