miga-base 1.3.8.2 → 1.3.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/miga/cli/action/add_result.rb +22 -1
- data/lib/miga/cli/action/browse/about.html +4 -2
- data/lib/miga/cli/action/download/gtdb.rb +1 -1
- data/lib/miga/cli/action/download/ncbi.rb +43 -68
- data/lib/miga/cli/action/download/seqcode.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +1 -8
- data/lib/miga/cli/action/wf.rb +15 -6
- data/lib/miga/cli/objects_helper.rb +3 -0
- data/lib/miga/cli/opt_helper.rb +8 -2
- data/lib/miga/common/net.rb +100 -18
- data/lib/miga/dataset/base.rb +40 -12
- data/lib/miga/dataset/hooks.rb +8 -0
- data/lib/miga/dataset/result/ignore.rb +14 -2
- data/lib/miga/dataset/type.rb +51 -0
- data/lib/miga/dataset.rb +3 -22
- data/lib/miga/json.rb +9 -0
- data/lib/miga/project/base.rb +15 -9
- data/lib/miga/project.rb +7 -1
- data/lib/miga/remote_dataset/base.rb +117 -36
- data/lib/miga/remote_dataset/download.rb +121 -54
- data/lib/miga/remote_dataset.rb +34 -13
- data/lib/miga/result/stats.rb +2 -0
- data/lib/miga/result/versions.rb +23 -0
- data/lib/miga/result.rb +7 -1
- data/lib/miga/taxonomy/base.rb +3 -2
- data/lib/miga/version.rb +2 -2
- data/scripts/assembly.bash +15 -1
- data/scripts/cds.bash +9 -3
- data/scripts/distances.bash +103 -5
- data/scripts/essential_genes.bash +14 -1
- data/scripts/mytaxa.bash +18 -3
- data/scripts/mytaxa_scan.bash +16 -3
- data/scripts/read_quality.bash +6 -2
- data/scripts/ssu.bash +19 -1
- data/scripts/stats.bash +9 -3
- data/scripts/taxonomy.bash +98 -2
- data/scripts/trimmed_fasta.bash +10 -2
- data/scripts/trimmed_reads.bash +26 -6
- data/test/dataset_test.rb +17 -2
- data/test/hook_test.rb +3 -2
- data/test/net_test.rb +21 -5
- data/test/project_test.rb +13 -0
- data/test/remote_dataset_test.rb +106 -7
- data/test/result_test.rb +47 -21
- data/test/taxonomy_test.rb +9 -3
- data/utils/distance/runner.rb +3 -1
- data/utils/distances.rb +1 -1
- metadata +4 -2
@@ -6,28 +6,40 @@ class MiGA::RemoteDataset
|
|
6
6
|
# Class-level
|
7
7
|
class << self
|
8
8
|
##
|
9
|
-
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
9
|
+
# Return hash of options used internally for the getter methods, including
|
10
|
+
# by +download+. The prepared request is for data from the +universe+ in the
|
11
|
+
# database +db+ with IDs +ids+ and in +format+. If passed, it saves the
|
12
|
+
# result in +file+. Additional parameters specific to the download method
|
13
|
+
# can be passed using +extra+. The +obj+ can also be passed as
|
14
|
+
# MiGA::RemoteDataset or MiGA::Dataset
|
15
|
+
def download_opts(
|
16
|
+
universe, db, ids, format, file = nil, extra = {}, obj = nil)
|
17
|
+
universe_hash = @@UNIVERSE[universe]
|
18
|
+
database_hash = universe_hash.dig(:dbs, db)
|
19
|
+
getter = database_hash[:getter] || :download
|
20
|
+
action = database_hash[:method] || universe_hash[:method]
|
21
|
+
|
22
|
+
{
|
23
|
+
universe: universe, db: db, ids: ids.is_a?(Array) ? ids : [ids],
|
24
|
+
format: format, file: file, obj: obj,
|
25
|
+
extra: (database_hash[:extra] || {}).merge(extra),
|
26
|
+
_fun: :"#{getter}_#{action}"
|
26
27
|
}
|
27
|
-
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# Returns String. The prequired parameters (+params+) are identical to those
|
32
|
+
# of +download_opts+ (see for details)
|
33
|
+
def download(*params)
|
34
|
+
opts = download_opts(*params)
|
35
|
+
doc = send(opts[:_fun], opts)
|
36
|
+
|
28
37
|
unless opts[:file].nil?
|
29
38
|
ofh = File.open(opts[:file], 'w')
|
30
|
-
|
39
|
+
unless opts[:file] =~ /\.([gb]?z|tar|zip|rar)$/i
|
40
|
+
doc = normalize_encoding(doc)
|
41
|
+
end
|
42
|
+
ofh.print doc
|
31
43
|
ofh.close
|
32
44
|
end
|
33
45
|
doc
|
@@ -39,9 +51,9 @@ class MiGA::RemoteDataset
|
|
39
51
|
# +obj+ (mandatory): MiGA::RemoteDataset
|
40
52
|
# +ids+ (mandatory): String or Array of String
|
41
53
|
# +file+: String, passed to download
|
42
|
-
# +extra+:
|
54
|
+
# +extra+: Hash, passed to download
|
43
55
|
# +format+: String, passed to download
|
44
|
-
def
|
56
|
+
def ncbi_asm_get(opts)
|
45
57
|
url_dir = opts[:obj].ncbi_asm_json_doc&.dig('ftppath_genbank')
|
46
58
|
if url_dir.nil? || url_dir.empty?
|
47
59
|
raise MiGA::RemoteDataMissingError.new(
|
@@ -58,8 +70,8 @@ class MiGA::RemoteDataset
|
|
58
70
|
|
59
71
|
##
|
60
72
|
# Download data from NCBI GenBank (nuccore) database using the REST method.
|
61
|
-
# Supported +opts+ (Hash) are the same as #download_rest and #
|
62
|
-
def
|
73
|
+
# Supported +opts+ (Hash) are the same as #download_rest and #ncbi_asm_get.
|
74
|
+
def ncbi_gb_get(opts)
|
63
75
|
# Simply use defaults, but ensure that the URL can be properly formed
|
64
76
|
o = download_rest(opts.merge(universe: :ncbi, db: :nuccore))
|
65
77
|
return o unless o.strip.empty?
|
@@ -70,54 +82,83 @@ class MiGA::RemoteDataset
|
|
70
82
|
File.unlink(opts[:file]) if File.exist? opts[:file]
|
71
83
|
opts[:file] = "#{opts[:file]}.gz"
|
72
84
|
end
|
73
|
-
|
85
|
+
ncbi_asm_get(opts)
|
74
86
|
end
|
75
87
|
|
76
88
|
##
|
77
|
-
# Download data using the
|
89
|
+
# Download data using the GET method. Supported +opts+ (Hash) include:
|
78
90
|
# +universe+ (mandatory): Symbol
|
79
|
-
# +db
|
80
|
-
# +ids
|
91
|
+
# +db+: Symbol
|
92
|
+
# +ids+: Array of String
|
81
93
|
# +format+: String
|
82
|
-
# +extra+:
|
83
|
-
def
|
94
|
+
# +extra+: Hash
|
95
|
+
def download_get(opts)
|
84
96
|
u = @@UNIVERSE[opts[:universe]]
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
97
|
+
download_uri(u[:uri][opts], u[:headers] ? u[:headers][opts] : {})
|
98
|
+
end
|
99
|
+
|
100
|
+
##
|
101
|
+
# Download data using the POST method. Supported +opts+ (Hash) include:
|
102
|
+
# +universe+ (mandatory): Symbol
|
103
|
+
# +db+: Symbol
|
104
|
+
# +ids+: Array of String
|
105
|
+
# +format+: String
|
106
|
+
# +extra+: Hash
|
107
|
+
def download_post(opts)
|
108
|
+
u = @@UNIVERSE[opts[:universe]]
|
109
|
+
uri = u[:uri][opts]
|
110
|
+
payload = u[:payload] ? u[:payload][opts] : ''
|
111
|
+
headers = u[:headers] ? u[:headers][opts] : {}
|
112
|
+
net_method(:post, uri, payload, headers)
|
113
|
+
end
|
114
|
+
|
115
|
+
##
|
116
|
+
# Download data using the FTP protocol. Supported +opts+ (Hash) include:
|
117
|
+
# +universe+ (mandatory): Symbol
|
118
|
+
# +db+: Symbol
|
119
|
+
# +ids+: Array of String
|
120
|
+
# +format+: String
|
121
|
+
# +extra+: Hash
|
122
|
+
def download_ftp(opts)
|
123
|
+
u = @@UNIVERSE[opts[:universe]]
|
124
|
+
net_method(:ftp, u[:uri][opts])
|
125
|
+
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# Redirects to +download_get+ or +download_ftp+, depending on the URI's
|
129
|
+
# protocol
|
130
|
+
def download_net(opts)
|
131
|
+
u = @@UNIVERSE[opts[:universe]]
|
132
|
+
if u[:scheme][opts] == 'ftp'
|
133
|
+
download_ftp(opts)
|
134
|
+
else
|
135
|
+
download_get(opts)
|
136
|
+
end
|
90
137
|
end
|
91
138
|
|
92
139
|
##
|
93
140
|
# Alias of download_rest
|
94
|
-
alias
|
141
|
+
alias download_rest download_get
|
142
|
+
|
143
|
+
##
|
144
|
+
# Download the given +URI+ and return the result regardless of response
|
145
|
+
# code. Attempts download up to three times before raising Net::ReadTimeout.
|
146
|
+
def download_uri(uri, headers = {})
|
147
|
+
net_method(:get, uri, headers)
|
148
|
+
end
|
95
149
|
|
96
150
|
##
|
97
151
|
# Download the given +url+ and return the result regardless of response
|
98
152
|
# code. Attempts download up to three times before raising Net::ReadTimeout.
|
99
|
-
def download_url(url)
|
100
|
-
|
101
|
-
@timeout_try = 0
|
102
|
-
begin
|
103
|
-
DEBUG 'GET: ' + url
|
104
|
-
URI.parse(url).open(read_timeout: 600) { |f| doc = f.read }
|
105
|
-
rescue => e
|
106
|
-
@timeout_try += 1
|
107
|
-
raise e if @timeout_try >= 3
|
108
|
-
|
109
|
-
sleep 5 # <- For: 429 Too Many Requests
|
110
|
-
DEBUG "RETRYING after: #{e}"
|
111
|
-
retry
|
112
|
-
end
|
113
|
-
doc
|
153
|
+
def download_url(url, headers = {})
|
154
|
+
download_uri(URI.parse(url), headers)
|
114
155
|
end
|
115
156
|
|
116
157
|
##
|
117
158
|
# Looks for the entry +id+ in +dbfrom+, and returns the linked
|
118
159
|
# identifier in +db+ (or nil).
|
119
160
|
def ncbi_map(id, dbfrom, db)
|
120
|
-
doc = download(:ncbi_map, dbfrom, id, :json, nil,
|
161
|
+
doc = download(:ncbi_map, dbfrom, id, :json, nil, db: db)
|
121
162
|
return if doc.empty?
|
122
163
|
|
123
164
|
tree = MiGA::Json.parse(doc, contents: true)
|
@@ -134,8 +175,34 @@ module MiGA::RemoteDataset::Download
|
|
134
175
|
##
|
135
176
|
# Download data into +file+
|
136
177
|
def download(file)
|
137
|
-
|
138
|
-
|
139
|
-
|
178
|
+
self.class.download(*download_params(file))
|
179
|
+
end
|
180
|
+
|
181
|
+
def universe_hash
|
182
|
+
self.class.UNIVERSE[universe]
|
183
|
+
end
|
184
|
+
|
185
|
+
def database_hash
|
186
|
+
universe_hash.dig(:dbs, db)
|
187
|
+
end
|
188
|
+
|
189
|
+
def download_params(file = nil)
|
190
|
+
[universe, db, ids, database_hash[:format], file, {}, self]
|
191
|
+
end
|
192
|
+
|
193
|
+
def download_opts(file = nil)
|
194
|
+
self.class.download_opts(*download_params(file))
|
195
|
+
end
|
196
|
+
|
197
|
+
def download_uri
|
198
|
+
universe_hash[:uri][download_opts]
|
199
|
+
end
|
200
|
+
|
201
|
+
def download_headers
|
202
|
+
universe_hash[:headers][download_opts]
|
203
|
+
end
|
204
|
+
|
205
|
+
def download_payload
|
206
|
+
universe_hash[:payload][download_opts]
|
140
207
|
end
|
141
208
|
end
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -193,7 +193,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
193
193
|
|
194
194
|
doc = MiGA::Json.parse(
|
195
195
|
MiGA::RemoteDataset.download(
|
196
|
-
:gtdb, :genome, gtdb_genome, 'taxon-history'
|
196
|
+
:gtdb, :genome, gtdb_genome, 'taxon-history'
|
197
197
|
),
|
198
198
|
contents: true
|
199
199
|
)
|
@@ -237,15 +237,24 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
237
237
|
private
|
238
238
|
|
239
239
|
def get_ncbi_taxid_from_web
|
240
|
-
|
240
|
+
# Check first if metadata was pulled from NCBI already
|
241
|
+
taxid = metadata.dig(:ncbi_dataset, :organism, :tax_id)
|
242
|
+
return taxid if taxid
|
241
243
|
|
242
|
-
|
244
|
+
# Otherwise, try to get the Assembly JSON document
|
245
|
+
ncbi_asm_json_doc&.dig('taxid')
|
243
246
|
end
|
244
247
|
|
245
248
|
def get_ncbi_taxid_from_ncbi
|
249
|
+
# Try first from Assembly data
|
246
250
|
return get_ncbi_taxid_from_web if db == :assembly
|
247
251
|
|
248
|
-
|
252
|
+
# Try from previously pulled NCBI data
|
253
|
+
taxid = metadata.dig(:ncbi_dataset, :organism, :tax_id)
|
254
|
+
return taxid if taxid
|
255
|
+
|
256
|
+
# Try from GenBank document (obtain it)
|
257
|
+
doc = self.class.download(:ncbi, db, ids, :gb, nil, {}, self).split(/\n/)
|
249
258
|
ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
|
250
259
|
return nil if ln.nil?
|
251
260
|
|
@@ -283,14 +292,25 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
283
292
|
end
|
284
293
|
|
285
294
|
def get_type_status_ncbi_asm(metadata)
|
286
|
-
|
295
|
+
from_type = nil
|
296
|
+
|
297
|
+
# Try first from previously pulled NCBI metadata
|
298
|
+
if metadata[:ncbi_dataset]
|
299
|
+
from_type = metadata.dig(
|
300
|
+
:ncbi_dataset, :type_material, :type_display_text
|
301
|
+
)
|
302
|
+
else
|
303
|
+
# Otherwise, check Assembly JSON document
|
304
|
+
return metadata if ncbi_asm_json_doc.nil?
|
305
|
+
|
306
|
+
metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
|
307
|
+
metadata[:suspect] = nil if metadata[:suspect].empty?
|
308
|
+
return metadata if metadata[:is_type] # If predefined, as in SeqCode
|
287
309
|
|
288
|
-
|
289
|
-
|
290
|
-
|
310
|
+
from_type = ncbi_asm_json_doc['from_type']
|
311
|
+
from_type = ncbi_asm_json_doc['fromtype'] if from_type.nil?
|
312
|
+
end
|
291
313
|
|
292
|
-
from_type = ncbi_asm_json_doc['from_type']
|
293
|
-
from_type = ncbi_asm_json_doc['fromtype'] if from_type.nil?
|
294
314
|
case from_type
|
295
315
|
when nil
|
296
316
|
# Do nothing
|
@@ -316,10 +336,11 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
316
336
|
a_ctg = "#{base}.AllContigs.fna"
|
317
337
|
File.open("#{base}.start", 'w') { |ofh| ofh.puts Time.now.to_s }
|
318
338
|
if udb[:format] == :fasta_gz
|
319
|
-
|
320
|
-
|
339
|
+
l_ctg_gz = "#{l_ctg}.gz"
|
340
|
+
download(l_ctg_gz)
|
341
|
+
self.class.run_cmd(['gzip', '-f', '-d', l_ctg_gz])
|
321
342
|
else
|
322
|
-
download
|
343
|
+
download(l_ctg)
|
323
344
|
end
|
324
345
|
File.unlink(a_ctg) if File.exist? a_ctg
|
325
346
|
File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
|
data/lib/miga/result/stats.rb
CHANGED
@@ -219,6 +219,8 @@ module MiGA::Result::Stats
|
|
219
219
|
|
220
220
|
def compute_stats_taxonomy
|
221
221
|
stats = {}
|
222
|
+
return stats unless file_path(:intax_test)
|
223
|
+
|
222
224
|
File.open(file_path(:intax_test), 'r') do |fh|
|
223
225
|
fh.gets.chomp =~ /Closest relative: (\S+) with AAI: (\S+)\.?/
|
224
226
|
stats[:closest_relative] = $1
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'miga/result/base'
|
2
|
+
|
3
|
+
##
|
4
|
+
# Helper module including functions for results to handle software versions
|
5
|
+
module MiGA::Result::Versions
|
6
|
+
##
|
7
|
+
# Return the versions hash
|
8
|
+
def versions
|
9
|
+
self[:versions]
|
10
|
+
end
|
11
|
+
|
12
|
+
##
|
13
|
+
# Add version information for the Software used by this result
|
14
|
+
def add_versions(versions)
|
15
|
+
versions.each { |k, v| self[:versions][k.to_sym] = v }
|
16
|
+
end
|
17
|
+
|
18
|
+
##
|
19
|
+
# Get list of software and their versions as raw text (Markdown)
|
20
|
+
def versions_md
|
21
|
+
versions.map { |k, v| "- #{k}: #{v}" }.join("\n")
|
22
|
+
end
|
23
|
+
end
|
data/lib/miga/result.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'miga/result/dates'
|
4
4
|
require 'miga/result/source'
|
5
5
|
require 'miga/result/stats'
|
6
|
+
require 'miga/result/versions'
|
6
7
|
|
7
8
|
##
|
8
9
|
# The result from a task run. It can be project-wide or dataset-specific.
|
@@ -10,6 +11,7 @@ class MiGA::Result < MiGA::MiGA
|
|
10
11
|
include MiGA::Result::Dates
|
11
12
|
include MiGA::Result::Source
|
12
13
|
include MiGA::Result::Stats
|
14
|
+
include MiGA::Result::Versions
|
13
15
|
|
14
16
|
# Class-level
|
15
17
|
class << self
|
@@ -151,7 +153,11 @@ class MiGA::Result < MiGA::MiGA
|
|
151
153
|
##
|
152
154
|
# Initialize and #save empty result
|
153
155
|
def create
|
154
|
-
@data = {
|
156
|
+
@data = {
|
157
|
+
created: Time.now.to_s,
|
158
|
+
stats: {}, files: {},
|
159
|
+
versions: { 'MiGA' => MiGA::VERSION.join('.') }
|
160
|
+
}
|
155
161
|
save
|
156
162
|
end
|
157
163
|
|
data/lib/miga/taxonomy/base.rb
CHANGED
@@ -6,14 +6,15 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
6
6
|
##
|
7
7
|
# Returns cannonical rank (Symbol) for the +rank+ String
|
8
8
|
def normalize_rank(rank)
|
9
|
+
return unless rank
|
9
10
|
return rank.to_sym if @@_KNOWN_RANKS_H[rank.to_sym]
|
10
11
|
|
11
12
|
rank = rank.to_s.downcase
|
12
|
-
return
|
13
|
+
return if rank == 'no rank'
|
13
14
|
|
14
15
|
rank = @@RANK_SYNONYMS[rank] unless @@RANK_SYNONYMS[rank].nil?
|
15
16
|
rank = rank.to_sym
|
16
|
-
return
|
17
|
+
return unless @@_KNOWN_RANKS_H[rank]
|
17
18
|
|
18
19
|
rank
|
19
20
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.3,
|
15
|
+
VERSION = [1.3, 9, 0].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(
|
23
|
+
VERSION_DATE = Date.new(2024, 1, 22)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/scripts/assembly.bash
CHANGED
@@ -10,6 +10,7 @@ cd "$PROJECT/data/05.assembly"
|
|
10
10
|
miga date > "$DATASET.start"
|
11
11
|
|
12
12
|
# Interpose (if needed)
|
13
|
+
interpose=no
|
13
14
|
TF="../04.trimmed_fasta"
|
14
15
|
b=$DATASET
|
15
16
|
if [[ -s "$TF/${b}.2.fasta" || -s "$TF/${b}.2.fasta.gz" ]] ; then
|
@@ -22,6 +23,7 @@ if [[ -s "$TF/${b}.2.fasta" || -s "$TF/${b}.2.fasta.gz" ]] ; then
|
|
22
23
|
gzip -cd "$TF/${b}.${s}.fasta.gz" > "${b}.${s}.tmp"
|
23
24
|
fi
|
24
25
|
done
|
26
|
+
interpose=yes
|
25
27
|
FastA.interpose.pl "$cr" "$b".[12].tmp
|
26
28
|
rm "$b".[12].tmp
|
27
29
|
miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
|
@@ -62,5 +64,17 @@ FastA.length.pl "$DATASET.AllContigs.fna" | awk '$2>=1000{print $1}' \
|
|
62
64
|
|
63
65
|
# Finalize
|
64
66
|
miga date > "$DATASET.done"
|
65
|
-
|
67
|
+
cat <<VERSIONS \
|
68
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
69
|
+
=> MiGA
|
70
|
+
$(miga --version)
|
71
|
+
$(
|
72
|
+
if [[ "$interpose" == "yes" ]] ; then
|
73
|
+
echo "=> Enveomics Collection: FastA.interpose.pl"
|
74
|
+
echo "version unknown"
|
75
|
+
fi
|
76
|
+
)
|
77
|
+
=> IDBA-UD
|
78
|
+
version unknown
|
79
|
+
VERSIONS
|
66
80
|
|
data/scripts/cds.bash
CHANGED
@@ -19,7 +19,7 @@ fi
|
|
19
19
|
# Run Prodigal
|
20
20
|
TYPE=$(miga ls -P "$PROJECT" -D "$DATASET" -m type | cut -f 2)
|
21
21
|
case "$TYPE" in
|
22
|
-
metagenome|virome)
|
22
|
+
metagenome|virome|plasmid)
|
23
23
|
prodigal -a "${DATASET}.faa" -d "${DATASET}.fna" -o "${DATASET}.gff3" \
|
24
24
|
-f gff -q -i "../05.assembly/${DATASET}.LargeContigs.fna" -p meta
|
25
25
|
;;
|
@@ -68,6 +68,12 @@ for ext in gff3 faa fna ; do
|
|
68
68
|
done
|
69
69
|
|
70
70
|
# Finalize
|
71
|
-
miga date > "$DATASET.done"
|
72
|
-
|
71
|
+
miga date > "${DATASET}.done"
|
72
|
+
cat <<VERSIONS \
|
73
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
74
|
+
=> MiGA
|
75
|
+
$(miga --version)
|
76
|
+
=> Prodigal
|
77
|
+
$(prodigal -v 2>&1 | grep . | perl -pe 's/^Prodigal //')
|
78
|
+
VERSIONS
|
73
79
|
|
data/scripts/distances.bash
CHANGED
@@ -10,13 +10,111 @@ cd "$PROJECT/data/09.distances"
|
|
10
10
|
miga date > "$DATASET.start"
|
11
11
|
|
12
12
|
# Check quality
|
13
|
-
miga
|
14
|
-
|
15
|
-
[[ "$
|
13
|
+
MARKERS=$(miga ls -P "$PROJECT" -D "$DATASET" --markers \
|
14
|
+
| wc -l | awk '{print $1}')
|
15
|
+
if [[ "$MARKERS" -eq "1" ]] ; then
|
16
|
+
miga stats -P "$PROJECT" -D "$DATASET" -r essential_genes --compute-and-save
|
17
|
+
inactive=$(miga ls -P "$PROJECT" -D "$DATASET" -m inactive | cut -f 2)
|
18
|
+
[[ "$inactive" == "true" ]] && exit
|
19
|
+
fi
|
16
20
|
|
17
21
|
# Run distances
|
18
22
|
ruby -I "$MIGA/lib" "$MIGA/utils/distances.rb" "$PROJECT" "$DATASET"
|
19
23
|
|
20
24
|
# Finalize
|
21
|
-
|
22
|
-
|
25
|
+
fastaai=no
|
26
|
+
aai=no
|
27
|
+
ani=no
|
28
|
+
blast=no
|
29
|
+
blat=no
|
30
|
+
diamond=no
|
31
|
+
fastani=no
|
32
|
+
case $(miga option -P "$PROJECT" -k haai_p) in
|
33
|
+
fastaai)
|
34
|
+
fastaai=yes
|
35
|
+
;;
|
36
|
+
diamond)
|
37
|
+
diamond=yes
|
38
|
+
aai=yes
|
39
|
+
;;
|
40
|
+
blast)
|
41
|
+
blast=yes
|
42
|
+
aai=yes
|
43
|
+
;;
|
44
|
+
esac
|
45
|
+
|
46
|
+
case $(miga option -P "$PROJECT" -k aai_p) in
|
47
|
+
diamond)
|
48
|
+
diamond=yes
|
49
|
+
aai=yes
|
50
|
+
;;
|
51
|
+
blast)
|
52
|
+
blast=yes
|
53
|
+
aai=yes
|
54
|
+
;;
|
55
|
+
esac
|
56
|
+
|
57
|
+
case $(miga option -P "$PROJECT" -k ani_p) in
|
58
|
+
blast)
|
59
|
+
blast=yes
|
60
|
+
ani=yes
|
61
|
+
;;
|
62
|
+
blat)
|
63
|
+
blat=yes
|
64
|
+
ani=yes
|
65
|
+
;;
|
66
|
+
fastani)
|
67
|
+
fastani=yes
|
68
|
+
;;
|
69
|
+
esac
|
70
|
+
|
71
|
+
|
72
|
+
miga date > "${DATASET}.done"
|
73
|
+
cat <<VERSIONS \
|
74
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
75
|
+
=> MiGA
|
76
|
+
$(miga --version)
|
77
|
+
$(
|
78
|
+
if [[ "$fastaai" == "yes" ]] ; then
|
79
|
+
echo "=> FastAAI"
|
80
|
+
fastaai version 2>&1 | perl -pe 's/.*=//'
|
81
|
+
fi
|
82
|
+
)
|
83
|
+
$(
|
84
|
+
if [[ "$fastani" == "yes" ]] ; then
|
85
|
+
echo "=> FastANI"
|
86
|
+
fastANI --version 2>&1 | grep . | perl -pe 's/^version //'
|
87
|
+
fi
|
88
|
+
)
|
89
|
+
$(
|
90
|
+
if [[ "$aai" == "yes" ]] ; then
|
91
|
+
echo "=> Enveomics Collection: aai.rb"
|
92
|
+
aai.rb --version 2>&1 | perl -pe 's/.*: //'
|
93
|
+
fi
|
94
|
+
)
|
95
|
+
$(
|
96
|
+
if [[ "$ani" == "yes" ]] ; then
|
97
|
+
echo "=> Enveomics Collection: ani.rb"
|
98
|
+
ani.rb --version 2>&1 | perl -pe 's/.*: //'
|
99
|
+
fi
|
100
|
+
)
|
101
|
+
$(
|
102
|
+
if [[ "$blast" == "yes" ]] ; then
|
103
|
+
echo "=> NCBI BLAST+"
|
104
|
+
blastp -version 2>&1 | tail -n 1 | perl -pe 's/.*: blast //'
|
105
|
+
fi
|
106
|
+
)
|
107
|
+
$(
|
108
|
+
if [[ "$blat" == "yes" ]] ; then
|
109
|
+
echo "=> BLAT"
|
110
|
+
blat 2>&1 | head -n 1 | perl -pe 's/.* v\. //' | perl -pe 's/ fast .*//'
|
111
|
+
fi
|
112
|
+
)
|
113
|
+
$(
|
114
|
+
if [[ "$diamond" == "yes" ]] ; then
|
115
|
+
echo "=> Diamond"
|
116
|
+
diamond --version 2>&1 | perl -pe 's/^diamond version //'
|
117
|
+
fi
|
118
|
+
)
|
119
|
+
VERSIONS
|
120
|
+
|
@@ -70,4 +70,17 @@ fi
|
|
70
70
|
|
71
71
|
# Finalize
|
72
72
|
miga date > "${DATASET}.done"
|
73
|
-
|
73
|
+
cat <<VERSIONS \
|
74
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
75
|
+
=> MiGA
|
76
|
+
$(miga --version)
|
77
|
+
=> Enveomics Collection: HMM.essential.rb
|
78
|
+
$(HMM.essential.rb --version 2>&1 | perl -pe 's/.*: //')
|
79
|
+
$(
|
80
|
+
if [[ "$NOMULTI" -eq "1" ]] ; then
|
81
|
+
echo "=> FastAAI"
|
82
|
+
fastaai version 2>&1 | perl -pe 's/.*=//'
|
83
|
+
fi
|
84
|
+
)
|
85
|
+
VERSIONS
|
86
|
+
|
data/scripts/mytaxa.bash
CHANGED
@@ -15,7 +15,7 @@ if [[ "$MIGA_MYTAXA" == "no" ]] ; then
|
|
15
15
|
> "$DATASET.nomytaxa.txt"
|
16
16
|
else
|
17
17
|
# Check type of dataset
|
18
|
-
MULTI=$(miga
|
18
|
+
MULTI=$(miga ls -P "$PROJECT" -D "$DATASET" --multi \
|
19
19
|
| wc -l | awk '{print $1}')
|
20
20
|
if [[ "$MULTI" -eq "1" ]] ; then
|
21
21
|
# Check requirements
|
@@ -98,5 +98,20 @@ else
|
|
98
98
|
fi
|
99
99
|
|
100
100
|
# Finalize
|
101
|
-
miga date > "$DATASET.done"
|
102
|
-
|
101
|
+
miga date > "${DATASET}.done"
|
102
|
+
cat <<VERSIONS \
|
103
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
104
|
+
=> MiGA
|
105
|
+
$(miga --version)
|
106
|
+
$(
|
107
|
+
if [[ "$MIGA_MYTAXA" != "no" && "$MULTI" -eq "1" ]] ; then
|
108
|
+
echo "=> MyTaxa"
|
109
|
+
MyTaxa | grep Version: | perl -pe 's/.*: //'
|
110
|
+
echo "=> Diamond"
|
111
|
+
diamond --version 2>&1 | perl -pe 's/^diamond version //'
|
112
|
+
echo "=> Krona"
|
113
|
+
ktImportText | head -n 2 | tail -n 1 | awk '{ print $3 }'
|
114
|
+
fi
|
115
|
+
)
|
116
|
+
VERSIONS
|
117
|
+
|
data/scripts/mytaxa_scan.bash
CHANGED
@@ -14,7 +14,7 @@ if [[ "$MIGA_MYTAXA" == "no" ]] ; then
|
|
14
14
|
> "$DATASET.nomytaxa.txt"
|
15
15
|
else
|
16
16
|
# Check type of dataset
|
17
|
-
NOMULTI=$(miga
|
17
|
+
NOMULTI=$(miga ls -P "$PROJECT" -D "$DATASET" --no-multi \
|
18
18
|
| wc -l | awk '{print $1}')
|
19
19
|
if [[ "$NOMULTI" -eq "1" ]] ; then
|
20
20
|
# Check requirements
|
@@ -97,5 +97,18 @@ else
|
|
97
97
|
fi
|
98
98
|
|
99
99
|
# Finalize
|
100
|
-
miga date > "$DATASET.done"
|
101
|
-
|
100
|
+
miga date > "${DATASET}.done"
|
101
|
+
cat <<VERSIONS \
|
102
|
+
| miga add_result -P "$PROJECT" -D "$DATASET" -r "$SCRIPT" -f --stdin-versions
|
103
|
+
=> MiGA
|
104
|
+
$(miga --version)
|
105
|
+
$(
|
106
|
+
if [[ "$MIGA_MYTAXA" != "no" && "$NOMULTI" -eq "1" ]] ; then
|
107
|
+
echo "=> MyTaxa"
|
108
|
+
MyTaxa | grep Version: | perl -pe 's/.*: //'
|
109
|
+
echo "=> Diamond"
|
110
|
+
diamond --version 2>&1 | perl -pe 's/^diamond version //'
|
111
|
+
fi
|
112
|
+
)
|
113
|
+
VERSIONS
|
114
|
+
|