miga-base 0.7.3.1 → 0.7.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +71 -82
- data/lib/miga/cli/action/doctor/base.rb +102 -0
- data/lib/miga/cli/action/edit.rb +14 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +53 -41
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +8 -4
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +25 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +27 -18
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +5 -2
- data/lib/miga/common/with_daemon_class.rb +1 -1
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +93 -44
- data/lib/miga/daemon/base.rb +30 -11
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/dataset/status.rb +6 -5
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +33 -26
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +34 -12
- data/lib/miga/result.rb +48 -53
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +37 -27
- data/lib/miga/tax_dist.rb +6 -3
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/scripts/project_stats.bash +3 -0
- data/scripts/stats.bash +1 -1
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +91 -99
- data/test/dataset_test.rb +63 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -40
- data/test/remote_dataset_test.rb +26 -32
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +2 -4
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/adapters.fa +13 -0
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +24 -21
- data/utils/distance/pipeline.rb +23 -10
- data/utils/distance/runner.rb +20 -16
- data/utils/distance/temporal.rb +1 -3
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +5 -4
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +6 -4
data/lib/miga/project/hooks.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'miga/common/hooks'
|
3
2
|
|
4
3
|
##
|
@@ -18,7 +17,6 @@ require 'miga/common/hooks'
|
|
18
17
|
# Internal hooks:
|
19
18
|
# - _pull_result_hooks()
|
20
19
|
module MiGA::Project::Hooks
|
21
|
-
|
22
20
|
include MiGA::Common::Hooks
|
23
21
|
|
24
22
|
def default_hooks
|
@@ -47,5 +45,4 @@ module MiGA::Project::Hooks
|
|
47
45
|
pull_hook(:"on_result_ready_#{event_args.first}", *event_args)
|
48
46
|
pull_hook(:on_processing_ready) if next_task(nil, false).nil?
|
49
47
|
end
|
50
|
-
|
51
48
|
end
|
data/lib/miga/project/result.rb
CHANGED
@@ -62,6 +62,7 @@ module MiGA::Project::Result
|
|
62
62
|
# Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
|
63
63
|
def add_result_distances(base, _opts)
|
64
64
|
return nil unless result_files_exist?(base, %w[.Rdata .log .txt])
|
65
|
+
|
65
66
|
r = MiGA::Result.new("#{base}.json")
|
66
67
|
r.add_file(:rdata, 'miga-project.Rdata')
|
67
68
|
r.add_file(:matrix, 'miga-project.txt')
|
@@ -79,8 +80,13 @@ module MiGA::Project::Result
|
|
79
80
|
return r
|
80
81
|
end
|
81
82
|
return nil unless result_files_exist?(base, %w[.proposed-clades])
|
82
|
-
|
83
|
-
|
83
|
+
unless is_clade? ||
|
84
|
+
result_files_exist?(
|
85
|
+
base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
|
86
|
+
)
|
87
|
+
return nil
|
88
|
+
end
|
89
|
+
|
84
90
|
r = add_result_iter_clades(base)
|
85
91
|
r.add_file(:aai_tree, 'miga-project.aai.nwk')
|
86
92
|
r.add_file(:proposal, 'miga-project.proposed-clades')
|
@@ -99,8 +105,10 @@ module MiGA::Project::Result
|
|
99
105
|
r.add_file(:empty, 'miga-project.empty')
|
100
106
|
return r
|
101
107
|
end
|
102
|
-
return nil unless result_files_exist?(
|
103
|
-
%w[.pdf .classif .medoids .class.tsv .class.nwk]
|
108
|
+
return nil unless result_files_exist?(
|
109
|
+
base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
|
110
|
+
)
|
111
|
+
|
104
112
|
r = add_result_iter_clades(base)
|
105
113
|
r.add_file(:ani_tree, 'miga-project.ani.nwk')
|
106
114
|
r
|
@@ -127,6 +135,7 @@ module MiGA::Project::Result
|
|
127
135
|
return r
|
128
136
|
end
|
129
137
|
return nil unless result_files_exist?(base, %w[.ogs .stats])
|
138
|
+
|
130
139
|
r = MiGA::Result.new("#{base}.json")
|
131
140
|
r.add_file(:ogs, 'miga-project.ogs')
|
132
141
|
r.add_file(:abc, 'miga-project.abc')
|
@@ -141,6 +150,7 @@ module MiGA::Project::Result
|
|
141
150
|
def add_result_project_stats(base, _opts)
|
142
151
|
return nil unless
|
143
152
|
result_files_exist?(base, %w[.taxonomy.json .metadata.db])
|
153
|
+
|
144
154
|
r = MiGA::Result.new("#{base}.json")
|
145
155
|
r.add_file(:taxonomy_index, 'miga-project.taxonomy.json')
|
146
156
|
r.add_file(:metadata_index, 'miga-project.metadata.db')
|
@@ -151,5 +161,4 @@ module MiGA::Project::Result
|
|
151
161
|
alias add_result_aai_distances add_result_distances
|
152
162
|
alias add_result_ani_distances add_result_distances
|
153
163
|
alias add_result_ssu_distances add_result_distances
|
154
|
-
|
155
164
|
end
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -14,9 +14,11 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
14
14
|
class << self
|
15
15
|
def ncbi_asm_acc2id(acc)
|
16
16
|
return acc if acc =~ /^\d+$/
|
17
|
+
|
17
18
|
search_doc = MiGA::Json.parse(
|
18
19
|
download(:ncbi_search, :assembly, acc, :json),
|
19
|
-
symbolize: false, contents: true
|
20
|
+
symbolize: false, contents: true
|
21
|
+
)
|
20
22
|
(search_doc['esearchresult']['idlist'] || []).first
|
21
23
|
end
|
22
24
|
end
|
@@ -90,14 +92,14 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
90
92
|
# and optionally the Hash +metadata+.
|
91
93
|
def update_metadata(dataset, metadata = {})
|
92
94
|
metadata = get_metadata(metadata)
|
93
|
-
metadata.each { |k,v| dataset.metadata[k] = v }
|
95
|
+
metadata.each { |k, v| dataset.metadata[k] = v }
|
94
96
|
dataset.save
|
95
97
|
end
|
96
98
|
|
97
99
|
##
|
98
100
|
# Get metadata from the remote location.
|
99
101
|
def get_metadata(metadata_def = {})
|
100
|
-
metadata_def.each { |k,v| @metadata[k] = v }
|
102
|
+
metadata_def.each { |k, v| @metadata[k] = v }
|
101
103
|
case universe
|
102
104
|
when :ebi, :ncbi, :web
|
103
105
|
# Get taxonomy
|
@@ -131,7 +133,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
131
133
|
def get_ncbi_taxonomy
|
132
134
|
tax_id = get_ncbi_taxid
|
133
135
|
return nil if tax_id.nil?
|
134
|
-
|
136
|
+
|
137
|
+
lineage = { ns: 'ncbi' }
|
135
138
|
doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
|
136
139
|
doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
|
137
140
|
name = i.scan(%r{<ScientificName>(.*)</ScientificName>}).first.to_a.first
|
@@ -148,89 +151,99 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
148
151
|
# Get the JSON document describing an NCBI assembly entry.
|
149
152
|
def ncbi_asm_json_doc
|
150
153
|
return @_ncbi_asm_json_doc unless @_ncbi_asm_json_doc.nil?
|
154
|
+
|
151
155
|
metadata[:ncbi_asm] ||= ids.first if universe == :ncbi and db == :assembly
|
152
156
|
return nil unless metadata[:ncbi_asm]
|
157
|
+
|
153
158
|
ncbi_asm_id = self.class.ncbi_asm_acc2id metadata[:ncbi_asm]
|
154
159
|
doc = MiGA::Json.parse(
|
155
160
|
self.class.download(:ncbi_summary, :assembly, ncbi_asm_id, :json),
|
156
|
-
symbolize: false, contents: true
|
161
|
+
symbolize: false, contents: true
|
162
|
+
)
|
157
163
|
@_ncbi_asm_json_doc = doc['result'][ doc['result']['uids'].first ]
|
158
164
|
end
|
159
165
|
|
160
|
-
|
161
166
|
private
|
162
167
|
|
163
|
-
|
164
|
-
|
165
|
-
ncbi_asm_json_doc['taxid']
|
166
|
-
end
|
168
|
+
def get_ncbi_taxid_from_web
|
169
|
+
return nil if ncbi_asm_json_doc.nil?
|
167
170
|
|
168
|
-
|
169
|
-
|
170
|
-
ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
|
171
|
-
return nil if ln.nil?
|
172
|
-
ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
|
173
|
-
return nil unless ln =~ /^\d+$/
|
174
|
-
ln
|
175
|
-
end
|
171
|
+
ncbi_asm_json_doc['taxid']
|
172
|
+
end
|
176
173
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
return nil if ln.nil?
|
182
|
-
ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, '\\1')
|
183
|
-
return nil unless ln =~ /^\d+$/
|
184
|
-
ln
|
185
|
-
end
|
174
|
+
def get_ncbi_taxid_from_ncbi
|
175
|
+
doc = self.class.download(universe, db, ids, :gb).split(/\n/)
|
176
|
+
ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
|
177
|
+
return nil if ln.nil?
|
186
178
|
|
187
|
-
|
188
|
-
|
189
|
-
biosample = self.class.ncbi_map(metadata[:ncbi_nuccore],
|
190
|
-
:nuccore, :biosample)
|
191
|
-
return metadata if biosample.nil?
|
192
|
-
asm = self.class.ncbi_map(biosample, :biosample, :assembly)
|
193
|
-
metadata[:ncbi_asm] = asm.to_s unless asm.nil?
|
194
|
-
get_type_status_ncbi_asm metadata
|
195
|
-
end
|
179
|
+
ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
|
180
|
+
return nil unless ln =~ /^\d+$/
|
196
181
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
metadata[:
|
217
|
-
|
218
|
-
|
182
|
+
ln
|
183
|
+
end
|
184
|
+
|
185
|
+
def get_ncbi_taxid_from_ebi
|
186
|
+
doc = self.class.download(universe, db, ids, :annot).split(/\n/)
|
187
|
+
ln = doc.grep(%r{^FT\s+/db_xref="taxon:}).first
|
188
|
+
ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
|
189
|
+
return nil if ln.nil?
|
190
|
+
|
191
|
+
ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, '\\1')
|
192
|
+
return nil unless ln =~ /^\d+$/
|
193
|
+
|
194
|
+
ln
|
195
|
+
end
|
196
|
+
|
197
|
+
def get_type_status_ncbi_nuccore(metadata)
|
198
|
+
return metadata if metadata[:ncbi_nuccore].nil?
|
199
|
+
|
200
|
+
biosample =
|
201
|
+
self.class.ncbi_map(metadata[:ncbi_nuccore], :nuccore, :biosample)
|
202
|
+
return metadata if biosample.nil?
|
203
|
+
|
204
|
+
asm = self.class.ncbi_map(biosample, :biosample, :assembly)
|
205
|
+
metadata[:ncbi_asm] = asm.to_s unless asm.nil?
|
206
|
+
get_type_status_ncbi_asm metadata
|
207
|
+
end
|
208
|
+
|
209
|
+
def get_type_status_ncbi_asm(metadata)
|
210
|
+
return metadata if ncbi_asm_json_doc.nil?
|
211
|
+
|
212
|
+
from_type = ncbi_asm_json_doc['from_type']
|
213
|
+
from_type = ncbi_asm_json_doc['fromtype'] if from_type.nil?
|
214
|
+
case from_type
|
215
|
+
when nil
|
216
|
+
# Do nothing
|
217
|
+
when ''
|
218
|
+
metadata[:is_type] = false
|
219
|
+
metadata[:is_ref_type] = false
|
220
|
+
when 'assembly from reference material', 'assembly designated as reftype'
|
221
|
+
metadata[:is_type] = false
|
222
|
+
metadata[:is_ref_type] = true
|
223
|
+
metadata[:type_rel] = from_type
|
224
|
+
else
|
225
|
+
metadata[:is_type] = true
|
226
|
+
metadata[:type_rel] = from_type
|
219
227
|
end
|
228
|
+
metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
|
229
|
+
metadata[:suspect] = nil if metadata[:suspect].empty?
|
230
|
+
MiGA.DEBUG "Got type: #{from_type}"
|
231
|
+
metadata
|
232
|
+
end
|
220
233
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
end
|
233
|
-
File.unlink(a_ctg) if File.exist? a_ctg
|
234
|
-
File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
|
234
|
+
def save_assembly_to(project, name, udb)
|
235
|
+
dir = MiGA::Dataset.RESULT_DIRS[:assembly]
|
236
|
+
base = "#{project.path}/data/#{dir}/#{name}"
|
237
|
+
l_ctg = "#{base}.LargeContigs.fna"
|
238
|
+
a_ctg = "#{base}.AllContigs.fna"
|
239
|
+
File.open("#{base}.start", 'w') { |ofh| ofh.puts Time.now.to_s }
|
240
|
+
if udb[:format] == :fasta_gz
|
241
|
+
download "#{l_ctg}.gz"
|
242
|
+
system "gzip -d '#{l_ctg}.gz'"
|
243
|
+
else
|
244
|
+
download l_ctg
|
235
245
|
end
|
246
|
+
File.unlink(a_ctg) if File.exist? a_ctg
|
247
|
+
File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
|
248
|
+
end
|
236
249
|
end
|
@@ -1,21 +1,20 @@
|
|
1
|
-
|
2
1
|
require 'open-uri'
|
3
2
|
require 'cgi'
|
4
3
|
|
5
4
|
class MiGA::RemoteDataset < MiGA::MiGA
|
6
|
-
|
7
5
|
# Class-level
|
8
6
|
class << self
|
9
|
-
def UNIVERSE
|
7
|
+
def UNIVERSE
|
8
|
+
@@UNIVERSE
|
9
|
+
end
|
10
10
|
end
|
11
|
-
|
12
11
|
end
|
13
12
|
|
14
13
|
module MiGA::RemoteDataset::Base
|
15
|
-
|
16
14
|
@@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
|
17
15
|
@@_NCBI_API_KEY = lambda { |url|
|
18
|
-
ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
|
16
|
+
ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
|
17
|
+
}
|
19
18
|
|
20
19
|
##
|
21
20
|
# Structure of the different database Universes or containers. The structure
|
@@ -43,13 +42,13 @@ module MiGA::RemoteDataset::Base
|
|
43
42
|
method: :net
|
44
43
|
},
|
45
44
|
ebi: {
|
46
|
-
dbs: { embl: {stage: :assembly, format: :fasta} },
|
45
|
+
dbs: { embl: { stage: :assembly, format: :fasta } },
|
47
46
|
url: 'https://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s',
|
48
47
|
method: :rest
|
49
48
|
},
|
50
49
|
ncbi: {
|
51
50
|
dbs: {
|
52
|
-
nuccore: { stage: :assembly, format: :fasta },
|
51
|
+
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
53
52
|
assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
|
54
53
|
taxonomy: { stage: :metadata, format: :xml }
|
55
54
|
},
|
@@ -59,9 +58,10 @@ module MiGA::RemoteDataset::Base
|
|
59
58
|
},
|
60
59
|
ncbi_map: {
|
61
60
|
dbs: {
|
62
|
-
nuccore: {
|
63
|
-
format: :json
|
64
|
-
|
61
|
+
nuccore: {
|
62
|
+
stage: :metadata, map_to: [:biosample, :assembly], format: :json
|
63
|
+
},
|
64
|
+
biosample: { stage: :metadata, map_to: [:assembly], format: :json }
|
65
65
|
},
|
66
66
|
url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%4$s&retmode=%3$s",
|
67
67
|
method: :net,
|
@@ -81,6 +81,4 @@ module MiGA::RemoteDataset::Base
|
|
81
81
|
api_key: @@_NCBI_API_KEY
|
82
82
|
}
|
83
83
|
}
|
84
|
-
|
85
84
|
end
|
86
|
-
|
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'miga/remote_dataset/base'
|
3
2
|
|
4
3
|
class MiGA::RemoteDataset
|
@@ -26,8 +25,8 @@ class MiGA::RemoteDataset
|
|
26
25
|
obj: obj
|
27
26
|
}
|
28
27
|
doc = send("#{getter}_#{method}", opts)
|
29
|
-
unless file.nil?
|
30
|
-
ofh = File.open(file, 'w')
|
28
|
+
unless opts[:file].nil?
|
29
|
+
ofh = File.open(opts[:file], 'w')
|
31
30
|
ofh.print doc.force_encoding('UTF-8')
|
32
31
|
ofh.close
|
33
32
|
end
|
@@ -45,8 +44,26 @@ class MiGA::RemoteDataset
|
|
45
44
|
def ncbi_asm_rest(opts)
|
46
45
|
url_dir = opts[:obj].ncbi_asm_json_doc['ftppath_genbank']
|
47
46
|
url = "#{url_dir}/#{File.basename url_dir}_genomic.fna.gz"
|
48
|
-
download(
|
49
|
-
|
47
|
+
download(
|
48
|
+
:web, :assembly_gz, url,
|
49
|
+
opts[:format], opts[:file], opts[:extra], opts[:obj]
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Download data from NCBI GenBank (nuccore) database using the REST method.
|
55
|
+
# Supported +opts+ (Hash) are the same as #download_rest and #ncbi_asm_rest.
|
56
|
+
def ncbi_gb_rest(opts)
|
57
|
+
o = download_rest(opts)
|
58
|
+
return o unless o.strip.empty?
|
59
|
+
|
60
|
+
MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
|
61
|
+
opts[:format] = :fasta_gz
|
62
|
+
if opts[:file]
|
63
|
+
File.unlink(opts[:file]) if File.exist? opts[:file]
|
64
|
+
opts[:file] = "#{opts[:file]}.gz"
|
65
|
+
end
|
66
|
+
ncbi_asm_rest(opts)
|
50
67
|
end
|
51
68
|
|
52
69
|
##
|
@@ -58,8 +75,9 @@ class MiGA::RemoteDataset
|
|
58
75
|
# +extra+: Array
|
59
76
|
def download_rest(opts)
|
60
77
|
u = @@UNIVERSE[opts[:universe]]
|
61
|
-
url = sprintf(
|
62
|
-
opts[:db], opts[:ids].join(','), opts[:format], *opts[:extra]
|
78
|
+
url = sprintf(
|
79
|
+
u[:url], opts[:db], opts[:ids].join(','), opts[:format], *opts[:extra]
|
80
|
+
)
|
63
81
|
url = u[:api_key][url] unless u[:api_key].nil?
|
64
82
|
download_url url
|
65
83
|
end
|
@@ -80,17 +98,20 @@ class MiGA::RemoteDataset
|
|
80
98
|
rescue => e
|
81
99
|
@timeout_try += 1
|
82
100
|
raise e if @timeout_try >= 3
|
101
|
+
|
102
|
+
sleep 5 # <- For: 429 Too Many Requests
|
83
103
|
retry
|
84
104
|
end
|
85
105
|
doc
|
86
106
|
end
|
87
|
-
|
107
|
+
|
88
108
|
##
|
89
109
|
# Looks for the entry +id+ in +dbfrom+, and returns the linked
|
90
110
|
# identifier in +db+ (or nil).
|
91
111
|
def ncbi_map(id, dbfrom, db)
|
92
112
|
doc = download(:ncbi_map, dbfrom, id, :json, nil, [db])
|
93
113
|
return if doc.empty?
|
114
|
+
|
94
115
|
tree = MiGA::Json.parse(doc, contents: true)
|
95
116
|
[:linksets, 0, :linksetdbs, 0, :links, 0].each do |i|
|
96
117
|
tree = tree[i]
|
@@ -102,11 +123,12 @@ class MiGA::RemoteDataset
|
|
102
123
|
end
|
103
124
|
|
104
125
|
module MiGA::RemoteDataset::Download
|
105
|
-
|
106
126
|
##
|
107
|
-
# Download data into +file
|
127
|
+
# Download data into +file+
|
108
128
|
def download(file)
|
109
|
-
self.class.download(
|
110
|
-
|
129
|
+
self.class.download(
|
130
|
+
universe, db, ids, self.class.UNIVERSE[universe][:dbs][db][:format],
|
131
|
+
file, [], self
|
132
|
+
)
|
111
133
|
end
|
112
134
|
end
|
data/lib/miga/result.rb
CHANGED
@@ -8,7 +8,6 @@ require 'miga/result/stats'
|
|
8
8
|
##
|
9
9
|
# The result from a task run. It can be project-wide or dataset-specific.
|
10
10
|
class MiGA::Result < MiGA::MiGA
|
11
|
-
|
12
11
|
include MiGA::Result::Dates
|
13
12
|
include MiGA::Result::Source
|
14
13
|
include MiGA::Result::Stats
|
@@ -26,6 +25,7 @@ class MiGA::Result < MiGA::MiGA
|
|
26
25
|
# Returns MiGA::Result if it already exists, nil otherwise.
|
27
26
|
def load(path)
|
28
27
|
return nil unless MiGA::Result.exist? path
|
28
|
+
|
29
29
|
MiGA::Result.new(path)
|
30
30
|
end
|
31
31
|
|
@@ -33,6 +33,7 @@ class MiGA::Result < MiGA::MiGA
|
|
33
33
|
FileUtils.rm(path) if force && File.exist?(path)
|
34
34
|
r_pre = self.load(path)
|
35
35
|
return r_pre unless r_pre.nil?
|
36
|
+
|
36
37
|
yield
|
37
38
|
self.load(path)
|
38
39
|
end
|
@@ -41,67 +42,72 @@ class MiGA::Result < MiGA::MiGA
|
|
41
42
|
# Instance-level
|
42
43
|
|
43
44
|
##
|
44
|
-
# Hash with the result metadata
|
45
|
+
# Hash with the result metadata
|
45
46
|
attr_reader :data
|
46
47
|
|
47
48
|
##
|
48
|
-
#
|
49
|
-
attr_reader :results
|
50
|
-
|
51
|
-
##
|
52
|
-
# Load or create the MiGA::Result described by the JSON file +path+.
|
49
|
+
# Load or create the MiGA::Result described by the JSON file +path+
|
53
50
|
def initialize(path)
|
54
51
|
@path = File.absolute_path(path)
|
55
52
|
MiGA::Result.exist?(@path) ? self.load : create
|
56
53
|
end
|
57
54
|
|
58
55
|
##
|
59
|
-
# Is the result clean? Returns Boolean
|
60
|
-
def clean?
|
56
|
+
# Is the result clean? Returns Boolean
|
57
|
+
def clean?
|
58
|
+
!!self[:clean]
|
59
|
+
end
|
61
60
|
|
62
61
|
##
|
63
|
-
# Register the result as cleaned
|
64
|
-
def clean!
|
62
|
+
# Register the result as cleaned
|
63
|
+
def clean!
|
64
|
+
self[:clean] = true
|
65
|
+
end
|
65
66
|
|
66
67
|
##
|
67
68
|
# Path to the standard files of the result. +which+ must be one of:
|
68
69
|
# - :json (default) : JSON file describing the result.
|
69
70
|
# - :start : File with the date when the processing started.
|
70
71
|
# - :done : File with the date when the processing ended.
|
71
|
-
def path(which
|
72
|
+
def path(which = :json)
|
72
73
|
case which.to_sym
|
73
74
|
when :json
|
74
75
|
@path
|
75
76
|
when :start
|
76
|
-
@path.sub(/\.json$/,
|
77
|
+
@path.sub(/\.json$/, '.start')
|
77
78
|
when :done
|
78
|
-
@path.sub(/\.json$/,
|
79
|
+
@path.sub(/\.json$/, '.done')
|
79
80
|
end
|
80
81
|
end
|
81
82
|
|
82
83
|
##
|
83
|
-
# Directory containing the result
|
84
|
+
# Directory containing the result
|
84
85
|
def dir
|
85
86
|
File.dirname(path)
|
86
87
|
end
|
87
88
|
|
88
89
|
##
|
89
|
-
# Absolute path to the file(s) defined by symbol +k
|
90
|
+
# Absolute path to the file(s) defined by symbol +k+
|
90
91
|
def file_path(k)
|
91
92
|
k = k.to_sym
|
92
93
|
f = self[:files].nil? ? nil : self[:files][k]
|
93
94
|
return nil if f.nil?
|
94
95
|
return File.expand_path(f, dir) unless f.is_a? Array
|
95
|
-
|
96
|
+
|
97
|
+
f.map { |fi| File.expand_path(fi, dir) }
|
96
98
|
end
|
97
99
|
|
98
100
|
##
|
99
|
-
# Entry with symbol +k
|
100
|
-
def [](k)
|
101
|
+
# Entry with symbol +k+
|
102
|
+
def [](k)
|
103
|
+
data[k.to_sym]
|
104
|
+
end
|
101
105
|
|
102
106
|
##
|
103
|
-
# Adds value +v+ to entry with symbol +k
|
104
|
-
def []=(k,v)
|
107
|
+
# Adds value +v+ to entry with symbol +k+
|
108
|
+
def []=(k, v)
|
109
|
+
data[k.to_sym] = v
|
110
|
+
end
|
105
111
|
|
106
112
|
##
|
107
113
|
# Register +file+ (path relative to #dir) with the symbol +k+. If the file
|
@@ -116,20 +122,20 @@ class MiGA::Result < MiGA::MiGA
|
|
116
122
|
end
|
117
123
|
|
118
124
|
##
|
119
|
-
# #add_file for each key-value pair in the +files+ Hash
|
125
|
+
# #add_file for each key-value pair in the +files+ Hash
|
120
126
|
def add_files(files)
|
121
127
|
files.each { |k, v| add_file(k, v) }
|
122
128
|
end
|
123
129
|
|
124
130
|
##
|
125
|
-
# Initialize and #save empty result
|
131
|
+
# Initialize and #save empty result
|
126
132
|
def create
|
127
|
-
@data = { created: Time.now.to_s,
|
133
|
+
@data = { created: Time.now.to_s, stats: {}, files: {} }
|
128
134
|
save
|
129
135
|
end
|
130
136
|
|
131
137
|
##
|
132
|
-
# Save the result persistently (in the JSON file #path)
|
138
|
+
# Save the result persistently (in the JSON file #path)
|
133
139
|
def save
|
134
140
|
@data[:updated] = Time.now.to_s
|
135
141
|
s = path(:start)
|
@@ -142,24 +148,23 @@ class MiGA::Result < MiGA::MiGA
|
|
142
148
|
end
|
143
149
|
|
144
150
|
##
|
145
|
-
# Load (or reload) result data in the JSON file #path
|
151
|
+
# Load (or reload) result data in the JSON file #path
|
146
152
|
def load
|
147
153
|
@data = MiGA::Json.parse(path)
|
148
154
|
@data[:files] ||= {}
|
149
|
-
@results = (self[:results] || []).map{ |rs| MiGA::Result.new rs }
|
150
155
|
end
|
151
156
|
|
152
157
|
##
|
153
|
-
# Remove result, including all associated files
|
158
|
+
# Remove result, including all associated files
|
154
159
|
def remove!
|
155
|
-
each_file
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
160
|
+
each_file { |file| FileUtils.rm_rf(File.join(dir, file)) }
|
161
|
+
unlink
|
162
|
+
end
|
163
|
+
|
164
|
+
# Unlink result by removing the .done and .start timestamps and the
|
165
|
+
# .json descriptor, but don't remove any other associated files
|
166
|
+
def unlink
|
167
|
+
%i(start done).each { |i| f = path(i) and File.unlink(f) }
|
163
168
|
File.unlink path
|
164
169
|
end
|
165
170
|
|
@@ -172,29 +177,19 @@ class MiGA::Result < MiGA::MiGA
|
|
172
177
|
# Note that multiple files may have the same symbol (file_sym), since
|
173
178
|
# arrays of files are supported.
|
174
179
|
def each_file(&blk)
|
180
|
+
return to_enum(:each_file) unless block_given?
|
181
|
+
|
175
182
|
@data[:files] ||= {}
|
176
|
-
self[:files].each do |k,files|
|
183
|
+
self[:files].each do |k, files|
|
177
184
|
files = [files] unless files.kind_of? Array
|
178
185
|
files.each do |file|
|
179
186
|
case blk.arity
|
180
|
-
when 1
|
181
|
-
|
182
|
-
when
|
183
|
-
|
184
|
-
when 3
|
185
|
-
blk.call(k, file, File.expand_path(file, dir))
|
186
|
-
else
|
187
|
-
raise "Wrong number of arguments: #{blk.arity} for 1..3"
|
187
|
+
when 1; blk.call(file)
|
188
|
+
when 2; blk.call(k, file)
|
189
|
+
when 3; blk.call(k, file, File.expand_path(file, dir))
|
190
|
+
else; raise "Wrong number of arguments: #{blk.arity} for 1..3"
|
188
191
|
end
|
189
192
|
end
|
190
193
|
end
|
191
194
|
end
|
192
|
-
|
193
|
-
##
|
194
|
-
# Add the MiGA::Result +result+ as part of the current result.
|
195
|
-
def add_result(result)
|
196
|
-
@data[:results] << result.path
|
197
|
-
save
|
198
|
-
end
|
199
|
-
|
200
195
|
end
|