miga-base 0.7.3.0 → 0.7.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +71 -82
- data/lib/miga/cli/action/doctor/base.rb +102 -0
- data/lib/miga/cli/action/edit.rb +14 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +53 -41
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +8 -4
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +21 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +27 -18
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +6 -3
- data/lib/miga/common/with_daemon_class.rb +3 -2
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +93 -44
- data/lib/miga/daemon/base.rb +30 -11
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/dataset/status.rb +6 -5
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +33 -26
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +34 -12
- data/lib/miga/result.rb +34 -25
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +37 -27
- data/lib/miga/tax_dist.rb +6 -4
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/scripts/project_stats.bash +3 -0
- data/scripts/stats.bash +1 -1
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +91 -99
- data/test/dataset_test.rb +63 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -40
- data/test/remote_dataset_test.rb +26 -32
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +2 -4
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/adapters.fa +13 -0
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +25 -21
- data/utils/distance/pipeline.rb +16 -10
- data/utils/distance/runner.rb +19 -13
- data/utils/distance/temporal.rb +7 -4
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +5 -4
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +6 -4
data/lib/miga/project/hooks.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'miga/common/hooks'
|
3
2
|
|
4
3
|
##
|
@@ -18,7 +17,6 @@ require 'miga/common/hooks'
|
|
18
17
|
# Internal hooks:
|
19
18
|
# - _pull_result_hooks()
|
20
19
|
module MiGA::Project::Hooks
|
21
|
-
|
22
20
|
include MiGA::Common::Hooks
|
23
21
|
|
24
22
|
def default_hooks
|
@@ -47,5 +45,4 @@ module MiGA::Project::Hooks
|
|
47
45
|
pull_hook(:"on_result_ready_#{event_args.first}", *event_args)
|
48
46
|
pull_hook(:on_processing_ready) if next_task(nil, false).nil?
|
49
47
|
end
|
50
|
-
|
51
48
|
end
|
data/lib/miga/project/result.rb
CHANGED
@@ -62,6 +62,7 @@ module MiGA::Project::Result
|
|
62
62
|
# Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
|
63
63
|
def add_result_distances(base, _opts)
|
64
64
|
return nil unless result_files_exist?(base, %w[.Rdata .log .txt])
|
65
|
+
|
65
66
|
r = MiGA::Result.new("#{base}.json")
|
66
67
|
r.add_file(:rdata, 'miga-project.Rdata')
|
67
68
|
r.add_file(:matrix, 'miga-project.txt')
|
@@ -79,8 +80,13 @@ module MiGA::Project::Result
|
|
79
80
|
return r
|
80
81
|
end
|
81
82
|
return nil unless result_files_exist?(base, %w[.proposed-clades])
|
82
|
-
|
83
|
-
|
83
|
+
unless is_clade? ||
|
84
|
+
result_files_exist?(
|
85
|
+
base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
|
86
|
+
)
|
87
|
+
return nil
|
88
|
+
end
|
89
|
+
|
84
90
|
r = add_result_iter_clades(base)
|
85
91
|
r.add_file(:aai_tree, 'miga-project.aai.nwk')
|
86
92
|
r.add_file(:proposal, 'miga-project.proposed-clades')
|
@@ -99,8 +105,10 @@ module MiGA::Project::Result
|
|
99
105
|
r.add_file(:empty, 'miga-project.empty')
|
100
106
|
return r
|
101
107
|
end
|
102
|
-
return nil unless result_files_exist?(
|
103
|
-
%w[.pdf .classif .medoids .class.tsv .class.nwk]
|
108
|
+
return nil unless result_files_exist?(
|
109
|
+
base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
|
110
|
+
)
|
111
|
+
|
104
112
|
r = add_result_iter_clades(base)
|
105
113
|
r.add_file(:ani_tree, 'miga-project.ani.nwk')
|
106
114
|
r
|
@@ -127,6 +135,7 @@ module MiGA::Project::Result
|
|
127
135
|
return r
|
128
136
|
end
|
129
137
|
return nil unless result_files_exist?(base, %w[.ogs .stats])
|
138
|
+
|
130
139
|
r = MiGA::Result.new("#{base}.json")
|
131
140
|
r.add_file(:ogs, 'miga-project.ogs')
|
132
141
|
r.add_file(:abc, 'miga-project.abc')
|
@@ -141,6 +150,7 @@ module MiGA::Project::Result
|
|
141
150
|
def add_result_project_stats(base, _opts)
|
142
151
|
return nil unless
|
143
152
|
result_files_exist?(base, %w[.taxonomy.json .metadata.db])
|
153
|
+
|
144
154
|
r = MiGA::Result.new("#{base}.json")
|
145
155
|
r.add_file(:taxonomy_index, 'miga-project.taxonomy.json')
|
146
156
|
r.add_file(:metadata_index, 'miga-project.metadata.db')
|
@@ -151,5 +161,4 @@ module MiGA::Project::Result
|
|
151
161
|
alias add_result_aai_distances add_result_distances
|
152
162
|
alias add_result_ani_distances add_result_distances
|
153
163
|
alias add_result_ssu_distances add_result_distances
|
154
|
-
|
155
164
|
end
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -14,9 +14,11 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
14
14
|
class << self
|
15
15
|
def ncbi_asm_acc2id(acc)
|
16
16
|
return acc if acc =~ /^\d+$/
|
17
|
+
|
17
18
|
search_doc = MiGA::Json.parse(
|
18
19
|
download(:ncbi_search, :assembly, acc, :json),
|
19
|
-
symbolize: false, contents: true
|
20
|
+
symbolize: false, contents: true
|
21
|
+
)
|
20
22
|
(search_doc['esearchresult']['idlist'] || []).first
|
21
23
|
end
|
22
24
|
end
|
@@ -90,14 +92,14 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
90
92
|
# and optionally the Hash +metadata+.
|
91
93
|
def update_metadata(dataset, metadata = {})
|
92
94
|
metadata = get_metadata(metadata)
|
93
|
-
metadata.each { |k,v| dataset.metadata[k] = v }
|
95
|
+
metadata.each { |k, v| dataset.metadata[k] = v }
|
94
96
|
dataset.save
|
95
97
|
end
|
96
98
|
|
97
99
|
##
|
98
100
|
# Get metadata from the remote location.
|
99
101
|
def get_metadata(metadata_def = {})
|
100
|
-
metadata_def.each { |k,v| @metadata[k] = v }
|
102
|
+
metadata_def.each { |k, v| @metadata[k] = v }
|
101
103
|
case universe
|
102
104
|
when :ebi, :ncbi, :web
|
103
105
|
# Get taxonomy
|
@@ -131,7 +133,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
131
133
|
def get_ncbi_taxonomy
|
132
134
|
tax_id = get_ncbi_taxid
|
133
135
|
return nil if tax_id.nil?
|
134
|
-
|
136
|
+
|
137
|
+
lineage = { ns: 'ncbi' }
|
135
138
|
doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
|
136
139
|
doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
|
137
140
|
name = i.scan(%r{<ScientificName>(.*)</ScientificName>}).first.to_a.first
|
@@ -148,89 +151,99 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
148
151
|
# Get the JSON document describing an NCBI assembly entry.
|
149
152
|
def ncbi_asm_json_doc
|
150
153
|
return @_ncbi_asm_json_doc unless @_ncbi_asm_json_doc.nil?
|
154
|
+
|
151
155
|
metadata[:ncbi_asm] ||= ids.first if universe == :ncbi and db == :assembly
|
152
156
|
return nil unless metadata[:ncbi_asm]
|
157
|
+
|
153
158
|
ncbi_asm_id = self.class.ncbi_asm_acc2id metadata[:ncbi_asm]
|
154
159
|
doc = MiGA::Json.parse(
|
155
160
|
self.class.download(:ncbi_summary, :assembly, ncbi_asm_id, :json),
|
156
|
-
symbolize: false, contents: true
|
161
|
+
symbolize: false, contents: true
|
162
|
+
)
|
157
163
|
@_ncbi_asm_json_doc = doc['result'][ doc['result']['uids'].first ]
|
158
164
|
end
|
159
165
|
|
160
|
-
|
161
166
|
private
|
162
167
|
|
163
|
-
|
164
|
-
|
165
|
-
ncbi_asm_json_doc['taxid']
|
166
|
-
end
|
168
|
+
def get_ncbi_taxid_from_web
|
169
|
+
return nil if ncbi_asm_json_doc.nil?
|
167
170
|
|
168
|
-
|
169
|
-
|
170
|
-
ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
|
171
|
-
return nil if ln.nil?
|
172
|
-
ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
|
173
|
-
return nil unless ln =~ /^\d+$/
|
174
|
-
ln
|
175
|
-
end
|
171
|
+
ncbi_asm_json_doc['taxid']
|
172
|
+
end
|
176
173
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
return nil if ln.nil?
|
182
|
-
ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, '\\1')
|
183
|
-
return nil unless ln =~ /^\d+$/
|
184
|
-
ln
|
185
|
-
end
|
174
|
+
def get_ncbi_taxid_from_ncbi
|
175
|
+
doc = self.class.download(universe, db, ids, :gb).split(/\n/)
|
176
|
+
ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
|
177
|
+
return nil if ln.nil?
|
186
178
|
|
187
|
-
|
188
|
-
|
189
|
-
biosample = self.class.ncbi_map(metadata[:ncbi_nuccore],
|
190
|
-
:nuccore, :biosample)
|
191
|
-
return metadata if biosample.nil?
|
192
|
-
asm = self.class.ncbi_map(biosample, :biosample, :assembly)
|
193
|
-
metadata[:ncbi_asm] = asm.to_s unless asm.nil?
|
194
|
-
get_type_status_ncbi_asm metadata
|
195
|
-
end
|
179
|
+
ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
|
180
|
+
return nil unless ln =~ /^\d+$/
|
196
181
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
metadata[:
|
217
|
-
|
218
|
-
|
182
|
+
ln
|
183
|
+
end
|
184
|
+
|
185
|
+
def get_ncbi_taxid_from_ebi
|
186
|
+
doc = self.class.download(universe, db, ids, :annot).split(/\n/)
|
187
|
+
ln = doc.grep(%r{^FT\s+/db_xref="taxon:}).first
|
188
|
+
ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
|
189
|
+
return nil if ln.nil?
|
190
|
+
|
191
|
+
ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, '\\1')
|
192
|
+
return nil unless ln =~ /^\d+$/
|
193
|
+
|
194
|
+
ln
|
195
|
+
end
|
196
|
+
|
197
|
+
def get_type_status_ncbi_nuccore(metadata)
|
198
|
+
return metadata if metadata[:ncbi_nuccore].nil?
|
199
|
+
|
200
|
+
biosample =
|
201
|
+
self.class.ncbi_map(metadata[:ncbi_nuccore], :nuccore, :biosample)
|
202
|
+
return metadata if biosample.nil?
|
203
|
+
|
204
|
+
asm = self.class.ncbi_map(biosample, :biosample, :assembly)
|
205
|
+
metadata[:ncbi_asm] = asm.to_s unless asm.nil?
|
206
|
+
get_type_status_ncbi_asm metadata
|
207
|
+
end
|
208
|
+
|
209
|
+
def get_type_status_ncbi_asm(metadata)
|
210
|
+
return metadata if ncbi_asm_json_doc.nil?
|
211
|
+
|
212
|
+
from_type = ncbi_asm_json_doc['from_type']
|
213
|
+
from_type = ncbi_asm_json_doc['fromtype'] if from_type.nil?
|
214
|
+
case from_type
|
215
|
+
when nil
|
216
|
+
# Do nothing
|
217
|
+
when ''
|
218
|
+
metadata[:is_type] = false
|
219
|
+
metadata[:is_ref_type] = false
|
220
|
+
when 'assembly from reference material', 'assembly designated as reftype'
|
221
|
+
metadata[:is_type] = false
|
222
|
+
metadata[:is_ref_type] = true
|
223
|
+
metadata[:type_rel] = from_type
|
224
|
+
else
|
225
|
+
metadata[:is_type] = true
|
226
|
+
metadata[:type_rel] = from_type
|
219
227
|
end
|
228
|
+
metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
|
229
|
+
metadata[:suspect] = nil if metadata[:suspect].empty?
|
230
|
+
MiGA.DEBUG "Got type: #{from_type}"
|
231
|
+
metadata
|
232
|
+
end
|
220
233
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
end
|
233
|
-
File.unlink(a_ctg) if File.exist? a_ctg
|
234
|
-
File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
|
234
|
+
def save_assembly_to(project, name, udb)
|
235
|
+
dir = MiGA::Dataset.RESULT_DIRS[:assembly]
|
236
|
+
base = "#{project.path}/data/#{dir}/#{name}"
|
237
|
+
l_ctg = "#{base}.LargeContigs.fna"
|
238
|
+
a_ctg = "#{base}.AllContigs.fna"
|
239
|
+
File.open("#{base}.start", 'w') { |ofh| ofh.puts Time.now.to_s }
|
240
|
+
if udb[:format] == :fasta_gz
|
241
|
+
download "#{l_ctg}.gz"
|
242
|
+
system "gzip -d '#{l_ctg}.gz'"
|
243
|
+
else
|
244
|
+
download l_ctg
|
235
245
|
end
|
246
|
+
File.unlink(a_ctg) if File.exist? a_ctg
|
247
|
+
File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
|
248
|
+
end
|
236
249
|
end
|
@@ -1,21 +1,20 @@
|
|
1
|
-
|
2
1
|
require 'open-uri'
|
3
2
|
require 'cgi'
|
4
3
|
|
5
4
|
class MiGA::RemoteDataset < MiGA::MiGA
|
6
|
-
|
7
5
|
# Class-level
|
8
6
|
class << self
|
9
|
-
def UNIVERSE
|
7
|
+
def UNIVERSE
|
8
|
+
@@UNIVERSE
|
9
|
+
end
|
10
10
|
end
|
11
|
-
|
12
11
|
end
|
13
12
|
|
14
13
|
module MiGA::RemoteDataset::Base
|
15
|
-
|
16
14
|
@@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
|
17
15
|
@@_NCBI_API_KEY = lambda { |url|
|
18
|
-
ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
|
16
|
+
ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
|
17
|
+
}
|
19
18
|
|
20
19
|
##
|
21
20
|
# Structure of the different database Universes or containers. The structure
|
@@ -43,13 +42,13 @@ module MiGA::RemoteDataset::Base
|
|
43
42
|
method: :net
|
44
43
|
},
|
45
44
|
ebi: {
|
46
|
-
dbs: { embl: {stage: :assembly, format: :fasta} },
|
45
|
+
dbs: { embl: { stage: :assembly, format: :fasta } },
|
47
46
|
url: 'https://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s',
|
48
47
|
method: :rest
|
49
48
|
},
|
50
49
|
ncbi: {
|
51
50
|
dbs: {
|
52
|
-
nuccore: { stage: :assembly, format: :fasta },
|
51
|
+
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
53
52
|
assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
|
54
53
|
taxonomy: { stage: :metadata, format: :xml }
|
55
54
|
},
|
@@ -59,9 +58,10 @@ module MiGA::RemoteDataset::Base
|
|
59
58
|
},
|
60
59
|
ncbi_map: {
|
61
60
|
dbs: {
|
62
|
-
nuccore: {
|
63
|
-
format: :json
|
64
|
-
|
61
|
+
nuccore: {
|
62
|
+
stage: :metadata, map_to: [:biosample, :assembly], format: :json
|
63
|
+
},
|
64
|
+
biosample: { stage: :metadata, map_to: [:assembly], format: :json }
|
65
65
|
},
|
66
66
|
url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%4$s&retmode=%3$s",
|
67
67
|
method: :net,
|
@@ -81,6 +81,4 @@ module MiGA::RemoteDataset::Base
|
|
81
81
|
api_key: @@_NCBI_API_KEY
|
82
82
|
}
|
83
83
|
}
|
84
|
-
|
85
84
|
end
|
86
|
-
|
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'miga/remote_dataset/base'
|
3
2
|
|
4
3
|
class MiGA::RemoteDataset
|
@@ -26,8 +25,8 @@ class MiGA::RemoteDataset
|
|
26
25
|
obj: obj
|
27
26
|
}
|
28
27
|
doc = send("#{getter}_#{method}", opts)
|
29
|
-
unless file.nil?
|
30
|
-
ofh = File.open(file, 'w')
|
28
|
+
unless opts[:file].nil?
|
29
|
+
ofh = File.open(opts[:file], 'w')
|
31
30
|
ofh.print doc.force_encoding('UTF-8')
|
32
31
|
ofh.close
|
33
32
|
end
|
@@ -45,8 +44,26 @@ class MiGA::RemoteDataset
|
|
45
44
|
def ncbi_asm_rest(opts)
|
46
45
|
url_dir = opts[:obj].ncbi_asm_json_doc['ftppath_genbank']
|
47
46
|
url = "#{url_dir}/#{File.basename url_dir}_genomic.fna.gz"
|
48
|
-
download(
|
49
|
-
|
47
|
+
download(
|
48
|
+
:web, :assembly_gz, url,
|
49
|
+
opts[:format], opts[:file], opts[:extra], opts[:obj]
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Download data from NCBI GenBank (nuccore) database using the REST method.
|
55
|
+
# Supported +opts+ (Hash) are the same as #download_rest and #ncbi_asm_rest.
|
56
|
+
def ncbi_gb_rest(opts)
|
57
|
+
o = download_rest(opts)
|
58
|
+
return o unless o.strip.empty?
|
59
|
+
|
60
|
+
MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
|
61
|
+
opts[:format] = :fasta_gz
|
62
|
+
if opts[:file]
|
63
|
+
File.unlink(opts[:file]) if File.exist? opts[:file]
|
64
|
+
opts[:file] = "#{opts[:file]}.gz"
|
65
|
+
end
|
66
|
+
ncbi_asm_rest(opts)
|
50
67
|
end
|
51
68
|
|
52
69
|
##
|
@@ -58,8 +75,9 @@ class MiGA::RemoteDataset
|
|
58
75
|
# +extra+: Array
|
59
76
|
def download_rest(opts)
|
60
77
|
u = @@UNIVERSE[opts[:universe]]
|
61
|
-
url = sprintf(
|
62
|
-
opts[:db], opts[:ids].join(','), opts[:format], *opts[:extra]
|
78
|
+
url = sprintf(
|
79
|
+
u[:url], opts[:db], opts[:ids].join(','), opts[:format], *opts[:extra]
|
80
|
+
)
|
63
81
|
url = u[:api_key][url] unless u[:api_key].nil?
|
64
82
|
download_url url
|
65
83
|
end
|
@@ -80,17 +98,20 @@ class MiGA::RemoteDataset
|
|
80
98
|
rescue => e
|
81
99
|
@timeout_try += 1
|
82
100
|
raise e if @timeout_try >= 3
|
101
|
+
|
102
|
+
sleep 5 # <- For: 429 Too Many Requests
|
83
103
|
retry
|
84
104
|
end
|
85
105
|
doc
|
86
106
|
end
|
87
|
-
|
107
|
+
|
88
108
|
##
|
89
109
|
# Looks for the entry +id+ in +dbfrom+, and returns the linked
|
90
110
|
# identifier in +db+ (or nil).
|
91
111
|
def ncbi_map(id, dbfrom, db)
|
92
112
|
doc = download(:ncbi_map, dbfrom, id, :json, nil, [db])
|
93
113
|
return if doc.empty?
|
114
|
+
|
94
115
|
tree = MiGA::Json.parse(doc, contents: true)
|
95
116
|
[:linksets, 0, :linksetdbs, 0, :links, 0].each do |i|
|
96
117
|
tree = tree[i]
|
@@ -102,11 +123,12 @@ class MiGA::RemoteDataset
|
|
102
123
|
end
|
103
124
|
|
104
125
|
module MiGA::RemoteDataset::Download
|
105
|
-
|
106
126
|
##
|
107
|
-
# Download data into +file
|
127
|
+
# Download data into +file+
|
108
128
|
def download(file)
|
109
|
-
self.class.download(
|
110
|
-
|
129
|
+
self.class.download(
|
130
|
+
universe, db, ids, self.class.UNIVERSE[universe][:dbs][db][:format],
|
131
|
+
file, [], self
|
132
|
+
)
|
111
133
|
end
|
112
134
|
end
|
data/lib/miga/result.rb
CHANGED
@@ -8,7 +8,6 @@ require 'miga/result/stats'
|
|
8
8
|
##
|
9
9
|
# The result from a task run. It can be project-wide or dataset-specific.
|
10
10
|
class MiGA::Result < MiGA::MiGA
|
11
|
-
|
12
11
|
include MiGA::Result::Dates
|
13
12
|
include MiGA::Result::Source
|
14
13
|
include MiGA::Result::Stats
|
@@ -26,6 +25,7 @@ class MiGA::Result < MiGA::MiGA
|
|
26
25
|
# Returns MiGA::Result if it already exists, nil otherwise.
|
27
26
|
def load(path)
|
28
27
|
return nil unless MiGA::Result.exist? path
|
28
|
+
|
29
29
|
MiGA::Result.new(path)
|
30
30
|
end
|
31
31
|
|
@@ -33,6 +33,7 @@ class MiGA::Result < MiGA::MiGA
|
|
33
33
|
FileUtils.rm(path) if force && File.exist?(path)
|
34
34
|
r_pre = self.load(path)
|
35
35
|
return r_pre unless r_pre.nil?
|
36
|
+
|
36
37
|
yield
|
37
38
|
self.load(path)
|
38
39
|
end
|
@@ -41,34 +42,38 @@ class MiGA::Result < MiGA::MiGA
|
|
41
42
|
# Instance-level
|
42
43
|
|
43
44
|
##
|
44
|
-
# Hash with the result metadata
|
45
|
+
# Hash with the result metadata
|
45
46
|
attr_reader :data
|
46
47
|
|
47
48
|
##
|
48
|
-
# Array of MiGA::Result objects nested within the result (if any)
|
49
|
+
# Array of MiGA::Result objects nested within the result (if any)
|
49
50
|
attr_reader :results
|
50
51
|
|
51
52
|
##
|
52
|
-
# Load or create the MiGA::Result described by the JSON file +path
|
53
|
+
# Load or create the MiGA::Result described by the JSON file +path+
|
53
54
|
def initialize(path)
|
54
55
|
@path = File.absolute_path(path)
|
55
56
|
MiGA::Result.exist?(@path) ? self.load : create
|
56
57
|
end
|
57
58
|
|
58
59
|
##
|
59
|
-
# Is the result clean? Returns Boolean
|
60
|
-
def clean?
|
60
|
+
# Is the result clean? Returns Boolean
|
61
|
+
def clean?
|
62
|
+
!!self[:clean]
|
63
|
+
end
|
61
64
|
|
62
65
|
##
|
63
|
-
# Register the result as cleaned
|
64
|
-
def clean!
|
66
|
+
# Register the result as cleaned
|
67
|
+
def clean!
|
68
|
+
self[:clean] = true
|
69
|
+
end
|
65
70
|
|
66
71
|
##
|
67
72
|
# Path to the standard files of the result. +which+ must be one of:
|
68
73
|
# - :json (default) : JSON file describing the result.
|
69
74
|
# - :start : File with the date when the processing started.
|
70
75
|
# - :done : File with the date when the processing ended.
|
71
|
-
def path(which
|
76
|
+
def path(which = :json)
|
72
77
|
case which.to_sym
|
73
78
|
when :json
|
74
79
|
@path
|
@@ -80,28 +85,33 @@ class MiGA::Result < MiGA::MiGA
|
|
80
85
|
end
|
81
86
|
|
82
87
|
##
|
83
|
-
# Directory containing the result
|
88
|
+
# Directory containing the result
|
84
89
|
def dir
|
85
90
|
File.dirname(path)
|
86
91
|
end
|
87
92
|
|
88
93
|
##
|
89
|
-
# Absolute path to the file(s) defined by symbol +k
|
94
|
+
# Absolute path to the file(s) defined by symbol +k+
|
90
95
|
def file_path(k)
|
91
96
|
k = k.to_sym
|
92
97
|
f = self[:files].nil? ? nil : self[:files][k]
|
93
98
|
return nil if f.nil?
|
94
99
|
return File.expand_path(f, dir) unless f.is_a? Array
|
95
|
-
|
100
|
+
|
101
|
+
f.map { |fi| File.expand_path(fi, dir) }
|
96
102
|
end
|
97
103
|
|
98
104
|
##
|
99
|
-
# Entry with symbol +k
|
100
|
-
def [](k)
|
105
|
+
# Entry with symbol +k+
|
106
|
+
def [](k)
|
107
|
+
data[k.to_sym]
|
108
|
+
end
|
101
109
|
|
102
110
|
##
|
103
|
-
# Adds value +v+ to entry with symbol +k
|
104
|
-
def []=(k,v)
|
111
|
+
# Adds value +v+ to entry with symbol +k+
|
112
|
+
def []=(k, v)
|
113
|
+
data[k.to_sym] = v
|
114
|
+
end
|
105
115
|
|
106
116
|
##
|
107
117
|
# Register +file+ (path relative to #dir) with the symbol +k+. If the file
|
@@ -116,20 +126,20 @@ class MiGA::Result < MiGA::MiGA
|
|
116
126
|
end
|
117
127
|
|
118
128
|
##
|
119
|
-
# #add_file for each key-value pair in the +files+ Hash
|
129
|
+
# #add_file for each key-value pair in the +files+ Hash
|
120
130
|
def add_files(files)
|
121
131
|
files.each { |k, v| add_file(k, v) }
|
122
132
|
end
|
123
133
|
|
124
134
|
##
|
125
|
-
# Initialize and #save empty result
|
135
|
+
# Initialize and #save empty result
|
126
136
|
def create
|
127
137
|
@data = { created: Time.now.to_s, results: [], stats: {}, files: {} }
|
128
138
|
save
|
129
139
|
end
|
130
140
|
|
131
141
|
##
|
132
|
-
# Save the result persistently (in the JSON file #path)
|
142
|
+
# Save the result persistently (in the JSON file #path)
|
133
143
|
def save
|
134
144
|
@data[:updated] = Time.now.to_s
|
135
145
|
s = path(:start)
|
@@ -142,15 +152,15 @@ class MiGA::Result < MiGA::MiGA
|
|
142
152
|
end
|
143
153
|
|
144
154
|
##
|
145
|
-
# Load (or reload) result data in the JSON file #path
|
155
|
+
# Load (or reload) result data in the JSON file #path
|
146
156
|
def load
|
147
157
|
@data = MiGA::Json.parse(path)
|
148
158
|
@data[:files] ||= {}
|
149
|
-
@results = (self[:results] || []).map{ |rs| MiGA::Result.new rs }
|
159
|
+
@results = (self[:results] || []).map { |rs| MiGA::Result.new rs }
|
150
160
|
end
|
151
161
|
|
152
162
|
##
|
153
|
-
# Remove result, including all associated files
|
163
|
+
# Remove result, including all associated files
|
154
164
|
def remove!
|
155
165
|
each_file do |file|
|
156
166
|
f = File.expand_path(file, dir)
|
@@ -173,7 +183,7 @@ class MiGA::Result < MiGA::MiGA
|
|
173
183
|
# arrays of files are supported.
|
174
184
|
def each_file(&blk)
|
175
185
|
@data[:files] ||= {}
|
176
|
-
self[:files].each do |k,files|
|
186
|
+
self[:files].each do |k, files|
|
177
187
|
files = [files] unless files.kind_of? Array
|
178
188
|
files.each do |file|
|
179
189
|
case blk.arity
|
@@ -191,10 +201,9 @@ class MiGA::Result < MiGA::MiGA
|
|
191
201
|
end
|
192
202
|
|
193
203
|
##
|
194
|
-
# Add the MiGA::Result +result+ as part of the current result
|
204
|
+
# Add the MiGA::Result +result+ as part of the current result
|
195
205
|
def add_result(result)
|
196
206
|
@data[:results] << result.path
|
197
207
|
save
|
198
208
|
end
|
199
|
-
|
200
209
|
end
|