miga-base 0.7.3.1 → 0.7.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli.rb +10 -8
- data/lib/miga/cli/action.rb +2 -3
- data/lib/miga/cli/action/about.rb +5 -6
- data/lib/miga/cli/action/add.rb +18 -12
- data/lib/miga/cli/action/add_result.rb +2 -3
- data/lib/miga/cli/action/archive.rb +1 -2
- data/lib/miga/cli/action/classify_wf.rb +8 -6
- data/lib/miga/cli/action/console.rb +0 -1
- data/lib/miga/cli/action/daemon.rb +7 -7
- data/lib/miga/cli/action/date.rb +0 -1
- data/lib/miga/cli/action/derep_wf.rb +5 -4
- data/lib/miga/cli/action/doctor.rb +71 -82
- data/lib/miga/cli/action/doctor/base.rb +102 -0
- data/lib/miga/cli/action/edit.rb +14 -2
- data/lib/miga/cli/action/files.rb +8 -8
- data/lib/miga/cli/action/find.rb +5 -6
- data/lib/miga/cli/action/generic.rb +7 -7
- data/lib/miga/cli/action/get.rb +20 -17
- data/lib/miga/cli/action/get_db.rb +8 -2
- data/lib/miga/cli/action/index_wf.rb +1 -1
- data/lib/miga/cli/action/init.rb +53 -41
- data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
- data/lib/miga/cli/action/lair.rb +7 -7
- data/lib/miga/cli/action/ln.rb +6 -6
- data/lib/miga/cli/action/ls.rb +1 -2
- data/lib/miga/cli/action/ncbi_get.rb +11 -3
- data/lib/miga/cli/action/new.rb +4 -4
- data/lib/miga/cli/action/next_step.rb +0 -1
- data/lib/miga/cli/action/preproc_wf.rb +3 -3
- data/lib/miga/cli/action/quality_wf.rb +1 -1
- data/lib/miga/cli/action/rm.rb +2 -3
- data/lib/miga/cli/action/run.rb +8 -8
- data/lib/miga/cli/action/stats.rb +8 -4
- data/lib/miga/cli/action/summary.rb +7 -6
- data/lib/miga/cli/action/tax_dist.rb +8 -4
- data/lib/miga/cli/action/tax_index.rb +3 -4
- data/lib/miga/cli/action/tax_set.rb +7 -6
- data/lib/miga/cli/action/tax_test.rb +6 -5
- data/lib/miga/cli/action/wf.rb +25 -19
- data/lib/miga/cli/base.rb +34 -32
- data/lib/miga/cli/objects_helper.rb +27 -18
- data/lib/miga/cli/opt_helper.rb +3 -2
- data/lib/miga/common.rb +2 -5
- data/lib/miga/common/base.rb +15 -16
- data/lib/miga/common/format.rb +8 -5
- data/lib/miga/common/hooks.rb +1 -4
- data/lib/miga/common/path.rb +4 -9
- data/lib/miga/common/with_daemon.rb +5 -2
- data/lib/miga/common/with_daemon_class.rb +1 -1
- data/lib/miga/common/with_result.rb +2 -1
- data/lib/miga/daemon.rb +93 -44
- data/lib/miga/daemon/base.rb +30 -11
- data/lib/miga/dataset.rb +47 -37
- data/lib/miga/dataset/base.rb +52 -37
- data/lib/miga/dataset/hooks.rb +3 -4
- data/lib/miga/dataset/result.rb +17 -1
- data/lib/miga/dataset/status.rb +6 -5
- data/lib/miga/json.rb +5 -7
- data/lib/miga/lair.rb +4 -0
- data/lib/miga/metadata.rb +4 -3
- data/lib/miga/project.rb +29 -20
- data/lib/miga/project/base.rb +52 -37
- data/lib/miga/project/dataset.rb +33 -26
- data/lib/miga/project/hooks.rb +0 -3
- data/lib/miga/project/result.rb +14 -5
- data/lib/miga/remote_dataset.rb +85 -72
- data/lib/miga/remote_dataset/base.rb +11 -13
- data/lib/miga/remote_dataset/download.rb +34 -12
- data/lib/miga/result.rb +48 -53
- data/lib/miga/result/base.rb +0 -2
- data/lib/miga/result/dates.rb +1 -3
- data/lib/miga/result/source.rb +15 -16
- data/lib/miga/result/stats.rb +37 -27
- data/lib/miga/tax_dist.rb +6 -3
- data/lib/miga/tax_index.rb +17 -17
- data/lib/miga/taxonomy.rb +6 -1
- data/lib/miga/taxonomy/base.rb +19 -15
- data/lib/miga/version.rb +19 -16
- data/scripts/project_stats.bash +3 -0
- data/scripts/stats.bash +1 -1
- data/test/common_test.rb +3 -11
- data/test/daemon_helper.rb +38 -0
- data/test/daemon_test.rb +91 -99
- data/test/dataset_test.rb +63 -59
- data/test/format_test.rb +3 -11
- data/test/hook_test.rb +50 -55
- data/test/json_test.rb +7 -8
- data/test/lair_test.rb +22 -28
- data/test/metadata_test.rb +6 -14
- data/test/project_test.rb +33 -40
- data/test/remote_dataset_test.rb +26 -32
- data/test/result_stats_test.rb +17 -27
- data/test/result_test.rb +41 -34
- data/test/tax_dist_test.rb +2 -4
- data/test/tax_index_test.rb +4 -10
- data/test/taxonomy_test.rb +7 -9
- data/test/test_helper.rb +42 -1
- data/test/with_daemon_test.rb +14 -22
- data/utils/adapters.fa +13 -0
- data/utils/cleanup-databases.rb +6 -5
- data/utils/distance/base.rb +0 -1
- data/utils/distance/commands.rb +19 -12
- data/utils/distance/database.rb +24 -21
- data/utils/distance/pipeline.rb +23 -10
- data/utils/distance/runner.rb +20 -16
- data/utils/distance/temporal.rb +1 -3
- data/utils/distances.rb +1 -1
- data/utils/domain-ess-genes.rb +7 -7
- data/utils/index_metadata.rb +5 -4
- data/utils/mytaxa_scan.rb +18 -16
- data/utils/representatives.rb +5 -4
- data/utils/requirements.txt +1 -1
- data/utils/subclade/base.rb +0 -1
- data/utils/subclade/pipeline.rb +7 -6
- data/utils/subclade/runner.rb +9 -9
- data/utils/subclade/temporal.rb +0 -2
- data/utils/subclades-compile.rb +39 -37
- data/utils/subclades.rb +1 -1
- metadata +6 -4
data/lib/miga/project/hooks.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'miga/common/hooks'
|
3
2
|
|
4
3
|
##
|
@@ -18,7 +17,6 @@ require 'miga/common/hooks'
|
|
18
17
|
# Internal hooks:
|
19
18
|
# - _pull_result_hooks()
|
20
19
|
module MiGA::Project::Hooks
|
21
|
-
|
22
20
|
include MiGA::Common::Hooks
|
23
21
|
|
24
22
|
def default_hooks
|
@@ -47,5 +45,4 @@ module MiGA::Project::Hooks
|
|
47
45
|
pull_hook(:"on_result_ready_#{event_args.first}", *event_args)
|
48
46
|
pull_hook(:on_processing_ready) if next_task(nil, false).nil?
|
49
47
|
end
|
50
|
-
|
51
48
|
end
|
data/lib/miga/project/result.rb
CHANGED
@@ -62,6 +62,7 @@ module MiGA::Project::Result
|
|
62
62
|
# Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
|
63
63
|
def add_result_distances(base, _opts)
|
64
64
|
return nil unless result_files_exist?(base, %w[.Rdata .log .txt])
|
65
|
+
|
65
66
|
r = MiGA::Result.new("#{base}.json")
|
66
67
|
r.add_file(:rdata, 'miga-project.Rdata')
|
67
68
|
r.add_file(:matrix, 'miga-project.txt')
|
@@ -79,8 +80,13 @@ module MiGA::Project::Result
|
|
79
80
|
return r
|
80
81
|
end
|
81
82
|
return nil unless result_files_exist?(base, %w[.proposed-clades])
|
82
|
-
|
83
|
-
|
83
|
+
unless is_clade? ||
|
84
|
+
result_files_exist?(
|
85
|
+
base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
|
86
|
+
)
|
87
|
+
return nil
|
88
|
+
end
|
89
|
+
|
84
90
|
r = add_result_iter_clades(base)
|
85
91
|
r.add_file(:aai_tree, 'miga-project.aai.nwk')
|
86
92
|
r.add_file(:proposal, 'miga-project.proposed-clades')
|
@@ -99,8 +105,10 @@ module MiGA::Project::Result
|
|
99
105
|
r.add_file(:empty, 'miga-project.empty')
|
100
106
|
return r
|
101
107
|
end
|
102
|
-
return nil unless result_files_exist?(
|
103
|
-
%w[.pdf .classif .medoids .class.tsv .class.nwk]
|
108
|
+
return nil unless result_files_exist?(
|
109
|
+
base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
|
110
|
+
)
|
111
|
+
|
104
112
|
r = add_result_iter_clades(base)
|
105
113
|
r.add_file(:ani_tree, 'miga-project.ani.nwk')
|
106
114
|
r
|
@@ -127,6 +135,7 @@ module MiGA::Project::Result
|
|
127
135
|
return r
|
128
136
|
end
|
129
137
|
return nil unless result_files_exist?(base, %w[.ogs .stats])
|
138
|
+
|
130
139
|
r = MiGA::Result.new("#{base}.json")
|
131
140
|
r.add_file(:ogs, 'miga-project.ogs')
|
132
141
|
r.add_file(:abc, 'miga-project.abc')
|
@@ -141,6 +150,7 @@ module MiGA::Project::Result
|
|
141
150
|
def add_result_project_stats(base, _opts)
|
142
151
|
return nil unless
|
143
152
|
result_files_exist?(base, %w[.taxonomy.json .metadata.db])
|
153
|
+
|
144
154
|
r = MiGA::Result.new("#{base}.json")
|
145
155
|
r.add_file(:taxonomy_index, 'miga-project.taxonomy.json')
|
146
156
|
r.add_file(:metadata_index, 'miga-project.metadata.db')
|
@@ -151,5 +161,4 @@ module MiGA::Project::Result
|
|
151
161
|
alias add_result_aai_distances add_result_distances
|
152
162
|
alias add_result_ani_distances add_result_distances
|
153
163
|
alias add_result_ssu_distances add_result_distances
|
154
|
-
|
155
164
|
end
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -14,9 +14,11 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
14
14
|
class << self
|
15
15
|
def ncbi_asm_acc2id(acc)
|
16
16
|
return acc if acc =~ /^\d+$/
|
17
|
+
|
17
18
|
search_doc = MiGA::Json.parse(
|
18
19
|
download(:ncbi_search, :assembly, acc, :json),
|
19
|
-
symbolize: false, contents: true
|
20
|
+
symbolize: false, contents: true
|
21
|
+
)
|
20
22
|
(search_doc['esearchresult']['idlist'] || []).first
|
21
23
|
end
|
22
24
|
end
|
@@ -90,14 +92,14 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
90
92
|
# and optionally the Hash +metadata+.
|
91
93
|
def update_metadata(dataset, metadata = {})
|
92
94
|
metadata = get_metadata(metadata)
|
93
|
-
metadata.each { |k,v| dataset.metadata[k] = v }
|
95
|
+
metadata.each { |k, v| dataset.metadata[k] = v }
|
94
96
|
dataset.save
|
95
97
|
end
|
96
98
|
|
97
99
|
##
|
98
100
|
# Get metadata from the remote location.
|
99
101
|
def get_metadata(metadata_def = {})
|
100
|
-
metadata_def.each { |k,v| @metadata[k] = v }
|
102
|
+
metadata_def.each { |k, v| @metadata[k] = v }
|
101
103
|
case universe
|
102
104
|
when :ebi, :ncbi, :web
|
103
105
|
# Get taxonomy
|
@@ -131,7 +133,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
131
133
|
def get_ncbi_taxonomy
|
132
134
|
tax_id = get_ncbi_taxid
|
133
135
|
return nil if tax_id.nil?
|
134
|
-
|
136
|
+
|
137
|
+
lineage = { ns: 'ncbi' }
|
135
138
|
doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
|
136
139
|
doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
|
137
140
|
name = i.scan(%r{<ScientificName>(.*)</ScientificName>}).first.to_a.first
|
@@ -148,89 +151,99 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
148
151
|
# Get the JSON document describing an NCBI assembly entry.
|
149
152
|
def ncbi_asm_json_doc
|
150
153
|
return @_ncbi_asm_json_doc unless @_ncbi_asm_json_doc.nil?
|
154
|
+
|
151
155
|
metadata[:ncbi_asm] ||= ids.first if universe == :ncbi and db == :assembly
|
152
156
|
return nil unless metadata[:ncbi_asm]
|
157
|
+
|
153
158
|
ncbi_asm_id = self.class.ncbi_asm_acc2id metadata[:ncbi_asm]
|
154
159
|
doc = MiGA::Json.parse(
|
155
160
|
self.class.download(:ncbi_summary, :assembly, ncbi_asm_id, :json),
|
156
|
-
symbolize: false, contents: true
|
161
|
+
symbolize: false, contents: true
|
162
|
+
)
|
157
163
|
@_ncbi_asm_json_doc = doc['result'][ doc['result']['uids'].first ]
|
158
164
|
end
|
159
165
|
|
160
|
-
|
161
166
|
private
|
162
167
|
|
163
|
-
|
164
|
-
|
165
|
-
ncbi_asm_json_doc['taxid']
|
166
|
-
end
|
168
|
+
def get_ncbi_taxid_from_web
|
169
|
+
return nil if ncbi_asm_json_doc.nil?
|
167
170
|
|
168
|
-
|
169
|
-
|
170
|
-
ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
|
171
|
-
return nil if ln.nil?
|
172
|
-
ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
|
173
|
-
return nil unless ln =~ /^\d+$/
|
174
|
-
ln
|
175
|
-
end
|
171
|
+
ncbi_asm_json_doc['taxid']
|
172
|
+
end
|
176
173
|
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
return nil if ln.nil?
|
182
|
-
ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, '\\1')
|
183
|
-
return nil unless ln =~ /^\d+$/
|
184
|
-
ln
|
185
|
-
end
|
174
|
+
def get_ncbi_taxid_from_ncbi
|
175
|
+
doc = self.class.download(universe, db, ids, :gb).split(/\n/)
|
176
|
+
ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
|
177
|
+
return nil if ln.nil?
|
186
178
|
|
187
|
-
|
188
|
-
|
189
|
-
biosample = self.class.ncbi_map(metadata[:ncbi_nuccore],
|
190
|
-
:nuccore, :biosample)
|
191
|
-
return metadata if biosample.nil?
|
192
|
-
asm = self.class.ncbi_map(biosample, :biosample, :assembly)
|
193
|
-
metadata[:ncbi_asm] = asm.to_s unless asm.nil?
|
194
|
-
get_type_status_ncbi_asm metadata
|
195
|
-
end
|
179
|
+
ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
|
180
|
+
return nil unless ln =~ /^\d+$/
|
196
181
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
metadata[:
|
217
|
-
|
218
|
-
|
182
|
+
ln
|
183
|
+
end
|
184
|
+
|
185
|
+
def get_ncbi_taxid_from_ebi
|
186
|
+
doc = self.class.download(universe, db, ids, :annot).split(/\n/)
|
187
|
+
ln = doc.grep(%r{^FT\s+/db_xref="taxon:}).first
|
188
|
+
ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
|
189
|
+
return nil if ln.nil?
|
190
|
+
|
191
|
+
ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, '\\1')
|
192
|
+
return nil unless ln =~ /^\d+$/
|
193
|
+
|
194
|
+
ln
|
195
|
+
end
|
196
|
+
|
197
|
+
def get_type_status_ncbi_nuccore(metadata)
|
198
|
+
return metadata if metadata[:ncbi_nuccore].nil?
|
199
|
+
|
200
|
+
biosample =
|
201
|
+
self.class.ncbi_map(metadata[:ncbi_nuccore], :nuccore, :biosample)
|
202
|
+
return metadata if biosample.nil?
|
203
|
+
|
204
|
+
asm = self.class.ncbi_map(biosample, :biosample, :assembly)
|
205
|
+
metadata[:ncbi_asm] = asm.to_s unless asm.nil?
|
206
|
+
get_type_status_ncbi_asm metadata
|
207
|
+
end
|
208
|
+
|
209
|
+
def get_type_status_ncbi_asm(metadata)
|
210
|
+
return metadata if ncbi_asm_json_doc.nil?
|
211
|
+
|
212
|
+
from_type = ncbi_asm_json_doc['from_type']
|
213
|
+
from_type = ncbi_asm_json_doc['fromtype'] if from_type.nil?
|
214
|
+
case from_type
|
215
|
+
when nil
|
216
|
+
# Do nothing
|
217
|
+
when ''
|
218
|
+
metadata[:is_type] = false
|
219
|
+
metadata[:is_ref_type] = false
|
220
|
+
when 'assembly from reference material', 'assembly designated as reftype'
|
221
|
+
metadata[:is_type] = false
|
222
|
+
metadata[:is_ref_type] = true
|
223
|
+
metadata[:type_rel] = from_type
|
224
|
+
else
|
225
|
+
metadata[:is_type] = true
|
226
|
+
metadata[:type_rel] = from_type
|
219
227
|
end
|
228
|
+
metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
|
229
|
+
metadata[:suspect] = nil if metadata[:suspect].empty?
|
230
|
+
MiGA.DEBUG "Got type: #{from_type}"
|
231
|
+
metadata
|
232
|
+
end
|
220
233
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
end
|
233
|
-
File.unlink(a_ctg) if File.exist? a_ctg
|
234
|
-
File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
|
234
|
+
def save_assembly_to(project, name, udb)
|
235
|
+
dir = MiGA::Dataset.RESULT_DIRS[:assembly]
|
236
|
+
base = "#{project.path}/data/#{dir}/#{name}"
|
237
|
+
l_ctg = "#{base}.LargeContigs.fna"
|
238
|
+
a_ctg = "#{base}.AllContigs.fna"
|
239
|
+
File.open("#{base}.start", 'w') { |ofh| ofh.puts Time.now.to_s }
|
240
|
+
if udb[:format] == :fasta_gz
|
241
|
+
download "#{l_ctg}.gz"
|
242
|
+
system "gzip -d '#{l_ctg}.gz'"
|
243
|
+
else
|
244
|
+
download l_ctg
|
235
245
|
end
|
246
|
+
File.unlink(a_ctg) if File.exist? a_ctg
|
247
|
+
File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
|
248
|
+
end
|
236
249
|
end
|
@@ -1,21 +1,20 @@
|
|
1
|
-
|
2
1
|
require 'open-uri'
|
3
2
|
require 'cgi'
|
4
3
|
|
5
4
|
class MiGA::RemoteDataset < MiGA::MiGA
|
6
|
-
|
7
5
|
# Class-level
|
8
6
|
class << self
|
9
|
-
def UNIVERSE
|
7
|
+
def UNIVERSE
|
8
|
+
@@UNIVERSE
|
9
|
+
end
|
10
10
|
end
|
11
|
-
|
12
11
|
end
|
13
12
|
|
14
13
|
module MiGA::RemoteDataset::Base
|
15
|
-
|
16
14
|
@@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
|
17
15
|
@@_NCBI_API_KEY = lambda { |url|
|
18
|
-
ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
|
16
|
+
ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
|
17
|
+
}
|
19
18
|
|
20
19
|
##
|
21
20
|
# Structure of the different database Universes or containers. The structure
|
@@ -43,13 +42,13 @@ module MiGA::RemoteDataset::Base
|
|
43
42
|
method: :net
|
44
43
|
},
|
45
44
|
ebi: {
|
46
|
-
dbs: { embl: {stage: :assembly, format: :fasta} },
|
45
|
+
dbs: { embl: { stage: :assembly, format: :fasta } },
|
47
46
|
url: 'https://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s',
|
48
47
|
method: :rest
|
49
48
|
},
|
50
49
|
ncbi: {
|
51
50
|
dbs: {
|
52
|
-
nuccore: { stage: :assembly, format: :fasta },
|
51
|
+
nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
|
53
52
|
assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
|
54
53
|
taxonomy: { stage: :metadata, format: :xml }
|
55
54
|
},
|
@@ -59,9 +58,10 @@ module MiGA::RemoteDataset::Base
|
|
59
58
|
},
|
60
59
|
ncbi_map: {
|
61
60
|
dbs: {
|
62
|
-
nuccore: {
|
63
|
-
format: :json
|
64
|
-
|
61
|
+
nuccore: {
|
62
|
+
stage: :metadata, map_to: [:biosample, :assembly], format: :json
|
63
|
+
},
|
64
|
+
biosample: { stage: :metadata, map_to: [:assembly], format: :json }
|
65
65
|
},
|
66
66
|
url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%4$s&retmode=%3$s",
|
67
67
|
method: :net,
|
@@ -81,6 +81,4 @@ module MiGA::RemoteDataset::Base
|
|
81
81
|
api_key: @@_NCBI_API_KEY
|
82
82
|
}
|
83
83
|
}
|
84
|
-
|
85
84
|
end
|
86
|
-
|
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
require 'miga/remote_dataset/base'
|
3
2
|
|
4
3
|
class MiGA::RemoteDataset
|
@@ -26,8 +25,8 @@ class MiGA::RemoteDataset
|
|
26
25
|
obj: obj
|
27
26
|
}
|
28
27
|
doc = send("#{getter}_#{method}", opts)
|
29
|
-
unless file.nil?
|
30
|
-
ofh = File.open(file, 'w')
|
28
|
+
unless opts[:file].nil?
|
29
|
+
ofh = File.open(opts[:file], 'w')
|
31
30
|
ofh.print doc.force_encoding('UTF-8')
|
32
31
|
ofh.close
|
33
32
|
end
|
@@ -45,8 +44,26 @@ class MiGA::RemoteDataset
|
|
45
44
|
def ncbi_asm_rest(opts)
|
46
45
|
url_dir = opts[:obj].ncbi_asm_json_doc['ftppath_genbank']
|
47
46
|
url = "#{url_dir}/#{File.basename url_dir}_genomic.fna.gz"
|
48
|
-
download(
|
49
|
-
|
47
|
+
download(
|
48
|
+
:web, :assembly_gz, url,
|
49
|
+
opts[:format], opts[:file], opts[:extra], opts[:obj]
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
##
|
54
|
+
# Download data from NCBI GenBank (nuccore) database using the REST method.
|
55
|
+
# Supported +opts+ (Hash) are the same as #download_rest and #ncbi_asm_rest.
|
56
|
+
def ncbi_gb_rest(opts)
|
57
|
+
o = download_rest(opts)
|
58
|
+
return o unless o.strip.empty?
|
59
|
+
|
60
|
+
MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
|
61
|
+
opts[:format] = :fasta_gz
|
62
|
+
if opts[:file]
|
63
|
+
File.unlink(opts[:file]) if File.exist? opts[:file]
|
64
|
+
opts[:file] = "#{opts[:file]}.gz"
|
65
|
+
end
|
66
|
+
ncbi_asm_rest(opts)
|
50
67
|
end
|
51
68
|
|
52
69
|
##
|
@@ -58,8 +75,9 @@ class MiGA::RemoteDataset
|
|
58
75
|
# +extra+: Array
|
59
76
|
def download_rest(opts)
|
60
77
|
u = @@UNIVERSE[opts[:universe]]
|
61
|
-
url = sprintf(
|
62
|
-
opts[:db], opts[:ids].join(','), opts[:format], *opts[:extra]
|
78
|
+
url = sprintf(
|
79
|
+
u[:url], opts[:db], opts[:ids].join(','), opts[:format], *opts[:extra]
|
80
|
+
)
|
63
81
|
url = u[:api_key][url] unless u[:api_key].nil?
|
64
82
|
download_url url
|
65
83
|
end
|
@@ -80,17 +98,20 @@ class MiGA::RemoteDataset
|
|
80
98
|
rescue => e
|
81
99
|
@timeout_try += 1
|
82
100
|
raise e if @timeout_try >= 3
|
101
|
+
|
102
|
+
sleep 5 # <- For: 429 Too Many Requests
|
83
103
|
retry
|
84
104
|
end
|
85
105
|
doc
|
86
106
|
end
|
87
|
-
|
107
|
+
|
88
108
|
##
|
89
109
|
# Looks for the entry +id+ in +dbfrom+, and returns the linked
|
90
110
|
# identifier in +db+ (or nil).
|
91
111
|
def ncbi_map(id, dbfrom, db)
|
92
112
|
doc = download(:ncbi_map, dbfrom, id, :json, nil, [db])
|
93
113
|
return if doc.empty?
|
114
|
+
|
94
115
|
tree = MiGA::Json.parse(doc, contents: true)
|
95
116
|
[:linksets, 0, :linksetdbs, 0, :links, 0].each do |i|
|
96
117
|
tree = tree[i]
|
@@ -102,11 +123,12 @@ class MiGA::RemoteDataset
|
|
102
123
|
end
|
103
124
|
|
104
125
|
module MiGA::RemoteDataset::Download
|
105
|
-
|
106
126
|
##
|
107
|
-
# Download data into +file
|
127
|
+
# Download data into +file+
|
108
128
|
def download(file)
|
109
|
-
self.class.download(
|
110
|
-
|
129
|
+
self.class.download(
|
130
|
+
universe, db, ids, self.class.UNIVERSE[universe][:dbs][db][:format],
|
131
|
+
file, [], self
|
132
|
+
)
|
111
133
|
end
|
112
134
|
end
|
data/lib/miga/result.rb
CHANGED
@@ -8,7 +8,6 @@ require 'miga/result/stats'
|
|
8
8
|
##
|
9
9
|
# The result from a task run. It can be project-wide or dataset-specific.
|
10
10
|
class MiGA::Result < MiGA::MiGA
|
11
|
-
|
12
11
|
include MiGA::Result::Dates
|
13
12
|
include MiGA::Result::Source
|
14
13
|
include MiGA::Result::Stats
|
@@ -26,6 +25,7 @@ class MiGA::Result < MiGA::MiGA
|
|
26
25
|
# Returns MiGA::Result if it already exists, nil otherwise.
|
27
26
|
def load(path)
|
28
27
|
return nil unless MiGA::Result.exist? path
|
28
|
+
|
29
29
|
MiGA::Result.new(path)
|
30
30
|
end
|
31
31
|
|
@@ -33,6 +33,7 @@ class MiGA::Result < MiGA::MiGA
|
|
33
33
|
FileUtils.rm(path) if force && File.exist?(path)
|
34
34
|
r_pre = self.load(path)
|
35
35
|
return r_pre unless r_pre.nil?
|
36
|
+
|
36
37
|
yield
|
37
38
|
self.load(path)
|
38
39
|
end
|
@@ -41,67 +42,72 @@ class MiGA::Result < MiGA::MiGA
|
|
41
42
|
# Instance-level
|
42
43
|
|
43
44
|
##
|
44
|
-
# Hash with the result metadata
|
45
|
+
# Hash with the result metadata
|
45
46
|
attr_reader :data
|
46
47
|
|
47
48
|
##
|
48
|
-
#
|
49
|
-
attr_reader :results
|
50
|
-
|
51
|
-
##
|
52
|
-
# Load or create the MiGA::Result described by the JSON file +path+.
|
49
|
+
# Load or create the MiGA::Result described by the JSON file +path+
|
53
50
|
def initialize(path)
|
54
51
|
@path = File.absolute_path(path)
|
55
52
|
MiGA::Result.exist?(@path) ? self.load : create
|
56
53
|
end
|
57
54
|
|
58
55
|
##
|
59
|
-
# Is the result clean? Returns Boolean
|
60
|
-
def clean?
|
56
|
+
# Is the result clean? Returns Boolean
|
57
|
+
def clean?
|
58
|
+
!!self[:clean]
|
59
|
+
end
|
61
60
|
|
62
61
|
##
|
63
|
-
# Register the result as cleaned
|
64
|
-
def clean!
|
62
|
+
# Register the result as cleaned
|
63
|
+
def clean!
|
64
|
+
self[:clean] = true
|
65
|
+
end
|
65
66
|
|
66
67
|
##
|
67
68
|
# Path to the standard files of the result. +which+ must be one of:
|
68
69
|
# - :json (default) : JSON file describing the result.
|
69
70
|
# - :start : File with the date when the processing started.
|
70
71
|
# - :done : File with the date when the processing ended.
|
71
|
-
def path(which
|
72
|
+
def path(which = :json)
|
72
73
|
case which.to_sym
|
73
74
|
when :json
|
74
75
|
@path
|
75
76
|
when :start
|
76
|
-
@path.sub(/\.json$/,
|
77
|
+
@path.sub(/\.json$/, '.start')
|
77
78
|
when :done
|
78
|
-
@path.sub(/\.json$/,
|
79
|
+
@path.sub(/\.json$/, '.done')
|
79
80
|
end
|
80
81
|
end
|
81
82
|
|
82
83
|
##
|
83
|
-
# Directory containing the result
|
84
|
+
# Directory containing the result
|
84
85
|
def dir
|
85
86
|
File.dirname(path)
|
86
87
|
end
|
87
88
|
|
88
89
|
##
|
89
|
-
# Absolute path to the file(s) defined by symbol +k
|
90
|
+
# Absolute path to the file(s) defined by symbol +k+
|
90
91
|
def file_path(k)
|
91
92
|
k = k.to_sym
|
92
93
|
f = self[:files].nil? ? nil : self[:files][k]
|
93
94
|
return nil if f.nil?
|
94
95
|
return File.expand_path(f, dir) unless f.is_a? Array
|
95
|
-
|
96
|
+
|
97
|
+
f.map { |fi| File.expand_path(fi, dir) }
|
96
98
|
end
|
97
99
|
|
98
100
|
##
|
99
|
-
# Entry with symbol +k
|
100
|
-
def [](k)
|
101
|
+
# Entry with symbol +k+
|
102
|
+
def [](k)
|
103
|
+
data[k.to_sym]
|
104
|
+
end
|
101
105
|
|
102
106
|
##
|
103
|
-
# Adds value +v+ to entry with symbol +k
|
104
|
-
def []=(k,v)
|
107
|
+
# Adds value +v+ to entry with symbol +k+
|
108
|
+
def []=(k, v)
|
109
|
+
data[k.to_sym] = v
|
110
|
+
end
|
105
111
|
|
106
112
|
##
|
107
113
|
# Register +file+ (path relative to #dir) with the symbol +k+. If the file
|
@@ -116,20 +122,20 @@ class MiGA::Result < MiGA::MiGA
|
|
116
122
|
end
|
117
123
|
|
118
124
|
##
|
119
|
-
# #add_file for each key-value pair in the +files+ Hash
|
125
|
+
# #add_file for each key-value pair in the +files+ Hash
|
120
126
|
def add_files(files)
|
121
127
|
files.each { |k, v| add_file(k, v) }
|
122
128
|
end
|
123
129
|
|
124
130
|
##
|
125
|
-
# Initialize and #save empty result
|
131
|
+
# Initialize and #save empty result
|
126
132
|
def create
|
127
|
-
@data = { created: Time.now.to_s,
|
133
|
+
@data = { created: Time.now.to_s, stats: {}, files: {} }
|
128
134
|
save
|
129
135
|
end
|
130
136
|
|
131
137
|
##
|
132
|
-
# Save the result persistently (in the JSON file #path)
|
138
|
+
# Save the result persistently (in the JSON file #path)
|
133
139
|
def save
|
134
140
|
@data[:updated] = Time.now.to_s
|
135
141
|
s = path(:start)
|
@@ -142,24 +148,23 @@ class MiGA::Result < MiGA::MiGA
|
|
142
148
|
end
|
143
149
|
|
144
150
|
##
|
145
|
-
# Load (or reload) result data in the JSON file #path
|
151
|
+
# Load (or reload) result data in the JSON file #path
|
146
152
|
def load
|
147
153
|
@data = MiGA::Json.parse(path)
|
148
154
|
@data[:files] ||= {}
|
149
|
-
@results = (self[:results] || []).map{ |rs| MiGA::Result.new rs }
|
150
155
|
end
|
151
156
|
|
152
157
|
##
|
153
|
-
# Remove result, including all associated files
|
158
|
+
# Remove result, including all associated files
|
154
159
|
def remove!
|
155
|
-
each_file
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
160
|
+
each_file { |file| FileUtils.rm_rf(File.join(dir, file)) }
|
161
|
+
unlink
|
162
|
+
end
|
163
|
+
|
164
|
+
# Unlink result by removing the .done and .start timestamps and the
|
165
|
+
# .json descriptor, but don't remove any other associated files
|
166
|
+
def unlink
|
167
|
+
%i(start done).each { |i| f = path(i) and File.unlink(f) }
|
163
168
|
File.unlink path
|
164
169
|
end
|
165
170
|
|
@@ -172,29 +177,19 @@ class MiGA::Result < MiGA::MiGA
|
|
172
177
|
# Note that multiple files may have the same symbol (file_sym), since
|
173
178
|
# arrays of files are supported.
|
174
179
|
def each_file(&blk)
|
180
|
+
return to_enum(:each_file) unless block_given?
|
181
|
+
|
175
182
|
@data[:files] ||= {}
|
176
|
-
self[:files].each do |k,files|
|
183
|
+
self[:files].each do |k, files|
|
177
184
|
files = [files] unless files.kind_of? Array
|
178
185
|
files.each do |file|
|
179
186
|
case blk.arity
|
180
|
-
when 1
|
181
|
-
|
182
|
-
when
|
183
|
-
|
184
|
-
when 3
|
185
|
-
blk.call(k, file, File.expand_path(file, dir))
|
186
|
-
else
|
187
|
-
raise "Wrong number of arguments: #{blk.arity} for 1..3"
|
187
|
+
when 1; blk.call(file)
|
188
|
+
when 2; blk.call(k, file)
|
189
|
+
when 3; blk.call(k, file, File.expand_path(file, dir))
|
190
|
+
else; raise "Wrong number of arguments: #{blk.arity} for 1..3"
|
188
191
|
end
|
189
192
|
end
|
190
193
|
end
|
191
194
|
end
|
192
|
-
|
193
|
-
##
|
194
|
-
# Add the MiGA::Result +result+ as part of the current result.
|
195
|
-
def add_result(result)
|
196
|
-
@data[:results] << result.path
|
197
|
-
save
|
198
|
-
end
|
199
|
-
|
200
195
|
end
|