miga-base 0.7.3.1 → 0.7.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli.rb +10 -8
  3. data/lib/miga/cli/action.rb +2 -3
  4. data/lib/miga/cli/action/about.rb +5 -6
  5. data/lib/miga/cli/action/add.rb +18 -12
  6. data/lib/miga/cli/action/add_result.rb +2 -3
  7. data/lib/miga/cli/action/archive.rb +1 -2
  8. data/lib/miga/cli/action/classify_wf.rb +8 -6
  9. data/lib/miga/cli/action/console.rb +0 -1
  10. data/lib/miga/cli/action/daemon.rb +7 -7
  11. data/lib/miga/cli/action/date.rb +0 -1
  12. data/lib/miga/cli/action/derep_wf.rb +5 -4
  13. data/lib/miga/cli/action/doctor.rb +71 -82
  14. data/lib/miga/cli/action/doctor/base.rb +102 -0
  15. data/lib/miga/cli/action/edit.rb +14 -2
  16. data/lib/miga/cli/action/files.rb +8 -8
  17. data/lib/miga/cli/action/find.rb +5 -6
  18. data/lib/miga/cli/action/generic.rb +7 -7
  19. data/lib/miga/cli/action/get.rb +20 -17
  20. data/lib/miga/cli/action/get_db.rb +8 -2
  21. data/lib/miga/cli/action/index_wf.rb +1 -1
  22. data/lib/miga/cli/action/init.rb +53 -41
  23. data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
  24. data/lib/miga/cli/action/lair.rb +7 -7
  25. data/lib/miga/cli/action/ln.rb +6 -6
  26. data/lib/miga/cli/action/ls.rb +1 -2
  27. data/lib/miga/cli/action/ncbi_get.rb +11 -3
  28. data/lib/miga/cli/action/new.rb +4 -4
  29. data/lib/miga/cli/action/next_step.rb +0 -1
  30. data/lib/miga/cli/action/preproc_wf.rb +3 -3
  31. data/lib/miga/cli/action/quality_wf.rb +1 -1
  32. data/lib/miga/cli/action/rm.rb +2 -3
  33. data/lib/miga/cli/action/run.rb +8 -8
  34. data/lib/miga/cli/action/stats.rb +8 -4
  35. data/lib/miga/cli/action/summary.rb +7 -6
  36. data/lib/miga/cli/action/tax_dist.rb +8 -4
  37. data/lib/miga/cli/action/tax_index.rb +3 -4
  38. data/lib/miga/cli/action/tax_set.rb +7 -6
  39. data/lib/miga/cli/action/tax_test.rb +6 -5
  40. data/lib/miga/cli/action/wf.rb +25 -19
  41. data/lib/miga/cli/base.rb +34 -32
  42. data/lib/miga/cli/objects_helper.rb +27 -18
  43. data/lib/miga/cli/opt_helper.rb +3 -2
  44. data/lib/miga/common.rb +2 -5
  45. data/lib/miga/common/base.rb +15 -16
  46. data/lib/miga/common/format.rb +8 -5
  47. data/lib/miga/common/hooks.rb +1 -4
  48. data/lib/miga/common/path.rb +4 -9
  49. data/lib/miga/common/with_daemon.rb +5 -2
  50. data/lib/miga/common/with_daemon_class.rb +1 -1
  51. data/lib/miga/common/with_result.rb +2 -1
  52. data/lib/miga/daemon.rb +93 -44
  53. data/lib/miga/daemon/base.rb +30 -11
  54. data/lib/miga/dataset.rb +47 -37
  55. data/lib/miga/dataset/base.rb +52 -37
  56. data/lib/miga/dataset/hooks.rb +3 -4
  57. data/lib/miga/dataset/result.rb +17 -1
  58. data/lib/miga/dataset/status.rb +6 -5
  59. data/lib/miga/json.rb +5 -7
  60. data/lib/miga/lair.rb +4 -0
  61. data/lib/miga/metadata.rb +4 -3
  62. data/lib/miga/project.rb +29 -20
  63. data/lib/miga/project/base.rb +52 -37
  64. data/lib/miga/project/dataset.rb +33 -26
  65. data/lib/miga/project/hooks.rb +0 -3
  66. data/lib/miga/project/result.rb +14 -5
  67. data/lib/miga/remote_dataset.rb +85 -72
  68. data/lib/miga/remote_dataset/base.rb +11 -13
  69. data/lib/miga/remote_dataset/download.rb +34 -12
  70. data/lib/miga/result.rb +48 -53
  71. data/lib/miga/result/base.rb +0 -2
  72. data/lib/miga/result/dates.rb +1 -3
  73. data/lib/miga/result/source.rb +15 -16
  74. data/lib/miga/result/stats.rb +37 -27
  75. data/lib/miga/tax_dist.rb +6 -3
  76. data/lib/miga/tax_index.rb +17 -17
  77. data/lib/miga/taxonomy.rb +6 -1
  78. data/lib/miga/taxonomy/base.rb +19 -15
  79. data/lib/miga/version.rb +19 -16
  80. data/scripts/project_stats.bash +3 -0
  81. data/scripts/stats.bash +1 -1
  82. data/test/common_test.rb +3 -11
  83. data/test/daemon_helper.rb +38 -0
  84. data/test/daemon_test.rb +91 -99
  85. data/test/dataset_test.rb +63 -59
  86. data/test/format_test.rb +3 -11
  87. data/test/hook_test.rb +50 -55
  88. data/test/json_test.rb +7 -8
  89. data/test/lair_test.rb +22 -28
  90. data/test/metadata_test.rb +6 -14
  91. data/test/project_test.rb +33 -40
  92. data/test/remote_dataset_test.rb +26 -32
  93. data/test/result_stats_test.rb +17 -27
  94. data/test/result_test.rb +41 -34
  95. data/test/tax_dist_test.rb +2 -4
  96. data/test/tax_index_test.rb +4 -10
  97. data/test/taxonomy_test.rb +7 -9
  98. data/test/test_helper.rb +42 -1
  99. data/test/with_daemon_test.rb +14 -22
  100. data/utils/adapters.fa +13 -0
  101. data/utils/cleanup-databases.rb +6 -5
  102. data/utils/distance/base.rb +0 -1
  103. data/utils/distance/commands.rb +19 -12
  104. data/utils/distance/database.rb +24 -21
  105. data/utils/distance/pipeline.rb +23 -10
  106. data/utils/distance/runner.rb +20 -16
  107. data/utils/distance/temporal.rb +1 -3
  108. data/utils/distances.rb +1 -1
  109. data/utils/domain-ess-genes.rb +7 -7
  110. data/utils/index_metadata.rb +5 -4
  111. data/utils/mytaxa_scan.rb +18 -16
  112. data/utils/representatives.rb +5 -4
  113. data/utils/requirements.txt +1 -1
  114. data/utils/subclade/base.rb +0 -1
  115. data/utils/subclade/pipeline.rb +7 -6
  116. data/utils/subclade/runner.rb +9 -9
  117. data/utils/subclade/temporal.rb +0 -2
  118. data/utils/subclades-compile.rb +39 -37
  119. data/utils/subclades.rb +1 -1
  120. metadata +6 -4
@@ -1,4 +1,3 @@
1
-
2
1
  require 'miga/common/hooks'
3
2
 
4
3
  ##
@@ -18,7 +17,6 @@ require 'miga/common/hooks'
18
17
  # Internal hooks:
19
18
  # - _pull_result_hooks()
20
19
  module MiGA::Project::Hooks
21
-
22
20
  include MiGA::Common::Hooks
23
21
 
24
22
  def default_hooks
@@ -47,5 +45,4 @@ module MiGA::Project::Hooks
47
45
  pull_hook(:"on_result_ready_#{event_args.first}", *event_args)
48
46
  pull_hook(:on_processing_ready) if next_task(nil, false).nil?
49
47
  end
50
-
51
48
  end
@@ -62,6 +62,7 @@ module MiGA::Project::Result
62
62
  # Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
63
63
  def add_result_distances(base, _opts)
64
64
  return nil unless result_files_exist?(base, %w[.Rdata .log .txt])
65
+
65
66
  r = MiGA::Result.new("#{base}.json")
66
67
  r.add_file(:rdata, 'miga-project.Rdata')
67
68
  r.add_file(:matrix, 'miga-project.txt')
@@ -79,8 +80,13 @@ module MiGA::Project::Result
79
80
  return r
80
81
  end
81
82
  return nil unless result_files_exist?(base, %w[.proposed-clades])
82
- return nil unless is_clade? or result_files_exist?(base,
83
- %w[.pdf .classif .medoids .class.tsv .class.nwk])
83
+ unless is_clade? ||
84
+ result_files_exist?(
85
+ base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
86
+ )
87
+ return nil
88
+ end
89
+
84
90
  r = add_result_iter_clades(base)
85
91
  r.add_file(:aai_tree, 'miga-project.aai.nwk')
86
92
  r.add_file(:proposal, 'miga-project.proposed-clades')
@@ -99,8 +105,10 @@ module MiGA::Project::Result
99
105
  r.add_file(:empty, 'miga-project.empty')
100
106
  return r
101
107
  end
102
- return nil unless result_files_exist?(base,
103
- %w[.pdf .classif .medoids .class.tsv .class.nwk])
108
+ return nil unless result_files_exist?(
109
+ base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
110
+ )
111
+
104
112
  r = add_result_iter_clades(base)
105
113
  r.add_file(:ani_tree, 'miga-project.ani.nwk')
106
114
  r
@@ -127,6 +135,7 @@ module MiGA::Project::Result
127
135
  return r
128
136
  end
129
137
  return nil unless result_files_exist?(base, %w[.ogs .stats])
138
+
130
139
  r = MiGA::Result.new("#{base}.json")
131
140
  r.add_file(:ogs, 'miga-project.ogs')
132
141
  r.add_file(:abc, 'miga-project.abc')
@@ -141,6 +150,7 @@ module MiGA::Project::Result
141
150
  def add_result_project_stats(base, _opts)
142
151
  return nil unless
143
152
  result_files_exist?(base, %w[.taxonomy.json .metadata.db])
153
+
144
154
  r = MiGA::Result.new("#{base}.json")
145
155
  r.add_file(:taxonomy_index, 'miga-project.taxonomy.json')
146
156
  r.add_file(:metadata_index, 'miga-project.metadata.db')
@@ -151,5 +161,4 @@ module MiGA::Project::Result
151
161
  alias add_result_aai_distances add_result_distances
152
162
  alias add_result_ani_distances add_result_distances
153
163
  alias add_result_ssu_distances add_result_distances
154
-
155
164
  end
@@ -14,9 +14,11 @@ class MiGA::RemoteDataset < MiGA::MiGA
14
14
  class << self
15
15
  def ncbi_asm_acc2id(acc)
16
16
  return acc if acc =~ /^\d+$/
17
+
17
18
  search_doc = MiGA::Json.parse(
18
19
  download(:ncbi_search, :assembly, acc, :json),
19
- symbolize: false, contents: true)
20
+ symbolize: false, contents: true
21
+ )
20
22
  (search_doc['esearchresult']['idlist'] || []).first
21
23
  end
22
24
  end
@@ -90,14 +92,14 @@ class MiGA::RemoteDataset < MiGA::MiGA
90
92
  # and optionally the Hash +metadata+.
91
93
  def update_metadata(dataset, metadata = {})
92
94
  metadata = get_metadata(metadata)
93
- metadata.each { |k,v| dataset.metadata[k] = v }
95
+ metadata.each { |k, v| dataset.metadata[k] = v }
94
96
  dataset.save
95
97
  end
96
98
 
97
99
  ##
98
100
  # Get metadata from the remote location.
99
101
  def get_metadata(metadata_def = {})
100
- metadata_def.each { |k,v| @metadata[k] = v }
102
+ metadata_def.each { |k, v| @metadata[k] = v }
101
103
  case universe
102
104
  when :ebi, :ncbi, :web
103
105
  # Get taxonomy
@@ -131,7 +133,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
131
133
  def get_ncbi_taxonomy
132
134
  tax_id = get_ncbi_taxid
133
135
  return nil if tax_id.nil?
134
- lineage = {ns: 'ncbi'}
136
+
137
+ lineage = { ns: 'ncbi' }
135
138
  doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
136
139
  doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
137
140
  name = i.scan(%r{<ScientificName>(.*)</ScientificName>}).first.to_a.first
@@ -148,89 +151,99 @@ class MiGA::RemoteDataset < MiGA::MiGA
148
151
  # Get the JSON document describing an NCBI assembly entry.
149
152
  def ncbi_asm_json_doc
150
153
  return @_ncbi_asm_json_doc unless @_ncbi_asm_json_doc.nil?
154
+
151
155
  metadata[:ncbi_asm] ||= ids.first if universe == :ncbi and db == :assembly
152
156
  return nil unless metadata[:ncbi_asm]
157
+
153
158
  ncbi_asm_id = self.class.ncbi_asm_acc2id metadata[:ncbi_asm]
154
159
  doc = MiGA::Json.parse(
155
160
  self.class.download(:ncbi_summary, :assembly, ncbi_asm_id, :json),
156
- symbolize: false, contents: true)
161
+ symbolize: false, contents: true
162
+ )
157
163
  @_ncbi_asm_json_doc = doc['result'][ doc['result']['uids'].first ]
158
164
  end
159
165
 
160
-
161
166
  private
162
167
 
163
- def get_ncbi_taxid_from_web
164
- return nil if ncbi_asm_json_doc.nil?
165
- ncbi_asm_json_doc['taxid']
166
- end
168
+ def get_ncbi_taxid_from_web
169
+ return nil if ncbi_asm_json_doc.nil?
167
170
 
168
- def get_ncbi_taxid_from_ncbi
169
- doc = self.class.download(universe, db, ids, :gb).split(/\n/)
170
- ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
171
- return nil if ln.nil?
172
- ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
173
- return nil unless ln =~ /^\d+$/
174
- ln
175
- end
171
+ ncbi_asm_json_doc['taxid']
172
+ end
176
173
 
177
- def get_ncbi_taxid_from_ebi
178
- doc = self.class.download(universe, db, ids, :annot).split(/\n/)
179
- ln = doc.grep(%r{^FT\s+/db_xref="taxon:}).first
180
- ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
181
- return nil if ln.nil?
182
- ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, '\\1')
183
- return nil unless ln =~ /^\d+$/
184
- ln
185
- end
174
+ def get_ncbi_taxid_from_ncbi
175
+ doc = self.class.download(universe, db, ids, :gb).split(/\n/)
176
+ ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
177
+ return nil if ln.nil?
186
178
 
187
- def get_type_status_ncbi_nuccore(metadata)
188
- return metadata if metadata[:ncbi_nuccore].nil?
189
- biosample = self.class.ncbi_map(metadata[:ncbi_nuccore],
190
- :nuccore, :biosample)
191
- return metadata if biosample.nil?
192
- asm = self.class.ncbi_map(biosample, :biosample, :assembly)
193
- metadata[:ncbi_asm] = asm.to_s unless asm.nil?
194
- get_type_status_ncbi_asm metadata
195
- end
179
+ ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
180
+ return nil unless ln =~ /^\d+$/
196
181
 
197
- def get_type_status_ncbi_asm(metadata)
198
- return metadata if ncbi_asm_json_doc.nil?
199
- from_type = ncbi_asm_json_doc['from_type']
200
- from_type = ncbi_asm_json_doc['fromtype'] if from_type.nil?
201
- case from_type
202
- when nil
203
- # Do nothing
204
- when ''
205
- metadata[:is_type] = false
206
- metadata[:is_ref_type] = false
207
- when 'assembly from reference material', 'assembly designated as reftype'
208
- metadata[:is_type] = false
209
- metadata[:is_ref_type] = true
210
- metadata[:type_rel] = from_type
211
- else
212
- metadata[:is_type] = true
213
- metadata[:type_rel] = from_type
214
- end
215
- metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
216
- metadata[:suspect] = nil if metadata[:suspect].empty?
217
- MiGA.DEBUG "Got type: #{from_type}"
218
- metadata
182
+ ln
183
+ end
184
+
185
+ def get_ncbi_taxid_from_ebi
186
+ doc = self.class.download(universe, db, ids, :annot).split(/\n/)
187
+ ln = doc.grep(%r{^FT\s+/db_xref="taxon:}).first
188
+ ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
189
+ return nil if ln.nil?
190
+
191
+ ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, '\\1')
192
+ return nil unless ln =~ /^\d+$/
193
+
194
+ ln
195
+ end
196
+
197
+ def get_type_status_ncbi_nuccore(metadata)
198
+ return metadata if metadata[:ncbi_nuccore].nil?
199
+
200
+ biosample =
201
+ self.class.ncbi_map(metadata[:ncbi_nuccore], :nuccore, :biosample)
202
+ return metadata if biosample.nil?
203
+
204
+ asm = self.class.ncbi_map(biosample, :biosample, :assembly)
205
+ metadata[:ncbi_asm] = asm.to_s unless asm.nil?
206
+ get_type_status_ncbi_asm metadata
207
+ end
208
+
209
+ def get_type_status_ncbi_asm(metadata)
210
+ return metadata if ncbi_asm_json_doc.nil?
211
+
212
+ from_type = ncbi_asm_json_doc['from_type']
213
+ from_type = ncbi_asm_json_doc['fromtype'] if from_type.nil?
214
+ case from_type
215
+ when nil
216
+ # Do nothing
217
+ when ''
218
+ metadata[:is_type] = false
219
+ metadata[:is_ref_type] = false
220
+ when 'assembly from reference material', 'assembly designated as reftype'
221
+ metadata[:is_type] = false
222
+ metadata[:is_ref_type] = true
223
+ metadata[:type_rel] = from_type
224
+ else
225
+ metadata[:is_type] = true
226
+ metadata[:type_rel] = from_type
219
227
  end
228
+ metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
229
+ metadata[:suspect] = nil if metadata[:suspect].empty?
230
+ MiGA.DEBUG "Got type: #{from_type}"
231
+ metadata
232
+ end
220
233
 
221
- def save_assembly_to(project, name, udb)
222
- dir = MiGA::Dataset.RESULT_DIRS[:assembly]
223
- base = "#{project.path}/data/#{dir}/#{name}"
224
- l_ctg = "#{base}.LargeContigs.fna"
225
- a_ctg = "#{base}.AllContigs.fna"
226
- File.open("#{base}.start", 'w') { |ofh| ofh.puts Time.now.to_s }
227
- if udb[:format] == :fasta_gz
228
- download "#{l_ctg}.gz"
229
- system "gzip -d '#{l_ctg}.gz'"
230
- else
231
- download l_ctg
232
- end
233
- File.unlink(a_ctg) if File.exist? a_ctg
234
- File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
234
+ def save_assembly_to(project, name, udb)
235
+ dir = MiGA::Dataset.RESULT_DIRS[:assembly]
236
+ base = "#{project.path}/data/#{dir}/#{name}"
237
+ l_ctg = "#{base}.LargeContigs.fna"
238
+ a_ctg = "#{base}.AllContigs.fna"
239
+ File.open("#{base}.start", 'w') { |ofh| ofh.puts Time.now.to_s }
240
+ if udb[:format] == :fasta_gz
241
+ download "#{l_ctg}.gz"
242
+ system "gzip -d '#{l_ctg}.gz'"
243
+ else
244
+ download l_ctg
235
245
  end
246
+ File.unlink(a_ctg) if File.exist? a_ctg
247
+ File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
248
+ end
236
249
  end
@@ -1,21 +1,20 @@
1
-
2
1
  require 'open-uri'
3
2
  require 'cgi'
4
3
 
5
4
  class MiGA::RemoteDataset < MiGA::MiGA
6
-
7
5
  # Class-level
8
6
  class << self
9
- def UNIVERSE ; @@UNIVERSE ; end
7
+ def UNIVERSE
8
+ @@UNIVERSE
9
+ end
10
10
  end
11
-
12
11
  end
13
12
 
14
13
  module MiGA::RemoteDataset::Base
15
-
16
14
  @@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
17
15
  @@_NCBI_API_KEY = lambda { |url|
18
- ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}" }
16
+ ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
17
+ }
19
18
 
20
19
  ##
21
20
  # Structure of the different database Universes or containers. The structure
@@ -43,13 +42,13 @@ module MiGA::RemoteDataset::Base
43
42
  method: :net
44
43
  },
45
44
  ebi: {
46
- dbs: { embl: {stage: :assembly, format: :fasta} },
45
+ dbs: { embl: { stage: :assembly, format: :fasta } },
47
46
  url: 'https://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s',
48
47
  method: :rest
49
48
  },
50
49
  ncbi: {
51
50
  dbs: {
52
- nuccore: { stage: :assembly, format: :fasta },
51
+ nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
53
52
  assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
54
53
  taxonomy: { stage: :metadata, format: :xml }
55
54
  },
@@ -59,9 +58,10 @@ module MiGA::RemoteDataset::Base
59
58
  },
60
59
  ncbi_map: {
61
60
  dbs: {
62
- nuccore: { stage: :metadata, map_to: [:biosample, :assembly],
63
- format: :json },
64
- biosample: {stage: :metadata, map_to: [:assembly], format: :json}
61
+ nuccore: {
62
+ stage: :metadata, map_to: [:biosample, :assembly], format: :json
63
+ },
64
+ biosample: { stage: :metadata, map_to: [:assembly], format: :json }
65
65
  },
66
66
  url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%4$s&retmode=%3$s",
67
67
  method: :net,
@@ -81,6 +81,4 @@ module MiGA::RemoteDataset::Base
81
81
  api_key: @@_NCBI_API_KEY
82
82
  }
83
83
  }
84
-
85
84
  end
86
-
@@ -1,4 +1,3 @@
1
-
2
1
  require 'miga/remote_dataset/base'
3
2
 
4
3
  class MiGA::RemoteDataset
@@ -26,8 +25,8 @@ class MiGA::RemoteDataset
26
25
  obj: obj
27
26
  }
28
27
  doc = send("#{getter}_#{method}", opts)
29
- unless file.nil?
30
- ofh = File.open(file, 'w')
28
+ unless opts[:file].nil?
29
+ ofh = File.open(opts[:file], 'w')
31
30
  ofh.print doc.force_encoding('UTF-8')
32
31
  ofh.close
33
32
  end
@@ -45,8 +44,26 @@ class MiGA::RemoteDataset
45
44
  def ncbi_asm_rest(opts)
46
45
  url_dir = opts[:obj].ncbi_asm_json_doc['ftppath_genbank']
47
46
  url = "#{url_dir}/#{File.basename url_dir}_genomic.fna.gz"
48
- download(:web, :assembly_gz, url,
49
- opts[:format], opts[:file], opts[:extra], opts[:obj])
47
+ download(
48
+ :web, :assembly_gz, url,
49
+ opts[:format], opts[:file], opts[:extra], opts[:obj]
50
+ )
51
+ end
52
+
53
+ ##
54
+ # Download data from NCBI GenBank (nuccore) database using the REST method.
55
+ # Supported +opts+ (Hash) are the same as #download_rest and #ncbi_asm_rest.
56
+ def ncbi_gb_rest(opts)
57
+ o = download_rest(opts)
58
+ return o unless o.strip.empty?
59
+
60
+ MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
61
+ opts[:format] = :fasta_gz
62
+ if opts[:file]
63
+ File.unlink(opts[:file]) if File.exist? opts[:file]
64
+ opts[:file] = "#{opts[:file]}.gz"
65
+ end
66
+ ncbi_asm_rest(opts)
50
67
  end
51
68
 
52
69
  ##
@@ -58,8 +75,9 @@ class MiGA::RemoteDataset
58
75
  # +extra+: Array
59
76
  def download_rest(opts)
60
77
  u = @@UNIVERSE[opts[:universe]]
61
- url = sprintf(u[:url],
62
- opts[:db], opts[:ids].join(','), opts[:format], *opts[:extra])
78
+ url = sprintf(
79
+ u[:url], opts[:db], opts[:ids].join(','), opts[:format], *opts[:extra]
80
+ )
63
81
  url = u[:api_key][url] unless u[:api_key].nil?
64
82
  download_url url
65
83
  end
@@ -80,17 +98,20 @@ class MiGA::RemoteDataset
80
98
  rescue => e
81
99
  @timeout_try += 1
82
100
  raise e if @timeout_try >= 3
101
+
102
+ sleep 5 # <- For: 429 Too Many Requests
83
103
  retry
84
104
  end
85
105
  doc
86
106
  end
87
-
107
+
88
108
  ##
89
109
  # Looks for the entry +id+ in +dbfrom+, and returns the linked
90
110
  # identifier in +db+ (or nil).
91
111
  def ncbi_map(id, dbfrom, db)
92
112
  doc = download(:ncbi_map, dbfrom, id, :json, nil, [db])
93
113
  return if doc.empty?
114
+
94
115
  tree = MiGA::Json.parse(doc, contents: true)
95
116
  [:linksets, 0, :linksetdbs, 0, :links, 0].each do |i|
96
117
  tree = tree[i]
@@ -102,11 +123,12 @@ class MiGA::RemoteDataset
102
123
  end
103
124
 
104
125
  module MiGA::RemoteDataset::Download
105
-
106
126
  ##
107
- # Download data into +file+.
127
+ # Download data into +file+
108
128
  def download(file)
109
- self.class.download(universe, db, ids,
110
- self.class.UNIVERSE[universe][:dbs][db][:format], file, [], self)
129
+ self.class.download(
130
+ universe, db, ids, self.class.UNIVERSE[universe][:dbs][db][:format],
131
+ file, [], self
132
+ )
111
133
  end
112
134
  end
@@ -8,7 +8,6 @@ require 'miga/result/stats'
8
8
  ##
9
9
  # The result from a task run. It can be project-wide or dataset-specific.
10
10
  class MiGA::Result < MiGA::MiGA
11
-
12
11
  include MiGA::Result::Dates
13
12
  include MiGA::Result::Source
14
13
  include MiGA::Result::Stats
@@ -26,6 +25,7 @@ class MiGA::Result < MiGA::MiGA
26
25
  # Returns MiGA::Result if it already exists, nil otherwise.
27
26
  def load(path)
28
27
  return nil unless MiGA::Result.exist? path
28
+
29
29
  MiGA::Result.new(path)
30
30
  end
31
31
 
@@ -33,6 +33,7 @@ class MiGA::Result < MiGA::MiGA
33
33
  FileUtils.rm(path) if force && File.exist?(path)
34
34
  r_pre = self.load(path)
35
35
  return r_pre unless r_pre.nil?
36
+
36
37
  yield
37
38
  self.load(path)
38
39
  end
@@ -41,67 +42,72 @@ class MiGA::Result < MiGA::MiGA
41
42
  # Instance-level
42
43
 
43
44
  ##
44
- # Hash with the result metadata.
45
+ # Hash with the result metadata
45
46
  attr_reader :data
46
47
 
47
48
  ##
48
- # Array of MiGA::Result objects nested within the result (if any).
49
- attr_reader :results
50
-
51
- ##
52
- # Load or create the MiGA::Result described by the JSON file +path+.
49
+ # Load or create the MiGA::Result described by the JSON file +path+
53
50
  def initialize(path)
54
51
  @path = File.absolute_path(path)
55
52
  MiGA::Result.exist?(@path) ? self.load : create
56
53
  end
57
54
 
58
55
  ##
59
- # Is the result clean? Returns Boolean.
60
- def clean? ; !! self[:clean] ; end
56
+ # Is the result clean? Returns Boolean
57
+ def clean?
58
+ !!self[:clean]
59
+ end
61
60
 
62
61
  ##
63
- # Register the result as cleaned.
64
- def clean! ; self[:clean] = true ; end
62
+ # Register the result as cleaned
63
+ def clean!
64
+ self[:clean] = true
65
+ end
65
66
 
66
67
  ##
67
68
  # Path to the standard files of the result. +which+ must be one of:
68
69
  # - :json (default) : JSON file describing the result.
69
70
  # - :start : File with the date when the processing started.
70
71
  # - :done : File with the date when the processing ended.
71
- def path(which=:json)
72
+ def path(which = :json)
72
73
  case which.to_sym
73
74
  when :json
74
75
  @path
75
76
  when :start
76
- @path.sub(/\.json$/, ".start")
77
+ @path.sub(/\.json$/, '.start')
77
78
  when :done
78
- @path.sub(/\.json$/, ".done")
79
+ @path.sub(/\.json$/, '.done')
79
80
  end
80
81
  end
81
82
 
82
83
  ##
83
- # Directory containing the result.
84
+ # Directory containing the result
84
85
  def dir
85
86
  File.dirname(path)
86
87
  end
87
88
 
88
89
  ##
89
- # Absolute path to the file(s) defined by symbol +k+.
90
+ # Absolute path to the file(s) defined by symbol +k+
90
91
  def file_path(k)
91
92
  k = k.to_sym
92
93
  f = self[:files].nil? ? nil : self[:files][k]
93
94
  return nil if f.nil?
94
95
  return File.expand_path(f, dir) unless f.is_a? Array
95
- f.map{ |fi| File.expand_path(fi, dir) }
96
+
97
+ f.map { |fi| File.expand_path(fi, dir) }
96
98
  end
97
99
 
98
100
  ##
99
- # Entry with symbol +k+.
100
- def [](k) data[k.to_sym] ; end
101
+ # Entry with symbol +k+
102
+ def [](k)
103
+ data[k.to_sym]
104
+ end
101
105
 
102
106
  ##
103
- # Adds value +v+ to entry with symbol +k+.
104
- def []=(k,v) data[k.to_sym]=v ; end
107
+ # Adds value +v+ to entry with symbol +k+
108
+ def []=(k, v)
109
+ data[k.to_sym] = v
110
+ end
105
111
 
106
112
  ##
107
113
  # Register +file+ (path relative to #dir) with the symbol +k+. If the file
@@ -116,20 +122,20 @@ class MiGA::Result < MiGA::MiGA
116
122
  end
117
123
 
118
124
  ##
119
- # #add_file for each key-value pair in the +files+ Hash.
125
+ # #add_file for each key-value pair in the +files+ Hash
120
126
  def add_files(files)
121
127
  files.each { |k, v| add_file(k, v) }
122
128
  end
123
129
 
124
130
  ##
125
- # Initialize and #save empty result.
131
+ # Initialize and #save empty result
126
132
  def create
127
- @data = { created: Time.now.to_s, results: [], stats: {}, files: {} }
133
+ @data = { created: Time.now.to_s, stats: {}, files: {} }
128
134
  save
129
135
  end
130
136
 
131
137
  ##
132
- # Save the result persistently (in the JSON file #path).
138
+ # Save the result persistently (in the JSON file #path)
133
139
  def save
134
140
  @data[:updated] = Time.now.to_s
135
141
  s = path(:start)
@@ -142,24 +148,23 @@ class MiGA::Result < MiGA::MiGA
142
148
  end
143
149
 
144
150
  ##
145
- # Load (or reload) result data in the JSON file #path.
151
+ # Load (or reload) result data in the JSON file #path
146
152
  def load
147
153
  @data = MiGA::Json.parse(path)
148
154
  @data[:files] ||= {}
149
- @results = (self[:results] || []).map{ |rs| MiGA::Result.new rs }
150
155
  end
151
156
 
152
157
  ##
153
- # Remove result, including all associated files.
158
+ # Remove result, including all associated files
154
159
  def remove!
155
- each_file do |file|
156
- f = File.expand_path(file, dir)
157
- FileUtils.rm_rf(f)
158
- end
159
- %w(.start .done).each do |ext|
160
- f = path.sub(/\.json$/, ext)
161
- File.unlink f if File.exist? f
162
- end
160
+ each_file { |file| FileUtils.rm_rf(File.join(dir, file)) }
161
+ unlink
162
+ end
163
+
164
+ # Unlink result by removing the .done and .start timestamps and the
165
+ # .json descriptor, but don't remove any other associated files
166
+ def unlink
167
+ %i(start done).each { |i| f = path(i) and File.unlink(f) }
163
168
  File.unlink path
164
169
  end
165
170
 
@@ -172,29 +177,19 @@ class MiGA::Result < MiGA::MiGA
172
177
  # Note that multiple files may have the same symbol (file_sym), since
173
178
  # arrays of files are supported.
174
179
  def each_file(&blk)
180
+ return to_enum(:each_file) unless block_given?
181
+
175
182
  @data[:files] ||= {}
176
- self[:files].each do |k,files|
183
+ self[:files].each do |k, files|
177
184
  files = [files] unless files.kind_of? Array
178
185
  files.each do |file|
179
186
  case blk.arity
180
- when 1
181
- blk.call(file)
182
- when 2
183
- blk.call(k, file)
184
- when 3
185
- blk.call(k, file, File.expand_path(file, dir))
186
- else
187
- raise "Wrong number of arguments: #{blk.arity} for 1..3"
187
+ when 1; blk.call(file)
188
+ when 2; blk.call(k, file)
189
+ when 3; blk.call(k, file, File.expand_path(file, dir))
190
+ else; raise "Wrong number of arguments: #{blk.arity} for 1..3"
188
191
  end
189
192
  end
190
193
  end
191
194
  end
192
-
193
- ##
194
- # Add the MiGA::Result +result+ as part of the current result.
195
- def add_result(result)
196
- @data[:results] << result.path
197
- save
198
- end
199
-
200
195
  end