miga-base 0.7.3.1 → 0.7.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. checksums.yaml +4 -4
  2. data/lib/miga/cli.rb +10 -8
  3. data/lib/miga/cli/action.rb +2 -3
  4. data/lib/miga/cli/action/about.rb +5 -6
  5. data/lib/miga/cli/action/add.rb +18 -12
  6. data/lib/miga/cli/action/add_result.rb +2 -3
  7. data/lib/miga/cli/action/archive.rb +1 -2
  8. data/lib/miga/cli/action/classify_wf.rb +8 -6
  9. data/lib/miga/cli/action/console.rb +0 -1
  10. data/lib/miga/cli/action/daemon.rb +7 -7
  11. data/lib/miga/cli/action/date.rb +0 -1
  12. data/lib/miga/cli/action/derep_wf.rb +5 -4
  13. data/lib/miga/cli/action/doctor.rb +71 -82
  14. data/lib/miga/cli/action/doctor/base.rb +102 -0
  15. data/lib/miga/cli/action/edit.rb +14 -2
  16. data/lib/miga/cli/action/files.rb +8 -8
  17. data/lib/miga/cli/action/find.rb +5 -6
  18. data/lib/miga/cli/action/generic.rb +7 -7
  19. data/lib/miga/cli/action/get.rb +20 -17
  20. data/lib/miga/cli/action/get_db.rb +8 -2
  21. data/lib/miga/cli/action/index_wf.rb +1 -1
  22. data/lib/miga/cli/action/init.rb +53 -41
  23. data/lib/miga/cli/action/init/daemon_helper.rb +65 -43
  24. data/lib/miga/cli/action/lair.rb +7 -7
  25. data/lib/miga/cli/action/ln.rb +6 -6
  26. data/lib/miga/cli/action/ls.rb +1 -2
  27. data/lib/miga/cli/action/ncbi_get.rb +11 -3
  28. data/lib/miga/cli/action/new.rb +4 -4
  29. data/lib/miga/cli/action/next_step.rb +0 -1
  30. data/lib/miga/cli/action/preproc_wf.rb +3 -3
  31. data/lib/miga/cli/action/quality_wf.rb +1 -1
  32. data/lib/miga/cli/action/rm.rb +2 -3
  33. data/lib/miga/cli/action/run.rb +8 -8
  34. data/lib/miga/cli/action/stats.rb +8 -4
  35. data/lib/miga/cli/action/summary.rb +7 -6
  36. data/lib/miga/cli/action/tax_dist.rb +8 -4
  37. data/lib/miga/cli/action/tax_index.rb +3 -4
  38. data/lib/miga/cli/action/tax_set.rb +7 -6
  39. data/lib/miga/cli/action/tax_test.rb +6 -5
  40. data/lib/miga/cli/action/wf.rb +25 -19
  41. data/lib/miga/cli/base.rb +34 -32
  42. data/lib/miga/cli/objects_helper.rb +27 -18
  43. data/lib/miga/cli/opt_helper.rb +3 -2
  44. data/lib/miga/common.rb +2 -5
  45. data/lib/miga/common/base.rb +15 -16
  46. data/lib/miga/common/format.rb +8 -5
  47. data/lib/miga/common/hooks.rb +1 -4
  48. data/lib/miga/common/path.rb +4 -9
  49. data/lib/miga/common/with_daemon.rb +5 -2
  50. data/lib/miga/common/with_daemon_class.rb +1 -1
  51. data/lib/miga/common/with_result.rb +2 -1
  52. data/lib/miga/daemon.rb +93 -44
  53. data/lib/miga/daemon/base.rb +30 -11
  54. data/lib/miga/dataset.rb +47 -37
  55. data/lib/miga/dataset/base.rb +52 -37
  56. data/lib/miga/dataset/hooks.rb +3 -4
  57. data/lib/miga/dataset/result.rb +17 -1
  58. data/lib/miga/dataset/status.rb +6 -5
  59. data/lib/miga/json.rb +5 -7
  60. data/lib/miga/lair.rb +4 -0
  61. data/lib/miga/metadata.rb +4 -3
  62. data/lib/miga/project.rb +29 -20
  63. data/lib/miga/project/base.rb +52 -37
  64. data/lib/miga/project/dataset.rb +33 -26
  65. data/lib/miga/project/hooks.rb +0 -3
  66. data/lib/miga/project/result.rb +14 -5
  67. data/lib/miga/remote_dataset.rb +85 -72
  68. data/lib/miga/remote_dataset/base.rb +11 -13
  69. data/lib/miga/remote_dataset/download.rb +34 -12
  70. data/lib/miga/result.rb +48 -53
  71. data/lib/miga/result/base.rb +0 -2
  72. data/lib/miga/result/dates.rb +1 -3
  73. data/lib/miga/result/source.rb +15 -16
  74. data/lib/miga/result/stats.rb +37 -27
  75. data/lib/miga/tax_dist.rb +6 -3
  76. data/lib/miga/tax_index.rb +17 -17
  77. data/lib/miga/taxonomy.rb +6 -1
  78. data/lib/miga/taxonomy/base.rb +19 -15
  79. data/lib/miga/version.rb +19 -16
  80. data/scripts/project_stats.bash +3 -0
  81. data/scripts/stats.bash +1 -1
  82. data/test/common_test.rb +3 -11
  83. data/test/daemon_helper.rb +38 -0
  84. data/test/daemon_test.rb +91 -99
  85. data/test/dataset_test.rb +63 -59
  86. data/test/format_test.rb +3 -11
  87. data/test/hook_test.rb +50 -55
  88. data/test/json_test.rb +7 -8
  89. data/test/lair_test.rb +22 -28
  90. data/test/metadata_test.rb +6 -14
  91. data/test/project_test.rb +33 -40
  92. data/test/remote_dataset_test.rb +26 -32
  93. data/test/result_stats_test.rb +17 -27
  94. data/test/result_test.rb +41 -34
  95. data/test/tax_dist_test.rb +2 -4
  96. data/test/tax_index_test.rb +4 -10
  97. data/test/taxonomy_test.rb +7 -9
  98. data/test/test_helper.rb +42 -1
  99. data/test/with_daemon_test.rb +14 -22
  100. data/utils/adapters.fa +13 -0
  101. data/utils/cleanup-databases.rb +6 -5
  102. data/utils/distance/base.rb +0 -1
  103. data/utils/distance/commands.rb +19 -12
  104. data/utils/distance/database.rb +24 -21
  105. data/utils/distance/pipeline.rb +23 -10
  106. data/utils/distance/runner.rb +20 -16
  107. data/utils/distance/temporal.rb +1 -3
  108. data/utils/distances.rb +1 -1
  109. data/utils/domain-ess-genes.rb +7 -7
  110. data/utils/index_metadata.rb +5 -4
  111. data/utils/mytaxa_scan.rb +18 -16
  112. data/utils/representatives.rb +5 -4
  113. data/utils/requirements.txt +1 -1
  114. data/utils/subclade/base.rb +0 -1
  115. data/utils/subclade/pipeline.rb +7 -6
  116. data/utils/subclade/runner.rb +9 -9
  117. data/utils/subclade/temporal.rb +0 -2
  118. data/utils/subclades-compile.rb +39 -37
  119. data/utils/subclades.rb +1 -1
  120. metadata +6 -4
@@ -1,4 +1,3 @@
1
-
2
1
  require 'miga/common/hooks'
3
2
 
4
3
  ##
@@ -18,7 +17,6 @@ require 'miga/common/hooks'
18
17
  # Internal hooks:
19
18
  # - _pull_result_hooks()
20
19
  module MiGA::Project::Hooks
21
-
22
20
  include MiGA::Common::Hooks
23
21
 
24
22
  def default_hooks
@@ -47,5 +45,4 @@ module MiGA::Project::Hooks
47
45
  pull_hook(:"on_result_ready_#{event_args.first}", *event_args)
48
46
  pull_hook(:on_processing_ready) if next_task(nil, false).nil?
49
47
  end
50
-
51
48
  end
@@ -62,6 +62,7 @@ module MiGA::Project::Result
62
62
  # Add result of any type +:*_distances+ at +base+ (no +_opts+ supported).
63
63
  def add_result_distances(base, _opts)
64
64
  return nil unless result_files_exist?(base, %w[.Rdata .log .txt])
65
+
65
66
  r = MiGA::Result.new("#{base}.json")
66
67
  r.add_file(:rdata, 'miga-project.Rdata')
67
68
  r.add_file(:matrix, 'miga-project.txt')
@@ -79,8 +80,13 @@ module MiGA::Project::Result
79
80
  return r
80
81
  end
81
82
  return nil unless result_files_exist?(base, %w[.proposed-clades])
82
- return nil unless is_clade? or result_files_exist?(base,
83
- %w[.pdf .classif .medoids .class.tsv .class.nwk])
83
+ unless is_clade? ||
84
+ result_files_exist?(
85
+ base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
86
+ )
87
+ return nil
88
+ end
89
+
84
90
  r = add_result_iter_clades(base)
85
91
  r.add_file(:aai_tree, 'miga-project.aai.nwk')
86
92
  r.add_file(:proposal, 'miga-project.proposed-clades')
@@ -99,8 +105,10 @@ module MiGA::Project::Result
99
105
  r.add_file(:empty, 'miga-project.empty')
100
106
  return r
101
107
  end
102
- return nil unless result_files_exist?(base,
103
- %w[.pdf .classif .medoids .class.tsv .class.nwk])
108
+ return nil unless result_files_exist?(
109
+ base, %w[.pdf .classif .medoids .class.tsv .class.nwk]
110
+ )
111
+
104
112
  r = add_result_iter_clades(base)
105
113
  r.add_file(:ani_tree, 'miga-project.ani.nwk')
106
114
  r
@@ -127,6 +135,7 @@ module MiGA::Project::Result
127
135
  return r
128
136
  end
129
137
  return nil unless result_files_exist?(base, %w[.ogs .stats])
138
+
130
139
  r = MiGA::Result.new("#{base}.json")
131
140
  r.add_file(:ogs, 'miga-project.ogs')
132
141
  r.add_file(:abc, 'miga-project.abc')
@@ -141,6 +150,7 @@ module MiGA::Project::Result
141
150
  def add_result_project_stats(base, _opts)
142
151
  return nil unless
143
152
  result_files_exist?(base, %w[.taxonomy.json .metadata.db])
153
+
144
154
  r = MiGA::Result.new("#{base}.json")
145
155
  r.add_file(:taxonomy_index, 'miga-project.taxonomy.json')
146
156
  r.add_file(:metadata_index, 'miga-project.metadata.db')
@@ -151,5 +161,4 @@ module MiGA::Project::Result
151
161
  alias add_result_aai_distances add_result_distances
152
162
  alias add_result_ani_distances add_result_distances
153
163
  alias add_result_ssu_distances add_result_distances
154
-
155
164
  end
@@ -14,9 +14,11 @@ class MiGA::RemoteDataset < MiGA::MiGA
14
14
  class << self
15
15
  def ncbi_asm_acc2id(acc)
16
16
  return acc if acc =~ /^\d+$/
17
+
17
18
  search_doc = MiGA::Json.parse(
18
19
  download(:ncbi_search, :assembly, acc, :json),
19
- symbolize: false, contents: true)
20
+ symbolize: false, contents: true
21
+ )
20
22
  (search_doc['esearchresult']['idlist'] || []).first
21
23
  end
22
24
  end
@@ -90,14 +92,14 @@ class MiGA::RemoteDataset < MiGA::MiGA
90
92
  # and optionally the Hash +metadata+.
91
93
  def update_metadata(dataset, metadata = {})
92
94
  metadata = get_metadata(metadata)
93
- metadata.each { |k,v| dataset.metadata[k] = v }
95
+ metadata.each { |k, v| dataset.metadata[k] = v }
94
96
  dataset.save
95
97
  end
96
98
 
97
99
  ##
98
100
  # Get metadata from the remote location.
99
101
  def get_metadata(metadata_def = {})
100
- metadata_def.each { |k,v| @metadata[k] = v }
102
+ metadata_def.each { |k, v| @metadata[k] = v }
101
103
  case universe
102
104
  when :ebi, :ncbi, :web
103
105
  # Get taxonomy
@@ -131,7 +133,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
131
133
  def get_ncbi_taxonomy
132
134
  tax_id = get_ncbi_taxid
133
135
  return nil if tax_id.nil?
134
- lineage = {ns: 'ncbi'}
136
+
137
+ lineage = { ns: 'ncbi' }
135
138
  doc = MiGA::RemoteDataset.download(:ncbi, :taxonomy, tax_id, :xml)
136
139
  doc.scan(%r{<Taxon>(.*?)</Taxon>}m).map(&:first).each do |i|
137
140
  name = i.scan(%r{<ScientificName>(.*)</ScientificName>}).first.to_a.first
@@ -148,89 +151,99 @@ class MiGA::RemoteDataset < MiGA::MiGA
148
151
  # Get the JSON document describing an NCBI assembly entry.
149
152
  def ncbi_asm_json_doc
150
153
  return @_ncbi_asm_json_doc unless @_ncbi_asm_json_doc.nil?
154
+
151
155
  metadata[:ncbi_asm] ||= ids.first if universe == :ncbi and db == :assembly
152
156
  return nil unless metadata[:ncbi_asm]
157
+
153
158
  ncbi_asm_id = self.class.ncbi_asm_acc2id metadata[:ncbi_asm]
154
159
  doc = MiGA::Json.parse(
155
160
  self.class.download(:ncbi_summary, :assembly, ncbi_asm_id, :json),
156
- symbolize: false, contents: true)
161
+ symbolize: false, contents: true
162
+ )
157
163
  @_ncbi_asm_json_doc = doc['result'][ doc['result']['uids'].first ]
158
164
  end
159
165
 
160
-
161
166
  private
162
167
 
163
- def get_ncbi_taxid_from_web
164
- return nil if ncbi_asm_json_doc.nil?
165
- ncbi_asm_json_doc['taxid']
166
- end
168
+ def get_ncbi_taxid_from_web
169
+ return nil if ncbi_asm_json_doc.nil?
167
170
 
168
- def get_ncbi_taxid_from_ncbi
169
- doc = self.class.download(universe, db, ids, :gb).split(/\n/)
170
- ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
171
- return nil if ln.nil?
172
- ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
173
- return nil unless ln =~ /^\d+$/
174
- ln
175
- end
171
+ ncbi_asm_json_doc['taxid']
172
+ end
176
173
 
177
- def get_ncbi_taxid_from_ebi
178
- doc = self.class.download(universe, db, ids, :annot).split(/\n/)
179
- ln = doc.grep(%r{^FT\s+/db_xref="taxon:}).first
180
- ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
181
- return nil if ln.nil?
182
- ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, '\\1')
183
- return nil unless ln =~ /^\d+$/
184
- ln
185
- end
174
+ def get_ncbi_taxid_from_ncbi
175
+ doc = self.class.download(universe, db, ids, :gb).split(/\n/)
176
+ ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
177
+ return nil if ln.nil?
186
178
 
187
- def get_type_status_ncbi_nuccore(metadata)
188
- return metadata if metadata[:ncbi_nuccore].nil?
189
- biosample = self.class.ncbi_map(metadata[:ncbi_nuccore],
190
- :nuccore, :biosample)
191
- return metadata if biosample.nil?
192
- asm = self.class.ncbi_map(biosample, :biosample, :assembly)
193
- metadata[:ncbi_asm] = asm.to_s unless asm.nil?
194
- get_type_status_ncbi_asm metadata
195
- end
179
+ ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
180
+ return nil unless ln =~ /^\d+$/
196
181
 
197
- def get_type_status_ncbi_asm(metadata)
198
- return metadata if ncbi_asm_json_doc.nil?
199
- from_type = ncbi_asm_json_doc['from_type']
200
- from_type = ncbi_asm_json_doc['fromtype'] if from_type.nil?
201
- case from_type
202
- when nil
203
- # Do nothing
204
- when ''
205
- metadata[:is_type] = false
206
- metadata[:is_ref_type] = false
207
- when 'assembly from reference material', 'assembly designated as reftype'
208
- metadata[:is_type] = false
209
- metadata[:is_ref_type] = true
210
- metadata[:type_rel] = from_type
211
- else
212
- metadata[:is_type] = true
213
- metadata[:type_rel] = from_type
214
- end
215
- metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
216
- metadata[:suspect] = nil if metadata[:suspect].empty?
217
- MiGA.DEBUG "Got type: #{from_type}"
218
- metadata
182
+ ln
183
+ end
184
+
185
+ def get_ncbi_taxid_from_ebi
186
+ doc = self.class.download(universe, db, ids, :annot).split(/\n/)
187
+ ln = doc.grep(%r{^FT\s+/db_xref="taxon:}).first
188
+ ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
189
+ return nil if ln.nil?
190
+
191
+ ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, '\\1')
192
+ return nil unless ln =~ /^\d+$/
193
+
194
+ ln
195
+ end
196
+
197
+ def get_type_status_ncbi_nuccore(metadata)
198
+ return metadata if metadata[:ncbi_nuccore].nil?
199
+
200
+ biosample =
201
+ self.class.ncbi_map(metadata[:ncbi_nuccore], :nuccore, :biosample)
202
+ return metadata if biosample.nil?
203
+
204
+ asm = self.class.ncbi_map(biosample, :biosample, :assembly)
205
+ metadata[:ncbi_asm] = asm.to_s unless asm.nil?
206
+ get_type_status_ncbi_asm metadata
207
+ end
208
+
209
+ def get_type_status_ncbi_asm(metadata)
210
+ return metadata if ncbi_asm_json_doc.nil?
211
+
212
+ from_type = ncbi_asm_json_doc['from_type']
213
+ from_type = ncbi_asm_json_doc['fromtype'] if from_type.nil?
214
+ case from_type
215
+ when nil
216
+ # Do nothing
217
+ when ''
218
+ metadata[:is_type] = false
219
+ metadata[:is_ref_type] = false
220
+ when 'assembly from reference material', 'assembly designated as reftype'
221
+ metadata[:is_type] = false
222
+ metadata[:is_ref_type] = true
223
+ metadata[:type_rel] = from_type
224
+ else
225
+ metadata[:is_type] = true
226
+ metadata[:type_rel] = from_type
219
227
  end
228
+ metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
229
+ metadata[:suspect] = nil if metadata[:suspect].empty?
230
+ MiGA.DEBUG "Got type: #{from_type}"
231
+ metadata
232
+ end
220
233
 
221
- def save_assembly_to(project, name, udb)
222
- dir = MiGA::Dataset.RESULT_DIRS[:assembly]
223
- base = "#{project.path}/data/#{dir}/#{name}"
224
- l_ctg = "#{base}.LargeContigs.fna"
225
- a_ctg = "#{base}.AllContigs.fna"
226
- File.open("#{base}.start", 'w') { |ofh| ofh.puts Time.now.to_s }
227
- if udb[:format] == :fasta_gz
228
- download "#{l_ctg}.gz"
229
- system "gzip -d '#{l_ctg}.gz'"
230
- else
231
- download l_ctg
232
- end
233
- File.unlink(a_ctg) if File.exist? a_ctg
234
- File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
234
+ def save_assembly_to(project, name, udb)
235
+ dir = MiGA::Dataset.RESULT_DIRS[:assembly]
236
+ base = "#{project.path}/data/#{dir}/#{name}"
237
+ l_ctg = "#{base}.LargeContigs.fna"
238
+ a_ctg = "#{base}.AllContigs.fna"
239
+ File.open("#{base}.start", 'w') { |ofh| ofh.puts Time.now.to_s }
240
+ if udb[:format] == :fasta_gz
241
+ download "#{l_ctg}.gz"
242
+ system "gzip -d '#{l_ctg}.gz'"
243
+ else
244
+ download l_ctg
235
245
  end
246
+ File.unlink(a_ctg) if File.exist? a_ctg
247
+ File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
248
+ end
236
249
  end
@@ -1,21 +1,20 @@
1
-
2
1
  require 'open-uri'
3
2
  require 'cgi'
4
3
 
5
4
  class MiGA::RemoteDataset < MiGA::MiGA
6
-
7
5
  # Class-level
8
6
  class << self
9
- def UNIVERSE ; @@UNIVERSE ; end
7
+ def UNIVERSE
8
+ @@UNIVERSE
9
+ end
10
10
  end
11
-
12
11
  end
13
12
 
14
13
  module MiGA::RemoteDataset::Base
15
-
16
14
  @@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
17
15
  @@_NCBI_API_KEY = lambda { |url|
18
- ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}" }
16
+ ENV['NCBI_API_KEY'].nil? ? url : "#{url}&api_key=#{ENV['NCBI_API_KEY']}"
17
+ }
19
18
 
20
19
  ##
21
20
  # Structure of the different database Universes or containers. The structure
@@ -43,13 +42,13 @@ module MiGA::RemoteDataset::Base
43
42
  method: :net
44
43
  },
45
44
  ebi: {
46
- dbs: { embl: {stage: :assembly, format: :fasta} },
45
+ dbs: { embl: { stage: :assembly, format: :fasta } },
47
46
  url: 'https://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s',
48
47
  method: :rest
49
48
  },
50
49
  ncbi: {
51
50
  dbs: {
52
- nuccore: { stage: :assembly, format: :fasta },
51
+ nuccore: { stage: :assembly, format: :fasta, getter: :ncbi_gb },
53
52
  assembly: { stage: :assembly, format: :fasta_gz, getter: :ncbi_asm },
54
53
  taxonomy: { stage: :metadata, format: :xml }
55
54
  },
@@ -59,9 +58,10 @@ module MiGA::RemoteDataset::Base
59
58
  },
60
59
  ncbi_map: {
61
60
  dbs: {
62
- nuccore: { stage: :metadata, map_to: [:biosample, :assembly],
63
- format: :json },
64
- biosample: {stage: :metadata, map_to: [:assembly], format: :json}
61
+ nuccore: {
62
+ stage: :metadata, map_to: [:biosample, :assembly], format: :json
63
+ },
64
+ biosample: { stage: :metadata, map_to: [:assembly], format: :json }
65
65
  },
66
66
  url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%4$s&retmode=%3$s",
67
67
  method: :net,
@@ -81,6 +81,4 @@ module MiGA::RemoteDataset::Base
81
81
  api_key: @@_NCBI_API_KEY
82
82
  }
83
83
  }
84
-
85
84
  end
86
-
@@ -1,4 +1,3 @@
1
-
2
1
  require 'miga/remote_dataset/base'
3
2
 
4
3
  class MiGA::RemoteDataset
@@ -26,8 +25,8 @@ class MiGA::RemoteDataset
26
25
  obj: obj
27
26
  }
28
27
  doc = send("#{getter}_#{method}", opts)
29
- unless file.nil?
30
- ofh = File.open(file, 'w')
28
+ unless opts[:file].nil?
29
+ ofh = File.open(opts[:file], 'w')
31
30
  ofh.print doc.force_encoding('UTF-8')
32
31
  ofh.close
33
32
  end
@@ -45,8 +44,26 @@ class MiGA::RemoteDataset
45
44
  def ncbi_asm_rest(opts)
46
45
  url_dir = opts[:obj].ncbi_asm_json_doc['ftppath_genbank']
47
46
  url = "#{url_dir}/#{File.basename url_dir}_genomic.fna.gz"
48
- download(:web, :assembly_gz, url,
49
- opts[:format], opts[:file], opts[:extra], opts[:obj])
47
+ download(
48
+ :web, :assembly_gz, url,
49
+ opts[:format], opts[:file], opts[:extra], opts[:obj]
50
+ )
51
+ end
52
+
53
+ ##
54
+ # Download data from NCBI GenBank (nuccore) database using the REST method.
55
+ # Supported +opts+ (Hash) are the same as #download_rest and #ncbi_asm_rest.
56
+ def ncbi_gb_rest(opts)
57
+ o = download_rest(opts)
58
+ return o unless o.strip.empty?
59
+
60
+ MiGA::MiGA.DEBUG 'Empty sequence, attempting download from NCBI assembly'
61
+ opts[:format] = :fasta_gz
62
+ if opts[:file]
63
+ File.unlink(opts[:file]) if File.exist? opts[:file]
64
+ opts[:file] = "#{opts[:file]}.gz"
65
+ end
66
+ ncbi_asm_rest(opts)
50
67
  end
51
68
 
52
69
  ##
@@ -58,8 +75,9 @@ class MiGA::RemoteDataset
58
75
  # +extra+: Array
59
76
  def download_rest(opts)
60
77
  u = @@UNIVERSE[opts[:universe]]
61
- url = sprintf(u[:url],
62
- opts[:db], opts[:ids].join(','), opts[:format], *opts[:extra])
78
+ url = sprintf(
79
+ u[:url], opts[:db], opts[:ids].join(','), opts[:format], *opts[:extra]
80
+ )
63
81
  url = u[:api_key][url] unless u[:api_key].nil?
64
82
  download_url url
65
83
  end
@@ -80,17 +98,20 @@ class MiGA::RemoteDataset
80
98
  rescue => e
81
99
  @timeout_try += 1
82
100
  raise e if @timeout_try >= 3
101
+
102
+ sleep 5 # <- For: 429 Too Many Requests
83
103
  retry
84
104
  end
85
105
  doc
86
106
  end
87
-
107
+
88
108
  ##
89
109
  # Looks for the entry +id+ in +dbfrom+, and returns the linked
90
110
  # identifier in +db+ (or nil).
91
111
  def ncbi_map(id, dbfrom, db)
92
112
  doc = download(:ncbi_map, dbfrom, id, :json, nil, [db])
93
113
  return if doc.empty?
114
+
94
115
  tree = MiGA::Json.parse(doc, contents: true)
95
116
  [:linksets, 0, :linksetdbs, 0, :links, 0].each do |i|
96
117
  tree = tree[i]
@@ -102,11 +123,12 @@ class MiGA::RemoteDataset
102
123
  end
103
124
 
104
125
  module MiGA::RemoteDataset::Download
105
-
106
126
  ##
107
- # Download data into +file+.
127
+ # Download data into +file+
108
128
  def download(file)
109
- self.class.download(universe, db, ids,
110
- self.class.UNIVERSE[universe][:dbs][db][:format], file, [], self)
129
+ self.class.download(
130
+ universe, db, ids, self.class.UNIVERSE[universe][:dbs][db][:format],
131
+ file, [], self
132
+ )
111
133
  end
112
134
  end
@@ -8,7 +8,6 @@ require 'miga/result/stats'
8
8
  ##
9
9
  # The result from a task run. It can be project-wide or dataset-specific.
10
10
  class MiGA::Result < MiGA::MiGA
11
-
12
11
  include MiGA::Result::Dates
13
12
  include MiGA::Result::Source
14
13
  include MiGA::Result::Stats
@@ -26,6 +25,7 @@ class MiGA::Result < MiGA::MiGA
26
25
  # Returns MiGA::Result if it already exists, nil otherwise.
27
26
  def load(path)
28
27
  return nil unless MiGA::Result.exist? path
28
+
29
29
  MiGA::Result.new(path)
30
30
  end
31
31
 
@@ -33,6 +33,7 @@ class MiGA::Result < MiGA::MiGA
33
33
  FileUtils.rm(path) if force && File.exist?(path)
34
34
  r_pre = self.load(path)
35
35
  return r_pre unless r_pre.nil?
36
+
36
37
  yield
37
38
  self.load(path)
38
39
  end
@@ -41,67 +42,72 @@ class MiGA::Result < MiGA::MiGA
41
42
  # Instance-level
42
43
 
43
44
  ##
44
- # Hash with the result metadata.
45
+ # Hash with the result metadata
45
46
  attr_reader :data
46
47
 
47
48
  ##
48
- # Array of MiGA::Result objects nested within the result (if any).
49
- attr_reader :results
50
-
51
- ##
52
- # Load or create the MiGA::Result described by the JSON file +path+.
49
+ # Load or create the MiGA::Result described by the JSON file +path+
53
50
  def initialize(path)
54
51
  @path = File.absolute_path(path)
55
52
  MiGA::Result.exist?(@path) ? self.load : create
56
53
  end
57
54
 
58
55
  ##
59
- # Is the result clean? Returns Boolean.
60
- def clean? ; !! self[:clean] ; end
56
+ # Is the result clean? Returns Boolean
57
+ def clean?
58
+ !!self[:clean]
59
+ end
61
60
 
62
61
  ##
63
- # Register the result as cleaned.
64
- def clean! ; self[:clean] = true ; end
62
+ # Register the result as cleaned
63
+ def clean!
64
+ self[:clean] = true
65
+ end
65
66
 
66
67
  ##
67
68
  # Path to the standard files of the result. +which+ must be one of:
68
69
  # - :json (default) : JSON file describing the result.
69
70
  # - :start : File with the date when the processing started.
70
71
  # - :done : File with the date when the processing ended.
71
- def path(which=:json)
72
+ def path(which = :json)
72
73
  case which.to_sym
73
74
  when :json
74
75
  @path
75
76
  when :start
76
- @path.sub(/\.json$/, ".start")
77
+ @path.sub(/\.json$/, '.start')
77
78
  when :done
78
- @path.sub(/\.json$/, ".done")
79
+ @path.sub(/\.json$/, '.done')
79
80
  end
80
81
  end
81
82
 
82
83
  ##
83
- # Directory containing the result.
84
+ # Directory containing the result
84
85
  def dir
85
86
  File.dirname(path)
86
87
  end
87
88
 
88
89
  ##
89
- # Absolute path to the file(s) defined by symbol +k+.
90
+ # Absolute path to the file(s) defined by symbol +k+
90
91
  def file_path(k)
91
92
  k = k.to_sym
92
93
  f = self[:files].nil? ? nil : self[:files][k]
93
94
  return nil if f.nil?
94
95
  return File.expand_path(f, dir) unless f.is_a? Array
95
- f.map{ |fi| File.expand_path(fi, dir) }
96
+
97
+ f.map { |fi| File.expand_path(fi, dir) }
96
98
  end
97
99
 
98
100
  ##
99
- # Entry with symbol +k+.
100
- def [](k) data[k.to_sym] ; end
101
+ # Entry with symbol +k+
102
+ def [](k)
103
+ data[k.to_sym]
104
+ end
101
105
 
102
106
  ##
103
- # Adds value +v+ to entry with symbol +k+.
104
- def []=(k,v) data[k.to_sym]=v ; end
107
+ # Adds value +v+ to entry with symbol +k+
108
+ def []=(k, v)
109
+ data[k.to_sym] = v
110
+ end
105
111
 
106
112
  ##
107
113
  # Register +file+ (path relative to #dir) with the symbol +k+. If the file
@@ -116,20 +122,20 @@ class MiGA::Result < MiGA::MiGA
116
122
  end
117
123
 
118
124
  ##
119
- # #add_file for each key-value pair in the +files+ Hash.
125
+ # #add_file for each key-value pair in the +files+ Hash
120
126
  def add_files(files)
121
127
  files.each { |k, v| add_file(k, v) }
122
128
  end
123
129
 
124
130
  ##
125
- # Initialize and #save empty result.
131
+ # Initialize and #save empty result
126
132
  def create
127
- @data = { created: Time.now.to_s, results: [], stats: {}, files: {} }
133
+ @data = { created: Time.now.to_s, stats: {}, files: {} }
128
134
  save
129
135
  end
130
136
 
131
137
  ##
132
- # Save the result persistently (in the JSON file #path).
138
+ # Save the result persistently (in the JSON file #path)
133
139
  def save
134
140
  @data[:updated] = Time.now.to_s
135
141
  s = path(:start)
@@ -142,24 +148,23 @@ class MiGA::Result < MiGA::MiGA
142
148
  end
143
149
 
144
150
  ##
145
- # Load (or reload) result data in the JSON file #path.
151
+ # Load (or reload) result data in the JSON file #path
146
152
  def load
147
153
  @data = MiGA::Json.parse(path)
148
154
  @data[:files] ||= {}
149
- @results = (self[:results] || []).map{ |rs| MiGA::Result.new rs }
150
155
  end
151
156
 
152
157
  ##
153
- # Remove result, including all associated files.
158
+ # Remove result, including all associated files
154
159
  def remove!
155
- each_file do |file|
156
- f = File.expand_path(file, dir)
157
- FileUtils.rm_rf(f)
158
- end
159
- %w(.start .done).each do |ext|
160
- f = path.sub(/\.json$/, ext)
161
- File.unlink f if File.exist? f
162
- end
160
+ each_file { |file| FileUtils.rm_rf(File.join(dir, file)) }
161
+ unlink
162
+ end
163
+
164
+ # Unlink result by removing the .done and .start timestamps and the
165
+ # .json descriptor, but don't remove any other associated files
166
+ def unlink
167
+ %i(start done).each { |i| f = path(i) and File.unlink(f) }
163
168
  File.unlink path
164
169
  end
165
170
 
@@ -172,29 +177,19 @@ class MiGA::Result < MiGA::MiGA
172
177
  # Note that multiple files may have the same symbol (file_sym), since
173
178
  # arrays of files are supported.
174
179
  def each_file(&blk)
180
+ return to_enum(:each_file) unless block_given?
181
+
175
182
  @data[:files] ||= {}
176
- self[:files].each do |k,files|
183
+ self[:files].each do |k, files|
177
184
  files = [files] unless files.kind_of? Array
178
185
  files.each do |file|
179
186
  case blk.arity
180
- when 1
181
- blk.call(file)
182
- when 2
183
- blk.call(k, file)
184
- when 3
185
- blk.call(k, file, File.expand_path(file, dir))
186
- else
187
- raise "Wrong number of arguments: #{blk.arity} for 1..3"
187
+ when 1; blk.call(file)
188
+ when 2; blk.call(k, file)
189
+ when 3; blk.call(k, file, File.expand_path(file, dir))
190
+ else; raise "Wrong number of arguments: #{blk.arity} for 1..3"
188
191
  end
189
192
  end
190
193
  end
191
194
  end
192
-
193
- ##
194
- # Add the MiGA::Result +result+ as part of the current result.
195
- def add_result(result)
196
- @data[:results] << result.path
197
- save
198
- end
199
-
200
195
  end