miga-base 0.3.3.1 → 0.3.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b1b32b7800278dc330c5c8e01f4b94dfd1d97750
4
- data.tar.gz: 2c3b6ef0e73568df8775fb98c65d454cdcf0f411
3
+ metadata.gz: b7140af2e2cb8525f5231ea065c79cb362348a9f
4
+ data.tar.gz: 257de78dbf14f7e01f4c239e7311883f98ccab21
5
5
  SHA512:
6
- metadata.gz: 590a41c7bc94f5d36a53e0b9eb4f096211ccdae8724e63948480e0b57c8b7fa24a5779534868c7cb13405b3360f35be00d977728886ba7b7491ef5aeebb0bc0d
7
- data.tar.gz: 64a273f14eea3aec6f9c8cfb388bdae4bdf2027d7ce95d89b1e8a27799e51420d2f58b42a58871037ee4a0e7f616f92e8ac485a12dbe226d7ab5cda99792f286
6
+ metadata.gz: 9a08c21c03a045d369078614dc41223db249b4105d20ece5198e4ef126dab468f7490278f50d7101652bba5433e4eee4653ea0fd67e85de55a78881d842d25ac
7
+ data.tar.gz: 62b07a2e62dbf801f5afbe87ac6bbb7c5a004ccc52153c640daf675e5e50e211fd568b7496d884a67759006d47e4ffd264dc280d28e299fb8351ba5ba14e5a6f
data/actions/about.rb CHANGED
@@ -27,7 +27,7 @@ raise "Impossible to load project: #{o[:project]}" if p.nil?
27
27
 
28
28
  if not o[:datum].nil?
29
29
  v = p.metadata[ o[:datum] ]
30
- puts v.nil? ? "?" : v
30
+ puts v.nil? ? '?' : v
31
31
  elsif o[:processing]
32
32
  keys = MiGA::Project.DISTANCE_TASKS + MiGA::Project.INCLADE_TASKS
33
33
  puts MiGA::MiGA.tabulate([:task, :status], keys.map do |k|
data/actions/ls.rb CHANGED
@@ -13,7 +13,7 @@ OptionParser.new do |opt|
13
13
  opt.on("-p", "--processing",
14
14
  "Print information on processing advance."){ |v| o[:processing]=v }
15
15
  opt.on("-m", "--metadata STRING",
16
- "Print name and metadata field only. If set, ignores -i."
16
+ "Print name and metadata field only. If set, ignores -i and assumes --tab."
17
17
  ){ |v| o[:datum]=v }
18
18
  opt.on("--tab",
19
19
  "Returns a tab-delimited table."){ |v| o[:tabular] = v }
@@ -43,7 +43,10 @@ ds = filter_datasets!(ds, o)
43
43
  exit(1) if o[:silent] and ds.empty?
44
44
 
45
45
  if not o[:datum].nil?
46
- ds.each{|d| puts "#{d.name}\t#{d.metadata[ o[:datum] ] || "?"}"}
46
+ ds.each do |d|
47
+ v = d.metadata[ o[:datum] ]
48
+ puts "#{d.name}\t#{v.nil? ? '?' : v}"
49
+ end
47
50
  elsif o[:info]
48
51
  puts MiGA::MiGA.tabulate(
49
52
  MiGA::Dataset.INFO_FIELDS, ds.map{ |d| d.info }, o[:tabular])
data/actions/ncbi_get.rb CHANGED
@@ -8,7 +8,8 @@ require 'miga/remote_dataset'
8
8
  o = {q:true, query:false, unlink:false,
9
9
  reference: false, ignore_plasmids: false,
10
10
  complete: false, chromosome: false,
11
- scaffold: false, contig: false, add_version: true, dry: false}
11
+ scaffold: false, contig: false, add_version: true, dry: false,
12
+ get_md: false}
12
13
  OptionParser.new do |opt|
13
14
  opt_banner(opt)
14
15
  opt_object(opt, o, [:project])
@@ -37,6 +38,9 @@ OptionParser.new do |opt|
37
38
  opt.on('--blacklist PATH',
38
39
  'A file with dataset names to blacklist.'){ |v| o[:blacklist] = v }
39
40
  opt.on('--dry', 'Do not download or save the datasets.'){ |v| o[:dry] = v }
41
+ opt.on('--get-metadata',
42
+ 'Only download and update metadata for existing datasets'
43
+ ){ |v| o[:get_md] = v }
40
44
  opt.on('-q', '--query',
41
45
  'Register the datasets as queries, not reference datasets.'
42
46
  ){ |v| o[:query]=v }
@@ -131,8 +135,7 @@ if o[:scaffold] or o[:contig]
131
135
  map{ |i| "#{i}/#{File.basename(i)}_genomic.fna.gz" }
132
136
  next if ids.empty?
133
137
  n = "#{r[0]}_#{asm}".miga_name
134
- comm = "Assembly: #{asm}"
135
- ds[n] = {ids: ids, md: {type: :genome, comments: comm},
138
+ ds[n] = {ids: ids, md: {type: :genome, ncbi_asm: asm},
136
139
  db: :assembly_gz, universe: :web}
137
140
  end
138
141
  end
@@ -144,23 +147,30 @@ unless o[:blacklist].nil?
144
147
  end
145
148
 
146
149
  # Download entries
147
- $stderr.puts "Downloading #{ds.size} #{ds.size>1 ? "entries" : "entry"}." unless o[:q]
150
+ $stderr.puts "Downloading #{ds.size} " +
151
+ (ds.size > 1 ? "entries" : "entry") unless o[:q]
148
152
  ds.each do |name,body|
149
153
  d << name
150
154
  puts name
151
- next unless p.dataset(name).nil?
155
+ next if p.dataset(name).nil? == o[:get_md]
152
156
  downloaded += 1
153
157
  next if o[:dry]
154
158
  $stderr.puts ' Locating remote dataset.' unless o[:q]
155
159
  rd = MiGA::RemoteDataset.new(body[:ids], body[:db], body[:universe])
156
- $stderr.puts ' Creating dataset.' unless o[:q]
157
- rd.save_to(p, name, !o[:query], body[:md])
158
- p.add_dataset(name)
160
+ if o[:get_md]
161
+ $stderr.puts ' Updating dataset.' unless o[:q]
162
+ rd.update_metadata(p.dataset(name), body[:md])
163
+ else
164
+ $stderr.puts ' Creating dataset.' unless o[:q]
165
+ rd.save_to(p, name, !o[:query], body[:md])
166
+ p.add_dataset(name)
167
+ end
159
168
  end
160
169
 
161
170
  # Finalize
162
171
  $stderr.puts "Datasets listed: #{d.size}" unless o[:q]
163
- $stderr.puts "Datasets #{"to be " if o[:dry]}downloaded: #{downloaded}" unless o[:q]
172
+ $stderr.puts "Datasets #{o[:dry] ? 'to download' : 'downloaded'}: " +
173
+ downloaded.to_s unless o[:q]
164
174
  unless o[:remote_list].nil?
165
175
  File.open(o[:remote_list], 'w') do |fh|
166
176
  d.each { |i| fh.puts i }
data/actions/stats.rb CHANGED
@@ -45,14 +45,16 @@ if o[:compute]
45
45
  when :raw_reads
46
46
  if r[:files][:pair1].nil?
47
47
  s = MiGA::MiGA.seqs_length(r.file_path(:single), :fastq, gc: true)
48
- stats = {reads: s[:n],
48
+ stats = {
49
+ reads: s[:n],
49
50
  length_average: [s[:avg], "bp"],
50
51
  length_standard_deviation: [s[:sd], "bp"],
51
52
  g_c_content: [s[:gc], "%"]}
52
53
  else
53
54
  s1 = MiGA::MiGA.seqs_length(r.file_path(:pair1), :fastq, gc: true)
54
55
  s2 = MiGA::MiGA.seqs_length(r.file_path(:pair2), :fastq, gc: true)
55
- stats = {read_pairs: s1[:n],
56
+ stats = {
57
+ read_pairs: s1[:n],
56
58
  forward_length_average: [s1[:avg], "bp"],
57
59
  forward_length_standard_deviation: [s1[:sd], "bp"],
58
60
  forward_g_c_content: [s1[:gc], "%"],
@@ -63,22 +65,28 @@ if o[:compute]
63
65
  when :trimmed_fasta
64
66
  f = r[:files][:coupled].nil? ? r.file_path(:single) : r.file_path(:coupled)
65
67
  s = MiGA::MiGA.seqs_length(f, :fasta, gc: true)
66
- stats = {reads: s[:n],
68
+ stats = {
69
+ reads: s[:n],
67
70
  length_average: [s[:avg], "bp"],
68
71
  length_standard_deviation: [s[:sd], "bp"],
69
72
  g_c_content: [s[:gc], "%"]}
70
73
  when :assembly
71
74
  s = MiGA::MiGA.seqs_length(r.file_path(:largecontigs), :fasta,
72
- n50:true, gc:true)
73
- stats = {contigs: s[:n], n50: [s[:n50], "bp"],
74
- total_length: [s[:tot], "bp"], g_c_content: [s[:gc], "%"]}
75
+ n50: true, gc: true)
76
+ stats = {
77
+ contigs: s[:n],
78
+ n50: [s[:n50], "bp"],
79
+ total_length: [s[:tot], "bp"],
80
+ g_c_content: [s[:gc], "%"]}
75
81
  when :cds
76
82
  s = MiGA::MiGA.seqs_length(r.file_path(:proteins), :fasta)
77
- stats = {predicted_proteins: s[:n], average_length: [s[:avg], "aa"]}
83
+ stats = {
84
+ predicted_proteins: s[:n],
85
+ average_length: [s[:avg], "aa"]}
78
86
  asm = d.add_result(:assembly, false)
79
87
  unless asm.nil? or asm[:stats][:total_length].nil?
80
88
  stats[:coding_density] =
81
- [300.0*s[:tot]/asm[:stats][:total_length][0], "%"]
89
+ [300.0 * s[:tot] / asm[:stats][:total_length][0], "%"]
82
90
  end
83
91
  when :essential_genes
84
92
  if d.is_multi?
@@ -102,7 +110,7 @@ if o[:compute]
102
110
  r.add_file(:report, "#{d.name}.ess/log.archaea")
103
111
  end
104
112
  # Extract/compute quality values
105
- stats = {completeness:[0.0,"%"], contamination:[0.0,"%"]}
113
+ stats = {completeness: [0.0,"%"], contamination: [0.0,"%"]}
106
114
  File.open(r.file_path(:report), "r") do |fh|
107
115
  fh.each_line do |ln|
108
116
  if /^! (Completeness|Contamination): (.*)%/.match(ln)
@@ -110,7 +118,7 @@ if o[:compute]
110
118
  end
111
119
  end
112
120
  end
113
- stats[:quality] = stats[:completeness][0] - stats[:contamination][0]*5
121
+ stats[:quality] = stats[:completeness][0] - stats[:contamination][0] * 5
114
122
  d.metadata[:quality] = case stats[:quality]
115
123
  when 80..100 ; :excellent
116
124
  when 50..80 ; :high
data/lib/miga.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require "json"
5
- require "fileutils"
6
- require "miga/common"
7
- require "miga/project"
8
- require "miga/taxonomy"
4
+ require 'json'
5
+ require 'fileutils'
6
+ require 'miga/common'
7
+ require 'miga/project'
8
+ require 'miga/taxonomy'
@@ -2,18 +2,20 @@
2
2
  require 'tempfile'
3
3
  require 'zlib'
4
4
 
5
+ ##
6
+ # General formatting functions shared throughout MiGA.
5
7
  module MiGA::Common::Format
6
8
  ##
7
9
  # Tabulates an +values+, and Array of Arrays, all with the same number of
8
10
  # entries as +header+. Returns an Array of String, one per line.
9
- def tabulate(header, values, tabular=false)
11
+ def tabulate(header, values, tabular = false)
10
12
  fields = [header.map(&:to_s)]
11
13
  fields << fields.first.map { |h| h.gsub(/\S/, '-') } unless tabular
12
14
  fields += values.map { |r| r.map { |cell| cell.nil? ? '?' : cell.to_s } }
13
15
  clen = tabular ? Array.new(header.size, 0) :
14
16
  fields.map { |r| r.map(&:length) }.transpose.map(&:max)
15
17
  fields.map do |r|
16
- (0 .. clen.size - 1).map do |col_n|
18
+ (0..(clen.size - 1)).map do |col_n|
17
19
  col_n == 0 ? r[col_n].rjust(clen[col_n]) : r[col_n].ljust(clen[col_n])
18
20
  end.join(tabular ? "\t" : ' ')
19
21
  end
@@ -37,7 +39,7 @@ module MiGA::Common::Format
37
39
  fh.each_line do |ln|
38
40
  ln.chomp!
39
41
  if ln =~ /^>\s*(\S+)(.*)/
40
- (id, df) = [$1, $2]
42
+ id, df = $1, $2
41
43
  tmp_fh.print buffer.wrap_width(80)
42
44
  buffer = ''
43
45
  tmp_fh.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, '_')}#{df}"
@@ -66,16 +68,17 @@ module MiGA::Common::Format
66
68
  # - +:n50+: If true, it also returns the N50 and the median (in bp).
67
69
  # - +gc+: If true, it also returns the G+C content (in %).
68
70
  def seqs_length(file, format, opts = {})
69
- fh = (file =~ /\.gz/) ? Zlib::GzipReader.open(file) : File.open(file, 'r')
71
+ fh = file =~ /\.gz/ ? Zlib::GzipReader.open(file) : File.open(file, 'r')
70
72
  l = []
71
73
  gc = 0
72
- i = 0 # <- Zlib::GzipReader doesn't set $.
74
+ i = 0 # <- Zlib::GzipReader doesn't set `$.`
73
75
  fh.each_line do |ln|
74
76
  i += 1
75
- if (format == :fasta and ln =~ /^>/) or (format == :fastq and (i % 4)==1)
77
+ if (format == :fasta and ln =~ /^>/) or
78
+ (format == :fastq and (i % 4) == 1)
76
79
  l << 0
77
80
  elsif format == :fasta or (i % 4) == 2
78
- l[l.size-1] += ln.chomp.size
81
+ l[l.size - 1] += ln.chomp.size
79
82
  gc += ln.scan(/[GCgc]/).count if opts[:gc]
80
83
  end
81
84
  end
@@ -131,4 +134,3 @@ class String
131
134
  gsub(/([^\n\r]{1,#{width}})/, "\\1\n")
132
135
  end
133
136
  end
134
-
@@ -92,7 +92,7 @@ module MiGA::Dataset::Result
92
92
 
93
93
  ##
94
94
  # Are all the dataset-specific tasks done? Passes +save+ to #add_result.
95
- def done_preprocessing?(save=false)
95
+ def done_preprocessing?(save = false)
96
96
  !first_preprocessing(save).nil? and next_preprocessing(save).nil?
97
97
  end
98
98
 
@@ -103,7 +103,7 @@ module MiGA::Dataset::Result
103
103
  # - 1 for a registered result (a completed task).
104
104
  # - 2 for a queued result (a task yet to be executed).
105
105
  # It passes +save+ to #add_result
106
- def profile_advance(save=false)
106
+ def profile_advance(save = false)
107
107
  first_task = first_preprocessing(save)
108
108
  return Array.new(@@PREPROCESSING_TASKS.size, 0) if first_task.nil?
109
109
  adv = []
@@ -141,12 +141,12 @@ module MiGA::Dataset::Result
141
141
  ##
142
142
  # Add result type +:raw_reads+ at +base+ (no +_opts+ supported).
143
143
  def add_result_raw_reads(base, _opts)
144
- return nil unless result_files_exist?(base, ".1.fastq")
144
+ return nil unless result_files_exist?(base, '.1.fastq')
145
145
  r = MiGA::Result.new("#{base}.json")
146
146
  add_files_to_ds_result(r, name,
147
- ( result_files_exist?(base, ".2.fastq") ?
148
- {pair1:".1.fastq", pair2:".2.fastq"} :
149
- {single:".1.fastq"} ))
147
+ ( result_files_exist?(base, '.2.fastq') ?
148
+ {pair1: '.1.fastq', pair2: '.2.fastq'} :
149
+ {single: '.1.fastq'} ))
150
150
  end
151
151
 
152
152
  ##
@@ -156,13 +156,12 @@ module MiGA::Dataset::Result
156
156
  r = MiGA::Result.new("#{base}.json")
157
157
  if result_files_exist?(base, ".2.clipped.fastq")
158
158
  r = add_files_to_ds_result(r, name,
159
- pair1:".1.clipped.fastq", pair2:".2.clipped.fastq",
160
- single:".1.clipped.single.fastq")
159
+ pair1: ".1.clipped.fastq", pair2: ".2.clipped.fastq",
160
+ single: ".1.clipped.single.fastq")
161
161
  else
162
- r = add_files_to_ds_result(r, name, single:".1.clipped.fastq")
162
+ r = add_files_to_ds_result(r, name, single: ".1.clipped.fastq")
163
163
  end
164
164
  r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
165
- add_result(:raw_reads) #-> Post gunzip
166
165
  r
167
166
  end
168
167
 
@@ -171,10 +170,8 @@ module MiGA::Dataset::Result
171
170
  def add_result_read_quality(base, _opts)
172
171
  return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
173
172
  r = MiGA::Result.new("#{base}.json")
174
- r = add_files_to_ds_result(r, name,
175
- solexaqa:".solexaqa", fastqc:".fastqc")
176
- add_result(:trimmed_reads) #-> Post cleaning
177
- r
173
+ add_files_to_ds_result(r, name,
174
+ solexaqa: ".solexaqa", fastqc: ".fastqc")
178
175
  end
179
176
 
180
177
  ##
@@ -185,10 +182,8 @@ module MiGA::Dataset::Result
185
182
  result_files_exist?(base, ".SingleReads.fa") or
186
183
  result_files_exist?(base, %w[.1.fasta .2.fasta])
187
184
  r = MiGA::Result.new("#{base}.json")
188
- r = add_files_to_ds_result(r, name, coupled:".CoupledReads.fa",
189
- single:".SingleReads.fa", pair1:".1.fasta", pair2:".2.fasta")
190
- add_result(:raw_reads) #-> Post gzip
191
- r
185
+ add_files_to_ds_result(r, name, coupled: ".CoupledReads.fa",
186
+ single: ".SingleReads.fa", pair1: ".1.fasta", pair2: ".2.fasta")
192
187
  end
193
188
 
194
189
  ##
@@ -197,15 +192,14 @@ module MiGA::Dataset::Result
197
192
  def add_result_assembly(base, opts)
198
193
  return nil unless result_files_exist?(base, ".LargeContigs.fna")
199
194
  r = MiGA::Result.new("#{base}.json")
200
- r = add_files_to_ds_result(r, name, largecontigs:".LargeContigs.fna",
201
- allcontigs:".AllContigs.fna", assembly_data:"")
195
+ r = add_files_to_ds_result(r, name, largecontigs: ".LargeContigs.fna",
196
+ allcontigs: ".AllContigs.fna", assembly_data: '')
202
197
  opts[:is_clean] ||= false
203
198
  r.clean! if opts[:is_clean]
204
199
  unless r.clean?
205
200
  MiGA::MiGA.clean_fasta_file(r.file_path :largecontigs)
206
201
  r.clean!
207
202
  end
208
- add_result(:trimmed_fasta) #-> Post interposing
209
203
  r
210
204
  end
211
205
 
@@ -214,8 +208,8 @@ module MiGA::Dataset::Result
214
208
  def add_result_cds(base, opts)
215
209
  return nil unless result_files_exist?(base, %w[.faa])
216
210
  r = MiGA::Result.new("#{base}.json")
217
- r = add_files_to_ds_result(r, name, proteins:".faa", genes:".fna",
218
- gff2:".gff2", gff3:".gff3", tab:".tab")
211
+ r = add_files_to_ds_result(r, name, proteins: ".faa", genes: ".fna",
212
+ gff2: ".gff2", gff3: ".gff3", tab: ".tab")
219
213
  opts[:is_clean] ||= false
220
214
  r.clean! if opts[:is_clean]
221
215
  unless r.clean?
@@ -231,8 +225,8 @@ module MiGA::Dataset::Result
231
225
  def add_result_essential_genes(base, _opts)
232
226
  return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
233
227
  r = MiGA::Result.new("#{base}.json")
234
- add_files_to_ds_result(r, name, ess_genes:".ess.faa",
235
- collection:".ess", report:".ess/log")
228
+ add_files_to_ds_result(r, name, ess_genes: ".ess.faa",
229
+ collection: ".ess", report: ".ess/log")
236
230
  end
237
231
 
238
232
  ##
@@ -241,8 +235,8 @@ module MiGA::Dataset::Result
241
235
  return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
242
236
  return nil unless result_files_exist?(base, ".ssu.fa")
243
237
  r = MiGA::Result.new("#{base}.json")
244
- r = add_files_to_ds_result(r, name, longest_ssu_gene:".ssu.fa",
245
- gff:".ssu.gff", all_ssu_genes:".ssu.all.fa")
238
+ r = add_files_to_ds_result(r, name, longest_ssu_gene: ".ssu.fa",
239
+ gff: ".ssu.gff", all_ssu_genes: ".ssu.all.fa")
246
240
  opts[:is_clean] ||= false
247
241
  r.clean! if opts[:is_clean]
248
242
  unless r.clean?
@@ -259,11 +253,11 @@ module MiGA::Dataset::Result
259
253
  return nil unless result_files_exist?(base, ".mytaxa") or
260
254
  result_files_exist?(base, ".nomytaxa.txt")
261
255
  r = MiGA::Result.new("#{base}.json")
262
- add_files_to_ds_result(r, name, mytaxa:".mytaxa", blast:".blast",
263
- mytaxain:".mytaxain", nomytaxa:".nomytaxa.txt",
264
- species:".mytaxa.Species.txt", genus:".mytaxa.Genus.txt",
265
- phylum:".mytaxa.Phylum.txt", innominate:".mytaxa.innominate",
266
- kronain:".mytaxa.krona", krona:".html")
256
+ add_files_to_ds_result(r, name, mytaxa: ".mytaxa", blast: ".blast",
257
+ mytaxain: ".mytaxain", nomytaxa: ".nomytaxa.txt",
258
+ species: ".mytaxa.Species.txt", genus: ".mytaxa.Genus.txt",
259
+ phylum: ".mytaxa.Phylum.txt", innominate: ".mytaxa.innominate",
260
+ kronain: ".mytaxa.krona", krona: ".html")
267
261
  else
268
262
  MiGA::Result.new("#{base}.json")
269
263
  end
@@ -327,8 +321,8 @@ module MiGA::Dataset::Result
327
321
  return nil unless
328
322
  File.exist?("#{pref}/01.haai/#{name}.db")
329
323
  r = MiGA::Result.new("#{base}.json")
330
- r.add_files(haai_db:"01.haai/#{name}.db", aai_db:"02.aai/#{name}.db",
331
- ani_db:"03.ani/#{name}.db")
324
+ r.add_files(haai_db: "01.haai/#{name}.db", aai_db: "02.aai/#{name}.db",
325
+ ani_db: "03.ani/#{name}.db")
332
326
  r
333
327
  end
334
328
 
@@ -339,10 +333,10 @@ module MiGA::Dataset::Result
339
333
  result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
340
334
  result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
341
335
  r = MiGA::Result.new("#{base}.json")
342
- add_files_to_ds_result(r, name, aai_medoids:".aai-medoids.tsv",
343
- haai_db:".haai.db", aai_db:".aai.db", ani_medoids:".ani-medoids.tsv",
344
- ani_db:".ani.db", ref_tree:".nwk", ref_tree_pdf:".nwk.pdf",
345
- intax_test:".intax.txt")
336
+ add_files_to_ds_result(r, name, aai_medoids: ".aai-medoids.tsv",
337
+ haai_db: ".haai.db", aai_db: ".aai.db", ani_medoids: ".ani-medoids.tsv",
338
+ ani_db: ".ani.db", ref_tree: ".nwk", ref_tree_pdf: ".nwk.pdf",
339
+ intax_test: ".intax.txt")
346
340
  end
347
341
 
348
342
  ##
@@ -1,68 +1,12 @@
1
1
  # @package MiGA
2
2
  # @license Artistic-2.0
3
3
 
4
- require 'miga/remote_dataset/base'
4
+ require 'miga/remote_dataset/download'
5
5
 
6
6
  ##
7
7
  # MiGA representation of datasets with data in remote locations.
8
8
  class MiGA::RemoteDataset < MiGA::MiGA
9
-
10
- include MiGA::RemoteDataset::Base
11
-
12
- # Class-level
13
-
14
- ##
15
- # Download data from the +universe+ in the database +db+ with IDs +ids+ and
16
- # in +format+. If passed, it saves the result in +file+. Returns String.
17
- def self.download(universe, db, ids, format, file=nil)
18
- ids = [ids] unless ids.is_a? Array
19
- case @@UNIVERSE[universe][:method]
20
- when :rest
21
- doc = download_rest(universe, db, ids, format)
22
- when :net
23
- doc = download_net(universe, db, ids, format)
24
- end
25
- unless file.nil?
26
- ofh = File.open(file, "w")
27
- ofh.print doc
28
- ofh.close
29
- end
30
- doc
31
- end
32
-
33
- ##
34
- # Download data using a REST method from the +universe+ in the database +db+
35
- # with IDs +ids+ and in +format+. Returns the doc as String.
36
- def self.download_rest(universe, db, ids, format)
37
- u = @@UNIVERSE[universe]
38
- map_to = u[:dbs][db].nil? ? nil : u[:dbs][db][:map_to]
39
- url = sprintf(u[:url], db, ids.join(","), format, map_to)
40
- response = RestClient::Request.execute(method: :get, url:url, timeout:600)
41
- unless response.code == 200
42
- raise "Unable to reach #{universe} client, error code #{response.code}."
43
- end
44
- response.to_s
45
- end
46
-
47
- ##
48
- # Download data using a GET request from the +universe+ in the database +db+
49
- # with IDs +ids+ and in +format+. Returns the doc as String.
50
- def self.download_net(universe, db, ids, format)
51
- u = @@UNIVERSE[universe]
52
- map_to = u[:dbs][db].nil? ? nil : u[:dbs][db][:map_to]
53
- url = sprintf(u[:url], db, ids.join(","), format, map_to)
54
- doc = ""
55
- @timeout_try = 0
56
- begin
57
- open(url) { |f| doc = f.read }
58
- rescue Net::ReadTimeout
59
- @timeout_try += 1
60
- if @timeout_try > 3 ; raise Net::ReadTimeout
61
- else ; retry
62
- end
63
- end
64
- doc
65
- end
9
+ include MiGA::RemoteDataset::Download
66
10
 
67
11
  # Instance-level
68
12
 
@@ -71,7 +15,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
71
15
  attr_reader :universe
72
16
  # Database storing the dataset.
73
17
  attr_reader :db
74
- # IDs of the entries composing the dataset.
18
+ # Array of IDs of the entries composing the dataset.
75
19
  attr_reader :ids
76
20
 
77
21
  ##
@@ -81,92 +25,91 @@ class MiGA::RemoteDataset < MiGA::MiGA
81
25
  @ids = (ids.is_a?(Array) ? ids : [ids])
82
26
  @db = db.to_sym
83
27
  @universe = universe.to_sym
84
- raise "Unknown Universe: #{@universe}. Try one of: "+
85
- "#{@@UNIVERSE.keys}" unless @@UNIVERSE.keys.include? @universe
86
- raise "Unknown Database: #{@db}. Try one of: "+
87
- "#{@@UNIVERSE[@universe][:dbs]}" unless
88
- @@UNIVERSE[@universe][:dbs].include? @db
89
- # FIXME Part of the +map_to+ support:
90
- #unless @@UNIVERSE[@universe][:dbs][@db][:map_to].nil?
91
- # MiGA::RemoteDataset.download
92
- #end
28
+ @@UNIVERSE.keys.include?(@universe) or
29
+ raise "Unknown Universe: #{@universe}. Try: #{@@UNIVERSE.keys}"
30
+ @@UNIVERSE[@universe][:dbs].include?(@db) or
31
+ raise "Unknown Database: #{@db}. Try: #{@@UNIVERSE[@universe][:dbs]}"
32
+ # FIXME: Part of the +map_to+ support:
33
+ # unless @@UNIVERSE[@universe][:dbs][@db][:map_to].nil?
34
+ # MiGA::RemoteDataset.download
35
+ # end
93
36
  end
94
37
 
95
38
  ##
96
39
  # Save dataset to the MiGA::Project +project+ identified with +name+. +is_ref+
97
40
  # indicates if it should be a reference dataset, and contains +metadata+.
98
- def save_to(project, name=nil, is_ref=true, metadata={})
99
- name ||= ids.join("_").miga_name
41
+ def save_to(project, name = nil, is_ref = true, metadata = {})
42
+ name ||= ids.join('_').miga_name
100
43
  project = MiGA::Project.new(project) if project.is_a? String
101
- if MiGA::Dataset.exist?(project, name)
44
+ MiGA::Dataset.exist?(project, name) and
102
45
  raise "Dataset #{name} exists in the project, aborting..."
103
- end
104
46
  metadata = get_metadata(metadata)
105
47
  udb = @@UNIVERSE[universe][:dbs][db]
106
- metadata["#{universe}_#{db}"] = ids.join(",")
107
- case udb[:stage]
108
- when :assembly
109
- dir = MiGA::Dataset.RESULT_DIRS[:assembly]
110
- base = "#{project.path}/data/#{dir}/#{name}"
111
- l_ctg = "#{base}.LargeContigs.fna"
112
- a_ctg = "#{base}.AllContigs.fna"
113
- File.open("#{base}.start", "w") { |ofh| ofh.puts Time.now.to_s }
114
- if udb[:format] == :fasta_gz
115
- download "#{l_ctg}.gz"
116
- system "gzip -d '#{l_ctg}.gz'"
117
- else
118
- download l_ctg
119
- end
120
- File.unlink(a_ctg) if File.exist? a_ctg
121
- File.symlink(File.basename(l_ctg), a_ctg)
122
- File.open("#{base}.done", "w") { |ofh| ofh.puts Time.now.to_s }
123
- else
124
- raise "Unexpected error: Unsupported result for database #{db}."
125
- end
48
+ metadata["#{universe}_#{db}"] = ids.join(',')
49
+ respond_to?("save_#{udb[:stage]}_to", true) or
50
+ raise "Unexpected error: Unsupported stage #{udb[:stage]} for #{db}."
51
+ send "save_#{udb[:stage]}_to", project, name, udb
126
52
  dataset = MiGA::Dataset.new(project, name, is_ref, metadata)
127
53
  project.add_dataset(dataset.name)
128
- result = dataset.add_result(udb[:stage], true, is_clean:true)
129
- raise "Empty dataset created: seed result was not added due to " +
130
- "incomplete files." if result.nil?
54
+ result = dataset.add_result(udb[:stage], true, is_clean: true)
55
+ result.nil? and
56
+ raise 'Empty dataset: seed result not added due to incomplete files.'
131
57
  result.clean!
132
58
  result.save
133
59
  dataset
134
60
  end
135
61
 
62
+ ##
63
+ # Updates the MiGA::Dataset +dataset+ with the remotely available metadata,
64
+ # and optionally the Hash +metadata+.
65
+ def update_metadata(dataset, metadata = {})
66
+ metadata = get_metadata(metadata)
67
+ metadata.each { |k,v| dataset.metadata[k] = v }
68
+ dataset.save
69
+ end
70
+
136
71
  ##
137
72
  # Get metadata from the remote location.
138
- def get_metadata(metadata={})
73
+ def get_metadata(metadata = {})
139
74
  case universe
140
75
  when :ebi, :ncbi
141
76
  # Get taxonomy
142
77
  metadata[:tax] = get_ncbi_taxonomy
143
78
  end
79
+ metadata[:"#{universe}_#{db}"] = ids.join(",")
80
+ metadata = get_type_status(metadata)
144
81
  metadata
145
82
  end
146
83
 
147
- ##
148
- # Download data into +file+.
149
- def download(file)
150
- MiGA::RemoteDataset.download(universe, db, ids,
151
- @@UNIVERSE[universe][:dbs][db][:format], file)
152
- end
153
-
154
84
  ##
155
85
  # Get NCBI Taxonomy ID.
156
86
  def get_ncbi_taxid
157
87
  send("get_ncbi_taxid_from_#{universe}")
158
88
  end
159
89
 
90
+ ##
91
+ # Get the type material status and return an (updated)
92
+ # +metadata+ hash.
93
+ def get_type_status(metadata)
94
+ if metadata[:ncbi_asm]
95
+ get_type_status_ncbi_asm metadata
96
+ elsif metadata[:ncbi_nuccore]
97
+ get_type_status_ncbi_nuccore metadata
98
+ else
99
+ metadata
100
+ end
101
+ end
102
+
160
103
  ##
161
104
  # Get NCBI taxonomy as MiGA::Taxonomy.
162
105
  def get_ncbi_taxonomy
163
106
  lineage = {}
164
107
  tax_id = get_ncbi_taxid
165
- while !(tax_id.nil? or %w{0 1}.include? tax_id)
166
- doc = MiGA::RemoteDataset.download(:ebi, :taxonomy, tax_id, "")
108
+ until [nil, '0', '1'].include? tax_id
109
+ doc = MiGA::RemoteDataset.download(:ebi, :taxonomy, tax_id, '')
167
110
  name = doc.scan(/SCIENTIFIC NAME\s+:\s+(.+)/).first.to_a.first
168
111
  rank = doc.scan(/RANK\s+:\s+(.+)/).first.to_a.first
169
- rank = "dataset" if lineage.empty? and rank=="no rank"
112
+ rank = 'dataset' if lineage.empty? and rank == 'no rank'
170
113
  lineage[rank] = name unless rank.nil?
171
114
  tax_id = doc.scan(/PARENT ID\s+:\s+(.+)/).first.to_a.first
172
115
  end
@@ -174,24 +117,72 @@ class MiGA::RemoteDataset < MiGA::MiGA
174
117
  end
175
118
 
176
119
  private
177
-
120
+
178
121
  def get_ncbi_taxid_from_ncbi
179
- doc = MiGA::RemoteDataset.download(universe, db, ids, :gb).split(/\n/)
180
- ln = doc.grep(/^\s+\/db_xref="taxon:/).first
122
+ doc = self.class.download(universe, db, ids, :gb).split(/\n/)
123
+ ln = doc.grep(%r{^\s+/db_xref="taxon:}).first
181
124
  return nil if ln.nil?
182
- ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, "\\1")
125
+ ln.sub!(/.*(?:"taxon:)(\d+)["; ].*/, '\\1')
183
126
  return nil unless ln =~ /^\d+$/
184
127
  ln
185
128
  end
186
129
 
187
130
  def get_ncbi_taxid_from_ebi
188
- doc = MiGA::RemoteDataset.download(universe, db, ids, :annot).split(/\n/)
189
- ln = doc.grep(/^FT\s+\/db_xref="taxon:/).first
131
+ doc = self.class.download(universe, db, ids, :annot).split(/\n/)
132
+ ln = doc.grep(%r{^FT\s+/db_xref="taxon:}).first
190
133
  ln = doc.grep(/^OX\s+NCBI_TaxID=/).first if ln.nil?
191
134
  return nil if ln.nil?
192
- ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, "\\1")
135
+ ln.sub!(/.*(?:"taxon:|NCBI_TaxID=)(\d+)["; ].*/, '\\1')
193
136
  return nil unless ln =~ /^\d+$/
194
137
  ln
195
138
  end
196
139
 
140
+ def get_type_status_ncbi_nuccore(metadata)
141
+ return metadata if metadata[:ncbi_nuccore].nil?
142
+ biosample = self.class.ncbi_map(metadata[:ncbi_nuccore],
143
+ :nuccore, :biosample)
144
+ return metadata if biosample.nil?
145
+ asm = self.class.ncbi_map(biosample,
146
+ :biosample, :assembly)
147
+ metadata[:ncbi_asm] = asm.to_s unless asm.nil?
148
+ get_type_status_ncbi_asm metadata
149
+ end
150
+
151
+ def get_type_status_ncbi_asm(metadata)
152
+ return metadata if metadata[:ncbi_asm].nil?
153
+ doc = CGI.unescapeHTML(self.class.download(:web, :text,
154
+ "https://www.ncbi.nlm.nih.gov/assembly/" \
155
+ "#{metadata[:ncbi_asm]}?report=xml", :xml)).each_line
156
+ from_type = doc.grep(%r{<FromType/?>}).first or return metadata
157
+ if from_type =~ %r{<FromType/>}
158
+ metadata[:is_type] = false
159
+ metadata[:is_ref_type] = false
160
+ elsif from_type =~ %r{<FromType>(.*)</FromType>}
161
+ if $1 == 'assembly from reference material'
162
+ metadata[:is_type] = false
163
+ metadata[:is_ref_type] = true
164
+ else
165
+ metadata[:is_type] = true
166
+ end
167
+ metadata[:type_rel] = $1
168
+ end
169
+ metadata
170
+ end
171
+
172
+ def save_assembly_to(project, name, udb)
173
+ dir = MiGA::Dataset.RESULT_DIRS[:assembly]
174
+ base = "#{project.path}/data/#{dir}/#{name}"
175
+ l_ctg = "#{base}.LargeContigs.fna"
176
+ a_ctg = "#{base}.AllContigs.fna"
177
+ File.open("#{base}.start", 'w') { |ofh| ofh.puts Time.now.to_s }
178
+ if udb[:format] == :fasta_gz
179
+ download "#{l_ctg}.gz"
180
+ system "gzip -d '#{l_ctg}.gz'"
181
+ else
182
+ download l_ctg
183
+ end
184
+ File.unlink(a_ctg) if File.exist? a_ctg
185
+ File.symlink(File.basename(l_ctg), a_ctg)
186
+ File.open("#{base}.done", 'w') { |ofh| ofh.puts Time.now.to_s }
187
+ end
197
188
  end
@@ -1,6 +1,7 @@
1
1
 
2
- require 'restclient'
2
+ require 'rest-client'
3
3
  require 'open-uri'
4
+ require 'cgi'
4
5
 
5
6
  class MiGA::RemoteDataset < MiGA::MiGA
6
7
 
@@ -13,7 +14,7 @@ end
13
14
 
14
15
  module MiGA::RemoteDataset::Base
15
16
 
16
- @@_EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
17
+ @@_EUTILS = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/'
17
18
 
18
19
  ##
19
20
  # Structure of the different database Universes or containers. The structure
@@ -23,33 +24,38 @@ module MiGA::RemoteDataset::Base
23
24
  # properties such as +stage+, +format+, and +map_to+.
24
25
  # - +url+ => Pattern of the URL where the data can be obtained, where +%1$s+
25
26
  # is the name of the database, +%2$s+ is the IDs, and +%3$s+ is format.
26
- # - +method+ => Method used to query the URL. Only +:rest+ is currently
27
- # supported.
27
+ # Additional parameters can be passed to certain functions using the +extra+
28
+ # option.
29
+ # - +method+ => Method used to query the URL. Only +:rest+ and +:net+ are
30
+ # currently supported.
28
31
  # - +map_to_universe+ => Universe where results map to. Currently unsupported.
29
32
  @@UNIVERSE = {
30
- web:{
33
+ web: {
31
34
  dbs: {
32
- assembly:{stage: :assembly, format: :fasta},
33
- assembly_gz:{stage: :assembly, format: :fasta_gz}
35
+ assembly: {stage: :assembly, format: :fasta},
36
+ assembly_gz: {stage: :assembly, format: :fasta_gz},
37
+ text: {stage: :metadata, format: :text}
34
38
  },
35
39
  url: "%2$s",
36
40
  method: :net
37
41
  },
38
- ebi:{
39
- dbs: { embl:{stage: :assembly, format: :fasta} },
40
- url: "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s",
42
+ ebi: {
43
+ dbs: { embl: {stage: :assembly, format: :fasta} },
44
+ url: "https://www.ebi.ac.uk/Tools/dbfetch/dbfetch/%1$s/%2$s/%3$s",
41
45
  method: :rest
42
46
  },
43
- ncbi:{
44
- dbs: { nuccore:{stage: :assembly, format: :fasta} },
47
+ ncbi: {
48
+ dbs: { nuccore: {stage: :assembly, format: :fasta} },
45
49
  url: "#{@@_EUTILS}efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
46
50
  method: :rest
47
51
  },
48
- ncbi_map:{
49
- dbs: { assembly:{map_to: :nuccore, format: :text} },
50
- # FIXME ncbi_map is intended to do internal NCBI mapping between
51
- # databases.
52
- url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -",
52
+ ncbi_map: {
53
+ dbs: {
54
+ nuccore: {stage: :metadata, map_to: [:biosample, :assembly],
55
+ format: :json},
56
+ biosample: {stage: :metadata, map_to: [:assembly], format: :json}
57
+ },
58
+ url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%4$s&retmode=%3$s",
53
59
  method: :rest,
54
60
  map_to_universe: :ncbi
55
61
  }
@@ -0,0 +1,84 @@
1
+
2
+ require 'miga/remote_dataset/base'
3
+
4
+ class MiGA::RemoteDataset
5
+ include MiGA::RemoteDataset::Base
6
+
7
+ # Class-level
8
+ class << self
9
+ ##
10
+ # Download data from the +universe+ in the database +db+ with IDs +ids+ and
11
+ # in +format+. If passed, it saves the result in +file+. Additional
12
+ # parameters specific to the download method can be passed using +extra+.
13
+ # Returns String.
14
+ def download(universe, db, ids, format, file = nil, extra = [])
15
+ ids = [ids] unless ids.is_a? Array
16
+ case @@UNIVERSE[universe][:method]
17
+ when :rest
18
+ doc = download_rest(universe, db, ids, format, extra)
19
+ when :net
20
+ doc = download_net(universe, db, ids, format, extra)
21
+ end
22
+ unless file.nil?
23
+ ofh = File.open(file, 'w')
24
+ ofh.print doc
25
+ ofh.close
26
+ end
27
+ doc
28
+ end
29
+
30
+ ##
31
+ # Download data using a REST method from the +universe+ in the database +db+
32
+ # with IDs +ids+ and in +format+. Additional URL parameters can be passed
33
+ # using +extra+. Returns the doc as String.
34
+ def download_rest(universe, db, ids, format, extra = [])
35
+ u = @@UNIVERSE[universe]
36
+ url ||= sprintf(u[:url], db, ids.join(","), format, *extra)
37
+ response = RestClient::Request.execute(method: :get, url:url, timeout:600)
38
+ unless response.code == 200
39
+ raise "Unable to reach #{universe} client, error code #{response.code}."
40
+ end
41
+ response.to_s
42
+ end
43
+
44
+ ##
45
+ # Download data using a GET request from the +universe+ in the database +db+
46
+ # with IDs +ids+ and in +format+. Additional URL parameters can be passed
47
+ # using +extra+. Returns the doc as String.
48
+ def download_net(universe, db, ids, format, extra = [])
49
+ u = @@UNIVERSE[universe]
50
+ url = sprintf(u[:url], db, ids.join(","), format, *extra)
51
+ doc = ""
52
+ @timeout_try = 0
53
+ begin
54
+ open(url) { |f| doc = f.read }
55
+ rescue Net::ReadTimeout
56
+ @timeout_try += 1
57
+ if @timeout_try > 3 ; raise Net::ReadTimeout
58
+ else ; retry
59
+ end
60
+ end
61
+ doc
62
+ end
63
+
64
+ ##
65
+ # Looks for the entry +id+ in +dbfrom+, and returns the linked
66
+ # identifier in +db+ (or nil).
67
+ def ncbi_map(id, dbfrom, db)
68
+ doc = download(:ncbi_map, dbfrom, id, :json, nil, [db])
69
+ return if doc.empty?
70
+ tree = JSON.parse(doc, symbolize_names: true)
71
+ tree.dig(:linksets, 0, :linksetdbs, 0, :links, 0)
72
+ end
73
+ end
74
+ end
75
+
76
+ module MiGA::RemoteDataset::Download
77
+
78
+ ##
79
+ # Download data into +file+.
80
+ def download(file)
81
+ self.class.download(universe, db, ids,
82
+ self.class.UNIVERSE[universe][:dbs][db][:format], file)
83
+ end
84
+ end
@@ -1,24 +1,25 @@
1
1
 
2
- require "miga/result/base"
2
+ require 'miga/result/base'
3
3
 
4
4
  ##
5
5
  # Helper module including date-specific functions for results.
6
6
  module MiGA::Result::Dates
7
-
8
7
  include MiGA::Result::Base
9
-
8
+
10
9
  ##
11
- # Returns the start date of processing as DateTime or +nil+ if it doesn't exist.
10
+ # Returns the start date of processing as DateTime or +nil+ if it doesn't
11
+ # exist.
12
12
  def started_at
13
13
  date_at :start
14
14
  end
15
15
 
16
16
  ##
17
- # Returns the end (done) date of processing as DateTime or +nil+ if it doesn't exist.
17
+ # Returns the end (done) date of processing as DateTime or +nil+ if it doesn't
18
+ # exist.
18
19
  def done_at
19
20
  date_at :done
20
21
  end
21
-
22
+
22
23
  ##
23
24
  # Time it took for the result to complete as Float in minutes.
24
25
  def running_time
@@ -27,16 +28,17 @@ module MiGA::Result::Dates
27
28
  (b - a).to_f * 24 * 60
28
29
  end
29
30
 
30
-
31
31
  private
32
32
 
33
33
  ##
34
34
  # Internal function to detect start and end dates
35
35
  def date_at(event)
36
- f = path event
37
- return nil unless File.size? f
38
- DateTime.parse File.read(f)
36
+ date = self[event]
37
+ if date.nil?
38
+ f = path event
39
+ date = File.read(f) if File.size? f
40
+ end
41
+ date.nil? ? nil : DateTime.parse(date)
39
42
  end
40
-
41
43
  end
42
44
 
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.3, 3, 1]
13
+ VERSION = [0.3, 4, 1]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -19,7 +19,7 @@ if [[ -s "$TF/$DATASET.1.fasta" \
19
19
  FastA.interpose.pl "$TF/$DATASET.CoupledReads.fa" "$TF/$DATASET".[12].fasta
20
20
  gzip -9 -f "$TF/$DATASET.1.fasta"
21
21
  gzip -9 -f "$TF/$DATASET.2.fasta"
22
- miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta
22
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_fasta -f
23
23
  fi
24
24
 
25
25
  # Assemble
@@ -28,6 +28,7 @@ rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.paired
28
28
  rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed.single
29
29
  rm -f "../02.trimmed_reads/$b".[12].fastq.trimmed
30
30
  rm -f "../02.trimmed_reads/$b".[12].fastq
31
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads -f
31
32
 
32
33
  # Finalize
33
34
  miga date > "$DATASET.done"
@@ -19,6 +19,7 @@ for sis in 1 2 ; do
19
19
  && ! -e "../02.trimmed_reads/$b.$sis.clipped.fastq" ]] \
20
20
  && gunzip "../02.trimmed_reads/$b.$sis.clipped.fastq.gz"
21
21
  done
22
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads -f
22
23
 
23
24
  # FastQ -> FastA
24
25
  FQ2A="$MIGA/utils/enveomics/Scripts/FastQ.toFastA.awk"
@@ -44,6 +45,8 @@ for sis in 1 2 ; do
44
45
  [[ -e "../02.trimmed_reads/$b.$sis.clipped.single.fastq" ]] \
45
46
  && gzip -9 -f "../02.trimmed_reads/$b.$sis.clipped.single.fastq"
46
47
  done
48
+ miga add_result -P "$PROJECT" -D "$DATASET" -r raw_reads -f
49
+ miga add_result -P "$PROJECT" -D "$DATASET" -r trimmed_reads -f
47
50
 
48
51
  # Finalize
49
52
  miga date > "$DATASET.done"
@@ -18,6 +18,7 @@ miga date > "$DATASET.start"
18
18
  && gunzip "../01.raw_reads/$b.1.fastq.gz"
19
19
  [[ -e "../01.raw_reads/$b.2.fastq.gz" && ! -e "../01.raw_reads/$b.2.fastq" ]] \
20
20
  && gunzip "../01.raw_reads/$b.2.fastq.gz"
21
+ miga add_result -P "$PROJECT" -D "$DATASET" -r raw_reads -f
21
22
 
22
23
  # Clean existing files
23
24
  exists "$b".[12].* && rm "$b".[12].*
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3.1
4
+ version: 0.3.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-29 00:00:00.000000000 Z
11
+ date: 2018-09-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rest-client
@@ -162,6 +162,7 @@ files:
162
162
  - lib/miga/project/result.rb
163
163
  - lib/miga/remote_dataset.rb
164
164
  - lib/miga/remote_dataset/base.rb
165
+ - lib/miga/remote_dataset/download.rb
165
166
  - lib/miga/result.rb
166
167
  - lib/miga/result/base.rb
167
168
  - lib/miga/result/dates.rb
@@ -499,7 +500,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
499
500
  requirements:
500
501
  - - ">="
501
502
  - !ruby/object:Gem::Version
502
- version: '1.9'
503
+ version: '2.3'
503
504
  required_rubygems_version: !ruby/object:Gem::Requirement
504
505
  requirements:
505
506
  - - ">="