miga-base 0.2.5.2 → 0.2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/actions/create_dataset.rb +2 -0
- data/actions/run_local.rb +43 -0
- data/bin/miga +1 -0
- data/lib/miga/common.rb +3 -3
- data/lib/miga/daemon.rb +29 -22
- data/lib/miga/dataset.rb +21 -20
- data/lib/miga/dataset_result.rb +50 -43
- data/lib/miga/metadata.rb +24 -18
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project_result.rb +12 -3
- data/lib/miga/remote_dataset.rb +9 -7
- data/lib/miga/result.rb +3 -5
- data/lib/miga/tax_index.rb +5 -5
- data/lib/miga/taxonomy.rb +9 -7
- data/lib/miga/version.rb +3 -4
- data/scripts/essential_genes.bash +1 -1
- data/scripts/project_stats.bash +24 -0
- data/test/dataset_test.rb +11 -0
- data/utils/index_metadata.rb +28 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 86777e753b2985fdceae0ee27c2d6270baf23e0e
|
4
|
+
data.tar.gz: bc5353979542701b28f35ad0fdc6bc68ee4d0c08
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9493d3aec9de2f8ba8131ca475bc9d982686168cf64c8c565c64215ecdb34978235419e5134674c9e4d625edf000868f9bd140b7df00cd29aa16f8120d2e27b3
|
7
|
+
data.tar.gz: 9f9125548016a2b29f840c8b8e120527eb3e9dcbbf7cc5a9c464e1bfcbc4e223bca4a3dce3c1e820afe0e7b45cca3307ec53241f5fd3cdb390112714c4353f87
|
data/actions/create_dataset.rb
CHANGED
@@ -44,6 +44,8 @@ end.parse!
|
|
44
44
|
##=> Main <=
|
45
45
|
opt_require(o)
|
46
46
|
opt_require(o, type:"-t") unless o[:update]
|
47
|
+
raise "Unrecognized dataset type: #{o[:type]}." if
|
48
|
+
(not o[:update]) and MiGA::Dataset.KNOWN_TYPES[o[:type]].nil?
|
47
49
|
|
48
50
|
$stderr.puts "Loading project." unless o[:q]
|
49
51
|
p = MiGA::Project.load(o[:project])
|
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# @package MiGA
|
4
|
+
# @license Artistic-2.0
|
5
|
+
|
6
|
+
require "shellwords"
|
7
|
+
|
8
|
+
o = {q:true, try_load:false, thr:1}
|
9
|
+
opts = OptionParser.new do |opt|
|
10
|
+
opt_banner(opt)
|
11
|
+
opt_object(opt, o, [:project, :dataset_opt, :result])
|
12
|
+
opt.on("-t", "--threads INT",
|
13
|
+
"Threads to use in the local run (by default: #{o[:thr]})."
|
14
|
+
){ |v| o[:thr] = v.to_i }
|
15
|
+
opt_common(opt, o)
|
16
|
+
end.parse!
|
17
|
+
|
18
|
+
##=> Main <=
|
19
|
+
opts.parse!
|
20
|
+
opt_require(o, project:"-P", name:"-r")
|
21
|
+
|
22
|
+
$stderr.puts "Loading project." unless o[:q]
|
23
|
+
p = MiGA::Project.load(o[:project])
|
24
|
+
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
25
|
+
|
26
|
+
miga = File.expand_path("../..", __FILE__)
|
27
|
+
cmd = ["PROJECT=#{p.path.shellescape}", "RUNTYPE=bash",
|
28
|
+
"MIGA=#{miga.shellescape}", "CORES=#{o[:thr]}"]
|
29
|
+
if o[:dataset].nil?
|
30
|
+
type = MiGA::Project
|
31
|
+
else
|
32
|
+
d = p.dataset(o[:dataset])
|
33
|
+
raise "Cannot load dataset." if d.nil?
|
34
|
+
cmd << "DATASET=#{d.name.shellescape}"
|
35
|
+
type = MiGA::Dataset
|
36
|
+
end
|
37
|
+
raise "Unsupported #{type.to_s.gsub(/.*::/,"")} result: #{o[:name]}." if
|
38
|
+
type.RESULT_DIRS[o[:name].to_sym].nil?
|
39
|
+
cmd << "#{miga}/scripts/#{o[:name]}.bash".shellescape
|
40
|
+
pid = spawn cmd.join(" ")
|
41
|
+
Process.wait pid
|
42
|
+
|
43
|
+
$stderr.puts "Done." unless o[:q]
|
data/bin/miga
CHANGED
@@ -28,6 +28,7 @@ $task_desc = {
|
|
28
28
|
result_stats: "Extracts statistics for the given result.",
|
29
29
|
list_files: "Lists all registered files from the results of a dataset or a "+
|
30
30
|
"project.",
|
31
|
+
run_local: "Executes locally one step analysis producing the given result.",
|
31
32
|
# System
|
32
33
|
daemon: "Controls the daemon of a MiGA project.",
|
33
34
|
date: "Returns the current date in standard MiGA format.",
|
data/lib/miga/common.rb
CHANGED
@@ -91,7 +91,7 @@ class MiGA::MiGA
|
|
91
91
|
buffer = ""
|
92
92
|
tmp.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, "_")}#{df}"
|
93
93
|
else
|
94
|
-
buffer
|
94
|
+
buffer << ln.gsub(/[^A-Za-z\.\-]/, "")
|
95
95
|
end
|
96
96
|
end
|
97
97
|
tmp.print buffer.wrap_width(80)
|
@@ -111,7 +111,7 @@ class MiGA::MiGA
|
|
111
111
|
def result_files_exist?(base, ext)
|
112
112
|
ext = [ext] unless ext.kind_of? Array
|
113
113
|
ext.all? do |f|
|
114
|
-
File.exist?(base + f) or File.exist?(base
|
114
|
+
File.exist?(base + f) or File.exist?("#{base}#{f}.gz")
|
115
115
|
end
|
116
116
|
end
|
117
117
|
|
@@ -156,7 +156,7 @@ class String
|
|
156
156
|
def miga_name? ; not(self !~ /^[A-Za-z0-9_]+$/) ; end
|
157
157
|
|
158
158
|
##
|
159
|
-
# Replace underscores by spaces.
|
159
|
+
# Replace underscores by spaces or dots (depending on context).
|
160
160
|
def unmiga_name ; gsub(/_(str|sp|subsp|pv)__/,"_\\1._").tr("_", " ") ; end
|
161
161
|
|
162
162
|
##
|
data/lib/miga/daemon.rb
CHANGED
@@ -14,7 +14,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
14
14
|
# active? Returns DateTime.
|
15
15
|
def self.last_alive(project)
|
16
16
|
f = File.expand_path("daemon/alive", project.path)
|
17
|
-
return nil unless File.
|
17
|
+
return nil unless File.exist? f
|
18
18
|
DateTime.parse(File.read(f))
|
19
19
|
end
|
20
20
|
|
@@ -63,10 +63,9 @@ class MiGA::Daemon < MiGA::MiGA
|
|
63
63
|
def runopts(k, v=nil, force=false)
|
64
64
|
k = k.to_sym
|
65
65
|
unless v.nil?
|
66
|
-
v =
|
67
|
-
|
68
|
-
|
69
|
-
v = !!v if [:shutdown_when_done].include? k
|
66
|
+
v = [:latency, :maxjobs, :ppn].include?(k) ? v.to_i :
|
67
|
+
[:shutdown_when_done].include?(k) ? !!v : v
|
68
|
+
raise "Daemon's #{k} cannot be set to zero." if !force and v==0
|
70
69
|
@runopts[k] = v
|
71
70
|
end
|
72
71
|
@runopts[k]
|
@@ -209,23 +208,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
209
208
|
# Launch as many +jobs_to_run+ as possible
|
210
209
|
while jobs_running.size < maxjobs
|
211
210
|
break if jobs_to_run.empty?
|
212
|
-
|
213
|
-
# Launch job
|
214
|
-
if runopts(:type) == "bash"
|
215
|
-
job[:pid] = spawn job[:cmd]
|
216
|
-
Process.detach job[:pid] unless [nil, "", 0].include? job[:pid]
|
217
|
-
else
|
218
|
-
job[:pid] = `#{job[:cmd]}`.chomp
|
219
|
-
end
|
220
|
-
# Check if registered
|
221
|
-
if [nil, "", 0].include? job[:pid].nil?
|
222
|
-
job[:pid] = nil
|
223
|
-
@jobs_to_run << job
|
224
|
-
say "Unsuccessful #{job[:task_name]}, rescheduling."
|
225
|
-
else
|
226
|
-
@jobs_running << job
|
227
|
-
say "Spawned pid:#{job[:pid]} for #{job[:task_name]}."
|
228
|
-
end
|
211
|
+
launch_job @jobs_to_run.shift
|
229
212
|
end
|
230
213
|
end
|
231
214
|
|
@@ -270,4 +253,28 @@ class MiGA::Daemon < MiGA::MiGA
|
|
270
253
|
print "[#{Time.new.inspect}] ", *opts, "\n"
|
271
254
|
end
|
272
255
|
|
256
|
+
private
|
257
|
+
|
258
|
+
def launch_job(job)
|
259
|
+
# Execute job
|
260
|
+
if runopts(:type) == "bash"
|
261
|
+
# Local job
|
262
|
+
job[:pid] = spawn job[:cmd]
|
263
|
+
Process.detach job[:pid] unless [nil, "", 0].include?(job[:pid])
|
264
|
+
else
|
265
|
+
# Schedule cluster job
|
266
|
+
job[:pid] = `#{job[:cmd]}`.chomp
|
267
|
+
end
|
268
|
+
|
269
|
+
# Check if registered
|
270
|
+
if [nil, "", 0].include?(job[:pid])
|
271
|
+
job[:pid] = nil
|
272
|
+
@jobs_to_run << job
|
273
|
+
say "Unsuccessful #{job[:task_name]}, rescheduling."
|
274
|
+
else
|
275
|
+
@jobs_running << job
|
276
|
+
say "Spawned pid:#{job[:pid]} for #{job[:task_name]}."
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
273
280
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -26,9 +26,6 @@ class MiGA::Dataset < MiGA::MiGA
|
|
26
26
|
ssu: "07.annotation/01.function/02.ssu",
|
27
27
|
mytaxa: "07.annotation/02.taxonomy/01.mytaxa",
|
28
28
|
mytaxa_scan: "07.annotation/03.qa/02.mytaxa_scan",
|
29
|
-
# Mapping
|
30
|
-
mapping_on_contigs: "08.mapping/01.read-ctg",
|
31
|
-
mapping_on_genes: "08.mapping/02.read-gene",
|
32
29
|
# Distances (for single-species datasets)
|
33
30
|
distances: "09.distances",
|
34
31
|
# General statistics
|
@@ -59,21 +56,24 @@ class MiGA::Dataset < MiGA::MiGA
|
|
59
56
|
##
|
60
57
|
# Tasks to be excluded from query datasets.
|
61
58
|
@@EXCLUDE_NOREF_TASKS = [:mytaxa_scan]
|
59
|
+
@@_EXCLUDE_NOREF_TASKS_H = Hash[@@EXCLUDE_NOREF_TASKS.map{ |i| [i,true] }]
|
62
60
|
|
63
61
|
##
|
64
62
|
# Tasks to be executed only in datasets that are not multi-organism. These
|
65
63
|
# tasks are ignored for multi-organism datasets or for unknown types.
|
66
64
|
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances]
|
65
|
+
@@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map{ |i| [i,true] }]
|
67
66
|
|
68
67
|
##
|
69
68
|
# Tasks to be executed only in datasets that are multi-organism. These
|
70
69
|
# tasks are ignored for single-organism datasets or for unknwon types.
|
71
70
|
@@ONLY_MULTI_TASKS = [:mytaxa]
|
71
|
+
@@_ONLY_MULTI_TASKS_H = Hash[@@ONLY_MULTI_TASKS.map{ |i| [i,true] }]
|
72
72
|
|
73
73
|
##
|
74
74
|
# Does the +project+ already have a dataset with that +name+?
|
75
75
|
def self.exist?(project, name)
|
76
|
-
File.exist? project.path
|
76
|
+
File.exist? "#{project.path}/metadata/#{name}.json"
|
77
77
|
end
|
78
78
|
|
79
79
|
##
|
@@ -109,8 +109,6 @@ class MiGA::Dataset < MiGA::MiGA
|
|
109
109
|
metadata[:ref] = is_ref
|
110
110
|
@metadata = MiGA::Metadata.new(
|
111
111
|
File.expand_path("metadata/#{name}.json", project.path), metadata )
|
112
|
-
warn "Warning: Unrecognized dataset type: #{type}." if
|
113
|
-
!type.nil? and @@KNOWN_TYPES[type].nil?
|
114
112
|
end
|
115
113
|
|
116
114
|
##
|
@@ -165,8 +163,8 @@ class MiGA::Dataset < MiGA::MiGA
|
|
165
163
|
# Get the result MiGA::Result in this dataset identified by the symbol +k+.
|
166
164
|
def result(k)
|
167
165
|
return nil if @@RESULT_DIRS[k.to_sym].nil?
|
168
|
-
MiGA::Result.load(
|
169
|
-
"/
|
166
|
+
MiGA::Result.load(
|
167
|
+
"#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json" )
|
170
168
|
end
|
171
169
|
|
172
170
|
##
|
@@ -184,16 +182,18 @@ class MiGA::Dataset < MiGA::MiGA
|
|
184
182
|
##
|
185
183
|
# Look for the result with symbol key +result_type+ and register it in the
|
186
184
|
# dataset. If +save+ is false, it doesn't register the result, but it still
|
187
|
-
# returns a result if the expected files are complete.
|
188
|
-
#
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
185
|
+
# returns a result if the expected files are complete. The +opts+ array
|
186
|
+
# controls result creation (if necessary). Supported values include:
|
187
|
+
# - +is_clean+: A Boolean indicating if the input files are clean.
|
188
|
+
# Returns MiGA::Result or nil.
|
189
|
+
def add_result(result_type, save=true, opts={})
|
190
|
+
dir = @@RESULT_DIRS[result_type]
|
191
|
+
return nil if dir.nil?
|
192
|
+
base = File.expand_path("data/#{dir}/#{name}", project.path)
|
193
193
|
r_pre = MiGA::Result.load("#{base}.json")
|
194
194
|
return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
|
195
|
-
|
196
|
-
|
195
|
+
r = File.exist?("#{base}.done") ?
|
196
|
+
self.send("add_result_#{result_type}", base, opts) : nil
|
197
197
|
r.save unless r.nil?
|
198
198
|
r
|
199
199
|
end
|
@@ -232,11 +232,12 @@ class MiGA::Dataset < MiGA::MiGA
|
|
232
232
|
# Should I ignore +task+ for this dataset?
|
233
233
|
def ignore_task?(task)
|
234
234
|
return !metadata["run_#{task}"] unless metadata["run_#{task}"].nil?
|
235
|
-
|
236
|
-
|
237
|
-
|
235
|
+
pattern = [true, false]
|
236
|
+
( [@@_EXCLUDE_NOREF_TASKS_H[task], is_ref? ]==pattern or
|
237
|
+
[@@_ONLY_MULTI_TASKS_H[task], is_multi? ]==pattern or
|
238
|
+
[@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?]==pattern )
|
238
239
|
end
|
239
|
-
|
240
|
+
|
240
241
|
##
|
241
242
|
# Are all the dataset-specific tasks done? Passes +save+ to #add_result.
|
242
243
|
def done_preprocessing?(save=false)
|
data/lib/miga/dataset_result.rb
CHANGED
@@ -27,10 +27,10 @@ module MiGA::DatasetResult
|
|
27
27
|
private
|
28
28
|
|
29
29
|
##
|
30
|
-
# Add result type +:raw_reads+ at +base
|
31
|
-
def add_result_raw_reads(base)
|
30
|
+
# Add result type +:raw_reads+ at +base+ (no +opts+ supported).
|
31
|
+
def add_result_raw_reads(base, opts)
|
32
32
|
return nil unless result_files_exist?(base, ".1.fastq")
|
33
|
-
r = MiGA::Result.new(base
|
33
|
+
r = MiGA::Result.new("#{base}.json")
|
34
34
|
r = add_files_to_ds_result(r, name,
|
35
35
|
( result_files_exist?(base, ".2.fastq") ?
|
36
36
|
{:pair1=>".1.fastq", :pair2=>".2.fastq"} :
|
@@ -38,24 +38,24 @@ module MiGA::DatasetResult
|
|
38
38
|
end
|
39
39
|
|
40
40
|
##
|
41
|
-
# Add result type +:trimmed_reads+ at +base
|
42
|
-
def add_result_trimmed_reads(base)
|
41
|
+
# Add result type +:trimmed_reads+ at +base+ (no +opts+ supported).
|
42
|
+
def add_result_trimmed_reads(base, opts)
|
43
43
|
return nil unless result_files_exist?(base, ".1.clipped.fastq")
|
44
|
-
r = MiGA::Result.new
|
44
|
+
r = MiGA::Result.new("#{base}.json")
|
45
45
|
r = add_files_to_ds_result(r, name,
|
46
46
|
{:pair1=>".1.clipped.fastq", :pair2=>".2.clipped.fastq"}) if
|
47
47
|
result_files_exist?(base, ".2.clipped.fastq")
|
48
|
-
r.add_file(:single, name
|
49
|
-
r.add_file(:trimming_sumary, name
|
48
|
+
r.add_file(:single, "#{name}.1.clipped.single.fastq")
|
49
|
+
r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
|
50
50
|
add_result(:raw_reads) #-> Post gunzip
|
51
51
|
r
|
52
52
|
end
|
53
53
|
|
54
54
|
##
|
55
|
-
# Add result type +:read_quality+ at +base
|
56
|
-
def add_result_read_quality(base)
|
55
|
+
# Add result type +:read_quality+ at +base+ (no +opts+ supported).
|
56
|
+
def add_result_read_quality(base, opts)
|
57
57
|
return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
|
58
|
-
r = MiGA::Result.new(base
|
58
|
+
r = MiGA::Result.new("#{base}.json")
|
59
59
|
r = add_files_to_ds_result(r, name,
|
60
60
|
{:solexaqa=>".solexaqa", :fastqc=>".fastqc"})
|
61
61
|
add_result(:trimmed_reads) #-> Post cleaning
|
@@ -63,13 +63,13 @@ module MiGA::DatasetResult
|
|
63
63
|
end
|
64
64
|
|
65
65
|
##
|
66
|
-
# Add result type +:trimmed_fasta+ at +base
|
67
|
-
def add_result_trimmed_fasta(base)
|
66
|
+
# Add result type +:trimmed_fasta+ at +base+ (no +opts+ supported).
|
67
|
+
def add_result_trimmed_fasta(base, opts)
|
68
68
|
return nil unless
|
69
69
|
result_files_exist?(base, ".CoupledReads.fa") or
|
70
70
|
result_files_exist?(base, ".SingleReads.fa") or
|
71
71
|
result_files_exist?(base, %w[.1.fasta .2.fasta])
|
72
|
-
r = MiGA::Result.new
|
72
|
+
r = MiGA::Result.new("#{base}.json")
|
73
73
|
r = add_files_to_ds_result(r, name, {:coupled=>".CoupledReads.fa",
|
74
74
|
:single=>".SingleReads.fa", :pair1=>".1.fasta", :pair2=>".2.fasta"})
|
75
75
|
add_result(:raw_reads) #-> Post gzip
|
@@ -77,12 +77,15 @@ module MiGA::DatasetResult
|
|
77
77
|
end
|
78
78
|
|
79
79
|
##
|
80
|
-
# Add result type +:assembly+ at +base+.
|
81
|
-
|
80
|
+
# Add result type +:assembly+ at +base+. Hash +opts+ supports
|
81
|
+
# +is_clean: Boolean+.
|
82
|
+
def add_result_assembly(base, opts)
|
82
83
|
return nil unless result_files_exist?(base, ".LargeContigs.fna")
|
83
|
-
r = MiGA::Result.new(base
|
84
|
+
r = MiGA::Result.new("#{base}.json")
|
84
85
|
r = add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
|
85
86
|
:allcontigs=>".AllContigs.fna", :assembly_data=>""})
|
87
|
+
opts[:is_clean] ||= false
|
88
|
+
r.clean! if opts[:is_clean]
|
86
89
|
unless r.clean?
|
87
90
|
MiGA::MiGA.clean_fasta_file(r.file_path :largecontigs)
|
88
91
|
r.clean!
|
@@ -92,12 +95,14 @@ module MiGA::DatasetResult
|
|
92
95
|
end
|
93
96
|
|
94
97
|
##
|
95
|
-
# Add result type +:cds+ at +base+.
|
96
|
-
def add_result_cds(base)
|
98
|
+
# Add result type +:cds+ at +base+. Hash +opts+ supports +is_clean: Boolean+
|
99
|
+
def add_result_cds(base, opts)
|
97
100
|
return nil unless result_files_exist?(base, %w[.faa .fna])
|
98
|
-
r = MiGA::Result.new(base
|
101
|
+
r = MiGA::Result.new("#{base}.json")
|
99
102
|
r = add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
|
100
103
|
:gff2=>".gff2", :gff3=>".gff3", :tab=>".tab"})
|
104
|
+
opts[:is_clean] ||= false
|
105
|
+
r.clean! if opts[:is_clean]
|
101
106
|
unless r.clean?
|
102
107
|
MiGA::MiGA.clean_fasta_file(r.file_path :proteins)
|
103
108
|
MiGA::MiGA.clean_fasta_file(r.file_path :genes)
|
@@ -107,22 +112,24 @@ module MiGA::DatasetResult
|
|
107
112
|
end
|
108
113
|
|
109
114
|
##
|
110
|
-
# Add result type +:essential_genes+ at +base
|
111
|
-
def add_result_essential_genes(base)
|
115
|
+
# Add result type +:essential_genes+ at +base+ (no +opts+ supported).
|
116
|
+
def add_result_essential_genes(base, opts)
|
112
117
|
return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
|
113
|
-
r = MiGA::Result.new(base
|
118
|
+
r = MiGA::Result.new("#{base}.json")
|
114
119
|
r = add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
|
115
120
|
:collection=>".ess", :report=>".ess/log"})
|
116
121
|
end
|
117
122
|
|
118
123
|
##
|
119
|
-
# Add result type +:ssu+ at +base+.
|
120
|
-
def add_result_ssu(base)
|
121
|
-
return MiGA::Result.new(base
|
124
|
+
# Add result type +:ssu+ at +base+. Hash +opts+ supports +is_clean: Boolean+
|
125
|
+
def add_result_ssu(base, opts)
|
126
|
+
return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
|
122
127
|
return nil unless result_files_exist?(base, ".ssu.fa")
|
123
|
-
r = MiGA::Result.new(base
|
128
|
+
r = MiGA::Result.new("#{base}.json")
|
124
129
|
r = add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
|
125
130
|
:gff=>".ssu.gff", :all_ssu_genes=>".ssu.all.fa"})
|
131
|
+
opts[:is_clean] ||= false
|
132
|
+
r.clean! if opts[:is_clean]
|
126
133
|
unless r.clean?
|
127
134
|
MiGA::MiGA.clean_fasta_file(r.file_path :longest_ssu_gene)
|
128
135
|
r.clean!
|
@@ -131,37 +138,37 @@ module MiGA::DatasetResult
|
|
131
138
|
end
|
132
139
|
|
133
140
|
##
|
134
|
-
# Add result type +:mytaxa+ at +base
|
135
|
-
def add_result_mytaxa(base)
|
141
|
+
# Add result type +:mytaxa+ at +base+ (no +opts+ supported).
|
142
|
+
def add_result_mytaxa(base, opts)
|
136
143
|
if is_multi?
|
137
144
|
return nil unless result_files_exist?(base, ".mytaxa")
|
138
|
-
r = MiGA::Result.new(base
|
145
|
+
r = MiGA::Result.new("#{base}.json")
|
139
146
|
add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :blast=>".blast",
|
140
147
|
:mytaxain=>".mytaxain"})
|
141
148
|
else
|
142
|
-
MiGA::Result.new(base
|
149
|
+
MiGA::Result.new("#{base}.json")
|
143
150
|
end
|
144
151
|
end
|
145
152
|
|
146
153
|
##
|
147
|
-
# Add result type +:mytaxa_scan+ at +base
|
148
|
-
def add_result_mytaxa_scan(base)
|
154
|
+
# Add result type +:mytaxa_scan+ at +base+ (no +opts+ supported).
|
155
|
+
def add_result_mytaxa_scan(base, opts)
|
149
156
|
if is_nonmulti?
|
150
157
|
return nil unless
|
151
158
|
result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg])
|
152
|
-
r = MiGA::Result.new(base
|
159
|
+
r = MiGA::Result.new("#{base}.json")
|
153
160
|
add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :wintax=>".wintax",
|
154
161
|
:blast=>".blast", :mytaxain=>".mytaxain", :report=>".pdf",
|
155
162
|
:regions=>".reg", :gene_ids=>".wintax.genes",
|
156
163
|
:region_ids=>".wintax.regions"})
|
157
164
|
else
|
158
|
-
MiGA::Result.new
|
165
|
+
MiGA::Result.new("#{base}.json")
|
159
166
|
end
|
160
167
|
end
|
161
168
|
|
162
169
|
##
|
163
|
-
# Add result type +:distances+ at +base
|
164
|
-
def add_result_distances(base)
|
170
|
+
# Add result type +:distances+ at +base+ (no +opts+ supported).
|
171
|
+
def add_result_distances(base, opts)
|
165
172
|
if is_nonmulti?
|
166
173
|
if is_ref?
|
167
174
|
add_result_distances_ref(base)
|
@@ -174,15 +181,15 @@ module MiGA::DatasetResult
|
|
174
181
|
end
|
175
182
|
|
176
183
|
##
|
177
|
-
# Add result type +:stats+ at +base
|
178
|
-
def add_result_stats(base)
|
179
|
-
MiGA::Result.new
|
184
|
+
# Add result type +:stats+ at +base+ (no +opts+ supported).
|
185
|
+
def add_result_stats(base, opts)
|
186
|
+
MiGA::Result.new("#{base}.json")
|
180
187
|
end
|
181
188
|
|
182
189
|
##
|
183
190
|
# Add result type +:distances+ for _multi_ datasets at +base+.
|
184
191
|
def add_result_distances_multi(base)
|
185
|
-
MiGA::Result.new
|
192
|
+
MiGA::Result.new("#{base}.json")
|
186
193
|
end
|
187
194
|
|
188
195
|
##
|
@@ -191,7 +198,7 @@ module MiGA::DatasetResult
|
|
191
198
|
pref = File.dirname(base)
|
192
199
|
return nil unless
|
193
200
|
File.exist?("#{pref}/01.haai/#{name}.db")
|
194
|
-
r = MiGA::Result.new(base
|
201
|
+
r = MiGA::Result.new("#{base}.json")
|
195
202
|
r.add_files({:haai_db=>"01.haai/#{name}.db",
|
196
203
|
:aai_db=>"02.aai/#{name}.db", :ani_db=>"03.ani/#{name}.db"})
|
197
204
|
r
|
@@ -203,7 +210,7 @@ module MiGA::DatasetResult
|
|
203
210
|
return nil unless
|
204
211
|
result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
|
205
212
|
result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
|
206
|
-
r = MiGA::Result.new(base
|
213
|
+
r = MiGA::Result.new("#{base}.json")
|
207
214
|
r = add_files_to_ds_result(r, name, {
|
208
215
|
:aai_medoids=>".aai-medoids.tsv",
|
209
216
|
:haai_db=>".haai.db", :aai_db=>".aai.db",
|
data/lib/miga/metadata.rb
CHANGED
@@ -9,7 +9,7 @@ class MiGA::Metadata < MiGA::MiGA
|
|
9
9
|
|
10
10
|
##
|
11
11
|
# Does the metadata described in +path+ already exist?
|
12
|
-
def self.exist?(path) File.
|
12
|
+
def self.exist?(path) File.exist? path end
|
13
13
|
|
14
14
|
##
|
15
15
|
# Load the metadata described in +path+ and return MiGA::Metadata if it
|
@@ -24,34 +24,39 @@ class MiGA::Metadata < MiGA::MiGA
|
|
24
24
|
##
|
25
25
|
# Path to the JSON file describing the metadata.
|
26
26
|
attr_reader :path
|
27
|
-
|
28
|
-
##
|
29
|
-
# Parsed data as a Hash.
|
30
|
-
attr_reader :data
|
31
27
|
|
32
28
|
##
|
33
29
|
# Initiate a MiGA::Metadata object with description in +path+. It will create
|
34
30
|
# it if it doesn't exist.
|
35
31
|
def initialize(path, defaults={})
|
32
|
+
@data = nil
|
36
33
|
@path = File.absolute_path(path)
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
34
|
+
unless File.exist? path
|
35
|
+
@data = {}
|
36
|
+
defaults.each_pair{ |k,v| self[k]=v }
|
37
|
+
create
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
# Parsed data as a Hash.
|
43
|
+
def data
|
44
|
+
self.load if @data.nil?
|
45
|
+
@data
|
41
46
|
end
|
42
47
|
|
43
48
|
##
|
44
49
|
# Reset :created field and save the current data.
|
45
50
|
def create
|
46
|
-
|
47
|
-
|
51
|
+
self[:created] = Time.now.to_s
|
52
|
+
save
|
48
53
|
end
|
49
54
|
|
50
55
|
##
|
51
56
|
# Save the metadata into #path.
|
52
57
|
def save
|
53
58
|
MiGA.DEBUG "Metadata.save #{path}"
|
54
|
-
|
59
|
+
self[:updated] = Time.now.to_s
|
55
60
|
json = JSON.pretty_generate(data)
|
56
61
|
sleeper = 0.0
|
57
62
|
while File.exist?(lock_file)
|
@@ -59,12 +64,12 @@ class MiGA::Metadata < MiGA::MiGA
|
|
59
64
|
sleep(sleeper.to_i)
|
60
65
|
end
|
61
66
|
FileUtils.touch lock_file
|
62
|
-
ofh = File.open(path
|
67
|
+
ofh = File.open("#{path}.tmp", "w")
|
63
68
|
ofh.puts json
|
64
69
|
ofh.close
|
65
70
|
raise "Lock-racing detected for #{path}." unless
|
66
|
-
File.exist?(path
|
67
|
-
File.rename(path
|
71
|
+
File.exist?("#{path}.tmp") and File.exist?(lock_file)
|
72
|
+
File.rename("#{path}.tmp", path)
|
68
73
|
File.unlink(lock_file)
|
69
74
|
end
|
70
75
|
|
@@ -86,14 +91,14 @@ class MiGA::Metadata < MiGA::MiGA
|
|
86
91
|
##
|
87
92
|
# Delete file at #path.
|
88
93
|
def remove!
|
89
|
-
MiGA.DEBUG "Metadata.remove! #{
|
90
|
-
File.unlink(
|
94
|
+
MiGA.DEBUG "Metadata.remove! #{path}"
|
95
|
+
File.unlink(path)
|
91
96
|
nil
|
92
97
|
end
|
93
98
|
|
94
99
|
##
|
95
100
|
# Lock file for the metadata.
|
96
|
-
def lock_file ; path
|
101
|
+
def lock_file ; "#{path}.lock" ; end
|
97
102
|
|
98
103
|
##
|
99
104
|
# Return the value of +k+ in #data.
|
@@ -102,6 +107,7 @@ class MiGA::Metadata < MiGA::MiGA
|
|
102
107
|
##
|
103
108
|
# Set the value of +k+ to +v+.
|
104
109
|
def []=(k,v)
|
110
|
+
self.load if @data.nil?
|
105
111
|
k = k.to_sym
|
106
112
|
# Protect the special field :name
|
107
113
|
v=v.miga_name if k==:name
|
data/lib/miga/project.rb
CHANGED
@@ -49,11 +49,11 @@ class MiGA::Project < MiGA::MiGA
|
|
49
49
|
clade_finding: "10.clades/01.find",
|
50
50
|
# Clade analysis
|
51
51
|
subclades: "10.clades/02.ani",
|
52
|
-
ogs: "10.clades/03.ogs"
|
52
|
+
ogs: "10.clades/03.ogs",
|
53
53
|
#ess_phylogeny: "10.clades/04.phylogeny/01.essential",
|
54
54
|
#core_phylogeny: "10.clades/04.phylogeny/02.core",
|
55
55
|
#clade_metadata: "10.clades/05.metadata"
|
56
|
-
|
56
|
+
project_stats: "90.stats"
|
57
57
|
}
|
58
58
|
|
59
59
|
##
|
@@ -85,7 +85,7 @@ class MiGA::Project < MiGA::MiGA
|
|
85
85
|
##
|
86
86
|
# Does the project at +path+ exist?
|
87
87
|
def self.exist?(path)
|
88
|
-
Dir.exist?(path) and File.exist?(path
|
88
|
+
Dir.exist?(path) and File.exist?("#{path}/miga.project.json")
|
89
89
|
end
|
90
90
|
|
91
91
|
##
|
@@ -130,7 +130,7 @@ class MiGA::Project < MiGA::MiGA
|
|
130
130
|
dirs.each{ |d| Dir.mkdir(d) unless Dir.exist? d }
|
131
131
|
@metadata = MiGA::Metadata.new(self.path + "/miga.project.json",
|
132
132
|
{datasets: [], name: File.basename(path)})
|
133
|
-
FileUtils.cp(ENV["MIGA_HOME"]
|
133
|
+
FileUtils.cp("#{ENV["MIGA_HOME"]}/.miga_daemon.json",
|
134
134
|
"#{path}/daemon/daemon.json") unless
|
135
135
|
File.exist? "#{path}/daemon/daemon.json"
|
136
136
|
self.load
|
@@ -252,9 +252,9 @@ class MiGA::Project < MiGA::MiGA
|
|
252
252
|
##
|
253
253
|
# Get result identified by Symbol +name+, returns MiGA::Result.
|
254
254
|
def result(name)
|
255
|
-
|
256
|
-
|
257
|
-
|
255
|
+
dir = @@RESULT_DIRS[name.to_sym]
|
256
|
+
return nil if dir.nil?
|
257
|
+
MiGA::Result.load("#{path}/data/#{dir}/miga-project.json")
|
258
258
|
end
|
259
259
|
|
260
260
|
##
|
@@ -269,7 +269,7 @@ class MiGA::Project < MiGA::MiGA
|
|
269
269
|
def add_result(name, save=true)
|
270
270
|
return nil if @@RESULT_DIRS[name].nil?
|
271
271
|
base = "#{path}/data/#{@@RESULT_DIRS[name]}/miga-project"
|
272
|
-
return MiGA::Result.load(base
|
272
|
+
return MiGA::Result.load("#{base}.json") unless save
|
273
273
|
return nil unless result_files_exist?(base, ".done")
|
274
274
|
r = send("add_result_#{name}", base)
|
275
275
|
r.save unless r.nil?
|
data/lib/miga/project_result.rb
CHANGED
@@ -11,7 +11,7 @@ module MiGA::ProjectResult
|
|
11
11
|
# Internal alias for all add_result_*_distances.
|
12
12
|
def add_result_distances(base)
|
13
13
|
return nil unless result_files_exist?(base, %w[.Rdata .log .txt])
|
14
|
-
r = MiGA::Result.new(base
|
14
|
+
r = MiGA::Result.new("#{base}.json")
|
15
15
|
r.add_file(:rdata, "miga-project.Rdata")
|
16
16
|
r.add_file(:matrix, "miga-project.txt")
|
17
17
|
r.add_file(:log, "miga-project.log")
|
@@ -40,7 +40,7 @@ module MiGA::ProjectResult
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def add_result_iter_clades(base)
|
43
|
-
r = MiGA::Result.new(base
|
43
|
+
r = MiGA::Result.new("#{base}.json")
|
44
44
|
r.add_file(:report, "miga-project.pdf")
|
45
45
|
r.add_file(:class_table, "miga-project.class.tsv")
|
46
46
|
r.add_file(:class_tree, "miga-project.class.nwk")
|
@@ -51,13 +51,22 @@ module MiGA::ProjectResult
|
|
51
51
|
|
52
52
|
def add_result_ogs(base)
|
53
53
|
return nil unless result_files_exist?(base, %w[.ogs .stats])
|
54
|
-
r = MiGA::Result.new(base
|
54
|
+
r = MiGA::Result.new("#{base}.json")
|
55
55
|
r.add_file(:ogs, "miga-project.ogs")
|
56
56
|
r.add_file(:stats, "miga-project.stats")
|
57
57
|
r.add_file(:rbm, "miga-project.rbm")
|
58
58
|
r
|
59
59
|
end
|
60
60
|
|
61
|
+
def add_result_project_stats(base)
|
62
|
+
return nil unless
|
63
|
+
result_files_exist?(base, %w[.taxonomy.json .metadata.db])
|
64
|
+
r = MiGA::Result.new("#{base}.json")
|
65
|
+
r.add_file(:taxonomy_index, "miga-project.taxonomy.json")
|
66
|
+
r.add_file(:metadata_index, "miga-project.metadata.db")
|
67
|
+
r
|
68
|
+
end
|
69
|
+
|
61
70
|
alias add_result_haai_distances add_result_distances
|
62
71
|
alias add_result_aai_distances add_result_distances
|
63
72
|
alias add_result_ani_distances add_result_distances
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -9,6 +9,7 @@ require "open-uri"
|
|
9
9
|
class MiGA::RemoteDataset < MiGA::MiGA
|
10
10
|
# Class-level
|
11
11
|
|
12
|
+
@@_EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
12
13
|
##
|
13
14
|
# Structure of the different database Universes or containers. The structure
|
14
15
|
# is a Hash with universe names as keys as Symbol and values being a Hash with
|
@@ -37,16 +38,14 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
37
38
|
},
|
38
39
|
ncbi:{
|
39
40
|
dbs: { nuccore:{stage: :assembly, format: :fasta} },
|
40
|
-
url: "
|
41
|
-
"efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
|
41
|
+
url: "#{@@_EUTILS}efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
|
42
42
|
method: :rest
|
43
43
|
},
|
44
44
|
ncbi_map:{
|
45
45
|
dbs: { assembly:{map_to: :nuccore, format: :text} },
|
46
|
-
url: "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" +
|
47
46
|
# FIXME ncbi_map is intended to do internal NCBI mapping between
|
48
47
|
# databases.
|
49
|
-
|
48
|
+
url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -",
|
50
49
|
method: :rest,
|
51
50
|
map_to_universe: :ncbi
|
52
51
|
}
|
@@ -127,8 +126,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
127
126
|
metadata = get_metadata(metadata)
|
128
127
|
case @@UNIVERSE[universe][:dbs][db][:stage]
|
129
128
|
when :assembly
|
130
|
-
|
131
|
-
|
129
|
+
dir = MiGA::Dataset.RESULT_DIRS[:assembly]
|
130
|
+
base = "#{project.path}/data/#{dir}/#{name}"
|
132
131
|
File.open("#{base}.start", "w") { |ofh| ofh.puts Time.now.to_s }
|
133
132
|
if @@UNIVERSE[universe][:dbs][db][:format] == :fasta_gz
|
134
133
|
download("#{base}.LargeContigs.fna.gz")
|
@@ -144,9 +143,12 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
144
143
|
end
|
145
144
|
dataset = MiGA::Dataset.new(project, name, is_ref, metadata)
|
146
145
|
project.add_dataset(dataset.name)
|
147
|
-
result = dataset.add_result
|
146
|
+
result = dataset.add_result(@@UNIVERSE[universe][:dbs][db][:stage],
|
147
|
+
true, is_clean:true)
|
148
148
|
raise "Empty dataset created: seed result was not added due to "+
|
149
149
|
"incomplete files." if result.nil?
|
150
|
+
result.clean!
|
151
|
+
result.save
|
150
152
|
dataset
|
151
153
|
end
|
152
154
|
|
data/lib/miga/result.rb
CHANGED
@@ -9,9 +9,7 @@ class MiGA::Result < MiGA::MiGA
|
|
9
9
|
|
10
10
|
##
|
11
11
|
# Check if the result described by the JSON in +path+ already exists.
|
12
|
-
def self.exist?(path)
|
13
|
-
!!(File.size? path)
|
14
|
-
end
|
12
|
+
def self.exist?(path) File.exist? path end
|
15
13
|
|
16
14
|
##
|
17
15
|
# Load the result described by the JSON in +path+. Returns MiGA::Result if it
|
@@ -82,8 +80,8 @@ class MiGA::Result < MiGA::MiGA
|
|
82
80
|
k = k.to_sym
|
83
81
|
@data[:files] ||= {}
|
84
82
|
@data[:files][k] = file if File.exist? File.expand_path(file, dir)
|
85
|
-
@data[:files][k] = file
|
86
|
-
File.exist? File.expand_path(file
|
83
|
+
@data[:files][k] = "#{file}.gz" if
|
84
|
+
File.exist? File.expand_path("#{file}.gz", dir)
|
87
85
|
end
|
88
86
|
|
89
87
|
##
|
data/lib/miga/tax_index.rb
CHANGED
@@ -115,13 +115,13 @@ class MiGA::TaxIndexTaxon < MiGA::MiGA
|
|
115
115
|
##
|
116
116
|
# Get the number of datasets in the taxon (including children).
|
117
117
|
def datasets_count
|
118
|
-
|
118
|
+
children.map{ |it| it.datasets_count }.reduce(datasets.size, :+)
|
119
119
|
end
|
120
120
|
|
121
121
|
##
|
122
122
|
# Get all the datasets in the taxon (including children).
|
123
123
|
def all_datasets
|
124
|
-
|
124
|
+
children.map{ |it| it.datasets }.reduce(datasets, :+)
|
125
125
|
end
|
126
126
|
|
127
127
|
##
|
@@ -142,11 +142,11 @@ class MiGA::TaxIndexTaxon < MiGA::MiGA
|
|
142
142
|
# Tabular String of the taxon.
|
143
143
|
def to_tab(unknown, indent=0)
|
144
144
|
o = ""
|
145
|
-
o =
|
145
|
+
o = "#{" " * indent}#{tax_str}: #{datasets_count}\n" if
|
146
146
|
unknown or not datasets.empty? or not name.nil?
|
147
147
|
indent += 2
|
148
|
-
datasets.each{ |ds| o
|
149
|
-
children.each{ |it| o
|
148
|
+
datasets.each{ |ds| o << "#{" " * indent}# #{ds.name}\n" }
|
149
|
+
children.each{ |it| o << it.to_tab(unknown, indent) }
|
150
150
|
o
|
151
151
|
end
|
152
152
|
|
data/lib/miga/taxonomy.rb
CHANGED
@@ -10,6 +10,7 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
10
10
|
# Cannonical ranks.
|
11
11
|
def self.KNOWN_RANKS() @@KNOWN_RANKS ; end
|
12
12
|
@@KNOWN_RANKS = %w{ns d k p c o f g s ssp str ds}.map{|r| r.to_sym}
|
13
|
+
@@_KNOWN_RANKS_H = Hash[ @@KNOWN_RANKS.map{ |i| [i,true] } ]
|
13
14
|
|
14
15
|
##
|
15
16
|
# Long names of the cannonical ranks.
|
@@ -42,11 +43,12 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
42
43
|
##
|
43
44
|
# Returns cannonical rank (Symbol) for the +rank+ String.
|
44
45
|
def self.normalize_rank(rank)
|
46
|
+
return rank.to_sym if @@_KNOWN_RANKS_H[rank.to_sym]
|
45
47
|
rank = rank.to_s.downcase
|
46
48
|
return nil if rank=="no rank"
|
47
49
|
rank = @@RANK_SYNONYMS[rank] unless @@RANK_SYNONYMS[rank].nil?
|
48
50
|
rank = rank.to_sym
|
49
|
-
return nil unless @@
|
51
|
+
return nil unless @@_KNOWN_RANKS_H[rank]
|
50
52
|
rank
|
51
53
|
end
|
52
54
|
|
@@ -84,16 +86,16 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
84
86
|
# Add +value+ to the hierarchy, that can be an Array, a String, or a Hash, as
|
85
87
|
# described in #initialize.
|
86
88
|
def <<(value)
|
87
|
-
if value.is_a?
|
88
|
-
value.each{ |v| self << v }
|
89
|
-
elsif value.is_a? String
|
90
|
-
(rank, name) = value.split(/:/)
|
91
|
-
self << { rank => name }
|
92
|
-
elsif value.is_a? Hash
|
89
|
+
if value.is_a? Hash
|
93
90
|
value.each_pair do |rank_i, name_i|
|
94
91
|
next if name_i.nil? or name_i == ""
|
95
92
|
@ranks[ Taxonomy.normalize_rank rank_i ] = name_i.tr("_"," ")
|
96
93
|
end
|
94
|
+
elsif value.is_a? Array
|
95
|
+
value.each{ |v| self << v }
|
96
|
+
elsif value.is_a? String
|
97
|
+
(rank, name) = value.split(/:/)
|
98
|
+
self << { rank => name }
|
97
99
|
else
|
98
100
|
raise "Unsupported class: #{value.class.name}."
|
99
101
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.2,
|
13
|
+
VERSION = [0.2, 6, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2017, 4,
|
21
|
+
VERSION_DATE = Date.new(2017, 4, 14)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
@@ -42,8 +42,7 @@ class MiGA::MiGA
|
|
42
42
|
##
|
43
43
|
# Complete version with nickname and date as string.
|
44
44
|
def self.LONG_VERSION
|
45
|
-
"MiGA
|
46
|
-
VERSION_DATE.to_s
|
45
|
+
"MiGA #{VERSION.join(".")} - #{VERSION_NAME} - #{VERSION_DATE}"
|
47
46
|
end
|
48
47
|
|
49
48
|
##
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
set -e
|
4
|
+
SCRIPT="project_stats"
|
5
|
+
echo "MiGA: $MIGA"
|
6
|
+
echo "Project: $PROJECT"
|
7
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
8
|
+
DIR="$PROJECT/data/90.stats"
|
9
|
+
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
10
|
+
cd "$DIR"
|
11
|
+
|
12
|
+
# Initialize
|
13
|
+
miga date > "miga-project.start"
|
14
|
+
|
15
|
+
# Index taxonomy
|
16
|
+
miga index_taxonomy -P "$PROJECT" -i "miga-project.taxonomy.json" --ref
|
17
|
+
|
18
|
+
# Index metadata
|
19
|
+
ruby -I "$MIGA/lib"
|
20
|
+
"$MIGA/utils/index_metadata.rb" "$PROJECT" "miga-project.metadata.db"
|
21
|
+
|
22
|
+
# Finalize
|
23
|
+
miga date > "miga-project.done"
|
24
|
+
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/test/dataset_test.rb
CHANGED
@@ -92,13 +92,24 @@ class DatasetTest < Test::Unit::TestCase
|
|
92
92
|
assert_equal(:trimmed_reads, d2.first_preprocessing(true))
|
93
93
|
assert_equal(:read_quality, d2.next_preprocessing(true))
|
94
94
|
assert(! d2.done_preprocessing?(true))
|
95
|
+
# Ref and undeclared multi
|
95
96
|
assert(d2.ignore_task?(:mytaxa))
|
97
|
+
assert(d2.ignore_task?(:mytaxa_scan))
|
96
98
|
assert(d2.ignore_task?(:distances))
|
99
|
+
# Ref and multi
|
97
100
|
d2.metadata[:type] = :metagenome
|
98
101
|
assert(! d2.ignore_task?(:mytaxa))
|
102
|
+
assert(d2.ignore_task?(:mytaxa_scan))
|
99
103
|
assert(d2.ignore_task?(:distances))
|
104
|
+
# Ref and nonmulti
|
100
105
|
d2.metadata[:type] = :genome
|
101
106
|
assert(d2.ignore_task?(:mytaxa))
|
107
|
+
assert(! d2.ignore_task?(:mytaxa_scan))
|
108
|
+
assert(! d2.ignore_task?(:distances))
|
109
|
+
# Qry and nonmulti
|
110
|
+
d2.metadata[:ref] = false
|
111
|
+
assert(d2.ignore_task?(:mytaxa))
|
112
|
+
assert(d2.ignore_task?(:mytaxa_scan))
|
102
113
|
assert(! d2.ignore_task?(:distances))
|
103
114
|
end
|
104
115
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "miga"
|
4
|
+
require "sqlite3"
|
5
|
+
|
6
|
+
p = MiGA::Project.load(ARGV[0])
|
7
|
+
raise "Impossible to load project: #{ARGV[0]}." if p.nil?
|
8
|
+
|
9
|
+
File.unlink(ARGV[1]) if File.exist? ARGV[1]
|
10
|
+
db = SQLite3::Database.new(ARGV[1])
|
11
|
+
db.execute "create table metadata(" +
|
12
|
+
"`name` varchar(256), `field` varchar(256), `value` text)"
|
13
|
+
|
14
|
+
def searchable(db, k, v)
|
15
|
+
db.execute "insert into metadata values(?,?,?)",
|
16
|
+
k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, " ")} "
|
17
|
+
end
|
18
|
+
|
19
|
+
p.each_dataset do |name, d|
|
20
|
+
next unless d.is_ref?
|
21
|
+
searchable(db, :name, d.name)
|
22
|
+
d.metadata.each do |k, v|
|
23
|
+
next if [:created, :updated].include? k
|
24
|
+
v = v.sorted_ranks.map{ |r| r[1] }.join(" ") if k==:tax
|
25
|
+
searchable(db, k, v)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-04-
|
11
|
+
date: 2017-04-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -149,6 +149,7 @@ files:
|
|
149
149
|
- scripts/mytaxa.bash
|
150
150
|
- scripts/mytaxa_scan.bash
|
151
151
|
- scripts/ogs.bash
|
152
|
+
- scripts/project_stats.bash
|
152
153
|
- scripts/read_quality.bash
|
153
154
|
- scripts/ssu.bash
|
154
155
|
- scripts/stats.bash
|
@@ -156,6 +157,7 @@ files:
|
|
156
157
|
- scripts/trimmed_fasta.bash
|
157
158
|
- scripts/trimmed_reads.bash
|
158
159
|
- utils/adapters.fa
|
160
|
+
- utils/index_metadata.rb
|
159
161
|
- utils/mytaxa_scan.R
|
160
162
|
- utils/mytaxa_scan.rb
|
161
163
|
- utils/plot-taxdist.R
|
@@ -179,6 +181,7 @@ files:
|
|
179
181
|
- actions/plugins.rb
|
180
182
|
- actions/project_info.rb
|
181
183
|
- actions/result_stats.rb
|
184
|
+
- actions/run_local.rb
|
182
185
|
- actions/tax_distributions.rb
|
183
186
|
- actions/unlink_dataset.rb
|
184
187
|
- Gemfile
|