miga-base 0.2.5.2 → 0.2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/create_dataset.rb +2 -0
- data/actions/run_local.rb +43 -0
- data/bin/miga +1 -0
- data/lib/miga/common.rb +3 -3
- data/lib/miga/daemon.rb +29 -22
- data/lib/miga/dataset.rb +21 -20
- data/lib/miga/dataset_result.rb +50 -43
- data/lib/miga/metadata.rb +24 -18
- data/lib/miga/project.rb +8 -8
- data/lib/miga/project_result.rb +12 -3
- data/lib/miga/remote_dataset.rb +9 -7
- data/lib/miga/result.rb +3 -5
- data/lib/miga/tax_index.rb +5 -5
- data/lib/miga/taxonomy.rb +9 -7
- data/lib/miga/version.rb +3 -4
- data/scripts/essential_genes.bash +1 -1
- data/scripts/project_stats.bash +24 -0
- data/test/dataset_test.rb +11 -0
- data/utils/index_metadata.rb +28 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 86777e753b2985fdceae0ee27c2d6270baf23e0e
|
4
|
+
data.tar.gz: bc5353979542701b28f35ad0fdc6bc68ee4d0c08
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9493d3aec9de2f8ba8131ca475bc9d982686168cf64c8c565c64215ecdb34978235419e5134674c9e4d625edf000868f9bd140b7df00cd29aa16f8120d2e27b3
|
7
|
+
data.tar.gz: 9f9125548016a2b29f840c8b8e120527eb3e9dcbbf7cc5a9c464e1bfcbc4e223bca4a3dce3c1e820afe0e7b45cca3307ec53241f5fd3cdb390112714c4353f87
|
data/actions/create_dataset.rb
CHANGED
@@ -44,6 +44,8 @@ end.parse!
|
|
44
44
|
##=> Main <=
|
45
45
|
opt_require(o)
|
46
46
|
opt_require(o, type:"-t") unless o[:update]
|
47
|
+
raise "Unrecognized dataset type: #{o[:type]}." if
|
48
|
+
(not o[:update]) and MiGA::Dataset.KNOWN_TYPES[o[:type]].nil?
|
47
49
|
|
48
50
|
$stderr.puts "Loading project." unless o[:q]
|
49
51
|
p = MiGA::Project.load(o[:project])
|
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# @package MiGA
|
4
|
+
# @license Artistic-2.0
|
5
|
+
|
6
|
+
require "shellwords"
|
7
|
+
|
8
|
+
o = {q:true, try_load:false, thr:1}
|
9
|
+
opts = OptionParser.new do |opt|
|
10
|
+
opt_banner(opt)
|
11
|
+
opt_object(opt, o, [:project, :dataset_opt, :result])
|
12
|
+
opt.on("-t", "--threads INT",
|
13
|
+
"Threads to use in the local run (by default: #{o[:thr]})."
|
14
|
+
){ |v| o[:thr] = v.to_i }
|
15
|
+
opt_common(opt, o)
|
16
|
+
end.parse!
|
17
|
+
|
18
|
+
##=> Main <=
|
19
|
+
opts.parse!
|
20
|
+
opt_require(o, project:"-P", name:"-r")
|
21
|
+
|
22
|
+
$stderr.puts "Loading project." unless o[:q]
|
23
|
+
p = MiGA::Project.load(o[:project])
|
24
|
+
raise "Impossible to load project: #{o[:project]}" if p.nil?
|
25
|
+
|
26
|
+
miga = File.expand_path("../..", __FILE__)
|
27
|
+
cmd = ["PROJECT=#{p.path.shellescape}", "RUNTYPE=bash",
|
28
|
+
"MIGA=#{miga.shellescape}", "CORES=#{o[:thr]}"]
|
29
|
+
if o[:dataset].nil?
|
30
|
+
type = MiGA::Project
|
31
|
+
else
|
32
|
+
d = p.dataset(o[:dataset])
|
33
|
+
raise "Cannot load dataset." if d.nil?
|
34
|
+
cmd << "DATASET=#{d.name.shellescape}"
|
35
|
+
type = MiGA::Dataset
|
36
|
+
end
|
37
|
+
raise "Unsupported #{type.to_s.gsub(/.*::/,"")} result: #{o[:name]}." if
|
38
|
+
type.RESULT_DIRS[o[:name].to_sym].nil?
|
39
|
+
cmd << "#{miga}/scripts/#{o[:name]}.bash".shellescape
|
40
|
+
pid = spawn cmd.join(" ")
|
41
|
+
Process.wait pid
|
42
|
+
|
43
|
+
$stderr.puts "Done." unless o[:q]
|
data/bin/miga
CHANGED
@@ -28,6 +28,7 @@ $task_desc = {
|
|
28
28
|
result_stats: "Extracts statistics for the given result.",
|
29
29
|
list_files: "Lists all registered files from the results of a dataset or a "+
|
30
30
|
"project.",
|
31
|
+
run_local: "Executes locally one step analysis producing the given result.",
|
31
32
|
# System
|
32
33
|
daemon: "Controls the daemon of a MiGA project.",
|
33
34
|
date: "Returns the current date in standard MiGA format.",
|
data/lib/miga/common.rb
CHANGED
@@ -91,7 +91,7 @@ class MiGA::MiGA
|
|
91
91
|
buffer = ""
|
92
92
|
tmp.puts ">#{id.gsub(/[^A-Za-z0-9_\|\.]/, "_")}#{df}"
|
93
93
|
else
|
94
|
-
buffer
|
94
|
+
buffer << ln.gsub(/[^A-Za-z\.\-]/, "")
|
95
95
|
end
|
96
96
|
end
|
97
97
|
tmp.print buffer.wrap_width(80)
|
@@ -111,7 +111,7 @@ class MiGA::MiGA
|
|
111
111
|
def result_files_exist?(base, ext)
|
112
112
|
ext = [ext] unless ext.kind_of? Array
|
113
113
|
ext.all? do |f|
|
114
|
-
File.exist?(base + f) or File.exist?(base
|
114
|
+
File.exist?(base + f) or File.exist?("#{base}#{f}.gz")
|
115
115
|
end
|
116
116
|
end
|
117
117
|
|
@@ -156,7 +156,7 @@ class String
|
|
156
156
|
def miga_name? ; not(self !~ /^[A-Za-z0-9_]+$/) ; end
|
157
157
|
|
158
158
|
##
|
159
|
-
# Replace underscores by spaces.
|
159
|
+
# Replace underscores by spaces or dots (depending on context).
|
160
160
|
def unmiga_name ; gsub(/_(str|sp|subsp|pv)__/,"_\\1._").tr("_", " ") ; end
|
161
161
|
|
162
162
|
##
|
data/lib/miga/daemon.rb
CHANGED
@@ -14,7 +14,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
14
14
|
# active? Returns DateTime.
|
15
15
|
def self.last_alive(project)
|
16
16
|
f = File.expand_path("daemon/alive", project.path)
|
17
|
-
return nil unless File.
|
17
|
+
return nil unless File.exist? f
|
18
18
|
DateTime.parse(File.read(f))
|
19
19
|
end
|
20
20
|
|
@@ -63,10 +63,9 @@ class MiGA::Daemon < MiGA::MiGA
|
|
63
63
|
def runopts(k, v=nil, force=false)
|
64
64
|
k = k.to_sym
|
65
65
|
unless v.nil?
|
66
|
-
v =
|
67
|
-
|
68
|
-
|
69
|
-
v = !!v if [:shutdown_when_done].include? k
|
66
|
+
v = [:latency, :maxjobs, :ppn].include?(k) ? v.to_i :
|
67
|
+
[:shutdown_when_done].include?(k) ? !!v : v
|
68
|
+
raise "Daemon's #{k} cannot be set to zero." if !force and v==0
|
70
69
|
@runopts[k] = v
|
71
70
|
end
|
72
71
|
@runopts[k]
|
@@ -209,23 +208,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
209
208
|
# Launch as many +jobs_to_run+ as possible
|
210
209
|
while jobs_running.size < maxjobs
|
211
210
|
break if jobs_to_run.empty?
|
212
|
-
|
213
|
-
# Launch job
|
214
|
-
if runopts(:type) == "bash"
|
215
|
-
job[:pid] = spawn job[:cmd]
|
216
|
-
Process.detach job[:pid] unless [nil, "", 0].include? job[:pid]
|
217
|
-
else
|
218
|
-
job[:pid] = `#{job[:cmd]}`.chomp
|
219
|
-
end
|
220
|
-
# Check if registered
|
221
|
-
if [nil, "", 0].include? job[:pid].nil?
|
222
|
-
job[:pid] = nil
|
223
|
-
@jobs_to_run << job
|
224
|
-
say "Unsuccessful #{job[:task_name]}, rescheduling."
|
225
|
-
else
|
226
|
-
@jobs_running << job
|
227
|
-
say "Spawned pid:#{job[:pid]} for #{job[:task_name]}."
|
228
|
-
end
|
211
|
+
launch_job @jobs_to_run.shift
|
229
212
|
end
|
230
213
|
end
|
231
214
|
|
@@ -270,4 +253,28 @@ class MiGA::Daemon < MiGA::MiGA
|
|
270
253
|
print "[#{Time.new.inspect}] ", *opts, "\n"
|
271
254
|
end
|
272
255
|
|
256
|
+
private
|
257
|
+
|
258
|
+
def launch_job(job)
|
259
|
+
# Execute job
|
260
|
+
if runopts(:type) == "bash"
|
261
|
+
# Local job
|
262
|
+
job[:pid] = spawn job[:cmd]
|
263
|
+
Process.detach job[:pid] unless [nil, "", 0].include?(job[:pid])
|
264
|
+
else
|
265
|
+
# Schedule cluster job
|
266
|
+
job[:pid] = `#{job[:cmd]}`.chomp
|
267
|
+
end
|
268
|
+
|
269
|
+
# Check if registered
|
270
|
+
if [nil, "", 0].include?(job[:pid])
|
271
|
+
job[:pid] = nil
|
272
|
+
@jobs_to_run << job
|
273
|
+
say "Unsuccessful #{job[:task_name]}, rescheduling."
|
274
|
+
else
|
275
|
+
@jobs_running << job
|
276
|
+
say "Spawned pid:#{job[:pid]} for #{job[:task_name]}."
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
273
280
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -26,9 +26,6 @@ class MiGA::Dataset < MiGA::MiGA
|
|
26
26
|
ssu: "07.annotation/01.function/02.ssu",
|
27
27
|
mytaxa: "07.annotation/02.taxonomy/01.mytaxa",
|
28
28
|
mytaxa_scan: "07.annotation/03.qa/02.mytaxa_scan",
|
29
|
-
# Mapping
|
30
|
-
mapping_on_contigs: "08.mapping/01.read-ctg",
|
31
|
-
mapping_on_genes: "08.mapping/02.read-gene",
|
32
29
|
# Distances (for single-species datasets)
|
33
30
|
distances: "09.distances",
|
34
31
|
# General statistics
|
@@ -59,21 +56,24 @@ class MiGA::Dataset < MiGA::MiGA
|
|
59
56
|
##
|
60
57
|
# Tasks to be excluded from query datasets.
|
61
58
|
@@EXCLUDE_NOREF_TASKS = [:mytaxa_scan]
|
59
|
+
@@_EXCLUDE_NOREF_TASKS_H = Hash[@@EXCLUDE_NOREF_TASKS.map{ |i| [i,true] }]
|
62
60
|
|
63
61
|
##
|
64
62
|
# Tasks to be executed only in datasets that are not multi-organism. These
|
65
63
|
# tasks are ignored for multi-organism datasets or for unknown types.
|
66
64
|
@@ONLY_NONMULTI_TASKS = [:mytaxa_scan, :distances]
|
65
|
+
@@_ONLY_NONMULTI_TASKS_H = Hash[@@ONLY_NONMULTI_TASKS.map{ |i| [i,true] }]
|
67
66
|
|
68
67
|
##
|
69
68
|
# Tasks to be executed only in datasets that are multi-organism. These
|
70
69
|
# tasks are ignored for single-organism datasets or for unknwon types.
|
71
70
|
@@ONLY_MULTI_TASKS = [:mytaxa]
|
71
|
+
@@_ONLY_MULTI_TASKS_H = Hash[@@ONLY_MULTI_TASKS.map{ |i| [i,true] }]
|
72
72
|
|
73
73
|
##
|
74
74
|
# Does the +project+ already have a dataset with that +name+?
|
75
75
|
def self.exist?(project, name)
|
76
|
-
File.exist? project.path
|
76
|
+
File.exist? "#{project.path}/metadata/#{name}.json"
|
77
77
|
end
|
78
78
|
|
79
79
|
##
|
@@ -109,8 +109,6 @@ class MiGA::Dataset < MiGA::MiGA
|
|
109
109
|
metadata[:ref] = is_ref
|
110
110
|
@metadata = MiGA::Metadata.new(
|
111
111
|
File.expand_path("metadata/#{name}.json", project.path), metadata )
|
112
|
-
warn "Warning: Unrecognized dataset type: #{type}." if
|
113
|
-
!type.nil? and @@KNOWN_TYPES[type].nil?
|
114
112
|
end
|
115
113
|
|
116
114
|
##
|
@@ -165,8 +163,8 @@ class MiGA::Dataset < MiGA::MiGA
|
|
165
163
|
# Get the result MiGA::Result in this dataset identified by the symbol +k+.
|
166
164
|
def result(k)
|
167
165
|
return nil if @@RESULT_DIRS[k.to_sym].nil?
|
168
|
-
MiGA::Result.load(
|
169
|
-
"/
|
166
|
+
MiGA::Result.load(
|
167
|
+
"#{project.path}/data/#{@@RESULT_DIRS[k.to_sym]}/#{name}.json" )
|
170
168
|
end
|
171
169
|
|
172
170
|
##
|
@@ -184,16 +182,18 @@ class MiGA::Dataset < MiGA::MiGA
|
|
184
182
|
##
|
185
183
|
# Look for the result with symbol key +result_type+ and register it in the
|
186
184
|
# dataset. If +save+ is false, it doesn't register the result, but it still
|
187
|
-
# returns a result if the expected files are complete.
|
188
|
-
#
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
185
|
+
# returns a result if the expected files are complete. The +opts+ array
|
186
|
+
# controls result creation (if necessary). Supported values include:
|
187
|
+
# - +is_clean+: A Boolean indicating if the input files are clean.
|
188
|
+
# Returns MiGA::Result or nil.
|
189
|
+
def add_result(result_type, save=true, opts={})
|
190
|
+
dir = @@RESULT_DIRS[result_type]
|
191
|
+
return nil if dir.nil?
|
192
|
+
base = File.expand_path("data/#{dir}/#{name}", project.path)
|
193
193
|
r_pre = MiGA::Result.load("#{base}.json")
|
194
194
|
return r_pre if (r_pre.nil? and not save) or not r_pre.nil?
|
195
|
-
|
196
|
-
|
195
|
+
r = File.exist?("#{base}.done") ?
|
196
|
+
self.send("add_result_#{result_type}", base, opts) : nil
|
197
197
|
r.save unless r.nil?
|
198
198
|
r
|
199
199
|
end
|
@@ -232,11 +232,12 @@ class MiGA::Dataset < MiGA::MiGA
|
|
232
232
|
# Should I ignore +task+ for this dataset?
|
233
233
|
def ignore_task?(task)
|
234
234
|
return !metadata["run_#{task}"] unless metadata["run_#{task}"].nil?
|
235
|
-
|
236
|
-
|
237
|
-
|
235
|
+
pattern = [true, false]
|
236
|
+
( [@@_EXCLUDE_NOREF_TASKS_H[task], is_ref? ]==pattern or
|
237
|
+
[@@_ONLY_MULTI_TASKS_H[task], is_multi? ]==pattern or
|
238
|
+
[@@_ONLY_NONMULTI_TASKS_H[task], is_nonmulti?]==pattern )
|
238
239
|
end
|
239
|
-
|
240
|
+
|
240
241
|
##
|
241
242
|
# Are all the dataset-specific tasks done? Passes +save+ to #add_result.
|
242
243
|
def done_preprocessing?(save=false)
|
data/lib/miga/dataset_result.rb
CHANGED
@@ -27,10 +27,10 @@ module MiGA::DatasetResult
|
|
27
27
|
private
|
28
28
|
|
29
29
|
##
|
30
|
-
# Add result type +:raw_reads+ at +base
|
31
|
-
def add_result_raw_reads(base)
|
30
|
+
# Add result type +:raw_reads+ at +base+ (no +opts+ supported).
|
31
|
+
def add_result_raw_reads(base, opts)
|
32
32
|
return nil unless result_files_exist?(base, ".1.fastq")
|
33
|
-
r = MiGA::Result.new(base
|
33
|
+
r = MiGA::Result.new("#{base}.json")
|
34
34
|
r = add_files_to_ds_result(r, name,
|
35
35
|
( result_files_exist?(base, ".2.fastq") ?
|
36
36
|
{:pair1=>".1.fastq", :pair2=>".2.fastq"} :
|
@@ -38,24 +38,24 @@ module MiGA::DatasetResult
|
|
38
38
|
end
|
39
39
|
|
40
40
|
##
|
41
|
-
# Add result type +:trimmed_reads+ at +base
|
42
|
-
def add_result_trimmed_reads(base)
|
41
|
+
# Add result type +:trimmed_reads+ at +base+ (no +opts+ supported).
|
42
|
+
def add_result_trimmed_reads(base, opts)
|
43
43
|
return nil unless result_files_exist?(base, ".1.clipped.fastq")
|
44
|
-
r = MiGA::Result.new
|
44
|
+
r = MiGA::Result.new("#{base}.json")
|
45
45
|
r = add_files_to_ds_result(r, name,
|
46
46
|
{:pair1=>".1.clipped.fastq", :pair2=>".2.clipped.fastq"}) if
|
47
47
|
result_files_exist?(base, ".2.clipped.fastq")
|
48
|
-
r.add_file(:single, name
|
49
|
-
r.add_file(:trimming_sumary, name
|
48
|
+
r.add_file(:single, "#{name}.1.clipped.single.fastq")
|
49
|
+
r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
|
50
50
|
add_result(:raw_reads) #-> Post gunzip
|
51
51
|
r
|
52
52
|
end
|
53
53
|
|
54
54
|
##
|
55
|
-
# Add result type +:read_quality+ at +base
|
56
|
-
def add_result_read_quality(base)
|
55
|
+
# Add result type +:read_quality+ at +base+ (no +opts+ supported).
|
56
|
+
def add_result_read_quality(base, opts)
|
57
57
|
return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
|
58
|
-
r = MiGA::Result.new(base
|
58
|
+
r = MiGA::Result.new("#{base}.json")
|
59
59
|
r = add_files_to_ds_result(r, name,
|
60
60
|
{:solexaqa=>".solexaqa", :fastqc=>".fastqc"})
|
61
61
|
add_result(:trimmed_reads) #-> Post cleaning
|
@@ -63,13 +63,13 @@ module MiGA::DatasetResult
|
|
63
63
|
end
|
64
64
|
|
65
65
|
##
|
66
|
-
# Add result type +:trimmed_fasta+ at +base
|
67
|
-
def add_result_trimmed_fasta(base)
|
66
|
+
# Add result type +:trimmed_fasta+ at +base+ (no +opts+ supported).
|
67
|
+
def add_result_trimmed_fasta(base, opts)
|
68
68
|
return nil unless
|
69
69
|
result_files_exist?(base, ".CoupledReads.fa") or
|
70
70
|
result_files_exist?(base, ".SingleReads.fa") or
|
71
71
|
result_files_exist?(base, %w[.1.fasta .2.fasta])
|
72
|
-
r = MiGA::Result.new
|
72
|
+
r = MiGA::Result.new("#{base}.json")
|
73
73
|
r = add_files_to_ds_result(r, name, {:coupled=>".CoupledReads.fa",
|
74
74
|
:single=>".SingleReads.fa", :pair1=>".1.fasta", :pair2=>".2.fasta"})
|
75
75
|
add_result(:raw_reads) #-> Post gzip
|
@@ -77,12 +77,15 @@ module MiGA::DatasetResult
|
|
77
77
|
end
|
78
78
|
|
79
79
|
##
|
80
|
-
# Add result type +:assembly+ at +base+.
|
81
|
-
|
80
|
+
# Add result type +:assembly+ at +base+. Hash +opts+ supports
|
81
|
+
# +is_clean: Boolean+.
|
82
|
+
def add_result_assembly(base, opts)
|
82
83
|
return nil unless result_files_exist?(base, ".LargeContigs.fna")
|
83
|
-
r = MiGA::Result.new(base
|
84
|
+
r = MiGA::Result.new("#{base}.json")
|
84
85
|
r = add_files_to_ds_result(r, name, {:largecontigs=>".LargeContigs.fna",
|
85
86
|
:allcontigs=>".AllContigs.fna", :assembly_data=>""})
|
87
|
+
opts[:is_clean] ||= false
|
88
|
+
r.clean! if opts[:is_clean]
|
86
89
|
unless r.clean?
|
87
90
|
MiGA::MiGA.clean_fasta_file(r.file_path :largecontigs)
|
88
91
|
r.clean!
|
@@ -92,12 +95,14 @@ module MiGA::DatasetResult
|
|
92
95
|
end
|
93
96
|
|
94
97
|
##
|
95
|
-
# Add result type +:cds+ at +base+.
|
96
|
-
def add_result_cds(base)
|
98
|
+
# Add result type +:cds+ at +base+. Hash +opts+ supports +is_clean: Boolean+
|
99
|
+
def add_result_cds(base, opts)
|
97
100
|
return nil unless result_files_exist?(base, %w[.faa .fna])
|
98
|
-
r = MiGA::Result.new(base
|
101
|
+
r = MiGA::Result.new("#{base}.json")
|
99
102
|
r = add_files_to_ds_result(r, name, {:proteins=>".faa", :genes=>".fna",
|
100
103
|
:gff2=>".gff2", :gff3=>".gff3", :tab=>".tab"})
|
104
|
+
opts[:is_clean] ||= false
|
105
|
+
r.clean! if opts[:is_clean]
|
101
106
|
unless r.clean?
|
102
107
|
MiGA::MiGA.clean_fasta_file(r.file_path :proteins)
|
103
108
|
MiGA::MiGA.clean_fasta_file(r.file_path :genes)
|
@@ -107,22 +112,24 @@ module MiGA::DatasetResult
|
|
107
112
|
end
|
108
113
|
|
109
114
|
##
|
110
|
-
# Add result type +:essential_genes+ at +base
|
111
|
-
def add_result_essential_genes(base)
|
115
|
+
# Add result type +:essential_genes+ at +base+ (no +opts+ supported).
|
116
|
+
def add_result_essential_genes(base, opts)
|
112
117
|
return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
|
113
|
-
r = MiGA::Result.new(base
|
118
|
+
r = MiGA::Result.new("#{base}.json")
|
114
119
|
r = add_files_to_ds_result(r, name, {:ess_genes=>".ess.faa",
|
115
120
|
:collection=>".ess", :report=>".ess/log"})
|
116
121
|
end
|
117
122
|
|
118
123
|
##
|
119
|
-
# Add result type +:ssu+ at +base+.
|
120
|
-
def add_result_ssu(base)
|
121
|
-
return MiGA::Result.new(base
|
124
|
+
# Add result type +:ssu+ at +base+. Hash +opts+ supports +is_clean: Boolean+
|
125
|
+
def add_result_ssu(base, opts)
|
126
|
+
return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
|
122
127
|
return nil unless result_files_exist?(base, ".ssu.fa")
|
123
|
-
r = MiGA::Result.new(base
|
128
|
+
r = MiGA::Result.new("#{base}.json")
|
124
129
|
r = add_files_to_ds_result(r, name, {:longest_ssu_gene=>".ssu.fa",
|
125
130
|
:gff=>".ssu.gff", :all_ssu_genes=>".ssu.all.fa"})
|
131
|
+
opts[:is_clean] ||= false
|
132
|
+
r.clean! if opts[:is_clean]
|
126
133
|
unless r.clean?
|
127
134
|
MiGA::MiGA.clean_fasta_file(r.file_path :longest_ssu_gene)
|
128
135
|
r.clean!
|
@@ -131,37 +138,37 @@ module MiGA::DatasetResult
|
|
131
138
|
end
|
132
139
|
|
133
140
|
##
|
134
|
-
# Add result type +:mytaxa+ at +base
|
135
|
-
def add_result_mytaxa(base)
|
141
|
+
# Add result type +:mytaxa+ at +base+ (no +opts+ supported).
|
142
|
+
def add_result_mytaxa(base, opts)
|
136
143
|
if is_multi?
|
137
144
|
return nil unless result_files_exist?(base, ".mytaxa")
|
138
|
-
r = MiGA::Result.new(base
|
145
|
+
r = MiGA::Result.new("#{base}.json")
|
139
146
|
add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :blast=>".blast",
|
140
147
|
:mytaxain=>".mytaxain"})
|
141
148
|
else
|
142
|
-
MiGA::Result.new(base
|
149
|
+
MiGA::Result.new("#{base}.json")
|
143
150
|
end
|
144
151
|
end
|
145
152
|
|
146
153
|
##
|
147
|
-
# Add result type +:mytaxa_scan+ at +base
|
148
|
-
def add_result_mytaxa_scan(base)
|
154
|
+
# Add result type +:mytaxa_scan+ at +base+ (no +opts+ supported).
|
155
|
+
def add_result_mytaxa_scan(base, opts)
|
149
156
|
if is_nonmulti?
|
150
157
|
return nil unless
|
151
158
|
result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg])
|
152
|
-
r = MiGA::Result.new(base
|
159
|
+
r = MiGA::Result.new("#{base}.json")
|
153
160
|
add_files_to_ds_result(r, name, {:mytaxa=>".mytaxa", :wintax=>".wintax",
|
154
161
|
:blast=>".blast", :mytaxain=>".mytaxain", :report=>".pdf",
|
155
162
|
:regions=>".reg", :gene_ids=>".wintax.genes",
|
156
163
|
:region_ids=>".wintax.regions"})
|
157
164
|
else
|
158
|
-
MiGA::Result.new
|
165
|
+
MiGA::Result.new("#{base}.json")
|
159
166
|
end
|
160
167
|
end
|
161
168
|
|
162
169
|
##
|
163
|
-
# Add result type +:distances+ at +base
|
164
|
-
def add_result_distances(base)
|
170
|
+
# Add result type +:distances+ at +base+ (no +opts+ supported).
|
171
|
+
def add_result_distances(base, opts)
|
165
172
|
if is_nonmulti?
|
166
173
|
if is_ref?
|
167
174
|
add_result_distances_ref(base)
|
@@ -174,15 +181,15 @@ module MiGA::DatasetResult
|
|
174
181
|
end
|
175
182
|
|
176
183
|
##
|
177
|
-
# Add result type +:stats+ at +base
|
178
|
-
def add_result_stats(base)
|
179
|
-
MiGA::Result.new
|
184
|
+
# Add result type +:stats+ at +base+ (no +opts+ supported).
|
185
|
+
def add_result_stats(base, opts)
|
186
|
+
MiGA::Result.new("#{base}.json")
|
180
187
|
end
|
181
188
|
|
182
189
|
##
|
183
190
|
# Add result type +:distances+ for _multi_ datasets at +base+.
|
184
191
|
def add_result_distances_multi(base)
|
185
|
-
MiGA::Result.new
|
192
|
+
MiGA::Result.new("#{base}.json")
|
186
193
|
end
|
187
194
|
|
188
195
|
##
|
@@ -191,7 +198,7 @@ module MiGA::DatasetResult
|
|
191
198
|
pref = File.dirname(base)
|
192
199
|
return nil unless
|
193
200
|
File.exist?("#{pref}/01.haai/#{name}.db")
|
194
|
-
r = MiGA::Result.new(base
|
201
|
+
r = MiGA::Result.new("#{base}.json")
|
195
202
|
r.add_files({:haai_db=>"01.haai/#{name}.db",
|
196
203
|
:aai_db=>"02.aai/#{name}.db", :ani_db=>"03.ani/#{name}.db"})
|
197
204
|
r
|
@@ -203,7 +210,7 @@ module MiGA::DatasetResult
|
|
203
210
|
return nil unless
|
204
211
|
result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
|
205
212
|
result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
|
206
|
-
r = MiGA::Result.new(base
|
213
|
+
r = MiGA::Result.new("#{base}.json")
|
207
214
|
r = add_files_to_ds_result(r, name, {
|
208
215
|
:aai_medoids=>".aai-medoids.tsv",
|
209
216
|
:haai_db=>".haai.db", :aai_db=>".aai.db",
|
data/lib/miga/metadata.rb
CHANGED
@@ -9,7 +9,7 @@ class MiGA::Metadata < MiGA::MiGA
|
|
9
9
|
|
10
10
|
##
|
11
11
|
# Does the metadata described in +path+ already exist?
|
12
|
-
def self.exist?(path) File.
|
12
|
+
def self.exist?(path) File.exist? path end
|
13
13
|
|
14
14
|
##
|
15
15
|
# Load the metadata described in +path+ and return MiGA::Metadata if it
|
@@ -24,34 +24,39 @@ class MiGA::Metadata < MiGA::MiGA
|
|
24
24
|
##
|
25
25
|
# Path to the JSON file describing the metadata.
|
26
26
|
attr_reader :path
|
27
|
-
|
28
|
-
##
|
29
|
-
# Parsed data as a Hash.
|
30
|
-
attr_reader :data
|
31
27
|
|
32
28
|
##
|
33
29
|
# Initiate a MiGA::Metadata object with description in +path+. It will create
|
34
30
|
# it if it doesn't exist.
|
35
31
|
def initialize(path, defaults={})
|
32
|
+
@data = nil
|
36
33
|
@path = File.absolute_path(path)
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
34
|
+
unless File.exist? path
|
35
|
+
@data = {}
|
36
|
+
defaults.each_pair{ |k,v| self[k]=v }
|
37
|
+
create
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
##
|
42
|
+
# Parsed data as a Hash.
|
43
|
+
def data
|
44
|
+
self.load if @data.nil?
|
45
|
+
@data
|
41
46
|
end
|
42
47
|
|
43
48
|
##
|
44
49
|
# Reset :created field and save the current data.
|
45
50
|
def create
|
46
|
-
|
47
|
-
|
51
|
+
self[:created] = Time.now.to_s
|
52
|
+
save
|
48
53
|
end
|
49
54
|
|
50
55
|
##
|
51
56
|
# Save the metadata into #path.
|
52
57
|
def save
|
53
58
|
MiGA.DEBUG "Metadata.save #{path}"
|
54
|
-
|
59
|
+
self[:updated] = Time.now.to_s
|
55
60
|
json = JSON.pretty_generate(data)
|
56
61
|
sleeper = 0.0
|
57
62
|
while File.exist?(lock_file)
|
@@ -59,12 +64,12 @@ class MiGA::Metadata < MiGA::MiGA
|
|
59
64
|
sleep(sleeper.to_i)
|
60
65
|
end
|
61
66
|
FileUtils.touch lock_file
|
62
|
-
ofh = File.open(path
|
67
|
+
ofh = File.open("#{path}.tmp", "w")
|
63
68
|
ofh.puts json
|
64
69
|
ofh.close
|
65
70
|
raise "Lock-racing detected for #{path}." unless
|
66
|
-
File.exist?(path
|
67
|
-
File.rename(path
|
71
|
+
File.exist?("#{path}.tmp") and File.exist?(lock_file)
|
72
|
+
File.rename("#{path}.tmp", path)
|
68
73
|
File.unlink(lock_file)
|
69
74
|
end
|
70
75
|
|
@@ -86,14 +91,14 @@ class MiGA::Metadata < MiGA::MiGA
|
|
86
91
|
##
|
87
92
|
# Delete file at #path.
|
88
93
|
def remove!
|
89
|
-
MiGA.DEBUG "Metadata.remove! #{
|
90
|
-
File.unlink(
|
94
|
+
MiGA.DEBUG "Metadata.remove! #{path}"
|
95
|
+
File.unlink(path)
|
91
96
|
nil
|
92
97
|
end
|
93
98
|
|
94
99
|
##
|
95
100
|
# Lock file for the metadata.
|
96
|
-
def lock_file ; path
|
101
|
+
def lock_file ; "#{path}.lock" ; end
|
97
102
|
|
98
103
|
##
|
99
104
|
# Return the value of +k+ in #data.
|
@@ -102,6 +107,7 @@ class MiGA::Metadata < MiGA::MiGA
|
|
102
107
|
##
|
103
108
|
# Set the value of +k+ to +v+.
|
104
109
|
def []=(k,v)
|
110
|
+
self.load if @data.nil?
|
105
111
|
k = k.to_sym
|
106
112
|
# Protect the special field :name
|
107
113
|
v=v.miga_name if k==:name
|
data/lib/miga/project.rb
CHANGED
@@ -49,11 +49,11 @@ class MiGA::Project < MiGA::MiGA
|
|
49
49
|
clade_finding: "10.clades/01.find",
|
50
50
|
# Clade analysis
|
51
51
|
subclades: "10.clades/02.ani",
|
52
|
-
ogs: "10.clades/03.ogs"
|
52
|
+
ogs: "10.clades/03.ogs",
|
53
53
|
#ess_phylogeny: "10.clades/04.phylogeny/01.essential",
|
54
54
|
#core_phylogeny: "10.clades/04.phylogeny/02.core",
|
55
55
|
#clade_metadata: "10.clades/05.metadata"
|
56
|
-
|
56
|
+
project_stats: "90.stats"
|
57
57
|
}
|
58
58
|
|
59
59
|
##
|
@@ -85,7 +85,7 @@ class MiGA::Project < MiGA::MiGA
|
|
85
85
|
##
|
86
86
|
# Does the project at +path+ exist?
|
87
87
|
def self.exist?(path)
|
88
|
-
Dir.exist?(path) and File.exist?(path
|
88
|
+
Dir.exist?(path) and File.exist?("#{path}/miga.project.json")
|
89
89
|
end
|
90
90
|
|
91
91
|
##
|
@@ -130,7 +130,7 @@ class MiGA::Project < MiGA::MiGA
|
|
130
130
|
dirs.each{ |d| Dir.mkdir(d) unless Dir.exist? d }
|
131
131
|
@metadata = MiGA::Metadata.new(self.path + "/miga.project.json",
|
132
132
|
{datasets: [], name: File.basename(path)})
|
133
|
-
FileUtils.cp(ENV["MIGA_HOME"]
|
133
|
+
FileUtils.cp("#{ENV["MIGA_HOME"]}/.miga_daemon.json",
|
134
134
|
"#{path}/daemon/daemon.json") unless
|
135
135
|
File.exist? "#{path}/daemon/daemon.json"
|
136
136
|
self.load
|
@@ -252,9 +252,9 @@ class MiGA::Project < MiGA::MiGA
|
|
252
252
|
##
|
253
253
|
# Get result identified by Symbol +name+, returns MiGA::Result.
|
254
254
|
def result(name)
|
255
|
-
|
256
|
-
|
257
|
-
|
255
|
+
dir = @@RESULT_DIRS[name.to_sym]
|
256
|
+
return nil if dir.nil?
|
257
|
+
MiGA::Result.load("#{path}/data/#{dir}/miga-project.json")
|
258
258
|
end
|
259
259
|
|
260
260
|
##
|
@@ -269,7 +269,7 @@ class MiGA::Project < MiGA::MiGA
|
|
269
269
|
def add_result(name, save=true)
|
270
270
|
return nil if @@RESULT_DIRS[name].nil?
|
271
271
|
base = "#{path}/data/#{@@RESULT_DIRS[name]}/miga-project"
|
272
|
-
return MiGA::Result.load(base
|
272
|
+
return MiGA::Result.load("#{base}.json") unless save
|
273
273
|
return nil unless result_files_exist?(base, ".done")
|
274
274
|
r = send("add_result_#{name}", base)
|
275
275
|
r.save unless r.nil?
|
data/lib/miga/project_result.rb
CHANGED
@@ -11,7 +11,7 @@ module MiGA::ProjectResult
|
|
11
11
|
# Internal alias for all add_result_*_distances.
|
12
12
|
def add_result_distances(base)
|
13
13
|
return nil unless result_files_exist?(base, %w[.Rdata .log .txt])
|
14
|
-
r = MiGA::Result.new(base
|
14
|
+
r = MiGA::Result.new("#{base}.json")
|
15
15
|
r.add_file(:rdata, "miga-project.Rdata")
|
16
16
|
r.add_file(:matrix, "miga-project.txt")
|
17
17
|
r.add_file(:log, "miga-project.log")
|
@@ -40,7 +40,7 @@ module MiGA::ProjectResult
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def add_result_iter_clades(base)
|
43
|
-
r = MiGA::Result.new(base
|
43
|
+
r = MiGA::Result.new("#{base}.json")
|
44
44
|
r.add_file(:report, "miga-project.pdf")
|
45
45
|
r.add_file(:class_table, "miga-project.class.tsv")
|
46
46
|
r.add_file(:class_tree, "miga-project.class.nwk")
|
@@ -51,13 +51,22 @@ module MiGA::ProjectResult
|
|
51
51
|
|
52
52
|
def add_result_ogs(base)
|
53
53
|
return nil unless result_files_exist?(base, %w[.ogs .stats])
|
54
|
-
r = MiGA::Result.new(base
|
54
|
+
r = MiGA::Result.new("#{base}.json")
|
55
55
|
r.add_file(:ogs, "miga-project.ogs")
|
56
56
|
r.add_file(:stats, "miga-project.stats")
|
57
57
|
r.add_file(:rbm, "miga-project.rbm")
|
58
58
|
r
|
59
59
|
end
|
60
60
|
|
61
|
+
def add_result_project_stats(base)
|
62
|
+
return nil unless
|
63
|
+
result_files_exist?(base, %w[.taxonomy.json .metadata.db])
|
64
|
+
r = MiGA::Result.new("#{base}.json")
|
65
|
+
r.add_file(:taxonomy_index, "miga-project.taxonomy.json")
|
66
|
+
r.add_file(:metadata_index, "miga-project.metadata.db")
|
67
|
+
r
|
68
|
+
end
|
69
|
+
|
61
70
|
alias add_result_haai_distances add_result_distances
|
62
71
|
alias add_result_aai_distances add_result_distances
|
63
72
|
alias add_result_ani_distances add_result_distances
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -9,6 +9,7 @@ require "open-uri"
|
|
9
9
|
class MiGA::RemoteDataset < MiGA::MiGA
|
10
10
|
# Class-level
|
11
11
|
|
12
|
+
@@_EUTILS = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
12
13
|
##
|
13
14
|
# Structure of the different database Universes or containers. The structure
|
14
15
|
# is a Hash with universe names as keys as Symbol and values being a Hash with
|
@@ -37,16 +38,14 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
37
38
|
},
|
38
39
|
ncbi:{
|
39
40
|
dbs: { nuccore:{stage: :assembly, format: :fasta} },
|
40
|
-
url: "
|
41
|
-
"efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
|
41
|
+
url: "#{@@_EUTILS}efetch.fcgi?db=%1$s&id=%2$s&rettype=%3$s&retmode=text",
|
42
42
|
method: :rest
|
43
43
|
},
|
44
44
|
ncbi_map:{
|
45
45
|
dbs: { assembly:{map_to: :nuccore, format: :text} },
|
46
|
-
url: "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" +
|
47
46
|
# FIXME ncbi_map is intended to do internal NCBI mapping between
|
48
47
|
# databases.
|
49
|
-
|
48
|
+
url: "#{@@_EUTILS}elink.fcgi?dbfrom=%1$s&id=%2$s&db=%3$s - - - - -",
|
50
49
|
method: :rest,
|
51
50
|
map_to_universe: :ncbi
|
52
51
|
}
|
@@ -127,8 +126,8 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
127
126
|
metadata = get_metadata(metadata)
|
128
127
|
case @@UNIVERSE[universe][:dbs][db][:stage]
|
129
128
|
when :assembly
|
130
|
-
|
131
|
-
|
129
|
+
dir = MiGA::Dataset.RESULT_DIRS[:assembly]
|
130
|
+
base = "#{project.path}/data/#{dir}/#{name}"
|
132
131
|
File.open("#{base}.start", "w") { |ofh| ofh.puts Time.now.to_s }
|
133
132
|
if @@UNIVERSE[universe][:dbs][db][:format] == :fasta_gz
|
134
133
|
download("#{base}.LargeContigs.fna.gz")
|
@@ -144,9 +143,12 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
144
143
|
end
|
145
144
|
dataset = MiGA::Dataset.new(project, name, is_ref, metadata)
|
146
145
|
project.add_dataset(dataset.name)
|
147
|
-
result = dataset.add_result
|
146
|
+
result = dataset.add_result(@@UNIVERSE[universe][:dbs][db][:stage],
|
147
|
+
true, is_clean:true)
|
148
148
|
raise "Empty dataset created: seed result was not added due to "+
|
149
149
|
"incomplete files." if result.nil?
|
150
|
+
result.clean!
|
151
|
+
result.save
|
150
152
|
dataset
|
151
153
|
end
|
152
154
|
|
data/lib/miga/result.rb
CHANGED
@@ -9,9 +9,7 @@ class MiGA::Result < MiGA::MiGA
|
|
9
9
|
|
10
10
|
##
|
11
11
|
# Check if the result described by the JSON in +path+ already exists.
|
12
|
-
def self.exist?(path)
|
13
|
-
!!(File.size? path)
|
14
|
-
end
|
12
|
+
def self.exist?(path) File.exist? path end
|
15
13
|
|
16
14
|
##
|
17
15
|
# Load the result described by the JSON in +path+. Returns MiGA::Result if it
|
@@ -82,8 +80,8 @@ class MiGA::Result < MiGA::MiGA
|
|
82
80
|
k = k.to_sym
|
83
81
|
@data[:files] ||= {}
|
84
82
|
@data[:files][k] = file if File.exist? File.expand_path(file, dir)
|
85
|
-
@data[:files][k] = file
|
86
|
-
File.exist? File.expand_path(file
|
83
|
+
@data[:files][k] = "#{file}.gz" if
|
84
|
+
File.exist? File.expand_path("#{file}.gz", dir)
|
87
85
|
end
|
88
86
|
|
89
87
|
##
|
data/lib/miga/tax_index.rb
CHANGED
@@ -115,13 +115,13 @@ class MiGA::TaxIndexTaxon < MiGA::MiGA
|
|
115
115
|
##
|
116
116
|
# Get the number of datasets in the taxon (including children).
|
117
117
|
def datasets_count
|
118
|
-
|
118
|
+
children.map{ |it| it.datasets_count }.reduce(datasets.size, :+)
|
119
119
|
end
|
120
120
|
|
121
121
|
##
|
122
122
|
# Get all the datasets in the taxon (including children).
|
123
123
|
def all_datasets
|
124
|
-
|
124
|
+
children.map{ |it| it.datasets }.reduce(datasets, :+)
|
125
125
|
end
|
126
126
|
|
127
127
|
##
|
@@ -142,11 +142,11 @@ class MiGA::TaxIndexTaxon < MiGA::MiGA
|
|
142
142
|
# Tabular String of the taxon.
|
143
143
|
def to_tab(unknown, indent=0)
|
144
144
|
o = ""
|
145
|
-
o =
|
145
|
+
o = "#{" " * indent}#{tax_str}: #{datasets_count}\n" if
|
146
146
|
unknown or not datasets.empty? or not name.nil?
|
147
147
|
indent += 2
|
148
|
-
datasets.each{ |ds| o
|
149
|
-
children.each{ |it| o
|
148
|
+
datasets.each{ |ds| o << "#{" " * indent}# #{ds.name}\n" }
|
149
|
+
children.each{ |it| o << it.to_tab(unknown, indent) }
|
150
150
|
o
|
151
151
|
end
|
152
152
|
|
data/lib/miga/taxonomy.rb
CHANGED
@@ -10,6 +10,7 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
10
10
|
# Cannonical ranks.
|
11
11
|
def self.KNOWN_RANKS() @@KNOWN_RANKS ; end
|
12
12
|
@@KNOWN_RANKS = %w{ns d k p c o f g s ssp str ds}.map{|r| r.to_sym}
|
13
|
+
@@_KNOWN_RANKS_H = Hash[ @@KNOWN_RANKS.map{ |i| [i,true] } ]
|
13
14
|
|
14
15
|
##
|
15
16
|
# Long names of the cannonical ranks.
|
@@ -42,11 +43,12 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
42
43
|
##
|
43
44
|
# Returns cannonical rank (Symbol) for the +rank+ String.
|
44
45
|
def self.normalize_rank(rank)
|
46
|
+
return rank.to_sym if @@_KNOWN_RANKS_H[rank.to_sym]
|
45
47
|
rank = rank.to_s.downcase
|
46
48
|
return nil if rank=="no rank"
|
47
49
|
rank = @@RANK_SYNONYMS[rank] unless @@RANK_SYNONYMS[rank].nil?
|
48
50
|
rank = rank.to_sym
|
49
|
-
return nil unless @@
|
51
|
+
return nil unless @@_KNOWN_RANKS_H[rank]
|
50
52
|
rank
|
51
53
|
end
|
52
54
|
|
@@ -84,16 +86,16 @@ class MiGA::Taxonomy < MiGA::MiGA
|
|
84
86
|
# Add +value+ to the hierarchy, that can be an Array, a String, or a Hash, as
|
85
87
|
# described in #initialize.
|
86
88
|
def <<(value)
|
87
|
-
if value.is_a?
|
88
|
-
value.each{ |v| self << v }
|
89
|
-
elsif value.is_a? String
|
90
|
-
(rank, name) = value.split(/:/)
|
91
|
-
self << { rank => name }
|
92
|
-
elsif value.is_a? Hash
|
89
|
+
if value.is_a? Hash
|
93
90
|
value.each_pair do |rank_i, name_i|
|
94
91
|
next if name_i.nil? or name_i == ""
|
95
92
|
@ranks[ Taxonomy.normalize_rank rank_i ] = name_i.tr("_"," ")
|
96
93
|
end
|
94
|
+
elsif value.is_a? Array
|
95
|
+
value.each{ |v| self << v }
|
96
|
+
elsif value.is_a? String
|
97
|
+
(rank, name) = value.split(/:/)
|
98
|
+
self << { rank => name }
|
97
99
|
else
|
98
100
|
raise "Unsupported class: #{value.class.name}."
|
99
101
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.2,
|
13
|
+
VERSION = [0.2, 6, 0]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2017, 4,
|
21
|
+
VERSION_DATE = Date.new(2017, 4, 14)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
@@ -42,8 +42,7 @@ class MiGA::MiGA
|
|
42
42
|
##
|
43
43
|
# Complete version with nickname and date as string.
|
44
44
|
def self.LONG_VERSION
|
45
|
-
"MiGA
|
46
|
-
VERSION_DATE.to_s
|
45
|
+
"MiGA #{VERSION.join(".")} - #{VERSION_NAME} - #{VERSION_DATE}"
|
47
46
|
end
|
48
47
|
|
49
48
|
##
|
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
# Available variables: $PROJECT, $RUNTYPE, $MIGA, $CORES
|
3
|
+
set -e
|
4
|
+
SCRIPT="project_stats"
|
5
|
+
echo "MiGA: $MIGA"
|
6
|
+
echo "Project: $PROJECT"
|
7
|
+
source "$MIGA/scripts/miga.bash" || exit 1
|
8
|
+
DIR="$PROJECT/data/90.stats"
|
9
|
+
[[ -d "$DIR" ]] || mkdir -p "$DIR"
|
10
|
+
cd "$DIR"
|
11
|
+
|
12
|
+
# Initialize
|
13
|
+
miga date > "miga-project.start"
|
14
|
+
|
15
|
+
# Index taxonomy
|
16
|
+
miga index_taxonomy -P "$PROJECT" -i "miga-project.taxonomy.json" --ref
|
17
|
+
|
18
|
+
# Index metadata
|
19
|
+
ruby -I "$MIGA/lib"
|
20
|
+
"$MIGA/utils/index_metadata.rb" "$PROJECT" "miga-project.metadata.db"
|
21
|
+
|
22
|
+
# Finalize
|
23
|
+
miga date > "miga-project.done"
|
24
|
+
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/test/dataset_test.rb
CHANGED
@@ -92,13 +92,24 @@ class DatasetTest < Test::Unit::TestCase
|
|
92
92
|
assert_equal(:trimmed_reads, d2.first_preprocessing(true))
|
93
93
|
assert_equal(:read_quality, d2.next_preprocessing(true))
|
94
94
|
assert(! d2.done_preprocessing?(true))
|
95
|
+
# Ref and undeclared multi
|
95
96
|
assert(d2.ignore_task?(:mytaxa))
|
97
|
+
assert(d2.ignore_task?(:mytaxa_scan))
|
96
98
|
assert(d2.ignore_task?(:distances))
|
99
|
+
# Ref and multi
|
97
100
|
d2.metadata[:type] = :metagenome
|
98
101
|
assert(! d2.ignore_task?(:mytaxa))
|
102
|
+
assert(d2.ignore_task?(:mytaxa_scan))
|
99
103
|
assert(d2.ignore_task?(:distances))
|
104
|
+
# Ref and nonmulti
|
100
105
|
d2.metadata[:type] = :genome
|
101
106
|
assert(d2.ignore_task?(:mytaxa))
|
107
|
+
assert(! d2.ignore_task?(:mytaxa_scan))
|
108
|
+
assert(! d2.ignore_task?(:distances))
|
109
|
+
# Qry and nonmulti
|
110
|
+
d2.metadata[:ref] = false
|
111
|
+
assert(d2.ignore_task?(:mytaxa))
|
112
|
+
assert(d2.ignore_task?(:mytaxa_scan))
|
102
113
|
assert(! d2.ignore_task?(:distances))
|
103
114
|
end
|
104
115
|
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "miga"
|
4
|
+
require "sqlite3"
|
5
|
+
|
6
|
+
p = MiGA::Project.load(ARGV[0])
|
7
|
+
raise "Impossible to load project: #{ARGV[0]}." if p.nil?
|
8
|
+
|
9
|
+
File.unlink(ARGV[1]) if File.exist? ARGV[1]
|
10
|
+
db = SQLite3::Database.new(ARGV[1])
|
11
|
+
db.execute "create table metadata(" +
|
12
|
+
"`name` varchar(256), `field` varchar(256), `value` text)"
|
13
|
+
|
14
|
+
def searchable(db, k, v)
|
15
|
+
db.execute "insert into metadata values(?,?,?)",
|
16
|
+
k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, " ")} "
|
17
|
+
end
|
18
|
+
|
19
|
+
p.each_dataset do |name, d|
|
20
|
+
next unless d.is_ref?
|
21
|
+
searchable(db, :name, d.name)
|
22
|
+
d.metadata.each do |k, v|
|
23
|
+
next if [:created, :updated].include? k
|
24
|
+
v = v.sorted_ranks.map{ |r| r[1] }.join(" ") if k==:tax
|
25
|
+
searchable(db, k, v)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-04-
|
11
|
+
date: 2017-04-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rest-client
|
@@ -149,6 +149,7 @@ files:
|
|
149
149
|
- scripts/mytaxa.bash
|
150
150
|
- scripts/mytaxa_scan.bash
|
151
151
|
- scripts/ogs.bash
|
152
|
+
- scripts/project_stats.bash
|
152
153
|
- scripts/read_quality.bash
|
153
154
|
- scripts/ssu.bash
|
154
155
|
- scripts/stats.bash
|
@@ -156,6 +157,7 @@ files:
|
|
156
157
|
- scripts/trimmed_fasta.bash
|
157
158
|
- scripts/trimmed_reads.bash
|
158
159
|
- utils/adapters.fa
|
160
|
+
- utils/index_metadata.rb
|
159
161
|
- utils/mytaxa_scan.R
|
160
162
|
- utils/mytaxa_scan.rb
|
161
163
|
- utils/plot-taxdist.R
|
@@ -179,6 +181,7 @@ files:
|
|
179
181
|
- actions/plugins.rb
|
180
182
|
- actions/project_info.rb
|
181
183
|
- actions/result_stats.rb
|
184
|
+
- actions/run_local.rb
|
182
185
|
- actions/tax_distributions.rb
|
183
186
|
- actions/unlink_dataset.rb
|
184
187
|
- Gemfile
|