miga-base 0.2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +351 -0
- data/actions/add_result +61 -0
- data/actions/add_taxonomy +86 -0
- data/actions/create_dataset +62 -0
- data/actions/create_project +70 -0
- data/actions/daemon +69 -0
- data/actions/download_dataset +77 -0
- data/actions/find_datasets +63 -0
- data/actions/import_datasets +86 -0
- data/actions/index_taxonomy +71 -0
- data/actions/list_datasets +83 -0
- data/actions/list_files +67 -0
- data/actions/unlink_dataset +52 -0
- data/bin/miga +48 -0
- data/lib/miga/daemon.rb +178 -0
- data/lib/miga/dataset.rb +286 -0
- data/lib/miga/gui.rb +289 -0
- data/lib/miga/metadata.rb +74 -0
- data/lib/miga/project.rb +268 -0
- data/lib/miga/remote_dataset.rb +154 -0
- data/lib/miga/result.rb +102 -0
- data/lib/miga/tax_index.rb +70 -0
- data/lib/miga/taxonomy.rb +107 -0
- data/lib/miga.rb +83 -0
- data/scripts/_distances_noref_nomulti.bash +86 -0
- data/scripts/_distances_ref_nomulti.bash +105 -0
- data/scripts/aai_distances.bash +40 -0
- data/scripts/ani_distances.bash +39 -0
- data/scripts/assembly.bash +38 -0
- data/scripts/cds.bash +45 -0
- data/scripts/clade_finding.bash +27 -0
- data/scripts/distances.bash +30 -0
- data/scripts/essential_genes.bash +29 -0
- data/scripts/haai_distances.bash +39 -0
- data/scripts/init.bash +211 -0
- data/scripts/miga.bash +12 -0
- data/scripts/mytaxa.bash +93 -0
- data/scripts/mytaxa_scan.bash +85 -0
- data/scripts/ogs.bash +36 -0
- data/scripts/read_quality.bash +37 -0
- data/scripts/ssu.bash +35 -0
- data/scripts/subclades.bash +26 -0
- data/scripts/trimmed_fasta.bash +47 -0
- data/scripts/trimmed_reads.bash +57 -0
- data/utils/adapters.fa +302 -0
- data/utils/mytaxa_scan.R +89 -0
- data/utils/mytaxa_scan.rb +58 -0
- data/utils/requirements.txt +19 -0
- data/utils/subclades-compile.rb +48 -0
- data/utils/subclades.R +171 -0
- metadata +185 -0
data/lib/miga/gui.rb
ADDED
@@ -0,0 +1,289 @@
|
|
1
|
+
#
|
2
|
+
# @package MiGA
|
3
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
|
+
# @license artistic license 2.0
|
5
|
+
# @update Jul-11-2015
|
6
|
+
#
|
7
|
+
|
8
|
+
require 'miga/project'
|
9
|
+
require 'shoes'
|
10
|
+
require 'shoes/swt'
|
11
|
+
|
12
|
+
Shoes::Swt.initialize_backend
|
13
|
+
|
14
|
+
module MiGA
|
15
|
+
class MiGA
|
16
|
+
@@STATUS = "Initializing MiGA..."
|
17
|
+
def self.STATUS=(status) @@STATUS = status ; end
|
18
|
+
def self.STATUS() @@STATUS; end
|
19
|
+
def self.RESET_STATUS() @@STATUS="MiGA is ready to go!" ; end
|
20
|
+
end
|
21
|
+
class GUI < Shoes
|
22
|
+
url '/', :index
|
23
|
+
url '/project', :project
|
24
|
+
url '/datasets', :datasets
|
25
|
+
url '/dataset-(.*)', :dataset
|
26
|
+
url '/report', :report
|
27
|
+
$miga_path = File.expand_path(File.dirname(__FILE__) + "/../../")
|
28
|
+
|
29
|
+
def self.init (&block)
|
30
|
+
Shoes.app title: "MiGA | Microbial Genomes Atlas", width: 750, height: 400, &block
|
31
|
+
end
|
32
|
+
|
33
|
+
# =====================[ View : Windows ]
|
34
|
+
# Main window
|
35
|
+
def index
|
36
|
+
status_bar
|
37
|
+
stack(margin:40) do
|
38
|
+
title "Welcome to MiGA!", align:"center"
|
39
|
+
para ""
|
40
|
+
flow{ menu_bar [:open_project, :new_project, :help] }
|
41
|
+
para ""
|
42
|
+
stack do
|
43
|
+
image $miga_path + "/gui/img/MiGA.png", width:150, height:50
|
44
|
+
para ""
|
45
|
+
para "If you use MiGA in your research, please consider citing:"
|
46
|
+
para MiGA.CITATION
|
47
|
+
para ""
|
48
|
+
end
|
49
|
+
end
|
50
|
+
MiGA.RESET_STATUS
|
51
|
+
keypress do |key|
|
52
|
+
if [:control_o, "o"].include? key
|
53
|
+
MiGA.STATUS = "Calling open_project..."
|
54
|
+
open_project
|
55
|
+
elsif [:control_n, "n"].include? key
|
56
|
+
MiGA.STATUS = "Calling new_project..."
|
57
|
+
new_project
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Project window
|
63
|
+
def project
|
64
|
+
stack(margin:40) do
|
65
|
+
title $project.name.unmiga_name, align:"center"
|
66
|
+
caption $project.path, align:"center"
|
67
|
+
para ""
|
68
|
+
stack{ menu_bar [:list_datasets, :new_dataset, :progress_report, :help] }
|
69
|
+
para ""
|
70
|
+
stack do
|
71
|
+
para strong("Datasets"), ": ", $project.metadata[:datasets].size
|
72
|
+
$project.metadata.each { |k,v| para(strong(k), ": ", v) unless k==:datasets }
|
73
|
+
end
|
74
|
+
para ""
|
75
|
+
end
|
76
|
+
MiGA.RESET_STATUS
|
77
|
+
keypress do |key|
|
78
|
+
if [:control_r, "r"].include? key
|
79
|
+
MiGA.STATUS = "Calling progress_report..."
|
80
|
+
progress_report
|
81
|
+
elsif [:control_d, "d"].include? key
|
82
|
+
MiGA.STATUS = "Calling list_datasets..."
|
83
|
+
list_datasets
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
# Datasets list window
|
89
|
+
def datasets
|
90
|
+
stack(margin:40) do
|
91
|
+
title $project.name.unmiga_name, align:"center"
|
92
|
+
caption $project.path, align:"center"
|
93
|
+
para ""
|
94
|
+
stack do
|
95
|
+
para "Displaying #{$project.metadata[:datasets].size} datasets:"
|
96
|
+
para ""
|
97
|
+
$project.metadata[:datasets].each do |name|
|
98
|
+
para link(name, :click=>"/dataset-#{name}")
|
99
|
+
end
|
100
|
+
end
|
101
|
+
para ""
|
102
|
+
MiGA.RESET_STATUS
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Dataset details window
|
107
|
+
def dataset(name)
|
108
|
+
stack(margin:40) do
|
109
|
+
ds = $project.dataset(name)
|
110
|
+
title ds.name.unmiga_name, align:"center"
|
111
|
+
caption "A dataset in ", strong(link($project.name.unmiga_name, :click=>"/datasets")), align:"center"
|
112
|
+
para ""
|
113
|
+
stack{ ds.metadata.each { |k,v| para strong(k), ": ", v } }
|
114
|
+
para ""
|
115
|
+
flow do
|
116
|
+
w = 40+30*Dataset.PREPROCESSING_TASKS.size
|
117
|
+
stack(width:w) do
|
118
|
+
subtitle "Advance"
|
119
|
+
done = self.graphic_advance(ds)
|
120
|
+
para sprintf("%.1f%% Complete", done*100)
|
121
|
+
end
|
122
|
+
stack(width:-w) do
|
123
|
+
subtitle "Task"
|
124
|
+
$task_name_field = stack { para "" }
|
125
|
+
animate do
|
126
|
+
$task_name_field.clear{ para $task }
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
para ""
|
131
|
+
MiGA.RESET_STATUS
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Project report window
|
136
|
+
def report
|
137
|
+
stack(margin:40) do
|
138
|
+
title $project.name.unmiga_name, align:"center"
|
139
|
+
$done = 0.0
|
140
|
+
$me = self
|
141
|
+
flow do
|
142
|
+
para ""
|
143
|
+
w = 40+30*Dataset.PREPROCESSING_TASKS.size
|
144
|
+
stack(width:w) do
|
145
|
+
para ""
|
146
|
+
subtitle "Dataset tasks advance:"
|
147
|
+
caption link("toggle"){ $adv_logo.toggle }
|
148
|
+
para ""
|
149
|
+
$adv_logo = stack do
|
150
|
+
$project.each_dataset do |ds|
|
151
|
+
$done += $me.graphic_advance(ds, 1)
|
152
|
+
end
|
153
|
+
motion do |x,y|
|
154
|
+
unless $task.nil?
|
155
|
+
$task_ds_box.clear do
|
156
|
+
subtitle "Task"
|
157
|
+
para $task
|
158
|
+
subtitle "Dataset"
|
159
|
+
para $dataset
|
160
|
+
end
|
161
|
+
$task_ds_box.show
|
162
|
+
$task_ds_box.move(w,y-150)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
click do
|
166
|
+
GUI.init{ visit "/dataset-#{$dataset}" } unless $dataset.nil?
|
167
|
+
end
|
168
|
+
leave do
|
169
|
+
$task = nil
|
170
|
+
$dataset = nil
|
171
|
+
$task_ds_box.hide
|
172
|
+
end
|
173
|
+
end
|
174
|
+
$done /= $project.metadata[:datasets].size
|
175
|
+
para sprintf("%.1f%% Complete", $done*100)
|
176
|
+
end
|
177
|
+
$task_ds_box = stack(width:-w)
|
178
|
+
end
|
179
|
+
if $done==1.0
|
180
|
+
para ""
|
181
|
+
stack do
|
182
|
+
subtitle "Project-wide tasks:"
|
183
|
+
Project.DISTANCE_TASKS.each { |t| para strong(t), ": ", ($project.add_result(t).nil? ? "Pending" : "Done") }
|
184
|
+
if $project.metadata[:type]==:clade
|
185
|
+
Project.INCLADE_TASKS.each { |t| para strong(t), ": ", ($project.add_result(t).nil? ? "Pending" : "Done") }
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
para ""
|
190
|
+
MiGA.RESET_STATUS
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
# =====================[ View : Elements ]
|
195
|
+
# Menu bar
|
196
|
+
def menu_bar actions
|
197
|
+
flow do
|
198
|
+
b = {
|
199
|
+
open_project:["Open project", "iconmonstr-archive-5-icon-256"],
|
200
|
+
new_project:["New project","iconmonstr-plus-5-icon-256"],
|
201
|
+
list_datasets:["List datasets", "iconmonstr-note-10-icon-256"],
|
202
|
+
new_dataset:["New dataset", "iconmonstr-note-25-icon-256"],
|
203
|
+
progress_report:["Progress report", "iconmonstr-bar-chart-2-icon-256"],
|
204
|
+
help:["Help", "iconmonstr-help-3-icon-256"]
|
205
|
+
}
|
206
|
+
actions.each do |k|
|
207
|
+
v = b[k]
|
208
|
+
flow(margin:0, width:200) do
|
209
|
+
image $miga_path + "/gui/img/#{v[1]}.png", width:40, height:40, margin:2
|
210
|
+
button v[0], top:5 do
|
211
|
+
MiGA.STATUS = "Calling #{k}..."
|
212
|
+
eval k.to_s
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end # menu_bar
|
218
|
+
|
219
|
+
# Status bar
|
220
|
+
def status_bar
|
221
|
+
stack(bottom:0) do
|
222
|
+
flow bottom:0, height:20, margin:0 do
|
223
|
+
background "#CCC"
|
224
|
+
stack(width:50)
|
225
|
+
$status_cont = stack(width:-300, height:1.0)
|
226
|
+
every do |i|
|
227
|
+
$status_cont.clear { inscription MiGA.STATUS, margin:5 }
|
228
|
+
end
|
229
|
+
stack(width:250, height:1.0) do
|
230
|
+
inscription MiGA.LONG_VERSION, align:"right", margin:5
|
231
|
+
end
|
232
|
+
end
|
233
|
+
image $miga_path + "/gui/img/MiGA-sq.png", left:0, bottom:0, width:30, height:32
|
234
|
+
end
|
235
|
+
end # status_bar
|
236
|
+
|
237
|
+
def graphic_advance(ds, h=10)
|
238
|
+
ds_adv = ds.profile_advance
|
239
|
+
flow(width:30*Dataset.PREPROCESSING_TASKS.size) do
|
240
|
+
nostroke
|
241
|
+
i = 0
|
242
|
+
col = ["#CCC", rgb(119,130,61), rgb(160,41,50)]
|
243
|
+
ds_adv.each do |j|
|
244
|
+
stack(width:28,margin:0,top:0,left:i*30,height:h) do
|
245
|
+
background col[j]
|
246
|
+
t = Dataset.PREPROCESSING_TASKS[i]
|
247
|
+
hover do
|
248
|
+
$task = t
|
249
|
+
$dataset = ds.name.unmiga_name
|
250
|
+
end
|
251
|
+
end
|
252
|
+
i += 1
|
253
|
+
end
|
254
|
+
nofill
|
255
|
+
end
|
256
|
+
return 0.0 if ds_adv.count{|i| i>0} <= 1
|
257
|
+
(ds_adv.count{|i| i==1}.to_f - 1.0)/(ds_adv.count{|i| i>0}.to_f - 1.0)
|
258
|
+
end # graphic_advance
|
259
|
+
|
260
|
+
# =====================[ Controller : Projects ]
|
261
|
+
def open_project
|
262
|
+
GUI.init do
|
263
|
+
folder = ask_open_folder
|
264
|
+
if folder.nil? or not Project.exist? folder
|
265
|
+
alert "Cannot find a MiGA project at #{folder}!" unless folder.nil?
|
266
|
+
return
|
267
|
+
else
|
268
|
+
$project = Project.new folder
|
269
|
+
visit '/project'
|
270
|
+
end
|
271
|
+
end
|
272
|
+
end # open_project
|
273
|
+
def new_project
|
274
|
+
GUI.init do
|
275
|
+
folder = ask_save_folder
|
276
|
+
if folder.nil? or Project.exist? folder
|
277
|
+
alert "Cannot overwrite existent MiGA project at #{folder}!" unless folder.nil?
|
278
|
+
return
|
279
|
+
else
|
280
|
+
$project = Project.new folder
|
281
|
+
visit "/project"
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end # new_project
|
285
|
+
def list_datasets ; GUI.init{ visit "/datasets" } ; end # list_datasets
|
286
|
+
def progress_report ; GUI.init{ visit "/report" } ; end # progress_report
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#
|
2
|
+
# @package MiGA
|
3
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
|
+
# @license artistic license 2.0
|
5
|
+
# @update Jul-06-2015
|
6
|
+
#
|
7
|
+
|
8
|
+
module MiGA
|
9
|
+
class Metadata
|
10
|
+
# Class
|
11
|
+
def self.exist?(path) File.size? path end
|
12
|
+
def self.load path
|
13
|
+
return nil unless Metadata.exist? path
|
14
|
+
Metadata.new path
|
15
|
+
end
|
16
|
+
# Instance
|
17
|
+
attr_reader :path, :data
|
18
|
+
def initialize(path, defaults={})
|
19
|
+
@path = File.absolute_path(path)
|
20
|
+
@data = {}
|
21
|
+
defaults.each_pair{ |k,v| self[k]=v }
|
22
|
+
self.create unless File.size? self.path
|
23
|
+
self.load
|
24
|
+
end
|
25
|
+
def create
|
26
|
+
@data[:created] = Time.now.to_s
|
27
|
+
self.save
|
28
|
+
end
|
29
|
+
def save
|
30
|
+
MiGA.DEBUG "Metadata.save #{self.path}"
|
31
|
+
self.data[:updated] = Time.now.to_s
|
32
|
+
json = JSON.pretty_generate(self.data)
|
33
|
+
sleeper = 0.0
|
34
|
+
while File.exist? self.path + ".lock"
|
35
|
+
sleeper += 0.1 if sleeper <= 10.0
|
36
|
+
sleep(sleeper.to_i)
|
37
|
+
end
|
38
|
+
FileUtils.touch self.path + ".lock"
|
39
|
+
ofh = File.open(self.path + ".tmp", "w")
|
40
|
+
ofh.puts json
|
41
|
+
ofh.close
|
42
|
+
raise "Lock-racing detected for #{self.path}." unless File.exist? self.path + ".tmp" and File.exist? self.path + ".lock"
|
43
|
+
File.rename self.path + ".tmp", self.path
|
44
|
+
File.unlink self.path + ".lock"
|
45
|
+
end
|
46
|
+
def load
|
47
|
+
sleeper = 0.0
|
48
|
+
while File.exist? self.path + ".lock"
|
49
|
+
sleeper += 0.1 if sleeper <= 10.0
|
50
|
+
sleep(sleeper.to_i)
|
51
|
+
end
|
52
|
+
# :symbolize_names does not play nicely with :create_additions
|
53
|
+
tmp = JSON.parse File.read(self.path), {:symbolize_names=>false, :create_additions=>true}
|
54
|
+
@data = {}
|
55
|
+
tmp.each_pair{ |k,v| self[k] = v }
|
56
|
+
end
|
57
|
+
def remove!
|
58
|
+
MiGA.DEBUG "Metadata.remove! #{self.path}"
|
59
|
+
File.unlink self.path
|
60
|
+
end
|
61
|
+
def [](k) self.data[k.to_sym] end
|
62
|
+
def []=(k,v)
|
63
|
+
k = k.to_sym
|
64
|
+
# Protect the special field :name
|
65
|
+
v=v.miga_name if k==:name
|
66
|
+
# Symbolize the special field :type
|
67
|
+
v=v.to_sym if k==:type
|
68
|
+
# Register and return
|
69
|
+
self.data[k]=v
|
70
|
+
end
|
71
|
+
def each(&blk) self.data.each{ |k,v| blk.call(k,v) } ; end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
data/lib/miga/project.rb
ADDED
@@ -0,0 +1,268 @@
|
|
1
|
+
#
|
2
|
+
# @package MiGA
|
3
|
+
# @author Luis M. Rodriguez-R <lmrodriguezr at gmail dot com>
|
4
|
+
# @license artistic license 2.0
|
5
|
+
# @update Jan-18-2016
|
6
|
+
#
|
7
|
+
|
8
|
+
require "miga/dataset"
|
9
|
+
|
10
|
+
module MiGA
|
11
|
+
class Project
|
12
|
+
# Class
|
13
|
+
@@FOLDERS = %w(data metadata daemon)
|
14
|
+
@@DATA_FOLDERS = %w(
|
15
|
+
01.raw_reads 02.trimmed_reads 03.read_quality 04.trimmed_fasta
|
16
|
+
05.assembly 06.cds
|
17
|
+
07.annotation 07.annotation/01.function 07.annotation/02.taxonomy
|
18
|
+
07.annotation/01.function/01.essential
|
19
|
+
07.annotation/01.function/02.ssu
|
20
|
+
07.annotation/02.taxonomy/01.mytaxa
|
21
|
+
07.annotation/03.qa 07.annotation/03.qa/01.checkm
|
22
|
+
07.annotation/03.qa/02.mytaxa_scan
|
23
|
+
08.mapping 08.mapping/01.read-ctg 08.mapping/02.read-gene
|
24
|
+
09.distances 09.distances/01.haai 09.distances/02.aai
|
25
|
+
09.distances/03.ani 09.distances/04.ssu
|
26
|
+
10.clades 10.clades/01.find 10.clades/02.ani 10.clades/03.ogs
|
27
|
+
10.clades/04.phylogeny 10.clades/04.phylogeny/01.essential
|
28
|
+
10.clades/04.phylogeny/02.core 10.clades/05.metadata)
|
29
|
+
@@RESULT_DIRS = {
|
30
|
+
# Distances
|
31
|
+
haai_distances: "09.distances/01.haai",
|
32
|
+
aai_distances: "09.distances/02.aai",
|
33
|
+
ani_distances: "09.distances/03.ani",
|
34
|
+
#ssu_distances: "09.distances/04.ssu",
|
35
|
+
# Clade identification
|
36
|
+
clade_finding: "10.clades/01.find",
|
37
|
+
# Clade analysis
|
38
|
+
subclades: "10.clades/02.ani",
|
39
|
+
ogs: "10.clades/03.ogs",
|
40
|
+
ess_phylogeny: "10.clades/04.phylogeny/01.essential",
|
41
|
+
core_phylogeny: "10.clades/04.phylogeny/02.core",
|
42
|
+
clade_metadata: "10.clades/05.metadata"
|
43
|
+
}
|
44
|
+
@@KNOWN_TYPES = {
|
45
|
+
mixed: {description: "Mixed collection of genomes, metagenomes, " +
|
46
|
+
"and viromes.",
|
47
|
+
single: true, multi: true},
|
48
|
+
genomes: {description: "Collection of genomes.",
|
49
|
+
single: true, multi: false},
|
50
|
+
clade: {description: "Collection of closely-related genomes " +
|
51
|
+
"(ANI <= 90%).",
|
52
|
+
single: true, multi: false},
|
53
|
+
metagenomes: {description: "Collection of metagenomes and/or " +
|
54
|
+
"viromes.",
|
55
|
+
single: false, multi: true}
|
56
|
+
}
|
57
|
+
@@DISTANCE_TASKS = [:haai_distances, :aai_distances, :ani_distances,
|
58
|
+
:clade_finding]
|
59
|
+
@@INCLADE_TASKS = [:subclades, :ogs, :ess_phylogeny, :core_phylogeny,
|
60
|
+
:clade_metadata]
|
61
|
+
def self.DISTANCE_TASKS ; @@DISTANCE_TASKS ; end
|
62
|
+
def self.INCLADE_TASKS ; @@INCLADE_TASKS ; end
|
63
|
+
def self.RESULT_DIRS ; @@RESULT_DIRS ; end
|
64
|
+
def self.KNOWN_TYPES ; @@KNOWN_TYPES ; end
|
65
|
+
def self.exist?(path)
|
66
|
+
Dir.exist?(path) and File.exist?(path + "/miga.project.json")
|
67
|
+
end
|
68
|
+
def self.load(path)
|
69
|
+
return nil unless Project.exist? path
|
70
|
+
Project.new path
|
71
|
+
end
|
72
|
+
# Instance
|
73
|
+
attr_reader :path, :metadata
|
74
|
+
def initialize(path, update=false)
|
75
|
+
raise "Impossible to create project in uninitialized MiGA." unless
|
76
|
+
File.exist? "#{ENV["HOME"]}/.miga_rc" and
|
77
|
+
File.exist? "#{ENV["HOME"]}/.miga_daemon.json"
|
78
|
+
@path = File.absolute_path(path)
|
79
|
+
self.create if update or not Project.exist? self.path
|
80
|
+
self.load if self.metadata.nil?
|
81
|
+
end
|
82
|
+
def create
|
83
|
+
Dir.mkdir self.path unless Dir.exist? self.path
|
84
|
+
@@FOLDERS.each do |dir|
|
85
|
+
Dir.mkdir self.path + "/" + dir unless
|
86
|
+
Dir.exist? self.path + "/" + dir
|
87
|
+
end
|
88
|
+
@@DATA_FOLDERS.each do |dir|
|
89
|
+
Dir.mkdir self.path + "/data/" + dir unless
|
90
|
+
Dir.exist? self.path + "/data/" + dir
|
91
|
+
end
|
92
|
+
@metadata = Metadata.new(self.path + "/miga.project.json",
|
93
|
+
{datasets: [], name: File.basename(self.path)})
|
94
|
+
FileUtils.cp(ENV["HOME"] + "/.miga_daemon.json",
|
95
|
+
self.path + "/daemon/daemon.json") unless
|
96
|
+
File.exist? self.path + "/daemon/daemon.json"
|
97
|
+
self.load
|
98
|
+
end
|
99
|
+
def save
|
100
|
+
self.metadata.save
|
101
|
+
self.load
|
102
|
+
end
|
103
|
+
def load
|
104
|
+
@metadata = Metadata.load self.path + "/miga.project.json"
|
105
|
+
raise "Couldn't find project metadata at #{self.path}" if
|
106
|
+
self.metadata.nil?
|
107
|
+
end
|
108
|
+
def name ; self.metadata[:name] ; end
|
109
|
+
def datasets
|
110
|
+
self.metadata[:datasets].map{ |name| self.dataset name }
|
111
|
+
end
|
112
|
+
def dataset(name)
|
113
|
+
name = name.miga_name
|
114
|
+
@datasets = {} if @datasets.nil?
|
115
|
+
@datasets[name] = Dataset.new(self, name) if @datasets[name].nil?
|
116
|
+
@datasets[name]
|
117
|
+
end
|
118
|
+
def each_dataset(&blk)
|
119
|
+
self.metadata[:datasets].each{ |name| blk.call(self.dataset name) }
|
120
|
+
end
|
121
|
+
def add_dataset(name)
|
122
|
+
self.metadata[:datasets] << name unless
|
123
|
+
self.metadata[:datasets].include? name
|
124
|
+
self.save
|
125
|
+
self.dataset(name)
|
126
|
+
end
|
127
|
+
def unlink_dataset(name)
|
128
|
+
d = self.dataset name
|
129
|
+
return nil if d.nil?
|
130
|
+
self.metadata[:datasets].delete(name)
|
131
|
+
self.save
|
132
|
+
d
|
133
|
+
end
|
134
|
+
def import_dataset(ds, method=:hardlink)
|
135
|
+
raise "Impossible to import dataset, it already exists: #{ds.name}." if
|
136
|
+
Dataset.exist?(self, ds.name)
|
137
|
+
# Import dataset results
|
138
|
+
ds.each_result do |task, result|
|
139
|
+
# import result files
|
140
|
+
result.each_file do |file|
|
141
|
+
File.generic_transfer("#{result.dir}/#{file}",
|
142
|
+
"#{self.path}/data/#{Dataset.RESULT_DIRS[task]}/#{file}",
|
143
|
+
method)
|
144
|
+
end
|
145
|
+
# import result metadata
|
146
|
+
%w(json start done).each do |suffix|
|
147
|
+
if File.exist? "#{result.dir}/#{ds.name}.#{suffix}"
|
148
|
+
File.generic_transfer("#{result.dir}/#{ds.name}.#{suffix}",
|
149
|
+
"#{self.path}/data/#{Dataset.RESULT_DIRS[task]}/" +
|
150
|
+
"#{ds.name}.#{suffix}",
|
151
|
+
method)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
# Import dataset metadata
|
156
|
+
File.generic_transfer("#{ds.project.path}/metadata/#{ds.name}.json",
|
157
|
+
"#{self.path}/metadata/#{ds.name}.json", method)
|
158
|
+
# Save dataset
|
159
|
+
self.add_dataset ds.name
|
160
|
+
end
|
161
|
+
def result(name)
|
162
|
+
return nil if @@RESULT_DIRS[name.to_sym].nil?
|
163
|
+
Result.load self.path + "/data/" + @@RESULT_DIRS[name.to_sym] +
|
164
|
+
"/miga-project.json"
|
165
|
+
end
|
166
|
+
def results
|
167
|
+
@@RESULT_DIRS.keys.map{ |k| self.result k }.reject{ |r| r.nil? }
|
168
|
+
end
|
169
|
+
def add_result result_type
|
170
|
+
return nil if @@RESULT_DIRS[result_type].nil?
|
171
|
+
base = self.path + "/data/" + @@RESULT_DIRS[result_type] +
|
172
|
+
"/miga-project"
|
173
|
+
return nil unless File.exist? base + ".done"
|
174
|
+
r = nil
|
175
|
+
case result_type
|
176
|
+
when :haai_distances, :aai_distances, :ani_distances, :ssu_distances
|
177
|
+
return nil unless
|
178
|
+
File.exist? base + ".Rdata" and
|
179
|
+
File.exist? base + ".log" and
|
180
|
+
(File.exist?(base + ".txt") or File.exist?(base + ".txt.gz"))
|
181
|
+
r = Result.new base + ".json"
|
182
|
+
r.add_file :rdata, "miga-project.Rdata"
|
183
|
+
r.add_file :matrix, "miga-project.txt"
|
184
|
+
r.add_file :log, "miga-project.log"
|
185
|
+
r.add_file :hist, "miga-project.hist"
|
186
|
+
r.data[:gz] = File.exist?(base + ".txt.gz")
|
187
|
+
when :clade_finding
|
188
|
+
return nil unless File.exist? base + ".proposed-clades"
|
189
|
+
r = Result.new base + ".json"
|
190
|
+
r.add_file :proposal, "miga-project.proposed-clades"
|
191
|
+
r.add_file :rbm_aai90, "genome-genome.aai90.rbm"
|
192
|
+
r.add_file :clades_aai90, "miga-project.ani-clades"
|
193
|
+
r.add_file :rbm_ani95, "genome-genome.ani95.rbm"
|
194
|
+
r.add_file :clades_ani95, "miga-project.ani95-clades"
|
195
|
+
when :subclades
|
196
|
+
return nil unless
|
197
|
+
File.exist?(base+".pdf") and
|
198
|
+
File.exist?(base+".1.classif") and
|
199
|
+
File.exist?(base+".1.medoids") and
|
200
|
+
File.exist?(base+".class.tsv") and
|
201
|
+
File.exist?(base+".class.nwk")
|
202
|
+
r = Result.new base + ".json"
|
203
|
+
r.add_file :report, "miga-project.pdf"
|
204
|
+
(1..6).each do |i|
|
205
|
+
%w{classif medoids}.each do |m|
|
206
|
+
r.add_file "#{m}_#{i}".to_sym, "miga-project.#{i}.#{m}"
|
207
|
+
end
|
208
|
+
end
|
209
|
+
r.add_file :class_table, "miga-project.class.tsv"
|
210
|
+
r.add_file :class_tree, "miga-project.class.nwk"
|
211
|
+
r.add_file :ani_tree, "miga-project.ani.nwk"
|
212
|
+
when :ogs
|
213
|
+
return nil unless
|
214
|
+
File.exist?(base+".ogs") and
|
215
|
+
File.exist?(base+".stats")
|
216
|
+
r = Result.new base + ".json"
|
217
|
+
r.add_file :ogs, "miga-project.ogs"
|
218
|
+
r.add_file :stats, "miga-project.stats"
|
219
|
+
r.add_file :rbm, "miga-project.rbm"
|
220
|
+
end
|
221
|
+
r.save
|
222
|
+
r
|
223
|
+
end
|
224
|
+
def next_distances
|
225
|
+
@@DISTANCE_TASKS.find{ |t| self.add_result(t).nil? }
|
226
|
+
end
|
227
|
+
def next_inclade
|
228
|
+
return nil unless self.metadata[:type]==:clade
|
229
|
+
@@INCLADE_TASKS.find{ |t| self.add_result(t).nil? }
|
230
|
+
end
|
231
|
+
def unregistered_datasets
|
232
|
+
datasets = []
|
233
|
+
Dataset.RESULT_DIRS.each do |res, dir|
|
234
|
+
Dir.entries(self.path + "/data/" + dir).each do |file|
|
235
|
+
next unless
|
236
|
+
file =~ %r{
|
237
|
+
\.(fa(a|sta|stqc?)?|fna|solexaqa|gff[23]?|done|ess)(\.gz)?$
|
238
|
+
}x
|
239
|
+
m = /([^\.]+)/.match(file)
|
240
|
+
datasets << m[1] unless m.nil? or m[1] == "miga-project"
|
241
|
+
end
|
242
|
+
end
|
243
|
+
datasets.uniq - self.metadata[:datasets]
|
244
|
+
end
|
245
|
+
def done_preprocessing?
|
246
|
+
self.datasets.map{|ds| (not ds.is_ref?) or ds.done_preprocessing?}.all?
|
247
|
+
end
|
248
|
+
## Generates a two-dimensional matrix (array of arrays) where the first
|
249
|
+
## index corresponds to the dataset, the second index corresponds to the
|
250
|
+
## dataset task, and the value corresponds to:
|
251
|
+
## 0: Before execution.
|
252
|
+
## 1: Done (or not required).
|
253
|
+
## 2: To do.
|
254
|
+
def profile_datasets_advance
|
255
|
+
advance = []
|
256
|
+
self.each_dataset_profile_advance do |ds_adv|
|
257
|
+
advance << ds_adv
|
258
|
+
end
|
259
|
+
advance
|
260
|
+
end
|
261
|
+
def each_dataset_profile_advance(&blk)
|
262
|
+
self.each_dataset do |ds|
|
263
|
+
blk.call(ds.profile_advance)
|
264
|
+
end
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|