miga-base 0.2.6.4 → 0.2.6.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/actions/list_datasets.rb +6 -1
- data/actions/run_local.rb +1 -1
- data/actions/tax_distributions.rb +4 -4
- data/lib/miga/common.rb +18 -0
- data/lib/miga/daemon.rb +1 -1
- data/lib/miga/dataset_result.rb +46 -47
- data/lib/miga/remote_dataset.rb +52 -32
- data/lib/miga/tax_dist.rb +2 -2
- data/lib/miga/tax_index.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/_distances_functions.bash +17 -8
- data/scripts/_distances_noref_nomulti.bash +26 -7
- data/scripts/aai_distances.bash +3 -2
- data/scripts/ani_distances.bash +3 -2
- data/scripts/assembly.bash +24 -24
- data/scripts/cds.bash +22 -30
- data/scripts/clade_finding.bash +5 -4
- data/scripts/distances.bash +13 -9
- data/scripts/essential_genes.bash +12 -11
- data/scripts/haai_distances.bash +3 -2
- data/scripts/init.bash +100 -108
- data/scripts/miga.bash +4 -2
- data/scripts/mytaxa.bash +72 -71
- data/scripts/mytaxa_scan.bash +62 -61
- data/scripts/ogs.bash +14 -13
- data/scripts/project_stats.bash +1 -0
- data/scripts/read_quality.bash +12 -16
- data/scripts/ssu.bash +18 -18
- data/scripts/stats.bash +3 -2
- data/scripts/subclades.bash +7 -6
- data/scripts/trimmed_fasta.bash +22 -21
- data/scripts/trimmed_reads.bash +34 -32
- data/utils/index_metadata.rb +4 -4
- data/utils/ref-tree.R +65 -0
- data/utils/requirements.txt +1 -1
- metadata +57 -56
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a69a12511e98d2cc6751efa31592661b664e9d33
|
4
|
+
data.tar.gz: 807b0a13efce0367c88369a2f389fffff34ee58d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bdc1db4adec179da57c45ab29a41ec36ef2895f34d8159792420560bd0bc15910052fe6e2949ee0a992500e0001a7db4d2bbbf4f4d5e1daefb4c1c0fa3bd4ebb
|
7
|
+
data.tar.gz: 40d5a05ec2c154b50eb8952b87212106461080740e8986f982f44fa79cea3100d31beaff1c647afa83892b3e2cde37464ddd11568ce658addcb60992b6395de3
|
data/actions/list_datasets.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# @package MiGA
|
4
4
|
# @license Artistic-2.0
|
5
5
|
|
6
|
-
o = {q:true, info:false, processing:false}
|
6
|
+
o = {q:true, info:false, processing:false, silent:false}
|
7
7
|
OptionParser.new do |opt|
|
8
8
|
opt_banner(opt)
|
9
9
|
opt_object(opt, o, [:project, :dataset_opt])
|
@@ -15,11 +15,15 @@ OptionParser.new do |opt|
|
|
15
15
|
opt.on("-m", "--metadata STRING",
|
16
16
|
"Print name and metadata field only. If set, ignores -i."
|
17
17
|
){ |v| o[:datum]=v }
|
18
|
+
opt.on("-s", "--silent",
|
19
|
+
"No output and exit with non-zero status if the dataset list is empty."
|
20
|
+
){ |v| o[:silent] = v }
|
18
21
|
opt_common(opt, o)
|
19
22
|
end.parse!
|
20
23
|
|
21
24
|
##=> Main <=
|
22
25
|
opt_require(o, project:"-P")
|
26
|
+
o[:q] = true if o[:silent]
|
23
27
|
|
24
28
|
$stderr.puts "Loading project." unless o[:q]
|
25
29
|
p = MiGA::Project.load(o[:project])
|
@@ -34,6 +38,7 @@ else
|
|
34
38
|
ds = []
|
35
39
|
end
|
36
40
|
ds = filter_datasets!(ds, o)
|
41
|
+
exit(1) if o[:silent] and ds.empty?
|
37
42
|
|
38
43
|
if not o[:datum].nil?
|
39
44
|
ds.each{|d| puts "#{d.name}\t#{d.metadata[ o[:datum] ] || "?"}"}
|
data/actions/run_local.rb
CHANGED
@@ -36,7 +36,7 @@ else
|
|
36
36
|
end
|
37
37
|
raise "Unsupported #{type.to_s.gsub(/.*::/,"")} result: #{o[:name]}." if
|
38
38
|
type.RESULT_DIRS[o[:name].to_sym].nil?
|
39
|
-
cmd <<
|
39
|
+
cmd << MiGA::MiGA.script_path(o[:name], miga:miga, project:p).shellescape
|
40
40
|
pid = spawn cmd.join(" ")
|
41
41
|
Process.wait pid
|
42
42
|
|
@@ -38,13 +38,13 @@ raise "#{res_n} not yet calculated." if res.nil?
|
|
38
38
|
matrix = res.file_path(:matrix)
|
39
39
|
raise "#{res_n} has no matrix." if matrix.nil?
|
40
40
|
dist = {}
|
41
|
-
|
42
|
-
|
43
|
-
next if
|
41
|
+
mfh = matrix=~/\.gz$/ ? Zlib::GzipReader.open(matrix) : File.open(matrix,"r")
|
42
|
+
mfh.each_line do |ln|
|
43
|
+
next if mfh.lineno==1
|
44
44
|
row = ln.chomp.split(/\t/)
|
45
45
|
dist[cannid(row[1], row[2])] = [row[3], 0, ["root:biota"]]
|
46
46
|
end
|
47
|
-
|
47
|
+
mfh.close
|
48
48
|
|
49
49
|
Dir.mktmpdir do |dir|
|
50
50
|
if o[:index].nil?
|
data/lib/miga/common.rb
CHANGED
@@ -103,6 +103,24 @@ class MiGA::MiGA
|
|
103
103
|
tmp.unlink
|
104
104
|
end
|
105
105
|
end
|
106
|
+
|
107
|
+
##
|
108
|
+
# Path to a script to be executed for +task+. Supported +opts+ are:
|
109
|
+
# - +:miga+ Path to the MiGA home to use. If not passed, the home of the
|
110
|
+
# library is used).
|
111
|
+
# - +:project+ MiGA::Project object to check within plugins. If not passed,
|
112
|
+
# only core scripts are supported.
|
113
|
+
def self.script_path(task, opts={})
|
114
|
+
opts[:miga] ||= root_path
|
115
|
+
unless opts[:project].nil?
|
116
|
+
opts[:project].plugins.each do |pl|
|
117
|
+
if File.exist? File.expand_path("scripts/#{task}.bash", pl)
|
118
|
+
opts[:miga] = pl
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
File.expand_path("scripts/#{task}.bash", opts[:miga])
|
123
|
+
end
|
106
124
|
|
107
125
|
|
108
126
|
##
|
data/lib/miga/daemon.rb
CHANGED
@@ -167,7 +167,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
167
167
|
to_run = {ds: ds, job: job, task_name: task_name,
|
168
168
|
cmd: sprintf(runopts(:cmd),
|
169
169
|
# 1: script
|
170
|
-
|
170
|
+
MiGA::MiGA.script_path(job, miga:vars["MIGA"], project:project),
|
171
171
|
# 2: vars
|
172
172
|
vars.keys.map { |k|
|
173
173
|
sprintf(runopts(:var), k, vars[k]) }.join(runopts(:varsep)),
|
data/lib/miga/dataset_result.rb
CHANGED
@@ -27,24 +27,25 @@ module MiGA::DatasetResult
|
|
27
27
|
private
|
28
28
|
|
29
29
|
##
|
30
|
-
# Add result type +:raw_reads+ at +base+ (no +
|
31
|
-
def add_result_raw_reads(base,
|
30
|
+
# Add result type +:raw_reads+ at +base+ (no +_opts+ supported).
|
31
|
+
def add_result_raw_reads(base, _opts)
|
32
32
|
return nil unless result_files_exist?(base, ".1.fastq")
|
33
33
|
r = MiGA::Result.new("#{base}.json")
|
34
|
-
|
34
|
+
add_files_to_ds_result(r, name,
|
35
35
|
( result_files_exist?(base, ".2.fastq") ?
|
36
|
-
{:
|
37
|
-
{:
|
36
|
+
{pair1:".1.fastq", pair2:".2.fastq"} :
|
37
|
+
{single:".1.fastq"} ))
|
38
38
|
end
|
39
39
|
|
40
40
|
##
|
41
|
-
# Add result type +:trimmed_reads+ at +base+ (no +
|
42
|
-
def add_result_trimmed_reads(base,
|
41
|
+
# Add result type +:trimmed_reads+ at +base+ (no +_opts+ supported).
|
42
|
+
def add_result_trimmed_reads(base, _opts)
|
43
43
|
return nil unless result_files_exist?(base, ".1.clipped.fastq")
|
44
44
|
r = MiGA::Result.new("#{base}.json")
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
if result_files_exist?(base, ".2.clipped.fastq")
|
46
|
+
r = add_files_to_ds_result(r, name,
|
47
|
+
pair1:".1.clipped.fastq", pair2:".2.clipped.fastq")
|
48
|
+
end
|
48
49
|
r.add_file(:single, "#{name}.1.clipped.single.fastq")
|
49
50
|
r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
|
50
51
|
add_result(:raw_reads) #-> Post gunzip
|
@@ -52,26 +53,26 @@ module MiGA::DatasetResult
|
|
52
53
|
end
|
53
54
|
|
54
55
|
##
|
55
|
-
# Add result type +:read_quality+ at +base+ (no +
|
56
|
-
def add_result_read_quality(base,
|
56
|
+
# Add result type +:read_quality+ at +base+ (no +_opts+ supported).
|
57
|
+
def add_result_read_quality(base, _opts)
|
57
58
|
return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
|
58
59
|
r = MiGA::Result.new("#{base}.json")
|
59
60
|
r = add_files_to_ds_result(r, name,
|
60
|
-
|
61
|
+
solexaqa:".solexaqa", fastqc:".fastqc")
|
61
62
|
add_result(:trimmed_reads) #-> Post cleaning
|
62
63
|
r
|
63
64
|
end
|
64
65
|
|
65
66
|
##
|
66
|
-
# Add result type +:trimmed_fasta+ at +base+ (no +
|
67
|
-
def add_result_trimmed_fasta(base,
|
67
|
+
# Add result type +:trimmed_fasta+ at +base+ (no +_opts+ supported).
|
68
|
+
def add_result_trimmed_fasta(base, _opts)
|
68
69
|
return nil unless
|
69
70
|
result_files_exist?(base, ".CoupledReads.fa") or
|
70
71
|
result_files_exist?(base, ".SingleReads.fa") or
|
71
72
|
result_files_exist?(base, %w[.1.fasta .2.fasta])
|
72
73
|
r = MiGA::Result.new("#{base}.json")
|
73
|
-
r = add_files_to_ds_result(r, name,
|
74
|
-
:
|
74
|
+
r = add_files_to_ds_result(r, name, coupled:".CoupledReads.fa",
|
75
|
+
single:".SingleReads.fa", pair1:".1.fasta", pair2:".2.fasta")
|
75
76
|
add_result(:raw_reads) #-> Post gzip
|
76
77
|
r
|
77
78
|
end
|
@@ -82,8 +83,8 @@ module MiGA::DatasetResult
|
|
82
83
|
def add_result_assembly(base, opts)
|
83
84
|
return nil unless result_files_exist?(base, ".LargeContigs.fna")
|
84
85
|
r = MiGA::Result.new("#{base}.json")
|
85
|
-
r = add_files_to_ds_result(r, name,
|
86
|
-
:
|
86
|
+
r = add_files_to_ds_result(r, name, largecontigs:".LargeContigs.fna",
|
87
|
+
allcontigs:".AllContigs.fna", assembly_data:"")
|
87
88
|
opts[:is_clean] ||= false
|
88
89
|
r.clean! if opts[:is_clean]
|
89
90
|
unless r.clean?
|
@@ -99,8 +100,8 @@ module MiGA::DatasetResult
|
|
99
100
|
def add_result_cds(base, opts)
|
100
101
|
return nil unless result_files_exist?(base, %w[.faa .fna])
|
101
102
|
r = MiGA::Result.new("#{base}.json")
|
102
|
-
r = add_files_to_ds_result(r, name,
|
103
|
-
:
|
103
|
+
r = add_files_to_ds_result(r, name, proteins:".faa", genes:".fna",
|
104
|
+
gff2:".gff2", gff3:".gff3", tab:".tab")
|
104
105
|
opts[:is_clean] ||= false
|
105
106
|
r.clean! if opts[:is_clean]
|
106
107
|
unless r.clean?
|
@@ -112,12 +113,12 @@ module MiGA::DatasetResult
|
|
112
113
|
end
|
113
114
|
|
114
115
|
##
|
115
|
-
# Add result type +:essential_genes+ at +base+ (no +
|
116
|
-
def add_result_essential_genes(base,
|
116
|
+
# Add result type +:essential_genes+ at +base+ (no +_opts+ supported).
|
117
|
+
def add_result_essential_genes(base, _opts)
|
117
118
|
return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
|
118
119
|
r = MiGA::Result.new("#{base}.json")
|
119
|
-
|
120
|
-
:
|
120
|
+
add_files_to_ds_result(r, name, ess_genes:".ess.faa",
|
121
|
+
collection:".ess", report:".ess/log")
|
121
122
|
end
|
122
123
|
|
123
124
|
##
|
@@ -126,8 +127,8 @@ module MiGA::DatasetResult
|
|
126
127
|
return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
|
127
128
|
return nil unless result_files_exist?(base, ".ssu.fa")
|
128
129
|
r = MiGA::Result.new("#{base}.json")
|
129
|
-
r = add_files_to_ds_result(r, name,
|
130
|
-
:
|
130
|
+
r = add_files_to_ds_result(r, name, longest_ssu_gene:".ssu.fa",
|
131
|
+
gff:".ssu.gff", all_ssu_genes:".ssu.all.fa")
|
131
132
|
opts[:is_clean] ||= false
|
132
133
|
r.clean! if opts[:is_clean]
|
133
134
|
unless r.clean?
|
@@ -138,37 +139,36 @@ module MiGA::DatasetResult
|
|
138
139
|
end
|
139
140
|
|
140
141
|
##
|
141
|
-
# Add result type +:mytaxa+ at +base+ (no +
|
142
|
-
def add_result_mytaxa(base,
|
142
|
+
# Add result type +:mytaxa+ at +base+ (no +_opts+ supported).
|
143
|
+
def add_result_mytaxa(base, _opts)
|
143
144
|
if is_multi?
|
144
145
|
return nil unless result_files_exist?(base, ".mytaxa")
|
145
146
|
r = MiGA::Result.new("#{base}.json")
|
146
|
-
add_files_to_ds_result(r, name,
|
147
|
-
:
|
147
|
+
add_files_to_ds_result(r, name, mytaxa:".mytaxa", blast:".blast",
|
148
|
+
mytaxain:".mytaxain")
|
148
149
|
else
|
149
150
|
MiGA::Result.new("#{base}.json")
|
150
151
|
end
|
151
152
|
end
|
152
153
|
|
153
154
|
##
|
154
|
-
# Add result type +:mytaxa_scan+ at +base+ (no +
|
155
|
-
def add_result_mytaxa_scan(base,
|
155
|
+
# Add result type +:mytaxa_scan+ at +base+ (no +_opts+ supported).
|
156
|
+
def add_result_mytaxa_scan(base, _opts)
|
156
157
|
if is_nonmulti?
|
157
158
|
return nil unless
|
158
159
|
result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg])
|
159
160
|
r = MiGA::Result.new("#{base}.json")
|
160
|
-
add_files_to_ds_result(r, name,
|
161
|
-
:
|
162
|
-
:
|
163
|
-
:region_ids=>".wintax.regions"})
|
161
|
+
add_files_to_ds_result(r, name, mytaxa:".mytaxa", wintax:".wintax",
|
162
|
+
blast:".blast", mytaxain:".mytaxain", report:".pdf", regions:".reg",
|
163
|
+
gene_ids:".wintax.genes", region_ids:".wintax.regions")
|
164
164
|
else
|
165
165
|
MiGA::Result.new("#{base}.json")
|
166
166
|
end
|
167
167
|
end
|
168
168
|
|
169
169
|
##
|
170
|
-
# Add result type +:distances+ at +base+ (no +
|
171
|
-
def add_result_distances(base,
|
170
|
+
# Add result type +:distances+ at +base+ (no +_opts+ supported).
|
171
|
+
def add_result_distances(base, _opts)
|
172
172
|
if is_nonmulti?
|
173
173
|
if is_ref?
|
174
174
|
add_result_distances_ref(base)
|
@@ -181,8 +181,8 @@ module MiGA::DatasetResult
|
|
181
181
|
end
|
182
182
|
|
183
183
|
##
|
184
|
-
# Add result type +:stats+ at +base+ (no +
|
185
|
-
def add_result_stats(base,
|
184
|
+
# Add result type +:stats+ at +base+ (no +_opts+ supported).
|
185
|
+
def add_result_stats(base, _opts)
|
186
186
|
MiGA::Result.new("#{base}.json")
|
187
187
|
end
|
188
188
|
|
@@ -199,8 +199,8 @@ module MiGA::DatasetResult
|
|
199
199
|
return nil unless
|
200
200
|
File.exist?("#{pref}/01.haai/#{name}.db")
|
201
201
|
r = MiGA::Result.new("#{base}.json")
|
202
|
-
r.add_files(
|
203
|
-
:
|
202
|
+
r.add_files(haai_db:"01.haai/#{name}.db", aai_db:"02.aai/#{name}.db",
|
203
|
+
ani_db:"03.ani/#{name}.db")
|
204
204
|
r
|
205
205
|
end
|
206
206
|
|
@@ -211,10 +211,9 @@ module MiGA::DatasetResult
|
|
211
211
|
result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
|
212
212
|
result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
|
213
213
|
r = MiGA::Result.new("#{base}.json")
|
214
|
-
|
215
|
-
:
|
216
|
-
:
|
217
|
-
:ani_medoids=>".ani-medoids.tsv", :ani_db=>".ani.db"})
|
214
|
+
add_files_to_ds_result(r, name, aai_medoids:".aai-medoids.tsv",
|
215
|
+
haai_db:".haai.db", aai_db:".aai.db", ani_medoids:".ani-medoids.tsv",
|
216
|
+
ani_db:".ani.db", ref_tree:".nwk", ref_tree_pdf:".nwk.pdf")
|
218
217
|
end
|
219
218
|
|
220
219
|
##
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -58,26 +58,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
58
58
|
ids = [ids] unless ids.is_a? Array
|
59
59
|
case @@UNIVERSE[universe][:method]
|
60
60
|
when :rest
|
61
|
-
|
62
|
-
@@UNIVERSE[universe][:dbs][db][:map_to]
|
63
|
-
url = sprintf @@UNIVERSE[universe][:url],
|
64
|
-
db, ids.join(","), format, map_to
|
65
|
-
response = RestClient::Request.execute(method: :get, url:url, timeout:600)
|
66
|
-
raise "Unable to reach #{universe} client, error code " +
|
67
|
-
"#{response.code}." unless response.code == 200
|
68
|
-
doc = response.to_s
|
61
|
+
doc = download_rest(universe, db, ids, format)
|
69
62
|
when :net
|
70
|
-
|
71
|
-
doc = ""
|
72
|
-
@timeout_try = 0
|
73
|
-
begin
|
74
|
-
open(url) { |f| doc = f.read }
|
75
|
-
rescue Net::ReadTimeout
|
76
|
-
@timeout_try += 1
|
77
|
-
if @timeout_try > 3 ; raise Net::ReadTimeout
|
78
|
-
else ; retry
|
79
|
-
end
|
80
|
-
end
|
63
|
+
doc = download_net(universe, db, ids, format)
|
81
64
|
end
|
82
65
|
unless file.nil?
|
83
66
|
ofh = File.open(file, "w")
|
@@ -87,6 +70,38 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
87
70
|
doc
|
88
71
|
end
|
89
72
|
|
73
|
+
##
|
74
|
+
# Download data usint a REST method from the +universe+ in the database +db+
|
75
|
+
# with IDs +ids+ and in +format+. Returns the doc as String.
|
76
|
+
def self.download_rest(universe, db, ids, format)
|
77
|
+
u = @@UNIVERSE[universe]
|
78
|
+
map_to = u[:dbs][db].nil? ? nil : u[:dbs][db][:map_to]
|
79
|
+
url = sprintf(u[:url], db, ids.join(","), format, map_to)
|
80
|
+
response = RestClient::Request.execute(method: :get, url:url, timeout:600)
|
81
|
+
unless response.code == 200
|
82
|
+
raise "Unable to reach #{universe} client, error code #{response.code}."
|
83
|
+
end
|
84
|
+
response.to_s
|
85
|
+
end
|
86
|
+
|
87
|
+
##
|
88
|
+
# Download data usint a REST method from the +universe+ in the database +db+
|
89
|
+
# with IDs +ids+ and in +format+. Returns the doc as String.
|
90
|
+
def self.download_net(universe, db, ids, format)
|
91
|
+
url = sprintf(@@UNIVERSE[universe][:url], db, ids.join(","), format, map_to)
|
92
|
+
doc = ""
|
93
|
+
@timeout_try = 0
|
94
|
+
begin
|
95
|
+
open(url) { |f| doc = f.read }
|
96
|
+
rescue Net::ReadTimeout
|
97
|
+
@timeout_try += 1
|
98
|
+
if @timeout_try > 3 ; raise Net::ReadTimeout
|
99
|
+
else ; retry
|
100
|
+
end
|
101
|
+
end
|
102
|
+
doc
|
103
|
+
end
|
104
|
+
|
90
105
|
# Instance-level
|
91
106
|
|
92
107
|
##
|
@@ -119,33 +134,37 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
119
134
|
# Save dataset to the MiGA::Project +project+ identified with +name+. +is_ref+
|
120
135
|
# indicates if it should be a reference dataset, and contains +metadata+.
|
121
136
|
def save_to(project, name=nil, is_ref=true, metadata={})
|
122
|
-
name
|
137
|
+
name ||= ids.join("_").miga_name
|
123
138
|
project = MiGA::Project.new(project) if project.is_a? String
|
124
|
-
|
125
|
-
|
139
|
+
if MiGA::Dataset.exist?(project, name)
|
140
|
+
raise "Dataset #{name} exists in the project, aborting..."
|
141
|
+
end
|
126
142
|
metadata = get_metadata(metadata)
|
127
|
-
|
143
|
+
udb = @@UNIVERSE[universe][:dbs][db]
|
144
|
+
metadata["#{universe}_#{db}"] = ids.join(",")
|
145
|
+
case udb[:stage]
|
128
146
|
when :assembly
|
129
147
|
dir = MiGA::Dataset.RESULT_DIRS[:assembly]
|
130
148
|
base = "#{project.path}/data/#{dir}/#{name}"
|
149
|
+
l_ctg = "#{base}.LargeContigs.fna"
|
150
|
+
a_ctg = "#{base}.AllContigs.fna"
|
131
151
|
File.open("#{base}.start", "w") { |ofh| ofh.puts Time.now.to_s }
|
132
|
-
if
|
133
|
-
download
|
134
|
-
system
|
152
|
+
if udb[:format] == :fasta_gz
|
153
|
+
download "#{l_ctg}.gz"
|
154
|
+
system "gzip -d '#{l_ctg}.gz'"
|
135
155
|
else
|
136
|
-
download
|
156
|
+
download l_ctg
|
137
157
|
end
|
138
|
-
File.
|
139
|
-
|
158
|
+
File.unlink(a_ctg) if File.exist? a_ctg
|
159
|
+
File.symlink(File.basename(l_ctg), a_ctg)
|
140
160
|
File.open("#{base}.done", "w") { |ofh| ofh.puts Time.now.to_s }
|
141
161
|
else
|
142
162
|
raise "Unexpected error: Unsupported result for database #{db}."
|
143
163
|
end
|
144
164
|
dataset = MiGA::Dataset.new(project, name, is_ref, metadata)
|
145
165
|
project.add_dataset(dataset.name)
|
146
|
-
result = dataset.add_result(
|
147
|
-
|
148
|
-
raise "Empty dataset created: seed result was not added due to "+
|
166
|
+
result = dataset.add_result(udb[:stage], true, is_clean:true)
|
167
|
+
raise "Empty dataset created: seed result was not added due to " +
|
149
168
|
"incomplete files." if result.nil?
|
150
169
|
result.clean!
|
151
170
|
result.save
|
@@ -212,4 +231,5 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
212
231
|
return nil unless ln =~ /^\d+$/
|
213
232
|
ln
|
214
233
|
end
|
234
|
+
|
215
235
|
end
|
data/lib/miga/tax_dist.rb
CHANGED
@@ -25,7 +25,7 @@ module MiGA::TaxDist
|
|
25
25
|
Zlib::GzipReader.open(aai_path(test)) do |fh|
|
26
26
|
keys = nil
|
27
27
|
fh.each_line do |ln|
|
28
|
-
row = ln.chomp.split
|
28
|
+
row = ln.chomp.split(/\t/)
|
29
29
|
if fh.lineno==1
|
30
30
|
keys = row[1, row.size-1].map{ |i| i.to_i }
|
31
31
|
elsif row.shift.to_f >= aai
|
@@ -56,7 +56,7 @@ module MiGA::TaxDist
|
|
56
56
|
min = pv.values.select{ |v| v < upr }.max
|
57
57
|
return out if min.nil?
|
58
58
|
if min >= lwr
|
59
|
-
v = pv.select{ |_,
|
59
|
+
v = pv.select{ |_,vj| vj==min }
|
60
60
|
out[phrase] = (test==:intax ? v.reverse_each : v).first
|
61
61
|
end
|
62
62
|
end
|
data/lib/miga/tax_index.rb
CHANGED
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.2, 6,
|
13
|
+
VERSION = [0.2, 6, 5]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2017,
|
21
|
+
VERSION_DATE = Date.new(2017, 5, 22)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
@@ -28,8 +28,10 @@ fx_exists miga-aai || function miga-aai {
|
|
28
28
|
local F2=$2
|
29
29
|
local TH=$3
|
30
30
|
local DB=$4
|
31
|
-
local N1
|
32
|
-
|
31
|
+
local N1
|
32
|
+
N1=$(miga-ds_name "$F1")
|
33
|
+
local N2
|
34
|
+
N2=$(miga-ds_name "$F2")
|
33
35
|
aai.rb -1 "$F1" -2 "$F2" -t "$TH" -a --lookup-first -S "$DB" --name1 "$N1" \
|
34
36
|
--name2 "$N2" --$MIGA_AAI_SAVE_RBM || echo "0"
|
35
37
|
}
|
@@ -39,8 +41,10 @@ fx_exists miga-ani || function miga-ani {
|
|
39
41
|
local F2=$2
|
40
42
|
local TH=$3
|
41
43
|
local DB=$4
|
42
|
-
local N1
|
43
|
-
|
44
|
+
local N1
|
45
|
+
N1=$(miga-ds_name "$F1")
|
46
|
+
local N2
|
47
|
+
N2=$(miga-ds_name "$F2")
|
44
48
|
ani.rb -1 "$F1" -2 "$F2" -t "$TH" -a --no-save-regions --no-save-rbm \
|
45
49
|
--lookup-first -S "$DB" --name1 "$N1" --name2 "$N2" || echo "0"
|
46
50
|
}
|
@@ -51,11 +55,15 @@ fx_exists miga-haai || function miga-haai {
|
|
51
55
|
local TH=$3
|
52
56
|
local DB=$4
|
53
57
|
local AAI_DB=$5
|
54
|
-
local N1
|
55
|
-
|
56
|
-
local
|
58
|
+
local N1
|
59
|
+
N1=$(miga-ds_name "$F1")
|
60
|
+
local N2
|
61
|
+
N2=$(miga-ds_name "$F2")
|
62
|
+
local HAAI
|
63
|
+
HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" miga-aai "$F1" "$F2" "$TH" "$DB")
|
57
64
|
if [[ "$HAAI" != "" && $(perl -e "print 1 if '$HAAI' <= 90") == "1" ]] ; then
|
58
|
-
local AAI
|
65
|
+
local AAI
|
66
|
+
AAI=$(perl -e "print (100-exp(2.435076 + 0.4275193*log(100-$HAAI)))")
|
59
67
|
[[ ! -s $AAI_DB ]] && miga-make_empty_aai_db "$AAI_DB"
|
60
68
|
echo "insert into aai values('$N1','$N2','$AAI',0,0,0);" | sqlite3 "$AAI_DB"
|
61
69
|
echo "$AAI"
|
@@ -70,6 +78,7 @@ fx_exists miga-haai_or_aai || function miga-haai_or_aai {
|
|
70
78
|
local F2=$5
|
71
79
|
local DB=$6
|
72
80
|
local TH=$7
|
81
|
+
local AAI
|
73
82
|
AAI=$(miga-haai "$FH1" "$FH2" "$TH" "$DBH" "$DB")
|
74
83
|
[[ "${AAI%.*}" -le 0 ]] && AAI=$(miga-aai "$F1" "$F2" "$TH" "$DB")
|
75
84
|
echo "$AAI"
|
@@ -36,24 +36,24 @@ fx_exists miga-noref_haai_or_aai || function miga-noref_haai_or_aai {
|
|
36
36
|
fx_exists miga-noref_ani || function miga-noref_ani {
|
37
37
|
local Q=$1
|
38
38
|
local S=$2
|
39
|
-
[[ -s $TMPDIR/$Q.LargeContigs.fna ]] \
|
40
|
-
|| cp ../05.assembly/$Q.LargeContigs.fna "$TMPDIR/$Q.LargeContigs.fna"
|
39
|
+
[[ -s "$TMPDIR/$Q.LargeContigs.fna" ]] \
|
40
|
+
|| cp "../05.assembly/$Q.LargeContigs.fna" "$TMPDIR/$Q.LargeContigs.fna"
|
41
41
|
miga-ani "$TMPDIR/$Q.LargeContigs.fna" "../05.assembly/$S.LargeContigs.fna" \
|
42
42
|
"$CORES" "$TMPDIR/$Q.ani.db"
|
43
43
|
}
|
44
44
|
|
45
|
-
|
46
|
-
|
47
45
|
# Calculate the classification-informed AAI/ANI traverse (if not classified)
|
48
46
|
ESS="../07.annotation/01.function/01.essential"
|
49
47
|
if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
50
48
|
# Classify aai-clade (if project type is not clade)
|
51
49
|
CLADES="../10.clades/01.find"
|
52
50
|
METRIC="aai"
|
51
|
+
REF_TABLE="02.aai/miga-project.txt.gz"
|
53
52
|
else
|
54
53
|
# Classify ani-clade (if project type is clade)
|
55
54
|
CLADES="../10.clades/02.ani"
|
56
55
|
METRIC="ani"
|
56
|
+
REF_TABLE="03.ani/miga-project.txt.gz"
|
57
57
|
fi
|
58
58
|
|
59
59
|
CLASSIF="."
|
@@ -63,7 +63,7 @@ while [[ -e "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
|
|
63
63
|
VAL_MED=""
|
64
64
|
VAL_CLS=""
|
65
65
|
i_n=0
|
66
|
-
while read i ; do
|
66
|
+
while read -r i ; do
|
67
67
|
let i_n=$i_n+1
|
68
68
|
if [[ $METRIC == "aai" ]] ; then
|
69
69
|
VAL=$(miga-noref_haai_or_aai "$DATASET" "$i")
|
@@ -87,7 +87,7 @@ done
|
|
87
87
|
if [[ "$CLASSIF" != "." ]] ; then
|
88
88
|
PAR=$(dirname "$CLADES/$CLASSIF")/miga-project.classif
|
89
89
|
if [[ -s "$PAR" ]] ; then
|
90
|
-
while read i ; do
|
90
|
+
while read -r i ; do
|
91
91
|
if [[ $METRIC == "aai" ]] ; then
|
92
92
|
AAI=$(miga-noref_haai_or_aai "$DATASET" "$i")
|
93
93
|
else
|
@@ -101,6 +101,25 @@ if [[ "$CLASSIF" != "." ]] ; then
|
|
101
101
|
fi
|
102
102
|
fi
|
103
103
|
|
104
|
-
#Finalize
|
104
|
+
# Finalize
|
105
105
|
N=11
|
106
106
|
miga-checkpoint_n
|
107
|
+
|
108
|
+
# Build tree with medoids
|
109
|
+
if [[ -s "${DATASET}.${METRIC}.db" ]] ; then
|
110
|
+
echo "select seq2 from $METRIC;" | sqlite3 "${DATASET}.${METRIC}.db" \
|
111
|
+
| sort | uniq > "${DATASET}.tmp0"
|
112
|
+
perl -pe "s/^/^/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
|
113
|
+
> "${DATASET}.tmp1"
|
114
|
+
perl -pe "s/^/\\t/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
|
115
|
+
> "${DATASET}.tmp2"
|
116
|
+
echo "a b value" | tr " " "\\t" > "${DATASET}.txt"
|
117
|
+
gzip -c -d "$REF_TABLE" | cut -f 2-4 \
|
118
|
+
| grep -f "${DATASET}.tmp1" | grep -f "${DATASET}.tmp2" \
|
119
|
+
>> "${DATASET}.txt"
|
120
|
+
echo "select seq1, seq2, $METRIC from $METRIC;" \
|
121
|
+
| sqlite3 "${DATASET}.${METRIC}.db" | tr "\\|" "\\t" \
|
122
|
+
>> "${DATASET}.txt"
|
123
|
+
"$MIGA/utils/ref-tree.R" "${DATASET}.txt" "$DATASET" "$DATASET"
|
124
|
+
rm "$DATASET".tmp[012] "${DATASET}.txt"
|
125
|
+
fi
|
data/scripts/aai_distances.bash
CHANGED
@@ -4,11 +4,12 @@ set -e
|
|
4
4
|
SCRIPT="aai_distances"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/09.distances/02.aai"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
echo -n "" > miga-project.log
|
14
15
|
DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
@@ -40,5 +41,5 @@ if(sum(aai[,'a'] != aai[,'b']) > 0){
|
|
40
41
|
gzip -9 -f miga-project.txt
|
41
42
|
|
42
43
|
# Finalize
|
43
|
-
date
|
44
|
+
miga date > "miga-project.done"
|
44
45
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/ani_distances.bash
CHANGED
@@ -4,11 +4,12 @@ set -e
|
|
4
4
|
SCRIPT="ani_distances"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/09.distances/03.ani"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
echo -n "" > miga-project.log
|
14
15
|
DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
@@ -39,5 +40,5 @@ if(sum(ani[,'a'] != ani[,'b']) > 0){
|
|
39
40
|
gzip -9 -f miga-project.txt
|
40
41
|
|
41
42
|
# Finalize
|
42
|
-
date
|
43
|
+
miga date > "miga-project.done"
|
43
44
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|