miga-base 0.2.6.4 → 0.2.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/actions/list_datasets.rb +6 -1
- data/actions/run_local.rb +1 -1
- data/actions/tax_distributions.rb +4 -4
- data/lib/miga/common.rb +18 -0
- data/lib/miga/daemon.rb +1 -1
- data/lib/miga/dataset_result.rb +46 -47
- data/lib/miga/remote_dataset.rb +52 -32
- data/lib/miga/tax_dist.rb +2 -2
- data/lib/miga/tax_index.rb +1 -1
- data/lib/miga/version.rb +2 -2
- data/scripts/_distances_functions.bash +17 -8
- data/scripts/_distances_noref_nomulti.bash +26 -7
- data/scripts/aai_distances.bash +3 -2
- data/scripts/ani_distances.bash +3 -2
- data/scripts/assembly.bash +24 -24
- data/scripts/cds.bash +22 -30
- data/scripts/clade_finding.bash +5 -4
- data/scripts/distances.bash +13 -9
- data/scripts/essential_genes.bash +12 -11
- data/scripts/haai_distances.bash +3 -2
- data/scripts/init.bash +100 -108
- data/scripts/miga.bash +4 -2
- data/scripts/mytaxa.bash +72 -71
- data/scripts/mytaxa_scan.bash +62 -61
- data/scripts/ogs.bash +14 -13
- data/scripts/project_stats.bash +1 -0
- data/scripts/read_quality.bash +12 -16
- data/scripts/ssu.bash +18 -18
- data/scripts/stats.bash +3 -2
- data/scripts/subclades.bash +7 -6
- data/scripts/trimmed_fasta.bash +22 -21
- data/scripts/trimmed_reads.bash +34 -32
- data/utils/index_metadata.rb +4 -4
- data/utils/ref-tree.R +65 -0
- data/utils/requirements.txt +1 -1
- metadata +57 -56
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a69a12511e98d2cc6751efa31592661b664e9d33
|
4
|
+
data.tar.gz: 807b0a13efce0367c88369a2f389fffff34ee58d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bdc1db4adec179da57c45ab29a41ec36ef2895f34d8159792420560bd0bc15910052fe6e2949ee0a992500e0001a7db4d2bbbf4f4d5e1daefb4c1c0fa3bd4ebb
|
7
|
+
data.tar.gz: 40d5a05ec2c154b50eb8952b87212106461080740e8986f982f44fa79cea3100d31beaff1c647afa83892b3e2cde37464ddd11568ce658addcb60992b6395de3
|
data/actions/list_datasets.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# @package MiGA
|
4
4
|
# @license Artistic-2.0
|
5
5
|
|
6
|
-
o = {q:true, info:false, processing:false}
|
6
|
+
o = {q:true, info:false, processing:false, silent:false}
|
7
7
|
OptionParser.new do |opt|
|
8
8
|
opt_banner(opt)
|
9
9
|
opt_object(opt, o, [:project, :dataset_opt])
|
@@ -15,11 +15,15 @@ OptionParser.new do |opt|
|
|
15
15
|
opt.on("-m", "--metadata STRING",
|
16
16
|
"Print name and metadata field only. If set, ignores -i."
|
17
17
|
){ |v| o[:datum]=v }
|
18
|
+
opt.on("-s", "--silent",
|
19
|
+
"No output and exit with non-zero status if the dataset list is empty."
|
20
|
+
){ |v| o[:silent] = v }
|
18
21
|
opt_common(opt, o)
|
19
22
|
end.parse!
|
20
23
|
|
21
24
|
##=> Main <=
|
22
25
|
opt_require(o, project:"-P")
|
26
|
+
o[:q] = true if o[:silent]
|
23
27
|
|
24
28
|
$stderr.puts "Loading project." unless o[:q]
|
25
29
|
p = MiGA::Project.load(o[:project])
|
@@ -34,6 +38,7 @@ else
|
|
34
38
|
ds = []
|
35
39
|
end
|
36
40
|
ds = filter_datasets!(ds, o)
|
41
|
+
exit(1) if o[:silent] and ds.empty?
|
37
42
|
|
38
43
|
if not o[:datum].nil?
|
39
44
|
ds.each{|d| puts "#{d.name}\t#{d.metadata[ o[:datum] ] || "?"}"}
|
data/actions/run_local.rb
CHANGED
@@ -36,7 +36,7 @@ else
|
|
36
36
|
end
|
37
37
|
raise "Unsupported #{type.to_s.gsub(/.*::/,"")} result: #{o[:name]}." if
|
38
38
|
type.RESULT_DIRS[o[:name].to_sym].nil?
|
39
|
-
cmd <<
|
39
|
+
cmd << MiGA::MiGA.script_path(o[:name], miga:miga, project:p).shellescape
|
40
40
|
pid = spawn cmd.join(" ")
|
41
41
|
Process.wait pid
|
42
42
|
|
@@ -38,13 +38,13 @@ raise "#{res_n} not yet calculated." if res.nil?
|
|
38
38
|
matrix = res.file_path(:matrix)
|
39
39
|
raise "#{res_n} has no matrix." if matrix.nil?
|
40
40
|
dist = {}
|
41
|
-
|
42
|
-
|
43
|
-
next if
|
41
|
+
mfh = matrix=~/\.gz$/ ? Zlib::GzipReader.open(matrix) : File.open(matrix,"r")
|
42
|
+
mfh.each_line do |ln|
|
43
|
+
next if mfh.lineno==1
|
44
44
|
row = ln.chomp.split(/\t/)
|
45
45
|
dist[cannid(row[1], row[2])] = [row[3], 0, ["root:biota"]]
|
46
46
|
end
|
47
|
-
|
47
|
+
mfh.close
|
48
48
|
|
49
49
|
Dir.mktmpdir do |dir|
|
50
50
|
if o[:index].nil?
|
data/lib/miga/common.rb
CHANGED
@@ -103,6 +103,24 @@ class MiGA::MiGA
|
|
103
103
|
tmp.unlink
|
104
104
|
end
|
105
105
|
end
|
106
|
+
|
107
|
+
##
|
108
|
+
# Path to a script to be executed for +task+. Supported +opts+ are:
|
109
|
+
# - +:miga+ Path to the MiGA home to use. If not passed, the home of the
|
110
|
+
# library is used).
|
111
|
+
# - +:project+ MiGA::Project object to check within plugins. If not passed,
|
112
|
+
# only core scripts are supported.
|
113
|
+
def self.script_path(task, opts={})
|
114
|
+
opts[:miga] ||= root_path
|
115
|
+
unless opts[:project].nil?
|
116
|
+
opts[:project].plugins.each do |pl|
|
117
|
+
if File.exist? File.expand_path("scripts/#{task}.bash", pl)
|
118
|
+
opts[:miga] = pl
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
File.expand_path("scripts/#{task}.bash", opts[:miga])
|
123
|
+
end
|
106
124
|
|
107
125
|
|
108
126
|
##
|
data/lib/miga/daemon.rb
CHANGED
@@ -167,7 +167,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
167
167
|
to_run = {ds: ds, job: job, task_name: task_name,
|
168
168
|
cmd: sprintf(runopts(:cmd),
|
169
169
|
# 1: script
|
170
|
-
|
170
|
+
MiGA::MiGA.script_path(job, miga:vars["MIGA"], project:project),
|
171
171
|
# 2: vars
|
172
172
|
vars.keys.map { |k|
|
173
173
|
sprintf(runopts(:var), k, vars[k]) }.join(runopts(:varsep)),
|
data/lib/miga/dataset_result.rb
CHANGED
@@ -27,24 +27,25 @@ module MiGA::DatasetResult
|
|
27
27
|
private
|
28
28
|
|
29
29
|
##
|
30
|
-
# Add result type +:raw_reads+ at +base+ (no +
|
31
|
-
def add_result_raw_reads(base,
|
30
|
+
# Add result type +:raw_reads+ at +base+ (no +_opts+ supported).
|
31
|
+
def add_result_raw_reads(base, _opts)
|
32
32
|
return nil unless result_files_exist?(base, ".1.fastq")
|
33
33
|
r = MiGA::Result.new("#{base}.json")
|
34
|
-
|
34
|
+
add_files_to_ds_result(r, name,
|
35
35
|
( result_files_exist?(base, ".2.fastq") ?
|
36
|
-
{:
|
37
|
-
{:
|
36
|
+
{pair1:".1.fastq", pair2:".2.fastq"} :
|
37
|
+
{single:".1.fastq"} ))
|
38
38
|
end
|
39
39
|
|
40
40
|
##
|
41
|
-
# Add result type +:trimmed_reads+ at +base+ (no +
|
42
|
-
def add_result_trimmed_reads(base,
|
41
|
+
# Add result type +:trimmed_reads+ at +base+ (no +_opts+ supported).
|
42
|
+
def add_result_trimmed_reads(base, _opts)
|
43
43
|
return nil unless result_files_exist?(base, ".1.clipped.fastq")
|
44
44
|
r = MiGA::Result.new("#{base}.json")
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
if result_files_exist?(base, ".2.clipped.fastq")
|
46
|
+
r = add_files_to_ds_result(r, name,
|
47
|
+
pair1:".1.clipped.fastq", pair2:".2.clipped.fastq")
|
48
|
+
end
|
48
49
|
r.add_file(:single, "#{name}.1.clipped.single.fastq")
|
49
50
|
r.add_file(:trimming_sumary, "#{name}.1.fastq.trimmed.summary.txt")
|
50
51
|
add_result(:raw_reads) #-> Post gunzip
|
@@ -52,26 +53,26 @@ module MiGA::DatasetResult
|
|
52
53
|
end
|
53
54
|
|
54
55
|
##
|
55
|
-
# Add result type +:read_quality+ at +base+ (no +
|
56
|
-
def add_result_read_quality(base,
|
56
|
+
# Add result type +:read_quality+ at +base+ (no +_opts+ supported).
|
57
|
+
def add_result_read_quality(base, _opts)
|
57
58
|
return nil unless result_files_exist?(base, %w[.solexaqa .fastqc])
|
58
59
|
r = MiGA::Result.new("#{base}.json")
|
59
60
|
r = add_files_to_ds_result(r, name,
|
60
|
-
|
61
|
+
solexaqa:".solexaqa", fastqc:".fastqc")
|
61
62
|
add_result(:trimmed_reads) #-> Post cleaning
|
62
63
|
r
|
63
64
|
end
|
64
65
|
|
65
66
|
##
|
66
|
-
# Add result type +:trimmed_fasta+ at +base+ (no +
|
67
|
-
def add_result_trimmed_fasta(base,
|
67
|
+
# Add result type +:trimmed_fasta+ at +base+ (no +_opts+ supported).
|
68
|
+
def add_result_trimmed_fasta(base, _opts)
|
68
69
|
return nil unless
|
69
70
|
result_files_exist?(base, ".CoupledReads.fa") or
|
70
71
|
result_files_exist?(base, ".SingleReads.fa") or
|
71
72
|
result_files_exist?(base, %w[.1.fasta .2.fasta])
|
72
73
|
r = MiGA::Result.new("#{base}.json")
|
73
|
-
r = add_files_to_ds_result(r, name,
|
74
|
-
:
|
74
|
+
r = add_files_to_ds_result(r, name, coupled:".CoupledReads.fa",
|
75
|
+
single:".SingleReads.fa", pair1:".1.fasta", pair2:".2.fasta")
|
75
76
|
add_result(:raw_reads) #-> Post gzip
|
76
77
|
r
|
77
78
|
end
|
@@ -82,8 +83,8 @@ module MiGA::DatasetResult
|
|
82
83
|
def add_result_assembly(base, opts)
|
83
84
|
return nil unless result_files_exist?(base, ".LargeContigs.fna")
|
84
85
|
r = MiGA::Result.new("#{base}.json")
|
85
|
-
r = add_files_to_ds_result(r, name,
|
86
|
-
:
|
86
|
+
r = add_files_to_ds_result(r, name, largecontigs:".LargeContigs.fna",
|
87
|
+
allcontigs:".AllContigs.fna", assembly_data:"")
|
87
88
|
opts[:is_clean] ||= false
|
88
89
|
r.clean! if opts[:is_clean]
|
89
90
|
unless r.clean?
|
@@ -99,8 +100,8 @@ module MiGA::DatasetResult
|
|
99
100
|
def add_result_cds(base, opts)
|
100
101
|
return nil unless result_files_exist?(base, %w[.faa .fna])
|
101
102
|
r = MiGA::Result.new("#{base}.json")
|
102
|
-
r = add_files_to_ds_result(r, name,
|
103
|
-
:
|
103
|
+
r = add_files_to_ds_result(r, name, proteins:".faa", genes:".fna",
|
104
|
+
gff2:".gff2", gff3:".gff3", tab:".tab")
|
104
105
|
opts[:is_clean] ||= false
|
105
106
|
r.clean! if opts[:is_clean]
|
106
107
|
unless r.clean?
|
@@ -112,12 +113,12 @@ module MiGA::DatasetResult
|
|
112
113
|
end
|
113
114
|
|
114
115
|
##
|
115
|
-
# Add result type +:essential_genes+ at +base+ (no +
|
116
|
-
def add_result_essential_genes(base,
|
116
|
+
# Add result type +:essential_genes+ at +base+ (no +_opts+ supported).
|
117
|
+
def add_result_essential_genes(base, _opts)
|
117
118
|
return nil unless result_files_exist?(base, %w[.ess.faa .ess .ess/log])
|
118
119
|
r = MiGA::Result.new("#{base}.json")
|
119
|
-
|
120
|
-
:
|
120
|
+
add_files_to_ds_result(r, name, ess_genes:".ess.faa",
|
121
|
+
collection:".ess", report:".ess/log")
|
121
122
|
end
|
122
123
|
|
123
124
|
##
|
@@ -126,8 +127,8 @@ module MiGA::DatasetResult
|
|
126
127
|
return MiGA::Result.new("#{base}.json") if result(:assembly).nil?
|
127
128
|
return nil unless result_files_exist?(base, ".ssu.fa")
|
128
129
|
r = MiGA::Result.new("#{base}.json")
|
129
|
-
r = add_files_to_ds_result(r, name,
|
130
|
-
:
|
130
|
+
r = add_files_to_ds_result(r, name, longest_ssu_gene:".ssu.fa",
|
131
|
+
gff:".ssu.gff", all_ssu_genes:".ssu.all.fa")
|
131
132
|
opts[:is_clean] ||= false
|
132
133
|
r.clean! if opts[:is_clean]
|
133
134
|
unless r.clean?
|
@@ -138,37 +139,36 @@ module MiGA::DatasetResult
|
|
138
139
|
end
|
139
140
|
|
140
141
|
##
|
141
|
-
# Add result type +:mytaxa+ at +base+ (no +
|
142
|
-
def add_result_mytaxa(base,
|
142
|
+
# Add result type +:mytaxa+ at +base+ (no +_opts+ supported).
|
143
|
+
def add_result_mytaxa(base, _opts)
|
143
144
|
if is_multi?
|
144
145
|
return nil unless result_files_exist?(base, ".mytaxa")
|
145
146
|
r = MiGA::Result.new("#{base}.json")
|
146
|
-
add_files_to_ds_result(r, name,
|
147
|
-
:
|
147
|
+
add_files_to_ds_result(r, name, mytaxa:".mytaxa", blast:".blast",
|
148
|
+
mytaxain:".mytaxain")
|
148
149
|
else
|
149
150
|
MiGA::Result.new("#{base}.json")
|
150
151
|
end
|
151
152
|
end
|
152
153
|
|
153
154
|
##
|
154
|
-
# Add result type +:mytaxa_scan+ at +base+ (no +
|
155
|
-
def add_result_mytaxa_scan(base,
|
155
|
+
# Add result type +:mytaxa_scan+ at +base+ (no +_opts+ supported).
|
156
|
+
def add_result_mytaxa_scan(base, _opts)
|
156
157
|
if is_nonmulti?
|
157
158
|
return nil unless
|
158
159
|
result_files_exist?(base, %w[.pdf .wintax .mytaxa .reg])
|
159
160
|
r = MiGA::Result.new("#{base}.json")
|
160
|
-
add_files_to_ds_result(r, name,
|
161
|
-
:
|
162
|
-
:
|
163
|
-
:region_ids=>".wintax.regions"})
|
161
|
+
add_files_to_ds_result(r, name, mytaxa:".mytaxa", wintax:".wintax",
|
162
|
+
blast:".blast", mytaxain:".mytaxain", report:".pdf", regions:".reg",
|
163
|
+
gene_ids:".wintax.genes", region_ids:".wintax.regions")
|
164
164
|
else
|
165
165
|
MiGA::Result.new("#{base}.json")
|
166
166
|
end
|
167
167
|
end
|
168
168
|
|
169
169
|
##
|
170
|
-
# Add result type +:distances+ at +base+ (no +
|
171
|
-
def add_result_distances(base,
|
170
|
+
# Add result type +:distances+ at +base+ (no +_opts+ supported).
|
171
|
+
def add_result_distances(base, _opts)
|
172
172
|
if is_nonmulti?
|
173
173
|
if is_ref?
|
174
174
|
add_result_distances_ref(base)
|
@@ -181,8 +181,8 @@ module MiGA::DatasetResult
|
|
181
181
|
end
|
182
182
|
|
183
183
|
##
|
184
|
-
# Add result type +:stats+ at +base+ (no +
|
185
|
-
def add_result_stats(base,
|
184
|
+
# Add result type +:stats+ at +base+ (no +_opts+ supported).
|
185
|
+
def add_result_stats(base, _opts)
|
186
186
|
MiGA::Result.new("#{base}.json")
|
187
187
|
end
|
188
188
|
|
@@ -199,8 +199,8 @@ module MiGA::DatasetResult
|
|
199
199
|
return nil unless
|
200
200
|
File.exist?("#{pref}/01.haai/#{name}.db")
|
201
201
|
r = MiGA::Result.new("#{base}.json")
|
202
|
-
r.add_files(
|
203
|
-
:
|
202
|
+
r.add_files(haai_db:"01.haai/#{name}.db", aai_db:"02.aai/#{name}.db",
|
203
|
+
ani_db:"03.ani/#{name}.db")
|
204
204
|
r
|
205
205
|
end
|
206
206
|
|
@@ -211,10 +211,9 @@ module MiGA::DatasetResult
|
|
211
211
|
result_files_exist?(base, %w[.aai-medoids.tsv .aai.db]) or
|
212
212
|
result_files_exist?(base, %w[.ani-medoids.tsv .ani.db])
|
213
213
|
r = MiGA::Result.new("#{base}.json")
|
214
|
-
|
215
|
-
:
|
216
|
-
:
|
217
|
-
:ani_medoids=>".ani-medoids.tsv", :ani_db=>".ani.db"})
|
214
|
+
add_files_to_ds_result(r, name, aai_medoids:".aai-medoids.tsv",
|
215
|
+
haai_db:".haai.db", aai_db:".aai.db", ani_medoids:".ani-medoids.tsv",
|
216
|
+
ani_db:".ani.db", ref_tree:".nwk", ref_tree_pdf:".nwk.pdf")
|
218
217
|
end
|
219
218
|
|
220
219
|
##
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -58,26 +58,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
58
58
|
ids = [ids] unless ids.is_a? Array
|
59
59
|
case @@UNIVERSE[universe][:method]
|
60
60
|
when :rest
|
61
|
-
|
62
|
-
@@UNIVERSE[universe][:dbs][db][:map_to]
|
63
|
-
url = sprintf @@UNIVERSE[universe][:url],
|
64
|
-
db, ids.join(","), format, map_to
|
65
|
-
response = RestClient::Request.execute(method: :get, url:url, timeout:600)
|
66
|
-
raise "Unable to reach #{universe} client, error code " +
|
67
|
-
"#{response.code}." unless response.code == 200
|
68
|
-
doc = response.to_s
|
61
|
+
doc = download_rest(universe, db, ids, format)
|
69
62
|
when :net
|
70
|
-
|
71
|
-
doc = ""
|
72
|
-
@timeout_try = 0
|
73
|
-
begin
|
74
|
-
open(url) { |f| doc = f.read }
|
75
|
-
rescue Net::ReadTimeout
|
76
|
-
@timeout_try += 1
|
77
|
-
if @timeout_try > 3 ; raise Net::ReadTimeout
|
78
|
-
else ; retry
|
79
|
-
end
|
80
|
-
end
|
63
|
+
doc = download_net(universe, db, ids, format)
|
81
64
|
end
|
82
65
|
unless file.nil?
|
83
66
|
ofh = File.open(file, "w")
|
@@ -87,6 +70,38 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
87
70
|
doc
|
88
71
|
end
|
89
72
|
|
73
|
+
##
|
74
|
+
# Download data usint a REST method from the +universe+ in the database +db+
|
75
|
+
# with IDs +ids+ and in +format+. Returns the doc as String.
|
76
|
+
def self.download_rest(universe, db, ids, format)
|
77
|
+
u = @@UNIVERSE[universe]
|
78
|
+
map_to = u[:dbs][db].nil? ? nil : u[:dbs][db][:map_to]
|
79
|
+
url = sprintf(u[:url], db, ids.join(","), format, map_to)
|
80
|
+
response = RestClient::Request.execute(method: :get, url:url, timeout:600)
|
81
|
+
unless response.code == 200
|
82
|
+
raise "Unable to reach #{universe} client, error code #{response.code}."
|
83
|
+
end
|
84
|
+
response.to_s
|
85
|
+
end
|
86
|
+
|
87
|
+
##
|
88
|
+
# Download data usint a REST method from the +universe+ in the database +db+
|
89
|
+
# with IDs +ids+ and in +format+. Returns the doc as String.
|
90
|
+
def self.download_net(universe, db, ids, format)
|
91
|
+
url = sprintf(@@UNIVERSE[universe][:url], db, ids.join(","), format, map_to)
|
92
|
+
doc = ""
|
93
|
+
@timeout_try = 0
|
94
|
+
begin
|
95
|
+
open(url) { |f| doc = f.read }
|
96
|
+
rescue Net::ReadTimeout
|
97
|
+
@timeout_try += 1
|
98
|
+
if @timeout_try > 3 ; raise Net::ReadTimeout
|
99
|
+
else ; retry
|
100
|
+
end
|
101
|
+
end
|
102
|
+
doc
|
103
|
+
end
|
104
|
+
|
90
105
|
# Instance-level
|
91
106
|
|
92
107
|
##
|
@@ -119,33 +134,37 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
119
134
|
# Save dataset to the MiGA::Project +project+ identified with +name+. +is_ref+
|
120
135
|
# indicates if it should be a reference dataset, and contains +metadata+.
|
121
136
|
def save_to(project, name=nil, is_ref=true, metadata={})
|
122
|
-
name
|
137
|
+
name ||= ids.join("_").miga_name
|
123
138
|
project = MiGA::Project.new(project) if project.is_a? String
|
124
|
-
|
125
|
-
|
139
|
+
if MiGA::Dataset.exist?(project, name)
|
140
|
+
raise "Dataset #{name} exists in the project, aborting..."
|
141
|
+
end
|
126
142
|
metadata = get_metadata(metadata)
|
127
|
-
|
143
|
+
udb = @@UNIVERSE[universe][:dbs][db]
|
144
|
+
metadata["#{universe}_#{db}"] = ids.join(",")
|
145
|
+
case udb[:stage]
|
128
146
|
when :assembly
|
129
147
|
dir = MiGA::Dataset.RESULT_DIRS[:assembly]
|
130
148
|
base = "#{project.path}/data/#{dir}/#{name}"
|
149
|
+
l_ctg = "#{base}.LargeContigs.fna"
|
150
|
+
a_ctg = "#{base}.AllContigs.fna"
|
131
151
|
File.open("#{base}.start", "w") { |ofh| ofh.puts Time.now.to_s }
|
132
|
-
if
|
133
|
-
download
|
134
|
-
system
|
152
|
+
if udb[:format] == :fasta_gz
|
153
|
+
download "#{l_ctg}.gz"
|
154
|
+
system "gzip -d '#{l_ctg}.gz'"
|
135
155
|
else
|
136
|
-
download
|
156
|
+
download l_ctg
|
137
157
|
end
|
138
|
-
File.
|
139
|
-
|
158
|
+
File.unlink(a_ctg) if File.exist? a_ctg
|
159
|
+
File.symlink(File.basename(l_ctg), a_ctg)
|
140
160
|
File.open("#{base}.done", "w") { |ofh| ofh.puts Time.now.to_s }
|
141
161
|
else
|
142
162
|
raise "Unexpected error: Unsupported result for database #{db}."
|
143
163
|
end
|
144
164
|
dataset = MiGA::Dataset.new(project, name, is_ref, metadata)
|
145
165
|
project.add_dataset(dataset.name)
|
146
|
-
result = dataset.add_result(
|
147
|
-
|
148
|
-
raise "Empty dataset created: seed result was not added due to "+
|
166
|
+
result = dataset.add_result(udb[:stage], true, is_clean:true)
|
167
|
+
raise "Empty dataset created: seed result was not added due to " +
|
149
168
|
"incomplete files." if result.nil?
|
150
169
|
result.clean!
|
151
170
|
result.save
|
@@ -212,4 +231,5 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
212
231
|
return nil unless ln =~ /^\d+$/
|
213
232
|
ln
|
214
233
|
end
|
234
|
+
|
215
235
|
end
|
data/lib/miga/tax_dist.rb
CHANGED
@@ -25,7 +25,7 @@ module MiGA::TaxDist
|
|
25
25
|
Zlib::GzipReader.open(aai_path(test)) do |fh|
|
26
26
|
keys = nil
|
27
27
|
fh.each_line do |ln|
|
28
|
-
row = ln.chomp.split
|
28
|
+
row = ln.chomp.split(/\t/)
|
29
29
|
if fh.lineno==1
|
30
30
|
keys = row[1, row.size-1].map{ |i| i.to_i }
|
31
31
|
elsif row.shift.to_f >= aai
|
@@ -56,7 +56,7 @@ module MiGA::TaxDist
|
|
56
56
|
min = pv.values.select{ |v| v < upr }.max
|
57
57
|
return out if min.nil?
|
58
58
|
if min >= lwr
|
59
|
-
v = pv.select{ |_,
|
59
|
+
v = pv.select{ |_,vj| vj==min }
|
60
60
|
out[phrase] = (test==:intax ? v.reverse_each : v).first
|
61
61
|
end
|
62
62
|
end
|
data/lib/miga/tax_index.rb
CHANGED
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.2, 6,
|
13
|
+
VERSION = [0.2, 6, 5]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2017,
|
21
|
+
VERSION_DATE = Date.new(2017, 5, 22)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
@@ -28,8 +28,10 @@ fx_exists miga-aai || function miga-aai {
|
|
28
28
|
local F2=$2
|
29
29
|
local TH=$3
|
30
30
|
local DB=$4
|
31
|
-
local N1
|
32
|
-
|
31
|
+
local N1
|
32
|
+
N1=$(miga-ds_name "$F1")
|
33
|
+
local N2
|
34
|
+
N2=$(miga-ds_name "$F2")
|
33
35
|
aai.rb -1 "$F1" -2 "$F2" -t "$TH" -a --lookup-first -S "$DB" --name1 "$N1" \
|
34
36
|
--name2 "$N2" --$MIGA_AAI_SAVE_RBM || echo "0"
|
35
37
|
}
|
@@ -39,8 +41,10 @@ fx_exists miga-ani || function miga-ani {
|
|
39
41
|
local F2=$2
|
40
42
|
local TH=$3
|
41
43
|
local DB=$4
|
42
|
-
local N1
|
43
|
-
|
44
|
+
local N1
|
45
|
+
N1=$(miga-ds_name "$F1")
|
46
|
+
local N2
|
47
|
+
N2=$(miga-ds_name "$F2")
|
44
48
|
ani.rb -1 "$F1" -2 "$F2" -t "$TH" -a --no-save-regions --no-save-rbm \
|
45
49
|
--lookup-first -S "$DB" --name1 "$N1" --name2 "$N2" || echo "0"
|
46
50
|
}
|
@@ -51,11 +55,15 @@ fx_exists miga-haai || function miga-haai {
|
|
51
55
|
local TH=$3
|
52
56
|
local DB=$4
|
53
57
|
local AAI_DB=$5
|
54
|
-
local N1
|
55
|
-
|
56
|
-
local
|
58
|
+
local N1
|
59
|
+
N1=$(miga-ds_name "$F1")
|
60
|
+
local N2
|
61
|
+
N2=$(miga-ds_name "$F2")
|
62
|
+
local HAAI
|
63
|
+
HAAI=$(MIGA_AAI_SAVE_RBM="no-save-rbm" miga-aai "$F1" "$F2" "$TH" "$DB")
|
57
64
|
if [[ "$HAAI" != "" && $(perl -e "print 1 if '$HAAI' <= 90") == "1" ]] ; then
|
58
|
-
local AAI
|
65
|
+
local AAI
|
66
|
+
AAI=$(perl -e "print (100-exp(2.435076 + 0.4275193*log(100-$HAAI)))")
|
59
67
|
[[ ! -s $AAI_DB ]] && miga-make_empty_aai_db "$AAI_DB"
|
60
68
|
echo "insert into aai values('$N1','$N2','$AAI',0,0,0);" | sqlite3 "$AAI_DB"
|
61
69
|
echo "$AAI"
|
@@ -70,6 +78,7 @@ fx_exists miga-haai_or_aai || function miga-haai_or_aai {
|
|
70
78
|
local F2=$5
|
71
79
|
local DB=$6
|
72
80
|
local TH=$7
|
81
|
+
local AAI
|
73
82
|
AAI=$(miga-haai "$FH1" "$FH2" "$TH" "$DBH" "$DB")
|
74
83
|
[[ "${AAI%.*}" -le 0 ]] && AAI=$(miga-aai "$F1" "$F2" "$TH" "$DB")
|
75
84
|
echo "$AAI"
|
@@ -36,24 +36,24 @@ fx_exists miga-noref_haai_or_aai || function miga-noref_haai_or_aai {
|
|
36
36
|
fx_exists miga-noref_ani || function miga-noref_ani {
|
37
37
|
local Q=$1
|
38
38
|
local S=$2
|
39
|
-
[[ -s $TMPDIR/$Q.LargeContigs.fna ]] \
|
40
|
-
|| cp ../05.assembly/$Q.LargeContigs.fna "$TMPDIR/$Q.LargeContigs.fna"
|
39
|
+
[[ -s "$TMPDIR/$Q.LargeContigs.fna" ]] \
|
40
|
+
|| cp "../05.assembly/$Q.LargeContigs.fna" "$TMPDIR/$Q.LargeContigs.fna"
|
41
41
|
miga-ani "$TMPDIR/$Q.LargeContigs.fna" "../05.assembly/$S.LargeContigs.fna" \
|
42
42
|
"$CORES" "$TMPDIR/$Q.ani.db"
|
43
43
|
}
|
44
44
|
|
45
|
-
|
46
|
-
|
47
45
|
# Calculate the classification-informed AAI/ANI traverse (if not classified)
|
48
46
|
ESS="../07.annotation/01.function/01.essential"
|
49
47
|
if [[ $(miga project_info -P "$PROJECT" -m type) != "clade" ]] ; then
|
50
48
|
# Classify aai-clade (if project type is not clade)
|
51
49
|
CLADES="../10.clades/01.find"
|
52
50
|
METRIC="aai"
|
51
|
+
REF_TABLE="02.aai/miga-project.txt.gz"
|
53
52
|
else
|
54
53
|
# Classify ani-clade (if project type is clade)
|
55
54
|
CLADES="../10.clades/02.ani"
|
56
55
|
METRIC="ani"
|
56
|
+
REF_TABLE="03.ani/miga-project.txt.gz"
|
57
57
|
fi
|
58
58
|
|
59
59
|
CLASSIF="."
|
@@ -63,7 +63,7 @@ while [[ -e "$CLADES/$CLASSIF/miga-project.medoids" ]] ; do
|
|
63
63
|
VAL_MED=""
|
64
64
|
VAL_CLS=""
|
65
65
|
i_n=0
|
66
|
-
while read i ; do
|
66
|
+
while read -r i ; do
|
67
67
|
let i_n=$i_n+1
|
68
68
|
if [[ $METRIC == "aai" ]] ; then
|
69
69
|
VAL=$(miga-noref_haai_or_aai "$DATASET" "$i")
|
@@ -87,7 +87,7 @@ done
|
|
87
87
|
if [[ "$CLASSIF" != "." ]] ; then
|
88
88
|
PAR=$(dirname "$CLADES/$CLASSIF")/miga-project.classif
|
89
89
|
if [[ -s "$PAR" ]] ; then
|
90
|
-
while read i ; do
|
90
|
+
while read -r i ; do
|
91
91
|
if [[ $METRIC == "aai" ]] ; then
|
92
92
|
AAI=$(miga-noref_haai_or_aai "$DATASET" "$i")
|
93
93
|
else
|
@@ -101,6 +101,25 @@ if [[ "$CLASSIF" != "." ]] ; then
|
|
101
101
|
fi
|
102
102
|
fi
|
103
103
|
|
104
|
-
#Finalize
|
104
|
+
# Finalize
|
105
105
|
N=11
|
106
106
|
miga-checkpoint_n
|
107
|
+
|
108
|
+
# Build tree with medoids
|
109
|
+
if [[ -s "${DATASET}.${METRIC}.db" ]] ; then
|
110
|
+
echo "select seq2 from $METRIC;" | sqlite3 "${DATASET}.${METRIC}.db" \
|
111
|
+
| sort | uniq > "${DATASET}.tmp0"
|
112
|
+
perl -pe "s/^/^/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
|
113
|
+
> "${DATASET}.tmp1"
|
114
|
+
perl -pe "s/^/\\t/" "${DATASET}.tmp0" | perl -pe "s/$/\\t/" \
|
115
|
+
> "${DATASET}.tmp2"
|
116
|
+
echo "a b value" | tr " " "\\t" > "${DATASET}.txt"
|
117
|
+
gzip -c -d "$REF_TABLE" | cut -f 2-4 \
|
118
|
+
| grep -f "${DATASET}.tmp1" | grep -f "${DATASET}.tmp2" \
|
119
|
+
>> "${DATASET}.txt"
|
120
|
+
echo "select seq1, seq2, $METRIC from $METRIC;" \
|
121
|
+
| sqlite3 "${DATASET}.${METRIC}.db" | tr "\\|" "\\t" \
|
122
|
+
>> "${DATASET}.txt"
|
123
|
+
"$MIGA/utils/ref-tree.R" "${DATASET}.txt" "$DATASET" "$DATASET"
|
124
|
+
rm "$DATASET".tmp[012] "${DATASET}.txt"
|
125
|
+
fi
|
data/scripts/aai_distances.bash
CHANGED
@@ -4,11 +4,12 @@ set -e
|
|
4
4
|
SCRIPT="aai_distances"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/09.distances/02.aai"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
echo -n "" > miga-project.log
|
14
15
|
DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
@@ -40,5 +41,5 @@ if(sum(aai[,'a'] != aai[,'b']) > 0){
|
|
40
41
|
gzip -9 -f miga-project.txt
|
41
42
|
|
42
43
|
# Finalize
|
43
|
-
date
|
44
|
+
miga date > "miga-project.done"
|
44
45
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|
data/scripts/ani_distances.bash
CHANGED
@@ -4,11 +4,12 @@ set -e
|
|
4
4
|
SCRIPT="ani_distances"
|
5
5
|
echo "MiGA: $MIGA"
|
6
6
|
echo "Project: $PROJECT"
|
7
|
+
# shellcheck source=scripts/miga.bash
|
7
8
|
source "$MIGA/scripts/miga.bash" || exit 1
|
8
9
|
cd "$PROJECT/data/09.distances/03.ani"
|
9
10
|
|
10
11
|
# Initialize
|
11
|
-
date
|
12
|
+
miga date > "miga-project.start"
|
12
13
|
|
13
14
|
echo -n "" > miga-project.log
|
14
15
|
DS=$(miga list_datasets -P "$PROJECT" --ref --no-multi)
|
@@ -39,5 +40,5 @@ if(sum(ani[,'a'] != ani[,'b']) > 0){
|
|
39
40
|
gzip -9 -f miga-project.txt
|
40
41
|
|
41
42
|
# Finalize
|
42
|
-
date
|
43
|
+
miga date > "miga-project.done"
|
43
44
|
miga add_result -P "$PROJECT" -r "$SCRIPT"
|