miga-base 0.5.5.1 → 0.5.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/cli/action.rb +1 -0
- data/lib/miga/cli/action/wf.rb +3 -0
- data/lib/miga/cli/opt_helper.rb +1 -1
- data/lib/miga/daemon.rb +16 -3
- data/lib/miga/dataset.rb +3 -4
- data/lib/miga/dataset/result.rb +42 -0
- data/lib/miga/remote_dataset.rb +4 -0
- data/lib/miga/version.rb +2 -2
- data/scripts/project_stats.bash +1 -1
- data/test/daemon_test.rb +1 -1
- data/test/dataset_test.rb +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
- data/utils/find-medoid.R +7 -2
- data/utils/index_metadata.rb +7 -6
- data/utils/subclade/pipeline.rb +3 -0
- data/utils/subclades.R +6 -6
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2898bb1227f7e473f1b11d89de6cec0dea005573374c2dac19d7dcaf21143a4b
|
4
|
+
data.tar.gz: f6dcbfeeccf9cbcc3d8103f2c27babb47f1594f43252e131dbc67ca9ff2ef294
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 42c0349e42347f2bd5bdbab6eb6dd7520335c5b3e1bb107685a80691d5ed0cc385f0a475328183db3c5e40e8e6d434495794696804d4c1c6c9c550fc0891f05a
|
7
|
+
data.tar.gz: 75b711267b414526af80641a8ef8cd308c0b7e7593091772fbf3ea6916da7360d3ddd3ff980df3d7e19db621731596ab284a32f7f18b05241e23606d673340ab
|
data/lib/miga/cli/action.rb
CHANGED
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -120,6 +120,7 @@ module MiGA::Cli::Action::Wf
|
|
120
120
|
# Define project metadata
|
121
121
|
p = cli.load_project(:outdir, '-o')
|
122
122
|
[:haai_p, :aai_p, :ani_p, :ess_coll].each { |i| p_metadata[i] = cli[i] }
|
123
|
+
p_metadata[:type] = cli[:project_type]
|
123
124
|
transfer_metadata(p, p_metadata)
|
124
125
|
# Download datasets
|
125
126
|
call_cli([
|
@@ -138,6 +139,7 @@ module MiGA::Cli::Action::Wf
|
|
138
139
|
] + cli.files) unless cli.files.empty?
|
139
140
|
# Define datasets metadata
|
140
141
|
p.load
|
142
|
+
d_metadata[:type] = cli[:dataset_type]
|
141
143
|
p.each_dataset { |d| transfer_metadata(d, d_metadata) }
|
142
144
|
p
|
143
145
|
end
|
@@ -165,6 +167,7 @@ module MiGA::Cli::Action::Wf
|
|
165
167
|
|
166
168
|
def call_cli(cmd)
|
167
169
|
cmd << '-v' if cli[:verbose]
|
170
|
+
MiGA::MiGA.DEBUG "Cli::Action::Wf.call_cli #{cmd}"
|
168
171
|
MiGA::Cli.new(cmd.map(&:to_s)).launch
|
169
172
|
end
|
170
173
|
|
data/lib/miga/cli/opt_helper.rb
CHANGED
@@ -155,6 +155,6 @@ module MiGA::Cli::OptHelper
|
|
155
155
|
# If +sym+ is nil, +flag+ is used as Symbol
|
156
156
|
def opt_flag(opt, flag, description, sym = nil)
|
157
157
|
sym = flag.to_sym if sym.nil?
|
158
|
-
opt.on("--#{flag}", description) { |v| self[sym] = v }
|
158
|
+
opt.on("--#{flag.to_s.gsub('_','-')}", description) { |v| self[sym] = v }
|
159
159
|
end
|
160
160
|
end
|
data/lib/miga/daemon.rb
CHANGED
@@ -71,12 +71,18 @@ class MiGA::Daemon < MiGA::MiGA
|
|
71
71
|
# Launches the +task+ with options +opts+ (as command-line arguments).
|
72
72
|
# Supported tasks include: start, stop, restart, status.
|
73
73
|
def daemon(task, opts=[])
|
74
|
+
MiGA.DEBUG "Daemon.daemon #{task} #{opts}"
|
74
75
|
options = default_options
|
75
76
|
opts.unshift(task)
|
76
77
|
options[:ARGV] = opts
|
77
|
-
|
78
|
-
|
78
|
+
# This additional degree of separation below was introduced so the Daemons
|
79
|
+
# package doesn't kill the parent process in workflows.
|
80
|
+
pid = fork do
|
81
|
+
Daemons.run_proc("MiGA:#{project.name}", options) do
|
82
|
+
loop { break unless in_loop }
|
83
|
+
end
|
79
84
|
end
|
85
|
+
Process.wait pid
|
80
86
|
end
|
81
87
|
|
82
88
|
##
|
@@ -321,7 +327,14 @@ class MiGA::Daemon < MiGA::MiGA
|
|
321
327
|
var: %w[key value],
|
322
328
|
alive: %w[pid],
|
323
329
|
kill: %w[pid]
|
324
|
-
}.each
|
330
|
+
}.each do |k,v|
|
331
|
+
runopts(
|
332
|
+
k, sprintf(
|
333
|
+
runopts(k).gsub(/%(\d+)\$d/, '%\\1$s'),
|
334
|
+
*v.map{ |i| "{{#{i}}}" }
|
335
|
+
)
|
336
|
+
)
|
337
|
+
end
|
325
338
|
runopts(:format_version, 1)
|
326
339
|
end
|
327
340
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -55,6 +55,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
55
55
|
File.expand_path("metadata/#{name}.json", project.path),
|
56
56
|
metadata
|
57
57
|
]
|
58
|
+
save unless File.exist? @metadata_future[0]
|
58
59
|
end
|
59
60
|
|
60
61
|
##
|
@@ -66,10 +67,8 @@ class MiGA::Dataset < MiGA::MiGA
|
|
66
67
|
##
|
67
68
|
# Save any changes you've made in the dataset.
|
68
69
|
def save
|
69
|
-
|
70
|
-
|
71
|
-
end
|
72
|
-
self.metadata.save
|
70
|
+
MiGA.DEBUG "Dataset.metadata: #{metadata.data}"
|
71
|
+
metadata.save
|
73
72
|
end
|
74
73
|
|
75
74
|
##
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -116,6 +116,48 @@ module MiGA::Dataset::Result
|
|
116
116
|
end
|
117
117
|
adv
|
118
118
|
end
|
119
|
+
|
120
|
+
##
|
121
|
+
# Returns a Hash with tasks as key and status as value.
|
122
|
+
# See +result_status+ for possible values
|
123
|
+
def results_status
|
124
|
+
Hash[@@PREPROCESSING_TASKS.map { |task| [task, result_status(task)] }]
|
125
|
+
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# Returns the status of +task+. The status values are symbols:
|
129
|
+
# - ignore_inactive: the dataset is inactive
|
130
|
+
# - ignore_force: forced to ignore by metadata
|
131
|
+
# - ignore_project: incompatible project
|
132
|
+
# - ignore_noref: incompatible dataset, only for reference
|
133
|
+
# - ignore_multi: incompatible dataset, only for multi
|
134
|
+
# - ignore_nonmulti: incompatible dataset, only for nonmulti
|
135
|
+
# - ignore: incompatible dataset, unknown reason
|
136
|
+
# - complete: a task with registered results
|
137
|
+
# - pending: a task queued to be performed
|
138
|
+
def result_status(task)
|
139
|
+
if not get_result(task).nil?
|
140
|
+
:complete
|
141
|
+
elsif ignore_task?(task)
|
142
|
+
if not is_active?
|
143
|
+
:ignore_inactive
|
144
|
+
elsif metadata["run_#{task}"]
|
145
|
+
:ignore_force
|
146
|
+
elsif task == :taxonomy and project.metadata[:ref_project].nil?
|
147
|
+
:ignore_project
|
148
|
+
elsif @@_EXCLUDE_NOREF_TASKS_H[task] && ! is_ref?
|
149
|
+
:ignore_noref
|
150
|
+
elsif @@_ONLY_MULTI_TASKS_H[task] && ! is_multi?
|
151
|
+
:ignore_multi
|
152
|
+
elsif @@_ONLY_NONMULTI_TASKS_H[task] && ! is_nonmulti?
|
153
|
+
:ignore_nonmulti
|
154
|
+
else
|
155
|
+
:ignore
|
156
|
+
end
|
157
|
+
else
|
158
|
+
:pending
|
159
|
+
end
|
160
|
+
end
|
119
161
|
|
120
162
|
##
|
121
163
|
# Clean-up all the stored distances, removing values for datasets no longer in
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -139,6 +139,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
139
139
|
rank = 'dataset' if lineage.empty? and rank.nil?
|
140
140
|
lineage[rank] = name unless rank.nil? or rank.nil?
|
141
141
|
end
|
142
|
+
MiGA.DEBUG "Got lineage: #{lineage}"
|
142
143
|
MiGA::Taxonomy.new(lineage)
|
143
144
|
end
|
144
145
|
|
@@ -210,6 +211,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
210
211
|
metadata[:is_type] = true
|
211
212
|
metadata[:type_rel] = from_type
|
212
213
|
end
|
214
|
+
metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
|
215
|
+
metadata[:suspect] = nil if metadata[:suspect].empty?
|
216
|
+
MiGA.DEBUG "Got type: #{from_type}"
|
213
217
|
metadata
|
214
218
|
end
|
215
219
|
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.5,
|
13
|
+
VERSION = [0.5, 7, 2]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2020, 2,
|
21
|
+
VERSION_DATE = Date.new(2020, 2, 8)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
data/scripts/project_stats.bash
CHANGED
@@ -12,7 +12,7 @@ cd "$DIR"
|
|
12
12
|
miga date > "miga-project.start"
|
13
13
|
|
14
14
|
# Index taxonomy
|
15
|
-
miga
|
15
|
+
miga tax_index -P "$PROJECT" -i "miga-project.taxonomy.json" --ref --active
|
16
16
|
|
17
17
|
# Index metadata
|
18
18
|
ruby -I "$MIGA/lib" \
|
data/test/daemon_test.rb
CHANGED
@@ -79,7 +79,7 @@ class DaemonTest < Test::Unit::TestCase
|
|
79
79
|
dpath = File.expand_path("daemon/MiGA:#{p.name}",p.path)
|
80
80
|
assert(File.exist?("#{dpath}.pid"))
|
81
81
|
out = capture_stdout { d.stop }
|
82
|
-
|
82
|
+
assert_equal('', out.string)
|
83
83
|
assert(!File.exist?("#{dpath}.pid"))
|
84
84
|
assert(File.exist?("#{dpath}.output"))
|
85
85
|
File.open("#{dpath}.output", "r") do |fh|
|
data/test/dataset_test.rb
CHANGED
@@ -50,7 +50,7 @@ class DatasetTest < Test::Unit::TestCase
|
|
50
50
|
assert(!d2.is_multi?)
|
51
51
|
assert(!d2.is_nonmulti?)
|
52
52
|
assert_nil(d2.metadata[:type])
|
53
|
-
d2.metadata[:
|
53
|
+
d2.metadata[:type] = :metagenome
|
54
54
|
d2.save
|
55
55
|
assert_equal(:metagenome, d2.metadata[:type])
|
56
56
|
assert(d2.is_multi?)
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.N50.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.filterN.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.length.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.split.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../enveomics.R
|
data/utils/find-medoid.R
CHANGED
@@ -20,18 +20,23 @@ find_medoids <- function(ani.df, out, clades) {
|
|
20
20
|
dist <- enve.df2dist(ani.df, 'a', 'b', 'd', default.d = max(ani.df$d)*1.2)
|
21
21
|
dist <- as.matrix(dist)
|
22
22
|
cl <- read.table(clades, header = FALSE, sep = '\t', as.is = TRUE)[,1]
|
23
|
+
cl.s <- c()
|
23
24
|
medoids <- c()
|
24
25
|
for(i in cl){
|
25
26
|
lab <- strsplit(i, ',')[[1]]
|
26
27
|
cat('Clade of:', lab[1], '\n')
|
27
28
|
if(length(lab) == 1) {
|
28
|
-
|
29
|
+
lab.s <- lab
|
29
30
|
} else {
|
30
|
-
|
31
|
+
lab.s <- lab[order(colSums(dist[lab, lab], na.rm = TRUE))]
|
31
32
|
}
|
33
|
+
med <- lab.s[1]
|
32
34
|
medoids <- c(medoids, med)
|
35
|
+
cl.s <- c(cl.s, paste(lab.s, collapse = ','))
|
33
36
|
}
|
34
37
|
write.table(medoids, out, quote = FALSE, row.names = FALSE, col.names = FALSE)
|
38
|
+
write.table(cl.s, paste(clades, '.sorted', sep = ''), quote = FALSE,
|
39
|
+
row.names = FALSE, col.names = FALSE)
|
35
40
|
}
|
36
41
|
|
37
42
|
#= Main
|
data/utils/index_metadata.rb
CHANGED
@@ -1,23 +1,24 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'miga'
|
4
|
+
require 'sqlite3'
|
5
5
|
|
6
6
|
p = MiGA::Project.load(ARGV[0])
|
7
7
|
raise "Impossible to load project: #{ARGV[0]}." if p.nil?
|
8
8
|
|
9
9
|
File.unlink(ARGV[1]) if File.exist? ARGV[1]
|
10
10
|
db = SQLite3::Database.new(ARGV[1])
|
11
|
-
db.execute
|
12
|
-
|
11
|
+
db.execute 'create table metadata(' \
|
12
|
+
'`name` varchar(256), `field` varchar(256), `value` text)'
|
13
13
|
|
14
14
|
def searchable(db, d, k, v)
|
15
|
-
db.execute
|
16
|
-
d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/,
|
15
|
+
db.execute 'insert into metadata values(?,?,?)',
|
16
|
+
d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, ' ')} "
|
17
17
|
end
|
18
18
|
|
19
19
|
p.each_dataset do |name, d|
|
20
20
|
next unless d.is_ref?
|
21
|
+
next unless d.is_active?
|
21
22
|
searchable(db, d, :name, d.name)
|
22
23
|
d.metadata.each do |k, v|
|
23
24
|
next if [:created, :updated].include? k
|
data/utils/subclade/pipeline.rb
CHANGED
@@ -47,6 +47,9 @@ module MiGA::SubcladeRunner::Pipeline
|
|
47
47
|
dir = opts[:gsp_metric] == 'aai' ? '02.aai' : '03.ani'
|
48
48
|
`Rscript '#{src}' ../../09.distances/#{dir}/miga-project.Rdata \
|
49
49
|
miga-project.gsp-medoids miga-project.gsp-clades`
|
50
|
+
if File.exist? 'miga-project.gsp-clades.sorted'
|
51
|
+
File.rename 'miga-project.gsp-clades.sorted', 'miga-project.gsp-clades'
|
52
|
+
end
|
50
53
|
|
51
54
|
# Propose clades
|
52
55
|
ofh = File.open('miga-project.proposed-clades', 'w')
|
data/utils/subclades.R
CHANGED
@@ -113,7 +113,7 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
|
|
113
113
|
}
|
114
114
|
write.tree(ani.ph, paste(out_base, ".nwk", sep=""))
|
115
115
|
options(expressions=express.ori)
|
116
|
-
|
116
|
+
|
117
117
|
# Silhouette
|
118
118
|
say("Silhouette")
|
119
119
|
nn <- length(labels(ani.d))
|
@@ -130,13 +130,13 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
|
|
130
130
|
ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
|
131
131
|
if(mean(s[1,]<0)<0.75) ds[s[1,]<0] <- mean(ds) # <- k's with negative average
|
132
132
|
top.n <- k[which.max(ds)]
|
133
|
-
|
133
|
+
|
134
134
|
# Classify genomes
|
135
135
|
say("Classify => k :", top.n, "| n :", length(labels(ani.d)))
|
136
136
|
ani.cl <- pam(ani.d, top.n, pamonce=1)
|
137
137
|
ani.types <- ani.cl$clustering
|
138
138
|
ani.medoids <- ani.cl$medoids
|
139
|
-
|
139
|
+
|
140
140
|
# Generate graphic report
|
141
141
|
say("Graphic report")
|
142
142
|
pdf(paste(out_base, ".pdf", sep=""), 7, 12)
|
@@ -149,7 +149,7 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
|
|
149
149
|
|
150
150
|
# Save results
|
151
151
|
write_text_report(out_base, ani.d, ani.medoids, ani.types)
|
152
|
-
|
152
|
+
|
153
153
|
# Return data
|
154
154
|
say("Cluster ready")
|
155
155
|
return(list(
|
@@ -172,8 +172,8 @@ generate_empty_files <- function(out_base) {
|
|
172
172
|
}
|
173
173
|
|
174
174
|
write_text_report <- function(out_base, ani.d, ani.medoids, ani.types){
|
175
|
-
say(
|
176
|
-
write.table(ani.medoids, paste(out_base,
|
175
|
+
say('Text report')
|
176
|
+
write.table(ani.medoids, paste(out_base, 'medoids', sep='.'),
|
177
177
|
quote=FALSE, col.names=FALSE, row.names=FALSE)
|
178
178
|
classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
|
179
179
|
ani.d.m <- 100 - as.matrix(ani.d)*100
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-02-
|
11
|
+
date: 2020-02-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -515,8 +515,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
515
515
|
- !ruby/object:Gem::Version
|
516
516
|
version: '0'
|
517
517
|
requirements: []
|
518
|
-
|
519
|
-
rubygems_version: 2.7.6
|
518
|
+
rubygems_version: 3.0.3
|
520
519
|
signing_key:
|
521
520
|
specification_version: 4
|
522
521
|
summary: MiGA
|