miga-base 0.5.5.1 → 0.5.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/cli/action.rb +1 -0
- data/lib/miga/cli/action/wf.rb +3 -0
- data/lib/miga/cli/opt_helper.rb +1 -1
- data/lib/miga/daemon.rb +16 -3
- data/lib/miga/dataset.rb +3 -4
- data/lib/miga/dataset/result.rb +42 -0
- data/lib/miga/remote_dataset.rb +4 -0
- data/lib/miga/version.rb +2 -2
- data/scripts/project_stats.bash +1 -1
- data/test/daemon_test.rb +1 -1
- data/test/dataset_test.rb +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.N50.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.filterN.pl +1 -1
- data/utils/enveomics/Pipelines/assembly.pbs/FastA.length.pl +1 -1
- data/utils/enveomics/Pipelines/blast.pbs/FastA.split.pl +1 -1
- data/utils/enveomics/Scripts/lib/enveomics.R +1 -1
- data/utils/find-medoid.R +7 -2
- data/utils/index_metadata.rb +7 -6
- data/utils/subclade/pipeline.rb +3 -0
- data/utils/subclades.R +6 -6
- metadata +3 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2898bb1227f7e473f1b11d89de6cec0dea005573374c2dac19d7dcaf21143a4b
|
4
|
+
data.tar.gz: f6dcbfeeccf9cbcc3d8103f2c27babb47f1594f43252e131dbc67ca9ff2ef294
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 42c0349e42347f2bd5bdbab6eb6dd7520335c5b3e1bb107685a80691d5ed0cc385f0a475328183db3c5e40e8e6d434495794696804d4c1c6c9c550fc0891f05a
|
7
|
+
data.tar.gz: 75b711267b414526af80641a8ef8cd308c0b7e7593091772fbf3ea6916da7360d3ddd3ff980df3d7e19db621731596ab284a32f7f18b05241e23606d673340ab
|
data/lib/miga/cli/action.rb
CHANGED
data/lib/miga/cli/action/wf.rb
CHANGED
@@ -120,6 +120,7 @@ module MiGA::Cli::Action::Wf
|
|
120
120
|
# Define project metadata
|
121
121
|
p = cli.load_project(:outdir, '-o')
|
122
122
|
[:haai_p, :aai_p, :ani_p, :ess_coll].each { |i| p_metadata[i] = cli[i] }
|
123
|
+
p_metadata[:type] = cli[:project_type]
|
123
124
|
transfer_metadata(p, p_metadata)
|
124
125
|
# Download datasets
|
125
126
|
call_cli([
|
@@ -138,6 +139,7 @@ module MiGA::Cli::Action::Wf
|
|
138
139
|
] + cli.files) unless cli.files.empty?
|
139
140
|
# Define datasets metadata
|
140
141
|
p.load
|
142
|
+
d_metadata[:type] = cli[:dataset_type]
|
141
143
|
p.each_dataset { |d| transfer_metadata(d, d_metadata) }
|
142
144
|
p
|
143
145
|
end
|
@@ -165,6 +167,7 @@ module MiGA::Cli::Action::Wf
|
|
165
167
|
|
166
168
|
def call_cli(cmd)
|
167
169
|
cmd << '-v' if cli[:verbose]
|
170
|
+
MiGA::MiGA.DEBUG "Cli::Action::Wf.call_cli #{cmd}"
|
168
171
|
MiGA::Cli.new(cmd.map(&:to_s)).launch
|
169
172
|
end
|
170
173
|
|
data/lib/miga/cli/opt_helper.rb
CHANGED
@@ -155,6 +155,6 @@ module MiGA::Cli::OptHelper
|
|
155
155
|
# If +sym+ is nil, +flag+ is used as Symbol
|
156
156
|
def opt_flag(opt, flag, description, sym = nil)
|
157
157
|
sym = flag.to_sym if sym.nil?
|
158
|
-
opt.on("--#{flag}", description) { |v| self[sym] = v }
|
158
|
+
opt.on("--#{flag.to_s.gsub('_','-')}", description) { |v| self[sym] = v }
|
159
159
|
end
|
160
160
|
end
|
data/lib/miga/daemon.rb
CHANGED
@@ -71,12 +71,18 @@ class MiGA::Daemon < MiGA::MiGA
|
|
71
71
|
# Launches the +task+ with options +opts+ (as command-line arguments).
|
72
72
|
# Supported tasks include: start, stop, restart, status.
|
73
73
|
def daemon(task, opts=[])
|
74
|
+
MiGA.DEBUG "Daemon.daemon #{task} #{opts}"
|
74
75
|
options = default_options
|
75
76
|
opts.unshift(task)
|
76
77
|
options[:ARGV] = opts
|
77
|
-
|
78
|
-
|
78
|
+
# This additional degree of separation below was introduced so the Daemons
|
79
|
+
# package doesn't kill the parent process in workflows.
|
80
|
+
pid = fork do
|
81
|
+
Daemons.run_proc("MiGA:#{project.name}", options) do
|
82
|
+
loop { break unless in_loop }
|
83
|
+
end
|
79
84
|
end
|
85
|
+
Process.wait pid
|
80
86
|
end
|
81
87
|
|
82
88
|
##
|
@@ -321,7 +327,14 @@ class MiGA::Daemon < MiGA::MiGA
|
|
321
327
|
var: %w[key value],
|
322
328
|
alive: %w[pid],
|
323
329
|
kill: %w[pid]
|
324
|
-
}.each
|
330
|
+
}.each do |k,v|
|
331
|
+
runopts(
|
332
|
+
k, sprintf(
|
333
|
+
runopts(k).gsub(/%(\d+)\$d/, '%\\1$s'),
|
334
|
+
*v.map{ |i| "{{#{i}}}" }
|
335
|
+
)
|
336
|
+
)
|
337
|
+
end
|
325
338
|
runopts(:format_version, 1)
|
326
339
|
end
|
327
340
|
end
|
data/lib/miga/dataset.rb
CHANGED
@@ -55,6 +55,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
55
55
|
File.expand_path("metadata/#{name}.json", project.path),
|
56
56
|
metadata
|
57
57
|
]
|
58
|
+
save unless File.exist? @metadata_future[0]
|
58
59
|
end
|
59
60
|
|
60
61
|
##
|
@@ -66,10 +67,8 @@ class MiGA::Dataset < MiGA::MiGA
|
|
66
67
|
##
|
67
68
|
# Save any changes you've made in the dataset.
|
68
69
|
def save
|
69
|
-
|
70
|
-
|
71
|
-
end
|
72
|
-
self.metadata.save
|
70
|
+
MiGA.DEBUG "Dataset.metadata: #{metadata.data}"
|
71
|
+
metadata.save
|
73
72
|
end
|
74
73
|
|
75
74
|
##
|
data/lib/miga/dataset/result.rb
CHANGED
@@ -116,6 +116,48 @@ module MiGA::Dataset::Result
|
|
116
116
|
end
|
117
117
|
adv
|
118
118
|
end
|
119
|
+
|
120
|
+
##
|
121
|
+
# Returns a Hash with tasks as key and status as value.
|
122
|
+
# See +result_status+ for possible values
|
123
|
+
def results_status
|
124
|
+
Hash[@@PREPROCESSING_TASKS.map { |task| [task, result_status(task)] }]
|
125
|
+
end
|
126
|
+
|
127
|
+
##
|
128
|
+
# Returns the status of +task+. The status values are symbols:
|
129
|
+
# - ignore_inactive: the dataset is inactive
|
130
|
+
# - ignore_force: forced to ignore by metadata
|
131
|
+
# - ignore_project: incompatible project
|
132
|
+
# - ignore_noref: incompatible dataset, only for reference
|
133
|
+
# - ignore_multi: incompatible dataset, only for multi
|
134
|
+
# - ignore_nonmulti: incompatible dataset, only for nonmulti
|
135
|
+
# - ignore: incompatible dataset, unknown reason
|
136
|
+
# - complete: a task with registered results
|
137
|
+
# - pending: a task queued to be performed
|
138
|
+
def result_status(task)
|
139
|
+
if not get_result(task).nil?
|
140
|
+
:complete
|
141
|
+
elsif ignore_task?(task)
|
142
|
+
if not is_active?
|
143
|
+
:ignore_inactive
|
144
|
+
elsif metadata["run_#{task}"]
|
145
|
+
:ignore_force
|
146
|
+
elsif task == :taxonomy and project.metadata[:ref_project].nil?
|
147
|
+
:ignore_project
|
148
|
+
elsif @@_EXCLUDE_NOREF_TASKS_H[task] && ! is_ref?
|
149
|
+
:ignore_noref
|
150
|
+
elsif @@_ONLY_MULTI_TASKS_H[task] && ! is_multi?
|
151
|
+
:ignore_multi
|
152
|
+
elsif @@_ONLY_NONMULTI_TASKS_H[task] && ! is_nonmulti?
|
153
|
+
:ignore_nonmulti
|
154
|
+
else
|
155
|
+
:ignore
|
156
|
+
end
|
157
|
+
else
|
158
|
+
:pending
|
159
|
+
end
|
160
|
+
end
|
119
161
|
|
120
162
|
##
|
121
163
|
# Clean-up all the stored distances, removing values for datasets no longer in
|
data/lib/miga/remote_dataset.rb
CHANGED
@@ -139,6 +139,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
139
139
|
rank = 'dataset' if lineage.empty? and rank.nil?
|
140
140
|
lineage[rank] = name unless rank.nil? or rank.nil?
|
141
141
|
end
|
142
|
+
MiGA.DEBUG "Got lineage: #{lineage}"
|
142
143
|
MiGA::Taxonomy.new(lineage)
|
143
144
|
end
|
144
145
|
|
@@ -210,6 +211,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
|
|
210
211
|
metadata[:is_type] = true
|
211
212
|
metadata[:type_rel] = from_type
|
212
213
|
end
|
214
|
+
metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
|
215
|
+
metadata[:suspect] = nil if metadata[:suspect].empty?
|
216
|
+
MiGA.DEBUG "Got type: #{from_type}"
|
213
217
|
metadata
|
214
218
|
end
|
215
219
|
|
data/lib/miga/version.rb
CHANGED
@@ -10,7 +10,7 @@ module MiGA
|
|
10
10
|
# - Float representing the major.minor version.
|
11
11
|
# - Integer representing gem releases of the current version.
|
12
12
|
# - Integer representing minor changes that require new version number.
|
13
|
-
VERSION = [0.5,
|
13
|
+
VERSION = [0.5, 7, 2]
|
14
14
|
|
15
15
|
##
|
16
16
|
# Nickname for the current major.minor version.
|
@@ -18,7 +18,7 @@ module MiGA
|
|
18
18
|
|
19
19
|
##
|
20
20
|
# Date of the current gem release.
|
21
|
-
VERSION_DATE = Date.new(2020, 2,
|
21
|
+
VERSION_DATE = Date.new(2020, 2, 8)
|
22
22
|
|
23
23
|
##
|
24
24
|
# Reference of MiGA.
|
data/scripts/project_stats.bash
CHANGED
@@ -12,7 +12,7 @@ cd "$DIR"
|
|
12
12
|
miga date > "miga-project.start"
|
13
13
|
|
14
14
|
# Index taxonomy
|
15
|
-
miga
|
15
|
+
miga tax_index -P "$PROJECT" -i "miga-project.taxonomy.json" --ref --active
|
16
16
|
|
17
17
|
# Index metadata
|
18
18
|
ruby -I "$MIGA/lib" \
|
data/test/daemon_test.rb
CHANGED
@@ -79,7 +79,7 @@ class DaemonTest < Test::Unit::TestCase
|
|
79
79
|
dpath = File.expand_path("daemon/MiGA:#{p.name}",p.path)
|
80
80
|
assert(File.exist?("#{dpath}.pid"))
|
81
81
|
out = capture_stdout { d.stop }
|
82
|
-
|
82
|
+
assert_equal('', out.string)
|
83
83
|
assert(!File.exist?("#{dpath}.pid"))
|
84
84
|
assert(File.exist?("#{dpath}.output"))
|
85
85
|
File.open("#{dpath}.output", "r") do |fh|
|
data/test/dataset_test.rb
CHANGED
@@ -50,7 +50,7 @@ class DatasetTest < Test::Unit::TestCase
|
|
50
50
|
assert(!d2.is_multi?)
|
51
51
|
assert(!d2.is_nonmulti?)
|
52
52
|
assert_nil(d2.metadata[:type])
|
53
|
-
d2.metadata[:
|
53
|
+
d2.metadata[:type] = :metagenome
|
54
54
|
d2.save
|
55
55
|
assert_equal(:metagenome, d2.metadata[:type])
|
56
56
|
assert(d2.is_multi?)
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.N50.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.filterN.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.length.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../Scripts/FastA.split.pl
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
../../enveomics.R
|
data/utils/find-medoid.R
CHANGED
@@ -20,18 +20,23 @@ find_medoids <- function(ani.df, out, clades) {
|
|
20
20
|
dist <- enve.df2dist(ani.df, 'a', 'b', 'd', default.d = max(ani.df$d)*1.2)
|
21
21
|
dist <- as.matrix(dist)
|
22
22
|
cl <- read.table(clades, header = FALSE, sep = '\t', as.is = TRUE)[,1]
|
23
|
+
cl.s <- c()
|
23
24
|
medoids <- c()
|
24
25
|
for(i in cl){
|
25
26
|
lab <- strsplit(i, ',')[[1]]
|
26
27
|
cat('Clade of:', lab[1], '\n')
|
27
28
|
if(length(lab) == 1) {
|
28
|
-
|
29
|
+
lab.s <- lab
|
29
30
|
} else {
|
30
|
-
|
31
|
+
lab.s <- lab[order(colSums(dist[lab, lab], na.rm = TRUE))]
|
31
32
|
}
|
33
|
+
med <- lab.s[1]
|
32
34
|
medoids <- c(medoids, med)
|
35
|
+
cl.s <- c(cl.s, paste(lab.s, collapse = ','))
|
33
36
|
}
|
34
37
|
write.table(medoids, out, quote = FALSE, row.names = FALSE, col.names = FALSE)
|
38
|
+
write.table(cl.s, paste(clades, '.sorted', sep = ''), quote = FALSE,
|
39
|
+
row.names = FALSE, col.names = FALSE)
|
35
40
|
}
|
36
41
|
|
37
42
|
#= Main
|
data/utils/index_metadata.rb
CHANGED
@@ -1,23 +1,24 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'miga'
|
4
|
+
require 'sqlite3'
|
5
5
|
|
6
6
|
p = MiGA::Project.load(ARGV[0])
|
7
7
|
raise "Impossible to load project: #{ARGV[0]}." if p.nil?
|
8
8
|
|
9
9
|
File.unlink(ARGV[1]) if File.exist? ARGV[1]
|
10
10
|
db = SQLite3::Database.new(ARGV[1])
|
11
|
-
db.execute
|
12
|
-
|
11
|
+
db.execute 'create table metadata(' \
|
12
|
+
'`name` varchar(256), `field` varchar(256), `value` text)'
|
13
13
|
|
14
14
|
def searchable(db, d, k, v)
|
15
|
-
db.execute
|
16
|
-
d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/,
|
15
|
+
db.execute 'insert into metadata values(?,?,?)',
|
16
|
+
d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, ' ')} "
|
17
17
|
end
|
18
18
|
|
19
19
|
p.each_dataset do |name, d|
|
20
20
|
next unless d.is_ref?
|
21
|
+
next unless d.is_active?
|
21
22
|
searchable(db, d, :name, d.name)
|
22
23
|
d.metadata.each do |k, v|
|
23
24
|
next if [:created, :updated].include? k
|
data/utils/subclade/pipeline.rb
CHANGED
@@ -47,6 +47,9 @@ module MiGA::SubcladeRunner::Pipeline
|
|
47
47
|
dir = opts[:gsp_metric] == 'aai' ? '02.aai' : '03.ani'
|
48
48
|
`Rscript '#{src}' ../../09.distances/#{dir}/miga-project.Rdata \
|
49
49
|
miga-project.gsp-medoids miga-project.gsp-clades`
|
50
|
+
if File.exist? 'miga-project.gsp-clades.sorted'
|
51
|
+
File.rename 'miga-project.gsp-clades.sorted', 'miga-project.gsp-clades'
|
52
|
+
end
|
50
53
|
|
51
54
|
# Propose clades
|
52
55
|
ofh = File.open('miga-project.proposed-clades', 'w')
|
data/utils/subclades.R
CHANGED
@@ -113,7 +113,7 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
|
|
113
113
|
}
|
114
114
|
write.tree(ani.ph, paste(out_base, ".nwk", sep=""))
|
115
115
|
options(expressions=express.ori)
|
116
|
-
|
116
|
+
|
117
117
|
# Silhouette
|
118
118
|
say("Silhouette")
|
119
119
|
nn <- length(labels(ani.d))
|
@@ -130,13 +130,13 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
|
|
130
130
|
ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
|
131
131
|
if(mean(s[1,]<0)<0.75) ds[s[1,]<0] <- mean(ds) # <- k's with negative average
|
132
132
|
top.n <- k[which.max(ds)]
|
133
|
-
|
133
|
+
|
134
134
|
# Classify genomes
|
135
135
|
say("Classify => k :", top.n, "| n :", length(labels(ani.d)))
|
136
136
|
ani.cl <- pam(ani.d, top.n, pamonce=1)
|
137
137
|
ani.types <- ani.cl$clustering
|
138
138
|
ani.medoids <- ani.cl$medoids
|
139
|
-
|
139
|
+
|
140
140
|
# Generate graphic report
|
141
141
|
say("Graphic report")
|
142
142
|
pdf(paste(out_base, ".pdf", sep=""), 7, 12)
|
@@ -149,7 +149,7 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
|
|
149
149
|
|
150
150
|
# Save results
|
151
151
|
write_text_report(out_base, ani.d, ani.medoids, ani.types)
|
152
|
-
|
152
|
+
|
153
153
|
# Return data
|
154
154
|
say("Cluster ready")
|
155
155
|
return(list(
|
@@ -172,8 +172,8 @@ generate_empty_files <- function(out_base) {
|
|
172
172
|
}
|
173
173
|
|
174
174
|
write_text_report <- function(out_base, ani.d, ani.medoids, ani.types){
|
175
|
-
say(
|
176
|
-
write.table(ani.medoids, paste(out_base,
|
175
|
+
say('Text report')
|
176
|
+
write.table(ani.medoids, paste(out_base, 'medoids', sep='.'),
|
177
177
|
quote=FALSE, col.names=FALSE, row.names=FALSE)
|
178
178
|
classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
|
179
179
|
ani.d.m <- 100 - as.matrix(ani.d)*100
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-02-
|
11
|
+
date: 2020-02-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|
@@ -515,8 +515,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
515
515
|
- !ruby/object:Gem::Version
|
516
516
|
version: '0'
|
517
517
|
requirements: []
|
518
|
-
|
519
|
-
rubygems_version: 2.7.6
|
518
|
+
rubygems_version: 3.0.3
|
520
519
|
signing_key:
|
521
520
|
specification_version: 4
|
522
521
|
summary: MiGA
|