miga-base 0.5.5.1 → 0.5.7.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 405d3cf6f84fa7f78f8026d1392be3fd8c32b3caea3d9e2c4c29a511ffeccb6c
4
- data.tar.gz: 29108306ca8358a1155f791a4a586fce73eb9dccbbf2841869ed450064508774
3
+ metadata.gz: 2898bb1227f7e473f1b11d89de6cec0dea005573374c2dac19d7dcaf21143a4b
4
+ data.tar.gz: f6dcbfeeccf9cbcc3d8103f2c27babb47f1594f43252e131dbc67ca9ff2ef294
5
5
  SHA512:
6
- metadata.gz: 84e3264d6d55ff810b6ba0ae415abac248b65ec411fde5780107c1974923c2ef7da2cd1ae86c74f36aad94af03534b1f326327f797eaf6aec1c2a20c444dfa8d
7
- data.tar.gz: cff5d84655677d1eabe298478d099074c1769cb4c74a753b10e3cbd62fb9fb0ed3b4df2ccc8924799c04d8a2e6be6ac7b3720f8ccbf7d83885cf17c5537fcd97
6
+ metadata.gz: 42c0349e42347f2bd5bdbab6eb6dd7520335c5b3e1bb107685a80691d5ed0cc385f0a475328183db3c5e40e8e6d434495794696804d4c1c6c9c550fc0891f05a
7
+ data.tar.gz: 75b711267b414526af80641a8ef8cd308c0b7e7593091772fbf3ea6916da7360d3ddd3ff980df3d7e19db621731596ab284a32f7f18b05241e23606d673340ab
@@ -27,6 +27,7 @@ class MiGA::Cli::Action < MiGA::MiGA
27
27
  ##
28
28
  # Launch the sequence
29
29
  def launch
30
+ MiGA.DEBUG 'Cli::Action.launch'
30
31
  empty_action if cli.argv.empty?
31
32
  parse_cli
32
33
  perform
@@ -120,6 +120,7 @@ module MiGA::Cli::Action::Wf
120
120
  # Define project metadata
121
121
  p = cli.load_project(:outdir, '-o')
122
122
  [:haai_p, :aai_p, :ani_p, :ess_coll].each { |i| p_metadata[i] = cli[i] }
123
+ p_metadata[:type] = cli[:project_type]
123
124
  transfer_metadata(p, p_metadata)
124
125
  # Download datasets
125
126
  call_cli([
@@ -138,6 +139,7 @@ module MiGA::Cli::Action::Wf
138
139
  ] + cli.files) unless cli.files.empty?
139
140
  # Define datasets metadata
140
141
  p.load
142
+ d_metadata[:type] = cli[:dataset_type]
141
143
  p.each_dataset { |d| transfer_metadata(d, d_metadata) }
142
144
  p
143
145
  end
@@ -165,6 +167,7 @@ module MiGA::Cli::Action::Wf
165
167
 
166
168
  def call_cli(cmd)
167
169
  cmd << '-v' if cli[:verbose]
170
+ MiGA::MiGA.DEBUG "Cli::Action::Wf.call_cli #{cmd}"
168
171
  MiGA::Cli.new(cmd.map(&:to_s)).launch
169
172
  end
170
173
 
@@ -155,6 +155,6 @@ module MiGA::Cli::OptHelper
155
155
  # If +sym+ is nil, +flag+ is used as Symbol
156
156
  def opt_flag(opt, flag, description, sym = nil)
157
157
  sym = flag.to_sym if sym.nil?
158
- opt.on("--#{flag}", description) { |v| self[sym] = v }
158
+ opt.on("--#{flag.to_s.gsub('_','-')}", description) { |v| self[sym] = v }
159
159
  end
160
160
  end
data/lib/miga/daemon.rb CHANGED
@@ -71,12 +71,18 @@ class MiGA::Daemon < MiGA::MiGA
71
71
  # Launches the +task+ with options +opts+ (as command-line arguments).
72
72
  # Supported tasks include: start, stop, restart, status.
73
73
  def daemon(task, opts=[])
74
+ MiGA.DEBUG "Daemon.daemon #{task} #{opts}"
74
75
  options = default_options
75
76
  opts.unshift(task)
76
77
  options[:ARGV] = opts
77
- Daemons.run_proc("MiGA:#{project.name}", options) do
78
- loop { break unless in_loop }
78
+ # This additional degree of separation below was introduced so the Daemons
79
+ # package doesn't kill the parent process in workflows.
80
+ pid = fork do
81
+ Daemons.run_proc("MiGA:#{project.name}", options) do
82
+ loop { break unless in_loop }
83
+ end
79
84
  end
85
+ Process.wait pid
80
86
  end
81
87
 
82
88
  ##
@@ -321,7 +327,14 @@ class MiGA::Daemon < MiGA::MiGA
321
327
  var: %w[key value],
322
328
  alive: %w[pid],
323
329
  kill: %w[pid]
324
- }.each { |k,v| runopts(k, sprintf(runopts(k), *v.map{ |i| "{{#{i}}}" })) }
330
+ }.each do |k,v|
331
+ runopts(
332
+ k, sprintf(
333
+ runopts(k).gsub(/%(\d+)\$d/, '%\\1$s'),
334
+ *v.map{ |i| "{{#{i}}}" }
335
+ )
336
+ )
337
+ end
325
338
  runopts(:format_version, 1)
326
339
  end
327
340
  end
data/lib/miga/dataset.rb CHANGED
@@ -55,6 +55,7 @@ class MiGA::Dataset < MiGA::MiGA
55
55
  File.expand_path("metadata/#{name}.json", project.path),
56
56
  metadata
57
57
  ]
58
+ save unless File.exist? @metadata_future[0]
58
59
  end
59
60
 
60
61
  ##
@@ -66,10 +67,8 @@ class MiGA::Dataset < MiGA::MiGA
66
67
  ##
67
68
  # Save any changes you've made in the dataset.
68
69
  def save
69
- if t = metadata[:tax] and n = t[:ns] and n == 'COMMUNITY'
70
- self.metadata[:type] = :metagenome
71
- end
72
- self.metadata.save
70
+ MiGA.DEBUG "Dataset.metadata: #{metadata.data}"
71
+ metadata.save
73
72
  end
74
73
 
75
74
  ##
@@ -116,6 +116,48 @@ module MiGA::Dataset::Result
116
116
  end
117
117
  adv
118
118
  end
119
+
120
+ ##
121
+ # Returns a Hash with tasks as key and status as value.
122
+ # See +result_status+ for possible values
123
+ def results_status
124
+ Hash[@@PREPROCESSING_TASKS.map { |task| [task, result_status(task)] }]
125
+ end
126
+
127
+ ##
128
+ # Returns the status of +task+. The status values are symbols:
129
+ # - ignore_inactive: the dataset is inactive
130
+ # - ignore_force: forced to ignore by metadata
131
+ # - ignore_project: incompatible project
132
+ # - ignore_noref: incompatible dataset, only for reference
133
+ # - ignore_multi: incompatible dataset, only for multi
134
+ # - ignore_nonmulti: incompatible dataset, only for nonmulti
135
+ # - ignore: incompatible dataset, unknown reason
136
+ # - complete: a task with registered results
137
+ # - pending: a task queued to be performed
138
+ def result_status(task)
139
+ if not get_result(task).nil?
140
+ :complete
141
+ elsif ignore_task?(task)
142
+ if not is_active?
143
+ :ignore_inactive
144
+ elsif metadata["run_#{task}"]
145
+ :ignore_force
146
+ elsif task == :taxonomy and project.metadata[:ref_project].nil?
147
+ :ignore_project
148
+ elsif @@_EXCLUDE_NOREF_TASKS_H[task] && ! is_ref?
149
+ :ignore_noref
150
+ elsif @@_ONLY_MULTI_TASKS_H[task] && ! is_multi?
151
+ :ignore_multi
152
+ elsif @@_ONLY_NONMULTI_TASKS_H[task] && ! is_nonmulti?
153
+ :ignore_nonmulti
154
+ else
155
+ :ignore
156
+ end
157
+ else
158
+ :pending
159
+ end
160
+ end
119
161
 
120
162
  ##
121
163
  # Clean-up all the stored distances, removing values for datasets no longer in
@@ -139,6 +139,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
139
139
  rank = 'dataset' if lineage.empty? and rank.nil?
140
140
  lineage[rank] = name unless rank.nil? or rank.nil?
141
141
  end
142
+ MiGA.DEBUG "Got lineage: #{lineage}"
142
143
  MiGA::Taxonomy.new(lineage)
143
144
  end
144
145
 
@@ -210,6 +211,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
210
211
  metadata[:is_type] = true
211
212
  metadata[:type_rel] = from_type
212
213
  end
214
+ metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
215
+ metadata[:suspect] = nil if metadata[:suspect].empty?
216
+ MiGA.DEBUG "Got type: #{from_type}"
213
217
  metadata
214
218
  end
215
219
 
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.5, 5, 1]
13
+ VERSION = [0.5, 7, 2]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2020, 2, 4)
21
+ VERSION_DATE = Date.new(2020, 2, 8)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -12,7 +12,7 @@ cd "$DIR"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  # Index taxonomy
15
- miga index_taxonomy -P "$PROJECT" -i "miga-project.taxonomy.json" --ref
15
+ miga tax_index -P "$PROJECT" -i "miga-project.taxonomy.json" --ref --active
16
16
 
17
17
  # Index metadata
18
18
  ruby -I "$MIGA/lib" \
data/test/daemon_test.rb CHANGED
@@ -79,7 +79,7 @@ class DaemonTest < Test::Unit::TestCase
79
79
  dpath = File.expand_path("daemon/MiGA:#{p.name}",p.path)
80
80
  assert(File.exist?("#{dpath}.pid"))
81
81
  out = capture_stdout { d.stop }
82
- assert(out.string =~ /MiGA:#{p.name}: trying to stop process with pid \d+/)
82
+ assert_equal('', out.string)
83
83
  assert(!File.exist?("#{dpath}.pid"))
84
84
  assert(File.exist?("#{dpath}.output"))
85
85
  File.open("#{dpath}.output", "r") do |fh|
data/test/dataset_test.rb CHANGED
@@ -50,7 +50,7 @@ class DatasetTest < Test::Unit::TestCase
50
50
  assert(!d2.is_multi?)
51
51
  assert(!d2.is_nonmulti?)
52
52
  assert_nil(d2.metadata[:type])
53
- d2.metadata[:tax] = {:ns=>"COMMUNITY"}
53
+ d2.metadata[:type] = :metagenome
54
54
  d2.save
55
55
  assert_equal(:metagenome, d2.metadata[:type])
56
56
  assert(d2.is_multi?)
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
1
+ ../../Scripts/FastA.N50.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
1
+ ../../Scripts/FastA.filterN.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
1
+ ../../Scripts/FastA.length.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
1
+ ../../Scripts/FastA.split.pl
@@ -1 +1 @@
1
- utils/enveomics/Scripts/lib/../../enveomics.R
1
+ ../../enveomics.R
data/utils/find-medoid.R CHANGED
@@ -20,18 +20,23 @@ find_medoids <- function(ani.df, out, clades) {
20
20
  dist <- enve.df2dist(ani.df, 'a', 'b', 'd', default.d = max(ani.df$d)*1.2)
21
21
  dist <- as.matrix(dist)
22
22
  cl <- read.table(clades, header = FALSE, sep = '\t', as.is = TRUE)[,1]
23
+ cl.s <- c()
23
24
  medoids <- c()
24
25
  for(i in cl){
25
26
  lab <- strsplit(i, ',')[[1]]
26
27
  cat('Clade of:', lab[1], '\n')
27
28
  if(length(lab) == 1) {
28
- med <- lab
29
+ lab.s <- lab
29
30
  } else {
30
- med <- lab[which.min(colSums(dist[lab, lab], na.rm = TRUE))]
31
+ lab.s <- lab[order(colSums(dist[lab, lab], na.rm = TRUE))]
31
32
  }
33
+ med <- lab.s[1]
32
34
  medoids <- c(medoids, med)
35
+ cl.s <- c(cl.s, paste(lab.s, collapse = ','))
33
36
  }
34
37
  write.table(medoids, out, quote = FALSE, row.names = FALSE, col.names = FALSE)
38
+ write.table(cl.s, paste(clades, '.sorted', sep = ''), quote = FALSE,
39
+ row.names = FALSE, col.names = FALSE)
35
40
  }
36
41
 
37
42
  #= Main
@@ -1,23 +1,24 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "miga"
4
- require "sqlite3"
3
+ require 'miga'
4
+ require 'sqlite3'
5
5
 
6
6
  p = MiGA::Project.load(ARGV[0])
7
7
  raise "Impossible to load project: #{ARGV[0]}." if p.nil?
8
8
 
9
9
  File.unlink(ARGV[1]) if File.exist? ARGV[1]
10
10
  db = SQLite3::Database.new(ARGV[1])
11
- db.execute "create table metadata(" +
12
- "`name` varchar(256), `field` varchar(256), `value` text)"
11
+ db.execute 'create table metadata(' \
12
+ '`name` varchar(256), `field` varchar(256), `value` text)'
13
13
 
14
14
  def searchable(db, d, k, v)
15
- db.execute "insert into metadata values(?,?,?)",
16
- d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, " ")} "
15
+ db.execute 'insert into metadata values(?,?,?)',
16
+ d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, ' ')} "
17
17
  end
18
18
 
19
19
  p.each_dataset do |name, d|
20
20
  next unless d.is_ref?
21
+ next unless d.is_active?
21
22
  searchable(db, d, :name, d.name)
22
23
  d.metadata.each do |k, v|
23
24
  next if [:created, :updated].include? k
@@ -47,6 +47,9 @@ module MiGA::SubcladeRunner::Pipeline
47
47
  dir = opts[:gsp_metric] == 'aai' ? '02.aai' : '03.ani'
48
48
  `Rscript '#{src}' ../../09.distances/#{dir}/miga-project.Rdata \
49
49
  miga-project.gsp-medoids miga-project.gsp-clades`
50
+ if File.exist? 'miga-project.gsp-clades.sorted'
51
+ File.rename 'miga-project.gsp-clades.sorted', 'miga-project.gsp-clades'
52
+ end
50
53
 
51
54
  # Propose clades
52
55
  ofh = File.open('miga-project.proposed-clades', 'w')
data/utils/subclades.R CHANGED
@@ -113,7 +113,7 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
113
113
  }
114
114
  write.tree(ani.ph, paste(out_base, ".nwk", sep=""))
115
115
  options(expressions=express.ori)
116
-
116
+
117
117
  # Silhouette
118
118
  say("Silhouette")
119
119
  nn <- length(labels(ani.d))
@@ -130,13 +130,13 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
130
130
  ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
131
131
  if(mean(s[1,]<0)<0.75) ds[s[1,]<0] <- mean(ds) # <- k's with negative average
132
132
  top.n <- k[which.max(ds)]
133
-
133
+
134
134
  # Classify genomes
135
135
  say("Classify => k :", top.n, "| n :", length(labels(ani.d)))
136
136
  ani.cl <- pam(ani.d, top.n, pamonce=1)
137
137
  ani.types <- ani.cl$clustering
138
138
  ani.medoids <- ani.cl$medoids
139
-
139
+
140
140
  # Generate graphic report
141
141
  say("Graphic report")
142
142
  pdf(paste(out_base, ".pdf", sep=""), 7, 12)
@@ -149,7 +149,7 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
149
149
 
150
150
  # Save results
151
151
  write_text_report(out_base, ani.d, ani.medoids, ani.types)
152
-
152
+
153
153
  # Return data
154
154
  say("Cluster ready")
155
155
  return(list(
@@ -172,8 +172,8 @@ generate_empty_files <- function(out_base) {
172
172
  }
173
173
 
174
174
  write_text_report <- function(out_base, ani.d, ani.medoids, ani.types){
175
- say("Text report")
176
- write.table(ani.medoids, paste(out_base, "medoids", sep="."),
175
+ say('Text report')
176
+ write.table(ani.medoids, paste(out_base, 'medoids', sep='.'),
177
177
  quote=FALSE, col.names=FALSE, row.names=FALSE)
178
178
  classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
179
179
  ani.d.m <- 100 - as.matrix(ani.d)*100
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5.1
4
+ version: 0.5.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-04 00:00:00.000000000 Z
11
+ date: 2020-02-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -515,8 +515,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
515
515
  - !ruby/object:Gem::Version
516
516
  version: '0'
517
517
  requirements: []
518
- rubyforge_project:
519
- rubygems_version: 2.7.6
518
+ rubygems_version: 3.0.3
520
519
  signing_key:
521
520
  specification_version: 4
522
521
  summary: MiGA