miga-base 0.5.5.1 → 0.5.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 405d3cf6f84fa7f78f8026d1392be3fd8c32b3caea3d9e2c4c29a511ffeccb6c
4
- data.tar.gz: 29108306ca8358a1155f791a4a586fce73eb9dccbbf2841869ed450064508774
3
+ metadata.gz: 2898bb1227f7e473f1b11d89de6cec0dea005573374c2dac19d7dcaf21143a4b
4
+ data.tar.gz: f6dcbfeeccf9cbcc3d8103f2c27babb47f1594f43252e131dbc67ca9ff2ef294
5
5
  SHA512:
6
- metadata.gz: 84e3264d6d55ff810b6ba0ae415abac248b65ec411fde5780107c1974923c2ef7da2cd1ae86c74f36aad94af03534b1f326327f797eaf6aec1c2a20c444dfa8d
7
- data.tar.gz: cff5d84655677d1eabe298478d099074c1769cb4c74a753b10e3cbd62fb9fb0ed3b4df2ccc8924799c04d8a2e6be6ac7b3720f8ccbf7d83885cf17c5537fcd97
6
+ metadata.gz: 42c0349e42347f2bd5bdbab6eb6dd7520335c5b3e1bb107685a80691d5ed0cc385f0a475328183db3c5e40e8e6d434495794696804d4c1c6c9c550fc0891f05a
7
+ data.tar.gz: 75b711267b414526af80641a8ef8cd308c0b7e7593091772fbf3ea6916da7360d3ddd3ff980df3d7e19db621731596ab284a32f7f18b05241e23606d673340ab
@@ -27,6 +27,7 @@ class MiGA::Cli::Action < MiGA::MiGA
27
27
  ##
28
28
  # Launch the sequence
29
29
  def launch
30
+ MiGA.DEBUG 'Cli::Action.launch'
30
31
  empty_action if cli.argv.empty?
31
32
  parse_cli
32
33
  perform
@@ -120,6 +120,7 @@ module MiGA::Cli::Action::Wf
120
120
  # Define project metadata
121
121
  p = cli.load_project(:outdir, '-o')
122
122
  [:haai_p, :aai_p, :ani_p, :ess_coll].each { |i| p_metadata[i] = cli[i] }
123
+ p_metadata[:type] = cli[:project_type]
123
124
  transfer_metadata(p, p_metadata)
124
125
  # Download datasets
125
126
  call_cli([
@@ -138,6 +139,7 @@ module MiGA::Cli::Action::Wf
138
139
  ] + cli.files) unless cli.files.empty?
139
140
  # Define datasets metadata
140
141
  p.load
142
+ d_metadata[:type] = cli[:dataset_type]
141
143
  p.each_dataset { |d| transfer_metadata(d, d_metadata) }
142
144
  p
143
145
  end
@@ -165,6 +167,7 @@ module MiGA::Cli::Action::Wf
165
167
 
166
168
  def call_cli(cmd)
167
169
  cmd << '-v' if cli[:verbose]
170
+ MiGA::MiGA.DEBUG "Cli::Action::Wf.call_cli #{cmd}"
168
171
  MiGA::Cli.new(cmd.map(&:to_s)).launch
169
172
  end
170
173
 
@@ -155,6 +155,6 @@ module MiGA::Cli::OptHelper
155
155
  # If +sym+ is nil, +flag+ is used as Symbol
156
156
  def opt_flag(opt, flag, description, sym = nil)
157
157
  sym = flag.to_sym if sym.nil?
158
- opt.on("--#{flag}", description) { |v| self[sym] = v }
158
+ opt.on("--#{flag.to_s.gsub('_','-')}", description) { |v| self[sym] = v }
159
159
  end
160
160
  end
data/lib/miga/daemon.rb CHANGED
@@ -71,12 +71,18 @@ class MiGA::Daemon < MiGA::MiGA
71
71
  # Launches the +task+ with options +opts+ (as command-line arguments).
72
72
  # Supported tasks include: start, stop, restart, status.
73
73
  def daemon(task, opts=[])
74
+ MiGA.DEBUG "Daemon.daemon #{task} #{opts}"
74
75
  options = default_options
75
76
  opts.unshift(task)
76
77
  options[:ARGV] = opts
77
- Daemons.run_proc("MiGA:#{project.name}", options) do
78
- loop { break unless in_loop }
78
+ # This additional degree of separation below was introduced so the Daemons
79
+ # package doesn't kill the parent process in workflows.
80
+ pid = fork do
81
+ Daemons.run_proc("MiGA:#{project.name}", options) do
82
+ loop { break unless in_loop }
83
+ end
79
84
  end
85
+ Process.wait pid
80
86
  end
81
87
 
82
88
  ##
@@ -321,7 +327,14 @@ class MiGA::Daemon < MiGA::MiGA
321
327
  var: %w[key value],
322
328
  alive: %w[pid],
323
329
  kill: %w[pid]
324
- }.each { |k,v| runopts(k, sprintf(runopts(k), *v.map{ |i| "{{#{i}}}" })) }
330
+ }.each do |k,v|
331
+ runopts(
332
+ k, sprintf(
333
+ runopts(k).gsub(/%(\d+)\$d/, '%\\1$s'),
334
+ *v.map{ |i| "{{#{i}}}" }
335
+ )
336
+ )
337
+ end
325
338
  runopts(:format_version, 1)
326
339
  end
327
340
  end
data/lib/miga/dataset.rb CHANGED
@@ -55,6 +55,7 @@ class MiGA::Dataset < MiGA::MiGA
55
55
  File.expand_path("metadata/#{name}.json", project.path),
56
56
  metadata
57
57
  ]
58
+ save unless File.exist? @metadata_future[0]
58
59
  end
59
60
 
60
61
  ##
@@ -66,10 +67,8 @@ class MiGA::Dataset < MiGA::MiGA
66
67
  ##
67
68
  # Save any changes you've made in the dataset.
68
69
  def save
69
- if t = metadata[:tax] and n = t[:ns] and n == 'COMMUNITY'
70
- self.metadata[:type] = :metagenome
71
- end
72
- self.metadata.save
70
+ MiGA.DEBUG "Dataset.metadata: #{metadata.data}"
71
+ metadata.save
73
72
  end
74
73
 
75
74
  ##
@@ -116,6 +116,48 @@ module MiGA::Dataset::Result
116
116
  end
117
117
  adv
118
118
  end
119
+
120
+ ##
121
+ # Returns a Hash with tasks as key and status as value.
122
+ # See +result_status+ for possible values
123
+ def results_status
124
+ Hash[@@PREPROCESSING_TASKS.map { |task| [task, result_status(task)] }]
125
+ end
126
+
127
+ ##
128
+ # Returns the status of +task+. The status values are symbols:
129
+ # - ignore_inactive: the dataset is inactive
130
+ # - ignore_force: forced to ignore by metadata
131
+ # - ignore_project: incompatible project
132
+ # - ignore_noref: incompatible dataset, only for reference
133
+ # - ignore_multi: incompatible dataset, only for multi
134
+ # - ignore_nonmulti: incompatible dataset, only for nonmulti
135
+ # - ignore: incompatible dataset, unknown reason
136
+ # - complete: a task with registered results
137
+ # - pending: a task queued to be performed
138
+ def result_status(task)
139
+ if not get_result(task).nil?
140
+ :complete
141
+ elsif ignore_task?(task)
142
+ if not is_active?
143
+ :ignore_inactive
144
+ elsif metadata["run_#{task}"]
145
+ :ignore_force
146
+ elsif task == :taxonomy and project.metadata[:ref_project].nil?
147
+ :ignore_project
148
+ elsif @@_EXCLUDE_NOREF_TASKS_H[task] && ! is_ref?
149
+ :ignore_noref
150
+ elsif @@_ONLY_MULTI_TASKS_H[task] && ! is_multi?
151
+ :ignore_multi
152
+ elsif @@_ONLY_NONMULTI_TASKS_H[task] && ! is_nonmulti?
153
+ :ignore_nonmulti
154
+ else
155
+ :ignore
156
+ end
157
+ else
158
+ :pending
159
+ end
160
+ end
119
161
 
120
162
  ##
121
163
  # Clean-up all the stored distances, removing values for datasets no longer in
@@ -139,6 +139,7 @@ class MiGA::RemoteDataset < MiGA::MiGA
139
139
  rank = 'dataset' if lineage.empty? and rank.nil?
140
140
  lineage[rank] = name unless rank.nil? or rank.nil?
141
141
  end
142
+ MiGA.DEBUG "Got lineage: #{lineage}"
142
143
  MiGA::Taxonomy.new(lineage)
143
144
  end
144
145
 
@@ -210,6 +211,9 @@ class MiGA::RemoteDataset < MiGA::MiGA
210
211
  metadata[:is_type] = true
211
212
  metadata[:type_rel] = from_type
212
213
  end
214
+ metadata[:suspect] = (ncbi_asm_json_doc['exclfromrefseq'] || [])
215
+ metadata[:suspect] = nil if metadata[:suspect].empty?
216
+ MiGA.DEBUG "Got type: #{from_type}"
213
217
  metadata
214
218
  end
215
219
 
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.5, 5, 1]
13
+ VERSION = [0.5, 7, 2]
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2020, 2, 4)
21
+ VERSION_DATE = Date.new(2020, 2, 8)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -12,7 +12,7 @@ cd "$DIR"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  # Index taxonomy
15
- miga index_taxonomy -P "$PROJECT" -i "miga-project.taxonomy.json" --ref
15
+ miga tax_index -P "$PROJECT" -i "miga-project.taxonomy.json" --ref --active
16
16
 
17
17
  # Index metadata
18
18
  ruby -I "$MIGA/lib" \
data/test/daemon_test.rb CHANGED
@@ -79,7 +79,7 @@ class DaemonTest < Test::Unit::TestCase
79
79
  dpath = File.expand_path("daemon/MiGA:#{p.name}",p.path)
80
80
  assert(File.exist?("#{dpath}.pid"))
81
81
  out = capture_stdout { d.stop }
82
- assert(out.string =~ /MiGA:#{p.name}: trying to stop process with pid \d+/)
82
+ assert_equal('', out.string)
83
83
  assert(!File.exist?("#{dpath}.pid"))
84
84
  assert(File.exist?("#{dpath}.output"))
85
85
  File.open("#{dpath}.output", "r") do |fh|
data/test/dataset_test.rb CHANGED
@@ -50,7 +50,7 @@ class DatasetTest < Test::Unit::TestCase
50
50
  assert(!d2.is_multi?)
51
51
  assert(!d2.is_nonmulti?)
52
52
  assert_nil(d2.metadata[:type])
53
- d2.metadata[:tax] = {:ns=>"COMMUNITY"}
53
+ d2.metadata[:type] = :metagenome
54
54
  d2.save
55
55
  assert_equal(:metagenome, d2.metadata[:type])
56
56
  assert(d2.is_multi?)
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.N50.pl
1
+ ../../Scripts/FastA.N50.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.filterN.pl
1
+ ../../Scripts/FastA.filterN.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/assembly.pbs/../../Scripts/FastA.length.pl
1
+ ../../Scripts/FastA.length.pl
@@ -1 +1 @@
1
- utils/enveomics/Pipelines/blast.pbs/../../Scripts/FastA.split.pl
1
+ ../../Scripts/FastA.split.pl
@@ -1 +1 @@
1
- utils/enveomics/Scripts/lib/../../enveomics.R
1
+ ../../enveomics.R
data/utils/find-medoid.R CHANGED
@@ -20,18 +20,23 @@ find_medoids <- function(ani.df, out, clades) {
20
20
  dist <- enve.df2dist(ani.df, 'a', 'b', 'd', default.d = max(ani.df$d)*1.2)
21
21
  dist <- as.matrix(dist)
22
22
  cl <- read.table(clades, header = FALSE, sep = '\t', as.is = TRUE)[,1]
23
+ cl.s <- c()
23
24
  medoids <- c()
24
25
  for(i in cl){
25
26
  lab <- strsplit(i, ',')[[1]]
26
27
  cat('Clade of:', lab[1], '\n')
27
28
  if(length(lab) == 1) {
28
- med <- lab
29
+ lab.s <- lab
29
30
  } else {
30
- med <- lab[which.min(colSums(dist[lab, lab], na.rm = TRUE))]
31
+ lab.s <- lab[order(colSums(dist[lab, lab], na.rm = TRUE))]
31
32
  }
33
+ med <- lab.s[1]
32
34
  medoids <- c(medoids, med)
35
+ cl.s <- c(cl.s, paste(lab.s, collapse = ','))
33
36
  }
34
37
  write.table(medoids, out, quote = FALSE, row.names = FALSE, col.names = FALSE)
38
+ write.table(cl.s, paste(clades, '.sorted', sep = ''), quote = FALSE,
39
+ row.names = FALSE, col.names = FALSE)
35
40
  }
36
41
 
37
42
  #= Main
@@ -1,23 +1,24 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "miga"
4
- require "sqlite3"
3
+ require 'miga'
4
+ require 'sqlite3'
5
5
 
6
6
  p = MiGA::Project.load(ARGV[0])
7
7
  raise "Impossible to load project: #{ARGV[0]}." if p.nil?
8
8
 
9
9
  File.unlink(ARGV[1]) if File.exist? ARGV[1]
10
10
  db = SQLite3::Database.new(ARGV[1])
11
- db.execute "create table metadata(" +
12
- "`name` varchar(256), `field` varchar(256), `value` text)"
11
+ db.execute 'create table metadata(' \
12
+ '`name` varchar(256), `field` varchar(256), `value` text)'
13
13
 
14
14
  def searchable(db, d, k, v)
15
- db.execute "insert into metadata values(?,?,?)",
16
- d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, " ")} "
15
+ db.execute 'insert into metadata values(?,?,?)',
16
+ d.name, k.to_s, " #{v.to_s.downcase.gsub(/[^A-Za-z0-9\-]+/, ' ')} "
17
17
  end
18
18
 
19
19
  p.each_dataset do |name, d|
20
20
  next unless d.is_ref?
21
+ next unless d.is_active?
21
22
  searchable(db, d, :name, d.name)
22
23
  d.metadata.each do |k, v|
23
24
  next if [:created, :updated].include? k
@@ -47,6 +47,9 @@ module MiGA::SubcladeRunner::Pipeline
47
47
  dir = opts[:gsp_metric] == 'aai' ? '02.aai' : '03.ani'
48
48
  `Rscript '#{src}' ../../09.distances/#{dir}/miga-project.Rdata \
49
49
  miga-project.gsp-medoids miga-project.gsp-clades`
50
+ if File.exist? 'miga-project.gsp-clades.sorted'
51
+ File.rename 'miga-project.gsp-clades.sorted', 'miga-project.gsp-clades'
52
+ end
50
53
 
51
54
  # Propose clades
52
55
  ofh = File.open('miga-project.proposed-clades', 'w')
data/utils/subclades.R CHANGED
@@ -113,7 +113,7 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
113
113
  }
114
114
  write.tree(ani.ph, paste(out_base, ".nwk", sep=""))
115
115
  options(expressions=express.ori)
116
-
116
+
117
117
  # Silhouette
118
118
  say("Silhouette")
119
119
  nn <- length(labels(ani.d))
@@ -130,13 +130,13 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
130
130
  ds <- s.avg.z - s.neg.z - 2/(1:length(k)) - (1:length(k))/50
131
131
  if(mean(s[1,]<0)<0.75) ds[s[1,]<0] <- mean(ds) # <- k's with negative average
132
132
  top.n <- k[which.max(ds)]
133
-
133
+
134
134
  # Classify genomes
135
135
  say("Classify => k :", top.n, "| n :", length(labels(ani.d)))
136
136
  ani.cl <- pam(ani.d, top.n, pamonce=1)
137
137
  ani.types <- ani.cl$clustering
138
138
  ani.medoids <- ani.cl$medoids
139
-
139
+
140
140
  # Generate graphic report
141
141
  say("Graphic report")
142
142
  pdf(paste(out_base, ".pdf", sep=""), 7, 12)
@@ -149,7 +149,7 @@ subclade_clustering <- function(out_base, thr, ani.d, dist_rdata) {
149
149
 
150
150
  # Save results
151
151
  write_text_report(out_base, ani.d, ani.medoids, ani.types)
152
-
152
+
153
153
  # Return data
154
154
  say("Cluster ready")
155
155
  return(list(
@@ -172,8 +172,8 @@ generate_empty_files <- function(out_base) {
172
172
  }
173
173
 
174
174
  write_text_report <- function(out_base, ani.d, ani.medoids, ani.types){
175
- say("Text report")
176
- write.table(ani.medoids, paste(out_base, "medoids", sep="."),
175
+ say('Text report')
176
+ write.table(ani.medoids, paste(out_base, 'medoids', sep='.'),
177
177
  quote=FALSE, col.names=FALSE, row.names=FALSE)
178
178
  classif <- cbind(names(ani.types), ani.types, ani.medoids[ ani.types ], NA)
179
179
  ani.d.m <- 100 - as.matrix(ani.d)*100
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.5.1
4
+ version: 0.5.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-04 00:00:00.000000000 Z
11
+ date: 2020-02-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -515,8 +515,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
515
515
  - !ruby/object:Gem::Version
516
516
  version: '0'
517
517
  requirements: []
518
- rubyforge_project:
519
- rubygems_version: 2.7.6
518
+ rubygems_version: 3.0.3
520
519
  signing_key:
521
520
  specification_version: 4
522
521
  summary: MiGA