miga-base 0.7.13.2 → 0.7.16.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36fd2b4078044d448feccc3a1c08f4299663692c3f564fd0d390808a2d2f07e7
4
- data.tar.gz: b57924e0a40b316f186d5089455e1d2221f3e11f559d91c0d9f5d8e8e621b257
3
+ metadata.gz: c515f327c9d7178b1891283f849d422885b3640f8cded39cfccd048e354acf16
4
+ data.tar.gz: 889a399e0fa68b54c901ec513e2049ab046593bc433185511516dbf9b3cedb7d
5
5
  SHA512:
6
- metadata.gz: 47038d85b7680fff3d74b176e1fc46125b0b76f5c7044c6ae3153f2192584e7934c480c901ab538a926ab9f5b2af9ac9d54c16ce88231043cb2e5bb0c0ad40d8
7
- data.tar.gz: b1878fb728f2e3414e8c9ffdc8af436d50f66c64588ecbff4b991bb2df93528e7b4fd3177b47ad4a136e45a972ea07c2c48328234ec1b5579795311bcd20c2d6
6
+ metadata.gz: f4a9d5515ddf4b208b81612ceafcdae0eeb9cd37266df5f0175cef859adb9b24fd6e9ac5adf384ad9c91f8ecc08d6d5522eef8b9dd96e7d39740b14f83fdaf53
7
+ data.tar.gz: 117bc5b917e1008084e064cd4d87f290a83b6962e93560abfad254d23345a7685bf9ad3a345f2ade98ea5c13b4be52ccf608683ede8c320f3b8ee1250c2cec32
data/README.md CHANGED
@@ -11,11 +11,12 @@
11
11
 
12
12
  For additional information on MiGA, visit:
13
13
 
14
- * [MiGA Online][miga-online]: The Microbial Genomes Atlas Online.
14
+ * [MiGA Online][miga-online]: The Microbial Genomes Atlas Online
15
+ * [MiGA@XSEDE][miga-at-xsede]: The MiGA@XSEDE Gateway
15
16
  * [MiGA users list][mailing-list]:
16
- Forum to discuss with other users and developers.
17
- * [MiGA manual][manual]: The definitive guide to MiGA.
18
- * [MiGA API docs][rubydoc]: Inner-workings of the `miga-base` gem.
17
+ Forum to discuss with other users and developers
18
+ * [MiGA manual][manual]: The definitive guide to MiGA
19
+ * [MiGA API docs][rubydoc]: Inner-workings of the `miga-base` gem
19
20
  * [MiGA Web][miga-web]: MiGA on Rails!
20
21
 
21
22
  # For the impatient
@@ -39,7 +40,8 @@ You have two options:
39
40
 
40
41
  Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
41
42
  collaboration between [Kostas Lab][kostas] at the Georgia Institute of
42
- Technology and [RDP][rdp] at Michigan State University.
43
+ Technology, [Rodriguez-R Lab][lrr] at the University of Innsbruck,
44
+ and [RDP][rdp] at Michigan State University.
43
45
 
44
46
  See also the [complete list of contributors](manual/part1/contributors.md).
45
47
 
@@ -47,7 +49,7 @@ See also the [complete list of contributors](manual/part1/contributors.md).
47
49
 
48
50
  See [LICENSE](LICENSE).
49
51
 
50
- [lrr]: http://lmrodriguezr.github.io/
52
+ [lrr]: https://rodriguez-r.com/
51
53
  [mailing-list]: http://support.microbial-genomes.org/
52
54
  [manual]: https://manual.microbial-genomes.org/
53
55
  [rubydoc]: http://www.rubydoc.info/github/bio-miga/miga
@@ -55,5 +57,6 @@ See [LICENSE](LICENSE).
55
57
  [miga-web]: https://github.com/bio-miga/miga-web
56
58
  [miga-gui]: https://github.com/bio-miga/miga-gui
57
59
  [miga-online]: http://microbial-genomes.org/
60
+ [miga-at-xsede]: https://xsede.microbial-genomes.org/
58
61
  [kostas]: http://enve-omics.gatech.edu/
59
62
  [rdp]: http://rdp.cme.msu.edu/
@@ -110,30 +110,9 @@ class MiGA::Cli < MiGA::MiGA
110
110
  end
111
111
 
112
112
  ##
113
- # Reports the advance of a task at +step+ (String), the +n+ out of +total+.
114
- # The advance is reported in powers of 1,024 if +bin+ is true, or powers of
115
- # 1,000 otherwise.
116
- # The report goes to $stderr iff --verborse
117
- def advance(step, n = 0, total = nil, bin = true)
118
- return unless self[:verbose]
119
-
120
- adv = total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
121
- ('%.1f%% (%s/%s)' % [100.0 * n / total,
122
- num_suffix(n, bin), num_suffix(total, bin)])
123
- $stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
124
- end
125
-
126
- def num_suffix(n, bin = false)
127
- p = ''
128
- { T: 4, G: 3, M: 2, K: 1 }.each do |k, x|
129
- v = (bin ? 1024 : 1e3)**x
130
- if n > v
131
- n = '%.1f' % (n / v)
132
- p = k
133
- break
134
- end
135
- end
136
- "#{n}#{p}"
113
+ # Same as MiGA::MiGA#advance, but checks if the CLI is verbose
114
+ def advance(*par)
115
+ super(*par) if self[:verbose]
137
116
  end
138
117
 
139
118
  ##
@@ -15,12 +15,12 @@ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
15
15
  p = cli.load_project
16
16
  create_empty_page(p)
17
17
  generate_project_page(p)
18
- say 'Creating dataset pages'
18
+ cli.say 'Creating dataset pages'
19
19
  cli.load_project.each_dataset do |d|
20
20
  generate_dataset_page(p, d)
21
21
  end
22
22
  generate_datasets_index(p)
23
- say "Open in your browser: #{File.join(p.path, 'index.html')}"
23
+ cli.say "Open in your browser: #{File.join(p.path, 'index.html')}"
24
24
  end
25
25
 
26
26
  private
@@ -28,7 +28,7 @@ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
28
28
  ##
29
29
  # Create an empty page with necessary assets for project +p+
30
30
  def create_empty_page(p)
31
- say 'Creating project page'
31
+ cli.say 'Creating project page'
32
32
  FileUtils.mkdir_p(browse_file(p, '.'))
33
33
  %w[favicon-32.png style.css].each do |i|
34
34
  FileUtils.cp(template_file(i), browse_file(p, i))
@@ -103,7 +103,7 @@ class MiGA::Cli::Action::Browse < MiGA::Cli::Action
103
103
  ##
104
104
  # Create pages for reference and query dataset indexes
105
105
  def generate_datasets_index(p)
106
- say 'Creating index pages'
106
+ cli.say 'Creating index pages'
107
107
  data = format_dataset_index(p)
108
108
  data.each do |k, v|
109
109
  write_file(p, "#{k}_datasets.html") do
@@ -42,9 +42,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
42
42
  '--no-summaries',
43
43
  'Do not generate intermediate step summaries'
44
44
  ) { |v| cli[:summaries] = v }
45
- opts_for_wf(
46
- opt, 'Input genome assemblies (nucleotides, FastA)', qual: false
47
- )
45
+ opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)')
48
46
  end
49
47
  end
50
48
 
@@ -6,7 +6,7 @@ require 'miga/daemon'
6
6
 
7
7
  class MiGA::Cli::Action::Daemon < MiGA::Cli::Action
8
8
  def parse_cli
9
- cli.defaults = { daemon_opts: [] }
9
+ cli.defaults = { daemon_opts: [], show_log: false }
10
10
  cli.expect_operation = true
11
11
  cli.parse do |opt|
12
12
  opt.separator 'Available operations:'
@@ -45,6 +45,10 @@ class MiGA::Cli::Action::Daemon < MiGA::Cli::Action
45
45
  '--json PATH',
46
46
  'Path to a custom daemon definition in json format'
47
47
  ) { |v| cli[:json] = v }
48
+ opt.on(
49
+ '--show-log',
50
+ 'Display log on advance instead of the progress summary'
51
+ ) { |v| cli[:show_log] = v }
48
52
  cli.opt_common(opt)
49
53
 
50
54
  opt.separator 'Daemon options:'
@@ -73,6 +77,7 @@ class MiGA::Cli::Action::Daemon < MiGA::Cli::Action
73
77
  d = MiGA::Daemon.new(p, cli[:json])
74
78
  dopts = %i[latency maxjobs nodelist ppn shutdown_when_done]
75
79
  dopts.each { |k| d.runopts(k, cli[k]) }
80
+ d.show_log! if cli[:show_log]
76
81
  d.daemon(cli.operation, cli[:daemon_opts])
77
82
  end
78
83
  end
@@ -37,6 +37,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
37
37
  @@OPERATIONS = {
38
38
  status: ['status', 'Update metadata status of all datasets'],
39
39
  db: ['databases', 'Check integrity of database files'],
40
+ bidir: ['bidirectional', 'Check distances are bidirectional'],
40
41
  dist: ['distances', 'Check distance summary tables'],
41
42
  files: ['files', 'Check for outdated files'],
42
43
  cds: ['cds', 'Check for gzipped genes and proteins'],
@@ -85,6 +86,24 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
85
86
  cli.say
86
87
  end
87
88
 
89
+ ##
90
+ # Perform bidirectional operation with MiGA::Cli +cli+
91
+ def check_bidir(cli)
92
+ cli.say 'Checking that reference distances are bidirectional'
93
+ ref_ds = cli.load_project.each_dataset.select(&:ref?)
94
+ ref_names = ref_ds.map(&:name)
95
+ n, k = ref_ds.size, 0
96
+ ref_ds.each do |d|
97
+ cli.advance('Datasets:', k += 1, n, false)
98
+ saved = saved_targets(d)
99
+ next if saved.nil?
100
+
101
+ to_save = ref_names - saved
102
+ to_save.each { |k| save_bidirectional(cli.load_project.dataset(k), d) }
103
+ end
104
+ cli.say
105
+ end
106
+
88
107
  ##
89
108
  # Perform distances operation with MiGA::Cli +cli+
90
109
  def check_dist(cli)
@@ -99,4 +99,46 @@ module MiGA::Cli::Action::Doctor::Base
99
99
  cli.say '- Removing tables, recompute'
100
100
  res.remove!
101
101
  end
102
+
103
+ ##
104
+ # Returns all targets identified by AAI
105
+ def saved_targets(dataset)
106
+ # Return nil if distance or database are not retrievable
107
+ dist = dataset.result(:distances) or return
108
+ path = dist.file_path(:aai_db) or return
109
+
110
+ o = []
111
+ SQLite3::Database.new(path) do |conn|
112
+ o = conn.execute('select seq2 from aai').map(&:first)
113
+ end
114
+ o
115
+ end
116
+
117
+ ##
118
+ # Saves all the distance estimates in +a+ -> +b+ into the +b+ databases
119
+ # (as +b+ -> +a+), where both +a+ and +b+ are MiGA::Dataset objects
120
+ def save_bidirectional(a, b)
121
+ each_database_file(a) do |db_file, metric, result|
122
+ data = nil
123
+ SQLite3::Database.new(db_file) do |conn|
124
+ data =
125
+ conn.execute(
126
+ "select seq1, seq2, #{metric}, sd, n, omega " +
127
+ "from #{metric} where seq2 = ? limit 1", b.name
128
+ ).first
129
+ end
130
+ next if data.nil? || data.empty?
131
+
132
+ db_file_b = File.join(File.dirname(db_file), "#{b.name}.db")
133
+ next unless File.exist?(db_file_b)
134
+
135
+ data[0], data[1] = data[1], data[0]
136
+ SQLite3::Database.new(db_file_b) do |conn|
137
+ conn.execute(
138
+ "insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
139
+ "values(?, ?, ?, ?, ?, ?)", data
140
+ )
141
+ end
142
+ end
143
+ end
102
144
  end
@@ -9,16 +9,13 @@ class MiGA::Cli::Action::QualityWf < MiGA::Cli::Action
9
9
 
10
10
  def parse_cli
11
11
  default_opts_for_wf
12
- cli.defaults = { mytaxa: false }
12
+ cli.defaults = { mytaxa: false, min_qual: 'no' }
13
13
  cli.parse do |opt|
14
14
  opt.on(
15
15
  '-m', '--mytaxa-scan',
16
16
  'Perform MyTaxa scan analysis'
17
17
  ) { |v| cli[:mytaxa] = v }
18
- opts_for_wf(
19
- opt, 'Input genome assemblies (nucleotides, FastA)',
20
- qual: false
21
- )
18
+ opts_for_wf(opt, 'Input genome assemblies (nucleotides, FastA)')
22
19
  end
23
20
  end
24
21
 
@@ -9,7 +9,7 @@ module MiGA::Cli::Action::Wf
9
9
  cli.defaults = {
10
10
  clean: false, regexp: MiGA::Cli.FILE_REGEXP,
11
11
  project_type: :genomes, dataset_type: :popgenome,
12
- ncbi_draft: true
12
+ ncbi_draft: true, min_qual: 25.0
13
13
  }
14
14
  end
15
15
 
@@ -42,10 +42,10 @@ module MiGA::Cli::Action::Wf
42
42
  end
43
43
  if params[:qual]
44
44
  opt.on(
45
- '--min-qual FLOAT', Float,
45
+ '--min-qual FLOAT',
46
46
  'Minimum genome quality to include in analysis',
47
- 'By default: 50.0'
48
- ) { |v| cli[:min_qual] = v }
47
+ "By default: #{cli[:min_qual]}"
48
+ ) { |v| cli[:min_qual] = v == 'no' ? v : v.to_f }
49
49
  end
50
50
  if params[:cleanup]
51
51
  opt.on(
@@ -6,6 +6,7 @@ require 'miga/json'
6
6
  require 'miga/common/base'
7
7
  require 'miga/common/path'
8
8
  require 'miga/common/format'
9
+ require 'stringio'
9
10
 
10
11
  ##
11
12
  # Generic class used to handle system-wide information and methods, and parent
@@ -38,10 +39,42 @@ class MiGA::MiGA
38
39
  ##
39
40
  # Print +par+ ensuring new line at the end.
40
41
  # Date/time-stamp each line.
41
- # If the first parameter is +IO+, the output is sent there,
42
+ # If the first parameter is +IO+ or +StringIO+ the output is sent there,
42
43
  # otherwise it's sent to +$stderr+
43
44
  def say(*par)
44
- io = par.first.is_a?(IO) ? par.shift : $stderr
45
+ io = like_io?(par.first) ? par.shift : $stderr
45
46
  io.puts(*par.map { |i| "[#{Time.now}] #{i}" })
46
47
  end
48
+
49
+ ##
50
+ # Reports the advance of a task at +step+ (String), the +n+ out of +total+.
51
+ # The advance is reported in powers of 1,024 if +bin+ is true, or powers of
52
+ # 1,000 otherwise.
53
+ # The report goes to $stderr iff --verborse
54
+ def advance(step, n = 0, total = nil, bin = true)
55
+ adv = total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
56
+ ('%.1f%% (%s/%s)' % [100.0 * n / total,
57
+ num_suffix(n, bin), num_suffix(total, bin)])
58
+ $stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
59
+ end
60
+
61
+ ##
62
+ # Return formatted number +n+ with the appropriate units as
63
+ # powers of 1,000 (if +bin+ if false) or 1,024 (otherwise)
64
+ def num_suffix(n, bin = false)
65
+ p = ''
66
+ { T: 4, G: 3, M: 2, K: 1 }.each do |k, x|
67
+ v = (bin ? 1024 : 1e3)**x
68
+ if n > v
69
+ n = '%.1f' % (n / v)
70
+ p = k
71
+ break
72
+ end
73
+ end
74
+ "#{n}#{p}"
75
+ end
76
+
77
+ def like_io?(obj)
78
+ obj.is_a?(IO) || obj.is_a?(StringIO)
79
+ end
47
80
  end
@@ -72,6 +72,7 @@ class MiGA::Daemon < MiGA::MiGA
72
72
  say '-----------------------------------'
73
73
  say 'MiGA:%s launched' % project.name
74
74
  say '-----------------------------------'
75
+ miga_say "Saving log to: #{output_file}" unless show_log?
75
76
  recalculate_status!
76
77
  load_status
77
78
  say 'Configuration options:'
@@ -111,10 +112,12 @@ class MiGA::Daemon < MiGA::MiGA
111
112
  say(*msg) if verbosity >= level
112
113
  end
113
114
 
115
+ alias miga_say say
116
+
114
117
  ##
115
118
  # Same as +l_say+ with +level = 1+
116
119
  def say(*msg)
117
- super(*msg) if verbosity >= 1
120
+ super(logfh, *msg) if verbosity >= 1
118
121
  end
119
122
 
120
123
  ##
@@ -173,6 +176,12 @@ class MiGA::Daemon < MiGA::MiGA
173
176
  o = true if ds.ref?
174
177
  queue_job(:d, ds)
175
178
  end
179
+ unless show_log?
180
+ n = project.dataset_names.count
181
+ k = jobs_to_run.size + jobs_running.size
182
+ advance('Datasets:', n - k, n, false)
183
+ miga_say if k == 0
184
+ end
176
185
  o
177
186
  end
178
187
 
@@ -332,8 +341,9 @@ class MiGA::Daemon < MiGA::MiGA
332
341
  kill: %w[pid]
333
342
  }.each do |k, v|
334
343
  if !runopts(k).nil? && runopts(k) =~ /%(\d+\$)?[ds]/
335
- runopts(k,
336
- runopts(k).gsub(/%(\d+\$)?d/, '%\\1s') % v.map { |i| "{{#{i}}}" })
344
+ runopts(
345
+ k, runopts(k).gsub(/%(\d+\$)?d/, '%\\1s') % v.map { |i| "{{#{i}}}" }
346
+ )
337
347
  end
338
348
  end
339
349
  runopts(:format_version, 1)
@@ -17,7 +17,7 @@ module MiGA::Daemon::Base
17
17
  if !force && v == 0 && k != :verbosity
18
18
  raise "Daemon's #{k} cannot be set to zero"
19
19
  end
20
- when :shutdown_when_done
20
+ when :shutdown_when_done, :show_log
21
21
  v = !!v
22
22
  when :nodelist
23
23
  if v =~ /^\$/
@@ -73,4 +73,32 @@ module MiGA::Daemon::Base
73
73
  def verbosity
74
74
  runopts(:verbosity) || 1
75
75
  end
76
+
77
+ ##
78
+ # Writing file handler (IO) to the log file
79
+ def logfh
80
+ @logfh ||= nil
81
+ return $stderr if show_log?
82
+ return @logfh if @logfh && !@logfh.closed?
83
+
84
+ @logfh = File.open(output_file, 'w')
85
+ end
86
+
87
+ ##
88
+ # Display log instead of the progress summary
89
+ def show_log!
90
+ @show_log = true
91
+ end
92
+
93
+ ##
94
+ # Display progress summary instead of the log
95
+ def show_summary!
96
+ @runopts[:show_log] = false
97
+ end
98
+
99
+ ##
100
+ # Display log instead of the progress summary?
101
+ def show_log?
102
+ @runopts[:show_log] ||= false
103
+ end
76
104
  end
@@ -34,17 +34,29 @@ class MiGA::Json < MiGA::MiGA
34
34
  # +opts+.
35
35
  def parse(path, opts = {})
36
36
  opts = default_opts(opts)
37
- cont = opts[:contents] ? path : File.read(path)
38
- raise "Empty descriptor: #{opts[:contents] ? "''" : path}." if cont.empty?
39
37
 
40
- y = JSON.parse(cont,
41
- symbolize_names: opts[:symbolize],
42
- create_additions: opts[:additions])
38
+ # Read JSON
39
+ cont = path
40
+ 12.times do
41
+ cont = File.read(path)
42
+ break unless cont.empty?
43
+ sleep 1 # Wait up to 12 seconds for racing processes (iff empty file)
44
+ end unless opts[:contents]
45
+ raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
46
+
47
+ # Parse JSON
48
+ params = { symbolize_names: opts[:symbolize],
49
+ create_additions: opts[:additions] }
50
+ y = JSON.parse(cont, params)
51
+
52
+ # Add defaults
43
53
  unless opts[:default].nil?
44
54
  opts[:default] = parse(opts[:default]) if opts[:default].is_a? String
45
55
  y.each { |k, v| opts[:default][k] = v }
46
56
  y = opts[:default]
47
57
  end
58
+
59
+ # Return
48
60
  y
49
61
  end
50
62
 
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 13, 2]
11
+ VERSION = [0.7, 16, 1]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 8, 4)
19
+ VERSION_DATE = Date.new(2020, 10, 19)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
@@ -9,7 +9,7 @@ class DaemonTest < Test::Unit::TestCase
9
9
  def setup
10
10
  initialize_miga_home(
11
11
  <<~DAEMON
12
- { "maxjobs": 1, "ppn": 1, "latency": 2, "varsep": " ",
12
+ { "maxjobs": 1, "ppn": 1, "latency": 2, "varsep": " ", "show_log": true,
13
13
  "var": "{{key}}={{value}}", "cmd": "echo {{task_name}} >/dev/null",
14
14
  "alive": "echo 1 # {{pid}}", "type": "bash", "format_version": 1 }
15
15
  DAEMON
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.13.2
4
+ version: 0.7.16.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-04 00:00:00.000000000 Z
11
+ date: 2020-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons