miga-base 0.7.15.0 → 0.7.16.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0160b454e26886637f28dea78ab57e66b1c53ee3fca610c64339f3265ce86afb
4
- data.tar.gz: 6df1a7bfc7ebf0493265cd6be8f8600a575b3c20f07ec43f7c25b8de3035b5c8
3
+ metadata.gz: 51173aea1659d14d35f485b22b4371bdf8cf9d16ae81c673d09c25cf3e36d1ac
4
+ data.tar.gz: 4c6cbaed4bc9f0ea2f71d75ccfd22e3b0ac38e62c138166b768feb91c9a804b3
5
5
  SHA512:
6
- metadata.gz: 00245e9a4d698ed335f1777bd1f9156ecc2ba325c4ef66f42f01f9e59c70e4bfb42a588a7be62271dd55df73e2fc737a29e24253487ba55e9d8645a43508d91c
7
- data.tar.gz: 12c8beb33f81fc7114d957a59982d4bfab1deceb21db27dde1a93f474d4ab5918308ac04d351c86b9a71f8a6804e0311b3230ccbad16de320523246f1e7e3f9b
6
+ metadata.gz: 6e00443678373cb3125e1d73d111ff909cfaa092c92ffd00089e2d122bad0f298133e4c4b2da00370d4ff56e3d1281b8e3ebe316e23fa675b73b217b6045e4e8
7
+ data.tar.gz: f5d5f2981f86e0a79a1c96b81cdb76853eeacd11a14c23d630bcf067d390074d40273159b2b95c3ea33d88d1ca1c66bce19338809af410e48a0db2138d4974d3
data/README.md CHANGED
@@ -11,11 +11,12 @@
11
11
 
12
12
  For additional information on MiGA, visit:
13
13
 
14
- * [MiGA Online][miga-online]: The Microbial Genomes Atlas Online.
14
+ * [MiGA Online][miga-online]: The Microbial Genomes Atlas Online
15
+ * [MiGA@XSEDE][miga-at-xsede]: The MiGA@XSEDE Gateway
15
16
  * [MiGA users list][mailing-list]:
16
- Forum to discuss with other users and developers.
17
- * [MiGA manual][manual]: The definitive guide to MiGA.
18
- * [MiGA API docs][rubydoc]: Inner-workings of the `miga-base` gem.
17
+ Forum to discuss with other users and developers
18
+ * [MiGA manual][manual]: The definitive guide to MiGA
19
+ * [MiGA API docs][rubydoc]: Inner-workings of the `miga-base` gem
19
20
  * [MiGA Web][miga-web]: MiGA on Rails!
20
21
 
21
22
  # For the impatient
@@ -39,7 +40,8 @@ You have two options:
39
40
 
40
41
  Developed and maintained by [Luis M. Rodriguez-R][lrr]. MiGA is the result of a
41
42
  collaboration between [Kostas Lab][kostas] at the Georgia Institute of
42
- Technology and [RDP][rdp] at Michigan State University.
43
+ Technology, [Rodriguez-R Lab][lrr] at the University of Innsbruck,
44
+ and [RDP][rdp] at Michigan State University.
43
45
 
44
46
  See also the [complete list of contributors](manual/part1/contributors.md).
45
47
 
@@ -47,7 +49,7 @@ See also the [complete list of contributors](manual/part1/contributors.md).
47
49
 
48
50
  See [LICENSE](LICENSE).
49
51
 
50
- [lrr]: http://lmrodriguezr.github.io/
52
+ [lrr]: https://rodriguez-r.com/
51
53
  [mailing-list]: http://support.microbial-genomes.org/
52
54
  [manual]: https://manual.microbial-genomes.org/
53
55
  [rubydoc]: http://www.rubydoc.info/github/bio-miga/miga
@@ -55,5 +57,6 @@ See [LICENSE](LICENSE).
55
57
  [miga-web]: https://github.com/bio-miga/miga-web
56
58
  [miga-gui]: https://github.com/bio-miga/miga-gui
57
59
  [miga-online]: http://microbial-genomes.org/
60
+ [miga-at-xsede]: https://xsede.microbial-genomes.org/
58
61
  [kostas]: http://enve-omics.gatech.edu/
59
62
  [rdp]: http://rdp.cme.msu.edu/
@@ -7,6 +7,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
7
7
  include MiGA::Cli::Action::Doctor::Base
8
8
 
9
9
  def parse_cli
10
+ cli.defaults = { threads: 1 }
10
11
  cli.defaults = Hash[@@OPERATIONS.keys.map { |i| [i, true] }]
11
12
  cli.parse do |opt|
12
13
  operation_n = Hash[@@OPERATIONS.map { |k, v| [v[0], k] }]
@@ -24,6 +25,10 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
24
25
  @@OPERATIONS.each_key { |i| cli[i] = false }
25
26
  cli[op_k] = true
26
27
  end
28
+ opt.on(
29
+ '-t', '--threads INT', Integer,
30
+ "Concurrent threads to use. By default: #{cli[:threads]}"
31
+ ) { |v| cli[:threads] = v }
27
32
  end
28
33
  end
29
34
 
@@ -37,6 +42,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
37
42
  @@OPERATIONS = {
38
43
  status: ['status', 'Update metadata status of all datasets'],
39
44
  db: ['databases', 'Check integrity of database files'],
45
+ bidir: ['bidirectional', 'Check distances are bidirectional'],
40
46
  dist: ['distances', 'Check distance summary tables'],
41
47
  files: ['files', 'Check for outdated files'],
42
48
  cds: ['cds', 'Check for gzipped genes and proteins'],
@@ -58,11 +64,19 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
58
64
  # Perform status operation with MiGA::Cli +cli+
59
65
  def check_status(cli)
60
66
  cli.say 'Updating metadata status'
61
- n, k = cli.load_project.dataset_names.size, 0
62
- cli.load_project.each_dataset do |d|
63
- cli.advance('Datasets:', k += 1, n, false)
64
- d.recalculate_status
67
+ p = cli.load_project
68
+ n = p.dataset_names.size
69
+ (0 .. cli[:threads] - 1).map do |i|
70
+ Process.fork do
71
+ k = 0
72
+ cli.load_project.each_dataset do |d|
73
+ k += 1
74
+ cli.advance('Datasets:', k, n, false) if i == 0
75
+ d.recalculate_status if k % cli[:threads] == i
76
+ end
77
+ end
65
78
  end
79
+ Process.waitall
66
80
  cli.say
67
81
  end
68
82
 
@@ -70,18 +84,59 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
70
84
  # Perform databases operation with MiGA::Cli +cli+
71
85
  def check_db(cli)
72
86
  cli.say 'Checking integrity of databases'
73
- n, k = cli.load_project.dataset_names.size, 0
74
- cli.load_project.each_dataset do |d|
75
- cli.advance('Datasets:', k += 1, n, false)
76
- each_database_file(d) do |db_file, metric, result|
77
- check_sqlite3_database(db_file, metric) do
78
- cli.say(" > Removing malformed database from #{d.name}:#{result} ")
79
- File.unlink(db_file)
80
- r = d.result(result) or next
81
- [r.path(:done), r.path].each { |f| File.unlink(f) if File.exist?(f) }
87
+ p = cli.load_project
88
+ n = p.dataset_names.size
89
+ (0 .. cli[:threads] - 1).map do |i|
90
+ Process.fork do
91
+ k = 0
92
+ p.each_dataset do |d|
93
+ k += 1
94
+ cli.advance('Datasets:', k, n, false) if i == 0
95
+ next unless k % cli[:threads] == i
96
+ each_database_file(d) do |db_file, metric, result|
97
+ check_sqlite3_database(db_file, metric) do
98
+ cli.say(
99
+ " > Removing malformed database from #{d.name}:#{result} "
100
+ )
101
+ File.unlink(db_file)
102
+ r = d.result(result) or next
103
+ [r.path(:done), r.path].each do |f|
104
+ File.unlink(f) if File.exist?(f)
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
111
+ Process.waitall
112
+ cli.say
113
+ end
114
+
115
+ ##
116
+ # Perform bidirectional operation with MiGA::Cli +cli+
117
+ def check_bidir(cli)
118
+ cli.say 'Checking if reference distances are bidirectional'
119
+ ref_ds = cli.load_project.each_dataset.select(&:ref?)
120
+ ref_names = ref_ds.map(&:name)
121
+ n = ref_ds.size
122
+ (0 .. cli[:threads] - 1).map do |i|
123
+ Process.fork do
124
+ k = 0
125
+ ref_ds.each do |d|
126
+ k += 1
127
+ cli.advance('Datasets:', k, n, false) if i == 0
128
+ next unless k % cli[:threads] == i
129
+
130
+ saved = saved_targets(d)
131
+ next if saved.nil?
132
+
133
+ (ref_names - saved).each do |k|
134
+ save_bidirectional(cli.load_project.dataset(k), d)
135
+ end
82
136
  end
83
137
  end
84
138
  end
139
+ Process.waitall
85
140
  cli.say
86
141
  end
87
142
 
@@ -99,4 +99,55 @@ module MiGA::Cli::Action::Doctor::Base
99
99
  cli.say '- Removing tables, recompute'
100
100
  res.remove!
101
101
  end
102
+
103
+ ##
104
+ # Returns all targets identified by AAI
105
+ def saved_targets(dataset)
106
+ # Return nil if distance or database are not retrievable
107
+ dist = dataset.result(:distances) or return
108
+ path = dist.file_path(:aai_db) or return
109
+
110
+ o = []
111
+ SQLite3::Database.new(path) do |conn|
112
+ o = conn.execute('select seq2 from aai').map(&:first)
113
+ end
114
+ o
115
+ end
116
+
117
+ ##
118
+ # Saves all the distance estimates in +a+ -> +b+ into the +b+ databases
119
+ # (as +b+ -> +a+), where both +a+ and +b+ are MiGA::Dataset objects
120
+ def save_bidirectional(a, b)
121
+ each_database_file(a) do |db_file, metric, result|
122
+ data = nil
123
+ SQLite3::Database.new(db_file) do |conn|
124
+ data =
125
+ conn.execute(
126
+ "select seq1, seq2, #{metric}, sd, n, omega " +
127
+ "from #{metric} where seq2 = ? limit 1", b.name
128
+ ).first
129
+ end
130
+ next if data.nil? || data.empty?
131
+
132
+ db_file_b = File.join(File.dirname(db_file), "#{b.name}.db")
133
+ next unless File.exist?(db_file_b)
134
+
135
+ data[0], data[1] = data[1], data[0]
136
+ SQLite3::Database.new(db_file_b) do |conn|
137
+ attempts = 0
138
+ begin
139
+ attempts += 1
140
+ conn.execute(
141
+ "insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
142
+ "values(?, ?, ?, ?, ?, ?)", data
143
+ )
144
+ rescue SQLite3::BusyException => e
145
+ raise "Cannot populate #{db_file_b}: #{e.message}" if attempts > 3
146
+
147
+ sleep(1)
148
+ retry
149
+ end
150
+ end
151
+ end
152
+ end
102
153
  end
@@ -70,7 +70,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
70
70
  glob = get_sub_cli
71
71
  p = cli.load_project
72
72
  glob.each do |sub_cli|
73
- rd = create_remote_dataset(sub_cli)
73
+ rd = create_remote_dataset(sub_cli, p)
74
74
  next if rd.nil?
75
75
 
76
76
  if sub_cli[:get_md]
@@ -115,7 +115,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
115
115
  glob
116
116
  end
117
117
 
118
- def create_remote_dataset(sub_cli)
118
+ def create_remote_dataset(sub_cli, p)
119
119
  sub_cli.ensure_par(dataset: '-D', ids: '-I')
120
120
  unless sub_cli[:api_key].nil?
121
121
  ENV["#{sub_cli[:universe].to_s.upcase}_API_KEY"] = sub_cli[:api_key]
@@ -52,10 +52,39 @@ class MiGA::MiGA
52
52
  # 1,000 otherwise.
53
53
  # The report goes to $stderr iff --verborse
54
54
  def advance(step, n = 0, total = nil, bin = true)
55
- adv = total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
56
- ('%.1f%% (%s/%s)' % [100.0 * n / total,
57
- num_suffix(n, bin), num_suffix(total, bin)])
58
- $stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
55
+ # Initialize advance timing
56
+ @_advance_time ||= { last: nil, n: 0, avg: nil }
57
+ if n <= 1 || @_advance_time[:n] > n
58
+ @_advance_time[:last] = nil
59
+ @_advance_time[:n] = 0
60
+ @_advance_time[:avg] = nil
61
+ end
62
+
63
+ # Estimate timing
64
+ unless total.nil? || @_advance_time[:last].nil? || @_advance_time[:n] == n
65
+ this_time = Time.now - @_advance_time[:last]
66
+ @_advance_time[:avg] ||= this_time / (n - @_advance_time[:n])
67
+ @_advance_time[:avg] = 0.99 * @_advance_time[:avg] + 0.01 * this_time
68
+ end
69
+ @_advance_time[:last] = Time.now
70
+ @_advance_time[:n] = n
71
+
72
+ # Report
73
+ adv_vals = [100.0 * n / total, num_suffix(n, bin), num_suffix(total, bin)]
74
+ adv =
75
+ total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
76
+ ('%.1f%% (%s/%s)' % adv_vals)
77
+ left =
78
+ if @_advance_time[:avg].nil?
79
+ ''
80
+ else
81
+ left_time = @_advance_time[:avg] * (total - n) / 60
82
+ left_time < 0.01 ? ' ' :
83
+ left_time < 1 ? ('%.0fs left' % (left_time * 60)) :
84
+ left_time > 60 ? ('%.1fh left' % (left_time / 60)) :
85
+ ('%.1fm left' % left_time)
86
+ end
87
+ $stderr.print("[%s] %s %s %s \r" % [Time.now, step, adv, left])
59
88
  end
60
89
 
61
90
  ##
@@ -77,7 +77,11 @@ module MiGA::Daemon::Base
77
77
  ##
78
78
  # Writing file handler (IO) to the log file
79
79
  def logfh
80
- show_log? ? $stderr : (@logfh ||= File.open(output_file, 'w'))
80
+ @logfh ||= nil
81
+ return $stderr if show_log?
82
+ return @logfh if @logfh && !@logfh.closed?
83
+
84
+ @logfh = File.open(output_file, 'w')
81
85
  end
82
86
 
83
87
  ##
@@ -34,17 +34,29 @@ class MiGA::Json < MiGA::MiGA
34
34
  # +opts+.
35
35
  def parse(path, opts = {})
36
36
  opts = default_opts(opts)
37
- cont = opts[:contents] ? path : File.read(path)
38
- raise "Empty descriptor: #{opts[:contents] ? "''" : path}." if cont.empty?
39
37
 
40
- y = JSON.parse(cont,
41
- symbolize_names: opts[:symbolize],
42
- create_additions: opts[:additions])
38
+ # Read JSON
39
+ cont = path
40
+ 12.times do
41
+ cont = File.read(path)
42
+ break unless cont.empty?
43
+ sleep 1 # Wait up to 12 seconds for racing processes (iff empty file)
44
+ end unless opts[:contents]
45
+ raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
46
+
47
+ # Parse JSON
48
+ params = { symbolize_names: opts[:symbolize],
49
+ create_additions: opts[:additions] }
50
+ y = JSON.parse(cont, params)
51
+
52
+ # Add defaults
43
53
  unless opts[:default].nil?
44
54
  opts[:default] = parse(opts[:default]) if opts[:default].is_a? String
45
55
  y.each { |k, v| opts[:default][k] = v }
46
56
  y = opts[:default]
47
57
  end
58
+
59
+ # Return
48
60
  y
49
61
  end
50
62
 
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 15, 0]
11
+ VERSION = [0.7, 16, 3]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 8, 12)
19
+ VERSION_DATE = Date.new(2020, 10, 27)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
@@ -12,7 +12,7 @@ cd "$DIR"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  # Execute doctor
15
- miga doctor -P "$PROJECT" -v
15
+ miga doctor -P "$PROJECT" -t "$CORES" -v
16
16
 
17
17
  # Index taxonomy
18
18
  miga tax_index -P "$PROJECT" -i "miga-project.taxonomy.json" --ref --active
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.15.0
4
+ version: 0.7.16.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-12 00:00:00.000000000 Z
11
+ date: 2020-10-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons