miga-base 0.7.15.1 → 0.7.16.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 23ef43a6cf94c31d470263746a242d8b065899d11e694e4c281c0ec16fc4a335
4
- data.tar.gz: 7b086c3b966827f7e49559409f89a4ec39af6bc2aec0164be8e06466bd765029
3
+ metadata.gz: f30e69f1e7031e63d0aeda691fffb2fa0e1bab46a8c859dfbc0212f6389b602d
4
+ data.tar.gz: 82a0acb779a387fc0f3722168526e200644db132bf906cbb749398412e83b152
5
5
  SHA512:
6
- metadata.gz: 41e99ff68112b6f5d882b43a40e5ee3797e031f55d55b681ab5a9d56a83d70fe650813b3cd9127e35ac60cb88a56399b4165dd116395fcbb709c6a4748e794ef
7
- data.tar.gz: '0870d670b3a90be77a39d027b00fdc83c01c91b9ab2e0e27ece54b1e90b64f6deb97da206bc93a1c4b1e9c9878d263b62028f63a9dac72349ebe3213f3551cfc'
6
+ metadata.gz: 3aa15d5a2a4340274b1c6f29fcfd7718ee825bdb200b06d4f7edbebf300e76f839d7a2c72a457ae3032ccd5fb3db1e9b0a9045e084e371c87f4a6bc86fdcc3b3
7
+ data.tar.gz: 37ba5fee0eef4c98e255f38a0afddf2ba7aee7f7ade45ba63550c7226967718b6aec781654adfd8fed7138f8723eba5b1d3ef20ffcb9c4b58c93a6df41b55f07
@@ -7,6 +7,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
7
7
  include MiGA::Cli::Action::Doctor::Base
8
8
 
9
9
  def parse_cli
10
+ cli.defaults = { threads: 1 }
10
11
  cli.defaults = Hash[@@OPERATIONS.keys.map { |i| [i, true] }]
11
12
  cli.parse do |opt|
12
13
  operation_n = Hash[@@OPERATIONS.map { |k, v| [v[0], k] }]
@@ -24,6 +25,10 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
24
25
  @@OPERATIONS.each_key { |i| cli[i] = false }
25
26
  cli[op_k] = true
26
27
  end
28
+ opt.on(
29
+ '-t', '--threads INT', Integer,
30
+ "Concurrent threads to use. By default: #{cli[:threads]}"
31
+ ) { |v| cli[:threads] = v }
27
32
  end
28
33
  end
29
34
 
@@ -37,6 +42,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
37
42
  @@OPERATIONS = {
38
43
  status: ['status', 'Update metadata status of all datasets'],
39
44
  db: ['databases', 'Check integrity of database files'],
45
+ bidir: ['bidirectional', 'Check distances are bidirectional'],
40
46
  dist: ['distances', 'Check distance summary tables'],
41
47
  files: ['files', 'Check for outdated files'],
42
48
  cds: ['cds', 'Check for gzipped genes and proteins'],
@@ -58,11 +64,19 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
58
64
  # Perform status operation with MiGA::Cli +cli+
59
65
  def check_status(cli)
60
66
  cli.say 'Updating metadata status'
61
- n, k = cli.load_project.dataset_names.size, 0
62
- cli.load_project.each_dataset do |d|
63
- cli.advance('Datasets:', k += 1, n, false)
64
- d.recalculate_status
67
+ p = cli.load_project
68
+ n = p.dataset_names.size
69
+ (0 .. cli[:threads] - 1).map do |i|
70
+ Process.fork do
71
+ k = 0
72
+ cli.load_project.each_dataset do |d|
73
+ k += 1
74
+ cli.advance('Datasets:', k, n, false) if i == 0
75
+ d.recalculate_status if k % cli[:threads] == i
76
+ end
77
+ end
65
78
  end
79
+ Process.waitall
66
80
  cli.say
67
81
  end
68
82
 
@@ -70,18 +84,59 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
70
84
  # Perform databases operation with MiGA::Cli +cli+
71
85
  def check_db(cli)
72
86
  cli.say 'Checking integrity of databases'
73
- n, k = cli.load_project.dataset_names.size, 0
74
- cli.load_project.each_dataset do |d|
75
- cli.advance('Datasets:', k += 1, n, false)
76
- each_database_file(d) do |db_file, metric, result|
77
- check_sqlite3_database(db_file, metric) do
78
- cli.say(" > Removing malformed database from #{d.name}:#{result} ")
79
- File.unlink(db_file)
80
- r = d.result(result) or next
81
- [r.path(:done), r.path].each { |f| File.unlink(f) if File.exist?(f) }
87
+ p = cli.load_project
88
+ n = p.dataset_names.size
89
+ (0 .. cli[:threads] - 1).map do |i|
90
+ Process.fork do
91
+ k = 0
92
+ p.each_dataset do |d|
93
+ k += 1
94
+ cli.advance('Datasets:', k, n, false) if i == 0
95
+ next unless k % cli[:threads] == i
96
+ each_database_file(d) do |db_file, metric, result|
97
+ check_sqlite3_database(db_file, metric) do
98
+ cli.say(
99
+ " > Removing malformed database from #{d.name}:#{result} "
100
+ )
101
+ File.unlink(db_file)
102
+ r = d.result(result) or next
103
+ [r.path(:done), r.path].each do |f|
104
+ File.unlink(f) if File.exist?(f)
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
111
+ Process.waitall
112
+ cli.say
113
+ end
114
+
115
+ ##
116
+ # Perform bidirectional operation with MiGA::Cli +cli+
117
+ def check_bidir(cli)
118
+ cli.say 'Checking if reference distances are bidirectional'
119
+ ref_ds = cli.load_project.each_dataset.select(&:ref?)
120
+ ref_names = ref_ds.map(&:name)
121
+ n = ref_ds.size
122
+ (0 .. cli[:threads] - 1).map do |i|
123
+ Process.fork do
124
+ k = 0
125
+ ref_ds.each do |d|
126
+ k += 1
127
+ cli.advance('Datasets:', k, n, false) if i == 0
128
+ next unless k % cli[:threads] == i
129
+
130
+ saved = saved_targets(d)
131
+ next if saved.nil?
132
+
133
+ (ref_names - saved).each do |k|
134
+ save_bidirectional(cli.load_project.dataset(k), d)
135
+ end
82
136
  end
83
137
  end
84
138
  end
139
+ Process.waitall
85
140
  cli.say
86
141
  end
87
142
 
@@ -99,4 +99,55 @@ module MiGA::Cli::Action::Doctor::Base
99
99
  cli.say '- Removing tables, recompute'
100
100
  res.remove!
101
101
  end
102
+
103
+ ##
104
+ # Returns all targets identified by AAI
105
+ def saved_targets(dataset)
106
+ # Return nil if distance or database are not retrievable
107
+ dist = dataset.result(:distances) or return
108
+ path = dist.file_path(:aai_db) or return
109
+
110
+ o = []
111
+ SQLite3::Database.new(path) do |conn|
112
+ o = conn.execute('select seq2 from aai').map(&:first)
113
+ end
114
+ o
115
+ end
116
+
117
+ ##
118
+ # Saves all the distance estimates in +a+ -> +b+ into the +b+ databases
119
+ # (as +b+ -> +a+), where both +a+ and +b+ are MiGA::Dataset objects
120
+ def save_bidirectional(a, b)
121
+ each_database_file(a) do |db_file, metric, result|
122
+ data = nil
123
+ SQLite3::Database.new(db_file) do |conn|
124
+ data =
125
+ conn.execute(
126
+ "select seq1, seq2, #{metric}, sd, n, omega " +
127
+ "from #{metric} where seq2 = ? limit 1", b.name
128
+ ).first
129
+ end
130
+ next if data.nil? || data.empty?
131
+
132
+ db_file_b = File.join(File.dirname(db_file), "#{b.name}.db")
133
+ next unless File.exist?(db_file_b)
134
+
135
+ data[0], data[1] = data[1], data[0]
136
+ SQLite3::Database.new(db_file_b) do |conn|
137
+ attempts = 0
138
+ begin
139
+ attempts += 1
140
+ conn.execute(
141
+ "insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
142
+ "values(?, ?, ?, ?, ?, ?)", data
143
+ )
144
+ rescue SQLite3::BusyException => e
145
+ raise "Cannot populate #{db_file_b}: #{e.message}" if attempts > 3
146
+
147
+ sleep(1)
148
+ retry
149
+ end
150
+ end
151
+ end
152
+ end
102
153
  end
@@ -70,7 +70,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
70
70
  glob = get_sub_cli
71
71
  p = cli.load_project
72
72
  glob.each do |sub_cli|
73
- rd = create_remote_dataset(sub_cli)
73
+ rd = create_remote_dataset(sub_cli, p)
74
74
  next if rd.nil?
75
75
 
76
76
  if sub_cli[:get_md]
@@ -115,7 +115,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
115
115
  glob
116
116
  end
117
117
 
118
- def create_remote_dataset(sub_cli)
118
+ def create_remote_dataset(sub_cli, p)
119
119
  sub_cli.ensure_par(dataset: '-D', ids: '-I')
120
120
  unless sub_cli[:api_key].nil?
121
121
  ENV["#{sub_cli[:universe].to_s.upcase}_API_KEY"] = sub_cli[:api_key]
@@ -52,10 +52,44 @@ class MiGA::MiGA
52
52
  # 1,000 otherwise.
53
53
  # The report goes to $stderr iff --verborse
54
54
  def advance(step, n = 0, total = nil, bin = true)
55
- adv = total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
56
- ('%.1f%% (%s/%s)' % [100.0 * n / total,
57
- num_suffix(n, bin), num_suffix(total, bin)])
58
- $stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
55
+ # Initialize advance timing
56
+ @_advance_time ||= { last: nil, n: 0, avg: nil }
57
+ if n <= 1 || @_advance_time[:n] > n
58
+ @_advance_time[:last] = nil
59
+ @_advance_time[:n] = 0
60
+ @_advance_time[:avg] = nil
61
+ end
62
+
63
+ # Estimate timing
64
+ adv_n = n - @_advance_time[:n]
65
+ unless total.nil? || @_advance_time[:last].nil? || adv_n <= 0
66
+ if adv_n.to_f/n > 0.001
67
+ this_time = Time.now - @_advance_time[:last]
68
+ this_avg = this_time / adv_n
69
+ @_advance_time[:avg] ||= this_avg
70
+ @_advance_time[:avg] = 0.9 * @_advance_time[:avg] + 0.1 * this_avg
71
+ end
72
+ end
73
+ @_advance_time[:last] = Time.now
74
+ @_advance_time[:n] = n
75
+
76
+ # Report
77
+ adv_vals = [100.0 * n / total, num_suffix(n, bin), num_suffix(total, bin)]
78
+ adv =
79
+ total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
80
+ ('%.1f%% (%s/%s)' % adv_vals)
81
+ left =
82
+ if @_advance_time[:avg].nil?
83
+ ''
84
+ else
85
+ left_time = @_advance_time[:avg] * (total - n) / 60 # <- in minutes
86
+ left_time < 0.01 ? ' ' :
87
+ left_time < 1 ? ('%.0fs left' % (left_time * 60)) :
88
+ left_time > 1440 ? ('%.1fd left' % (left_time / 1440)) :
89
+ left_time > 60 ? ('%.1fh left' % (left_time / 60)) :
90
+ ('%.1fm left' % left_time)
91
+ end
92
+ $stderr.print("[%s] %s %s %s \r" % [Time.now, step, adv, left])
59
93
  end
60
94
 
61
95
  ##
@@ -77,8 +77,9 @@ module MiGA::Daemon::Base
77
77
  ##
78
78
  # Writing file handler (IO) to the log file
79
79
  def logfh
80
+ @logfh ||= nil
80
81
  return $stderr if show_log?
81
- return @logfh if @logfh && @logfh.closed?
82
+ return @logfh if @logfh && !@logfh.closed?
82
83
 
83
84
  @logfh = File.open(output_file, 'w')
84
85
  end
@@ -34,17 +34,29 @@ class MiGA::Json < MiGA::MiGA
34
34
  # +opts+.
35
35
  def parse(path, opts = {})
36
36
  opts = default_opts(opts)
37
- cont = opts[:contents] ? path : File.read(path)
38
- raise "Empty descriptor: #{opts[:contents] ? "''" : path}." if cont.empty?
39
37
 
40
- y = JSON.parse(cont,
41
- symbolize_names: opts[:symbolize],
42
- create_additions: opts[:additions])
38
+ # Read JSON
39
+ cont = path
40
+ 12.times do
41
+ cont = File.read(path)
42
+ break unless cont.empty?
43
+ sleep 1 # Wait up to 12 seconds for racing processes (iff empty file)
44
+ end unless opts[:contents]
45
+ raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
46
+
47
+ # Parse JSON
48
+ params = { symbolize_names: opts[:symbolize],
49
+ create_additions: opts[:additions] }
50
+ y = JSON.parse(cont, params)
51
+
52
+ # Add defaults
43
53
  unless opts[:default].nil?
44
54
  opts[:default] = parse(opts[:default]) if opts[:default].is_a? String
45
55
  y.each { |k, v| opts[:default][k] = v }
46
56
  y = opts[:default]
47
57
  end
58
+
59
+ # Return
48
60
  y
49
61
  end
50
62
 
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 15, 1]
11
+ VERSION = [0.7, 16, 4]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 8, 12)
19
+ VERSION_DATE = Date.new(2020, 10, 28)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
@@ -12,7 +12,7 @@ cd "$DIR"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  # Execute doctor
15
- miga doctor -P "$PROJECT" -v
15
+ miga doctor -P "$PROJECT" -t "$CORES" -v
16
16
 
17
17
  # Index taxonomy
18
18
  miga tax_index -P "$PROJECT" -i "miga-project.taxonomy.json" --ref --active
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.15.1
4
+ version: 0.7.16.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-12 00:00:00.000000000 Z
11
+ date: 2020-10-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons