miga-base 0.7.15.1 → 0.7.16.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 23ef43a6cf94c31d470263746a242d8b065899d11e694e4c281c0ec16fc4a335
4
- data.tar.gz: 7b086c3b966827f7e49559409f89a4ec39af6bc2aec0164be8e06466bd765029
3
+ metadata.gz: f30e69f1e7031e63d0aeda691fffb2fa0e1bab46a8c859dfbc0212f6389b602d
4
+ data.tar.gz: 82a0acb779a387fc0f3722168526e200644db132bf906cbb749398412e83b152
5
5
  SHA512:
6
- metadata.gz: 41e99ff68112b6f5d882b43a40e5ee3797e031f55d55b681ab5a9d56a83d70fe650813b3cd9127e35ac60cb88a56399b4165dd116395fcbb709c6a4748e794ef
7
- data.tar.gz: '0870d670b3a90be77a39d027b00fdc83c01c91b9ab2e0e27ece54b1e90b64f6deb97da206bc93a1c4b1e9c9878d263b62028f63a9dac72349ebe3213f3551cfc'
6
+ metadata.gz: 3aa15d5a2a4340274b1c6f29fcfd7718ee825bdb200b06d4f7edbebf300e76f839d7a2c72a457ae3032ccd5fb3db1e9b0a9045e084e371c87f4a6bc86fdcc3b3
7
+ data.tar.gz: 37ba5fee0eef4c98e255f38a0afddf2ba7aee7f7ade45ba63550c7226967718b6aec781654adfd8fed7138f8723eba5b1d3ef20ffcb9c4b58c93a6df41b55f07
@@ -7,6 +7,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
7
7
  include MiGA::Cli::Action::Doctor::Base
8
8
 
9
9
  def parse_cli
10
+ cli.defaults = { threads: 1 }
10
11
  cli.defaults = Hash[@@OPERATIONS.keys.map { |i| [i, true] }]
11
12
  cli.parse do |opt|
12
13
  operation_n = Hash[@@OPERATIONS.map { |k, v| [v[0], k] }]
@@ -24,6 +25,10 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
24
25
  @@OPERATIONS.each_key { |i| cli[i] = false }
25
26
  cli[op_k] = true
26
27
  end
28
+ opt.on(
29
+ '-t', '--threads INT', Integer,
30
+ "Concurrent threads to use. By default: #{cli[:threads]}"
31
+ ) { |v| cli[:threads] = v }
27
32
  end
28
33
  end
29
34
 
@@ -37,6 +42,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
37
42
  @@OPERATIONS = {
38
43
  status: ['status', 'Update metadata status of all datasets'],
39
44
  db: ['databases', 'Check integrity of database files'],
45
+ bidir: ['bidirectional', 'Check distances are bidirectional'],
40
46
  dist: ['distances', 'Check distance summary tables'],
41
47
  files: ['files', 'Check for outdated files'],
42
48
  cds: ['cds', 'Check for gzipped genes and proteins'],
@@ -58,11 +64,19 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
58
64
  # Perform status operation with MiGA::Cli +cli+
59
65
  def check_status(cli)
60
66
  cli.say 'Updating metadata status'
61
- n, k = cli.load_project.dataset_names.size, 0
62
- cli.load_project.each_dataset do |d|
63
- cli.advance('Datasets:', k += 1, n, false)
64
- d.recalculate_status
67
+ p = cli.load_project
68
+ n = p.dataset_names.size
69
+ (0 .. cli[:threads] - 1).map do |i|
70
+ Process.fork do
71
+ k = 0
72
+ cli.load_project.each_dataset do |d|
73
+ k += 1
74
+ cli.advance('Datasets:', k, n, false) if i == 0
75
+ d.recalculate_status if k % cli[:threads] == i
76
+ end
77
+ end
65
78
  end
79
+ Process.waitall
66
80
  cli.say
67
81
  end
68
82
 
@@ -70,18 +84,59 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
70
84
  # Perform databases operation with MiGA::Cli +cli+
71
85
  def check_db(cli)
72
86
  cli.say 'Checking integrity of databases'
73
- n, k = cli.load_project.dataset_names.size, 0
74
- cli.load_project.each_dataset do |d|
75
- cli.advance('Datasets:', k += 1, n, false)
76
- each_database_file(d) do |db_file, metric, result|
77
- check_sqlite3_database(db_file, metric) do
78
- cli.say(" > Removing malformed database from #{d.name}:#{result} ")
79
- File.unlink(db_file)
80
- r = d.result(result) or next
81
- [r.path(:done), r.path].each { |f| File.unlink(f) if File.exist?(f) }
87
+ p = cli.load_project
88
+ n = p.dataset_names.size
89
+ (0 .. cli[:threads] - 1).map do |i|
90
+ Process.fork do
91
+ k = 0
92
+ p.each_dataset do |d|
93
+ k += 1
94
+ cli.advance('Datasets:', k, n, false) if i == 0
95
+ next unless k % cli[:threads] == i
96
+ each_database_file(d) do |db_file, metric, result|
97
+ check_sqlite3_database(db_file, metric) do
98
+ cli.say(
99
+ " > Removing malformed database from #{d.name}:#{result} "
100
+ )
101
+ File.unlink(db_file)
102
+ r = d.result(result) or next
103
+ [r.path(:done), r.path].each do |f|
104
+ File.unlink(f) if File.exist?(f)
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
111
+ Process.waitall
112
+ cli.say
113
+ end
114
+
115
+ ##
116
+ # Perform bidirectional operation with MiGA::Cli +cli+
117
+ def check_bidir(cli)
118
+ cli.say 'Checking if reference distances are bidirectional'
119
+ ref_ds = cli.load_project.each_dataset.select(&:ref?)
120
+ ref_names = ref_ds.map(&:name)
121
+ n = ref_ds.size
122
+ (0 .. cli[:threads] - 1).map do |i|
123
+ Process.fork do
124
+ k = 0
125
+ ref_ds.each do |d|
126
+ k += 1
127
+ cli.advance('Datasets:', k, n, false) if i == 0
128
+ next unless k % cli[:threads] == i
129
+
130
+ saved = saved_targets(d)
131
+ next if saved.nil?
132
+
133
+ (ref_names - saved).each do |k|
134
+ save_bidirectional(cli.load_project.dataset(k), d)
135
+ end
82
136
  end
83
137
  end
84
138
  end
139
+ Process.waitall
85
140
  cli.say
86
141
  end
87
142
 
@@ -99,4 +99,55 @@ module MiGA::Cli::Action::Doctor::Base
99
99
  cli.say '- Removing tables, recompute'
100
100
  res.remove!
101
101
  end
102
+
103
+ ##
104
+ # Returns all targets identified by AAI
105
+ def saved_targets(dataset)
106
+ # Return nil if distance or database are not retrievable
107
+ dist = dataset.result(:distances) or return
108
+ path = dist.file_path(:aai_db) or return
109
+
110
+ o = []
111
+ SQLite3::Database.new(path) do |conn|
112
+ o = conn.execute('select seq2 from aai').map(&:first)
113
+ end
114
+ o
115
+ end
116
+
117
+ ##
118
+ # Saves all the distance estimates in +a+ -> +b+ into the +b+ databases
119
+ # (as +b+ -> +a+), where both +a+ and +b+ are MiGA::Dataset objects
120
+ def save_bidirectional(a, b)
121
+ each_database_file(a) do |db_file, metric, result|
122
+ data = nil
123
+ SQLite3::Database.new(db_file) do |conn|
124
+ data =
125
+ conn.execute(
126
+ "select seq1, seq2, #{metric}, sd, n, omega " +
127
+ "from #{metric} where seq2 = ? limit 1", b.name
128
+ ).first
129
+ end
130
+ next if data.nil? || data.empty?
131
+
132
+ db_file_b = File.join(File.dirname(db_file), "#{b.name}.db")
133
+ next unless File.exist?(db_file_b)
134
+
135
+ data[0], data[1] = data[1], data[0]
136
+ SQLite3::Database.new(db_file_b) do |conn|
137
+ attempts = 0
138
+ begin
139
+ attempts += 1
140
+ conn.execute(
141
+ "insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
142
+ "values(?, ?, ?, ?, ?, ?)", data
143
+ )
144
+ rescue SQLite3::BusyException => e
145
+ raise "Cannot populate #{db_file_b}: #{e.message}" if attempts > 3
146
+
147
+ sleep(1)
148
+ retry
149
+ end
150
+ end
151
+ end
152
+ end
102
153
  end
@@ -70,7 +70,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
70
70
  glob = get_sub_cli
71
71
  p = cli.load_project
72
72
  glob.each do |sub_cli|
73
- rd = create_remote_dataset(sub_cli)
73
+ rd = create_remote_dataset(sub_cli, p)
74
74
  next if rd.nil?
75
75
 
76
76
  if sub_cli[:get_md]
@@ -115,7 +115,7 @@ class MiGA::Cli::Action::Get < MiGA::Cli::Action
115
115
  glob
116
116
  end
117
117
 
118
- def create_remote_dataset(sub_cli)
118
+ def create_remote_dataset(sub_cli, p)
119
119
  sub_cli.ensure_par(dataset: '-D', ids: '-I')
120
120
  unless sub_cli[:api_key].nil?
121
121
  ENV["#{sub_cli[:universe].to_s.upcase}_API_KEY"] = sub_cli[:api_key]
@@ -52,10 +52,44 @@ class MiGA::MiGA
52
52
  # 1,000 otherwise.
53
53
  # The report goes to $stderr iff --verborse
54
54
  def advance(step, n = 0, total = nil, bin = true)
55
- adv = total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
56
- ('%.1f%% (%s/%s)' % [100.0 * n / total,
57
- num_suffix(n, bin), num_suffix(total, bin)])
58
- $stderr.print("[%s] %s %s \r" % [Time.now, step, adv])
55
+ # Initialize advance timing
56
+ @_advance_time ||= { last: nil, n: 0, avg: nil }
57
+ if n <= 1 || @_advance_time[:n] > n
58
+ @_advance_time[:last] = nil
59
+ @_advance_time[:n] = 0
60
+ @_advance_time[:avg] = nil
61
+ end
62
+
63
+ # Estimate timing
64
+ adv_n = n - @_advance_time[:n]
65
+ unless total.nil? || @_advance_time[:last].nil? || adv_n <= 0
66
+ if adv_n.to_f/n > 0.001
67
+ this_time = Time.now - @_advance_time[:last]
68
+ this_avg = this_time / adv_n
69
+ @_advance_time[:avg] ||= this_avg
70
+ @_advance_time[:avg] = 0.9 * @_advance_time[:avg] + 0.1 * this_avg
71
+ end
72
+ end
73
+ @_advance_time[:last] = Time.now
74
+ @_advance_time[:n] = n
75
+
76
+ # Report
77
+ adv_vals = [100.0 * n / total, num_suffix(n, bin), num_suffix(total, bin)]
78
+ adv =
79
+ total.nil? ? (n == 0 ? '' : num_suffix(n, bin)) :
80
+ ('%.1f%% (%s/%s)' % adv_vals)
81
+ left =
82
+ if @_advance_time[:avg].nil?
83
+ ''
84
+ else
85
+ left_time = @_advance_time[:avg] * (total - n) / 60 # <- in minutes
86
+ left_time < 0.01 ? ' ' :
87
+ left_time < 1 ? ('%.0fs left' % (left_time * 60)) :
88
+ left_time > 1440 ? ('%.1fd left' % (left_time / 1440)) :
89
+ left_time > 60 ? ('%.1fh left' % (left_time / 60)) :
90
+ ('%.1fm left' % left_time)
91
+ end
92
+ $stderr.print("[%s] %s %s %s \r" % [Time.now, step, adv, left])
59
93
  end
60
94
 
61
95
  ##
@@ -77,8 +77,9 @@ module MiGA::Daemon::Base
77
77
  ##
78
78
  # Writing file handler (IO) to the log file
79
79
  def logfh
80
+ @logfh ||= nil
80
81
  return $stderr if show_log?
81
- return @logfh if @logfh && @logfh.closed?
82
+ return @logfh if @logfh && !@logfh.closed?
82
83
 
83
84
  @logfh = File.open(output_file, 'w')
84
85
  end
@@ -34,17 +34,29 @@ class MiGA::Json < MiGA::MiGA
34
34
  # +opts+.
35
35
  def parse(path, opts = {})
36
36
  opts = default_opts(opts)
37
- cont = opts[:contents] ? path : File.read(path)
38
- raise "Empty descriptor: #{opts[:contents] ? "''" : path}." if cont.empty?
39
37
 
40
- y = JSON.parse(cont,
41
- symbolize_names: opts[:symbolize],
42
- create_additions: opts[:additions])
38
+ # Read JSON
39
+ cont = path
40
+ 12.times do
41
+ cont = File.read(path)
42
+ break unless cont.empty?
43
+ sleep 1 # Wait up to 12 seconds for racing processes (iff empty file)
44
+ end unless opts[:contents]
45
+ raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
46
+
47
+ # Parse JSON
48
+ params = { symbolize_names: opts[:symbolize],
49
+ create_additions: opts[:additions] }
50
+ y = JSON.parse(cont, params)
51
+
52
+ # Add defaults
43
53
  unless opts[:default].nil?
44
54
  opts[:default] = parse(opts[:default]) if opts[:default].is_a? String
45
55
  y.each { |k, v| opts[:default][k] = v }
46
56
  y = opts[:default]
47
57
  end
58
+
59
+ # Return
48
60
  y
49
61
  end
50
62
 
@@ -8,7 +8,7 @@ module MiGA
8
8
  # - Float representing the major.minor version.
9
9
  # - Integer representing gem releases of the current version.
10
10
  # - Integer representing minor changes that require new version number.
11
- VERSION = [0.7, 15, 1]
11
+ VERSION = [0.7, 16, 4]
12
12
 
13
13
  ##
14
14
  # Nickname for the current major.minor version.
@@ -16,7 +16,7 @@ module MiGA
16
16
 
17
17
  ##
18
18
  # Date of the current gem release.
19
- VERSION_DATE = Date.new(2020, 8, 12)
19
+ VERSION_DATE = Date.new(2020, 10, 28)
20
20
 
21
21
  ##
22
22
  # Reference of MiGA.
@@ -12,7 +12,7 @@ cd "$DIR"
12
12
  miga date > "miga-project.start"
13
13
 
14
14
  # Execute doctor
15
- miga doctor -P "$PROJECT" -v
15
+ miga doctor -P "$PROJECT" -t "$CORES" -v
16
16
 
17
17
  # Index taxonomy
18
18
  miga tax_index -P "$PROJECT" -i "miga-project.taxonomy.json" --ref --active
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.15.1
4
+ version: 0.7.16.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-08-12 00:00:00.000000000 Z
11
+ date: 2020-10-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons