miga-base 0.7.26.2 → 0.7.26.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8a4edaaac424eb59fe5956b98c74b6926bfd57b90fd80e0dff9556c5ec75aae5
4
- data.tar.gz: 0d2bba4f71074ef9efbeb03482a2d860b3c56ecd6734e7e8aeb82a928c2eec7d
3
+ metadata.gz: 0f2c9f2013b77f3a3b1dff0fbb73e74dad0b836d17d4a2129110a6095ffc7d8a
4
+ data.tar.gz: 0d7a1c879970dc9ffae5bae8cbbb51f6ef7ef13e41373fcc7cd5cc93db841104
5
5
  SHA512:
6
- metadata.gz: '0052068c9d5a055d2c13694f1bacf755a37aa294d6db76ea8ed10a1464ddd58f6a536513f8d3f8817cf2491b7f2b01825fcf891bea75e72d699b0d2b9b82804e'
7
- data.tar.gz: bbef9fa36b8b8900a3c9de38b3e69daf23fddf2fea59d0899d2781edecd96d32c6f612945ed8f32c373ea669ee7ee02c9c0d9db5db8b13cd92c95c94601494ca
6
+ metadata.gz: 4da1c02b538370a585efc788a71cbb9fafb5b8383c2882f0ab01d1bdec22a9dac2878bb1fa3a028f78ff19d22ecf70e50c67e20e71dc8a0308437ca19a6368db
7
+ data.tar.gz: 4eb38dafd4eb0dbaf69eed6fa734fec5c0d104d97d1f035cafdcd77365f93ecc723ac01eda643986f0676c4b4156a2504f92c1d98a02d0bd3955306e1a4924f5
@@ -50,7 +50,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
50
50
  # Input data
51
51
  ref_db = reference_db
52
52
  norun = %w[
53
- project_stats haai_distances aai_distances ani_distances clade_finding
53
+ haai_distances aai_distances ani_distances clade_finding
54
54
  ]
55
55
  p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
56
56
  p = create_project(
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
59
59
  run_ssu: false, run_mytaxa_scan: false, run_distances: false
60
60
  )
61
61
  p.set_option(:ref_project, ref_db.path)
62
- p.set_option(:tax_pvalue, cli[:pvalue], true)
62
+ p.set_option(:tax_pvalue, cli[:pvalue])
63
63
  # Run
64
64
  run_daemon
65
65
  summarize(%w[cds assembly essential_genes]) if cli[:summaries]
@@ -52,7 +52,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
52
52
  # Input data
53
53
  p = create_project(
54
54
  :assembly,
55
- { run_project_stats: false, run_clades: false },
55
+ { run_clades: false },
56
56
  { run_mytaxa_scan: false, run_ssu: false }
57
57
  )
58
58
  p.set_option(:gsp_metric, cli[:metric].to_s)
@@ -93,7 +93,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
93
93
  k += 1
94
94
  cli.advance('Datasets:', k, n, false) if i == 0
95
95
  next unless k % cli[:threads] == i
96
- each_database_file(d) do |db_file, metric, result|
96
+ each_database_file(d) do |db_file, metric, result, _rank|
97
97
  check_sqlite3_database(db_file, metric) do
98
98
  cli.say(
99
99
  " > Removing malformed database from #{d.name}:#{result} "
@@ -119,24 +119,36 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
119
119
  ref_ds = cli.load_project.each_dataset.select(&:ref?)
120
120
  ref_names = ref_ds.map(&:name)
121
121
  n = ref_ds.size
122
- (0 .. cli[:threads] - 1).map do |i|
123
- Process.fork do
124
- k = 0
125
- ref_ds.each do |d|
126
- k += 1
127
- cli.advance('Datasets:', k, n, false) if i == 0
128
- next unless k % cli[:threads] == i
129
-
130
- saved = saved_targets(d)
131
- next if saved.nil?
132
122
 
133
- (ref_names - saved).each do |k|
134
- save_bidirectional(cli.load_project.dataset(k), d)
135
- end
123
+ # Read data first (threaded)
124
+ @distances = { aai: {}, ani: {} }
125
+ Dir.mktmpdir do |tmp|
126
+ MiGA::Parallel.process(cli[:threads]) do |thr|
127
+ idx = 0
128
+ ref_ds.each do |ds|
129
+ cli.advance('Reading:', idx + 1, n, false) if thr == 0
130
+ read_bidirectional(ds) if idx % cli[:threads] == thr
131
+ idx += 1
132
+ end
133
+ File.open("#{tmp}/#{thr}.json", 'w') do |fh|
134
+ fh.print JSON.fast_generate(@distances)
136
135
  end
137
136
  end
137
+ cli.say
138
+
139
+ cli[:threads].times do |i|
140
+ cli.advance('Merging:', i + 1, cli[:threads], false)
141
+ o = MiGA::Json.parse("#{tmp}/#{i}.json", symbolize: false)
142
+ o.each { |k, v| @distances[k.to_sym].merge!(v) }
143
+ end
144
+ cli.say
145
+ end
146
+
147
+ # Write missing values (threaded)
148
+ MiGA::Parallel.distribute(ref_ds, cli[:threads]) do |ds, idx, thr|
149
+ cli.advance('Datasets:', idx + 1, n, false) if thr == 0
150
+ save_bidirectional(ds)
138
151
  end
139
- Process.waitall
140
152
  cli.say
141
153
  end
142
154
 
@@ -16,22 +16,28 @@ module MiGA::Cli::Action::Doctor::Base
16
16
  end
17
17
 
18
18
  def each_database_file(dataset, &blk)
19
- ref_db = { '01.haai' => :aai, '02.aai' => :aai, '03.ani' => :ani }
20
- qry_db = { '.haai.db' => :aai, '.aai.db' => :aai, '.ani.db' => :ani }
19
+ ref_db = {
20
+ haai: ['01.haai', :aai], aai: ['02.aai', :aai], ani: ['03.ani', :ani]
21
+ }
22
+ qry_db = {
23
+ haai: ['.haai.db', :aai], aai: ['.aai.db', :aai], ani: ['.ani.db', :ani]
24
+ }
21
25
  base = File.join(dataset.project.path, 'data', '09.distances')
22
26
  result = :distances
23
27
  if dataset.ref?
24
28
  file_db = "#{dataset.name}.db"
25
- ref_db.each do |dir, metric|
29
+ ref_db.each do |rank, v|
30
+ dir, metric = *v
26
31
  file = File.join(base, dir, file_db)
27
- blk[file, metric, result] if File.exist? file
32
+ blk[file, metric, result, rank] if File.exist? file
28
33
  end
29
34
  base = File.join(base, '05.taxonomy')
30
35
  result = :taxonomy
31
36
  end
32
- qry_db.each do |ext, metric|
37
+ qry_db.each do |rank, v|
38
+ ext, metric = *v
33
39
  file = File.join(base, "#{dataset.name}#{ext}")
34
- blk[file, metric, result] if File.exist? file
40
+ blk[file, metric, result, rank] if File.exist? file
35
41
  end
36
42
  end
37
43
 
@@ -109,25 +115,41 @@ module MiGA::Cli::Action::Doctor::Base
109
115
  end
110
116
 
111
117
  ##
112
- # Saves all the distance estimates in +a+ -> +b+ into the +b+ databases
113
- # (as +b+ -> +a+), where both +a+ and +b+ are MiGA::Dataset objects
114
- def save_bidirectional(a, b)
115
- each_database_file(a) do |db_file, metric, result|
116
- data = nil
117
- data = MiGA::SQLite.new(db_file).run(
118
- "select seq1, seq2, #{metric}, sd, n, omega " +
119
- "from #{metric} where seq2 = ? limit 1", b.name
120
- ).first
118
+ # Reads all the distance estimates in +a+ -> *, and saves them in memory
119
+ # in the +@distances+ variable.
120
+ def read_bidirectional(a)
121
+ each_database_file(a) do |db_file, metric, result, rank|
122
+ next if rank == :haai # No need for hAAI to be bidirectional
123
+
124
+ sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
125
+ data = MiGA::SQLite.new(db_file).run(sql)
121
126
  next if data.nil? || data.empty?
122
127
 
123
- db_file_b = File.join(File.dirname(db_file), "#{b.name}.db")
124
- next unless File.exist?(db_file_b)
128
+ @distances[rank][a.name] ||= {}
129
+ data.each { |row| @distances[rank][a.name][row.shift] = row }
130
+ end
131
+ end
125
132
 
126
- data[0], data[1] = data[1], data[0]
127
- MiGA::SQLite.new(db_file_b).run(
128
- "insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
129
- "values(?, ?, ?, ?, ?, ?)", data
130
- )
133
+ ##
134
+ # Saves all the distance estimates in * -> +a+ into the +a+ databases
135
+ # (as +a+ -> *), where +a+ is a MiGA::Dataset object
136
+ def save_bidirectional(a)
137
+ each_database_file(a) do |db_file, metric, result, rank|
138
+ next if rank == :haai # No need for hAAI to be bidirectional
139
+
140
+ b2a = @distances[rank].map { |b_name, v| b_name if v[a.name] }.compact
141
+ a2b = @distances[rank][a.name].keys
142
+ SQLite3::Database.new(db_file) do |db|
143
+ sql = <<~SQL
144
+ insert into #{metric}(seq1, seq2, #{metric}, sd, n, omega) \
145
+ values(?, ?, ?, ?, ?, ?);
146
+ SQL
147
+ db.execute('BEGIN TRANSACTION;')
148
+ (b2a - a2b).each do |b_name|
149
+ db.execute(sql, [a.name, b_name] + @distances[rank][b_name][a.name])
150
+ end
151
+ db.execute('COMMIT;')
152
+ end
131
153
  end
132
154
  end
133
155
  end
data/lib/miga/common.rb CHANGED
@@ -3,6 +3,7 @@
3
3
 
4
4
  require 'miga/version'
5
5
  require 'miga/json'
6
+ require 'miga/parallel'
6
7
  require 'miga/common/base'
7
8
  require 'miga/common/path'
8
9
  require 'miga/common/format'
data/lib/miga/json.rb CHANGED
@@ -45,8 +45,10 @@ class MiGA::Json < MiGA::MiGA
45
45
  raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
46
46
 
47
47
  # Parse JSON
48
- params = { symbolize_names: opts[:symbolize],
49
- create_additions: opts[:additions] }
48
+ params = {
49
+ symbolize_names: opts[:symbolize],
50
+ create_additions: opts[:additions]
51
+ }
50
52
  y = JSON.parse(cont, params)
51
53
 
52
54
  # Add defaults
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Parallel execution in MiGA.
5
+ class MiGA::Parallel < MiGA::MiGA
6
+ class << self
7
+ ##
8
+ # Executes the passed block with the thread number as argument (0-numbered)
9
+ # in +threads+ processes
10
+ def process(threads)
11
+ threads.times do |i|
12
+ Process.fork { yield(i) }
13
+ end
14
+ Process.waitall
15
+ end
16
+
17
+ ##
18
+ # Distributes +enum+ across +threads+ and calls the passed block with args:
19
+ # 1. Unitary object from +enum+
20
+ # 2. Index of the unitary object
21
+ # 3. Index of the acting thread
22
+ def distribute(enum, threads)
23
+ process(threads) do |thr|
24
+ enum.each_with_index do |obj, idx|
25
+ yield(obj, idx, thr) if idx % threads == thr
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+
data/lib/miga/sqlite.rb CHANGED
@@ -37,8 +37,9 @@ class MiGA::SQLite < MiGA::MiGA
37
37
  # Executes +cmd+ and returns the result
38
38
  def run(*cmd)
39
39
  busy_attempts ||= 0
40
- conn = SQLite3::Database.new(path)
41
- conn.execute(*cmd)
40
+ y = nil
41
+ SQLite3::Database.new(path) { |conn| y = conn.execute(*cmd) }
42
+ y
42
43
  rescue SQLite3::BusyException => e
43
44
  busy_attempts += 1
44
45
  raise "Database busy #{path}: #{e.message}" if busy_attempts >= 3
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.7, 26, 2].freeze
13
+ VERSION = [0.7, 26, 3].freeze
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2021, 3, 1)
21
+ VERSION_DATE = Date.new(2021, 3, 11)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -47,7 +47,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
47
47
  end
48
48
 
49
49
  def test_net_ftp
50
- cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/ga/GAPJ01.fasta.gz'
50
+ cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/gap/GAPJ01.fasta.gz'
51
51
  n = 'Cjac_L14'
52
52
  rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
53
53
  assert_equal([cjac], rd.ids)
@@ -126,7 +126,6 @@ module MiGA::DistanceRunner::Database
126
126
  def batch_data_to_db(metric, data)
127
127
  db = tmp_dbs[metric]
128
128
  table = metric == :haai ? :aai : metric
129
- `cp #{db} ~/here.db`
130
129
  SQLite3::Database.new(db) do |conn|
131
130
  data.each do |k, v|
132
131
  sql = <<~SQL
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.26.2
4
+ version: 0.7.26.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-01 00:00:00.000000000 Z
11
+ date: 2021-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -187,6 +187,7 @@ files:
187
187
  - lib/miga/json.rb
188
188
  - lib/miga/lair.rb
189
189
  - lib/miga/metadata.rb
190
+ - lib/miga/parallel.rb
190
191
  - lib/miga/project.rb
191
192
  - lib/miga/project/base.rb
192
193
  - lib/miga/project/dataset.rb