miga-base 0.7.26.2 → 0.7.26.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8a4edaaac424eb59fe5956b98c74b6926bfd57b90fd80e0dff9556c5ec75aae5
4
- data.tar.gz: 0d2bba4f71074ef9efbeb03482a2d860b3c56ecd6734e7e8aeb82a928c2eec7d
3
+ metadata.gz: 0f2c9f2013b77f3a3b1dff0fbb73e74dad0b836d17d4a2129110a6095ffc7d8a
4
+ data.tar.gz: 0d7a1c879970dc9ffae5bae8cbbb51f6ef7ef13e41373fcc7cd5cc93db841104
5
5
  SHA512:
6
- metadata.gz: '0052068c9d5a055d2c13694f1bacf755a37aa294d6db76ea8ed10a1464ddd58f6a536513f8d3f8817cf2491b7f2b01825fcf891bea75e72d699b0d2b9b82804e'
7
- data.tar.gz: bbef9fa36b8b8900a3c9de38b3e69daf23fddf2fea59d0899d2781edecd96d32c6f612945ed8f32c373ea669ee7ee02c9c0d9db5db8b13cd92c95c94601494ca
6
+ metadata.gz: 4da1c02b538370a585efc788a71cbb9fafb5b8383c2882f0ab01d1bdec22a9dac2878bb1fa3a028f78ff19d22ecf70e50c67e20e71dc8a0308437ca19a6368db
7
+ data.tar.gz: 4eb38dafd4eb0dbaf69eed6fa734fec5c0d104d97d1f035cafdcd77365f93ecc723ac01eda643986f0676c4b4156a2504f92c1d98a02d0bd3955306e1a4924f5
@@ -50,7 +50,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
50
50
  # Input data
51
51
  ref_db = reference_db
52
52
  norun = %w[
53
- project_stats haai_distances aai_distances ani_distances clade_finding
53
+ haai_distances aai_distances ani_distances clade_finding
54
54
  ]
55
55
  p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
56
56
  p = create_project(
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
59
59
  run_ssu: false, run_mytaxa_scan: false, run_distances: false
60
60
  )
61
61
  p.set_option(:ref_project, ref_db.path)
62
- p.set_option(:tax_pvalue, cli[:pvalue], true)
62
+ p.set_option(:tax_pvalue, cli[:pvalue])
63
63
  # Run
64
64
  run_daemon
65
65
  summarize(%w[cds assembly essential_genes]) if cli[:summaries]
@@ -52,7 +52,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
52
52
  # Input data
53
53
  p = create_project(
54
54
  :assembly,
55
- { run_project_stats: false, run_clades: false },
55
+ { run_clades: false },
56
56
  { run_mytaxa_scan: false, run_ssu: false }
57
57
  )
58
58
  p.set_option(:gsp_metric, cli[:metric].to_s)
@@ -93,7 +93,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
93
93
  k += 1
94
94
  cli.advance('Datasets:', k, n, false) if i == 0
95
95
  next unless k % cli[:threads] == i
96
- each_database_file(d) do |db_file, metric, result|
96
+ each_database_file(d) do |db_file, metric, result, _rank|
97
97
  check_sqlite3_database(db_file, metric) do
98
98
  cli.say(
99
99
  " > Removing malformed database from #{d.name}:#{result} "
@@ -119,24 +119,36 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
119
119
  ref_ds = cli.load_project.each_dataset.select(&:ref?)
120
120
  ref_names = ref_ds.map(&:name)
121
121
  n = ref_ds.size
122
- (0 .. cli[:threads] - 1).map do |i|
123
- Process.fork do
124
- k = 0
125
- ref_ds.each do |d|
126
- k += 1
127
- cli.advance('Datasets:', k, n, false) if i == 0
128
- next unless k % cli[:threads] == i
129
-
130
- saved = saved_targets(d)
131
- next if saved.nil?
132
122
 
133
- (ref_names - saved).each do |k|
134
- save_bidirectional(cli.load_project.dataset(k), d)
135
- end
123
+ # Read data first (threaded)
124
+ @distances = { aai: {}, ani: {} }
125
+ Dir.mktmpdir do |tmp|
126
+ MiGA::Parallel.process(cli[:threads]) do |thr|
127
+ idx = 0
128
+ ref_ds.each do |ds|
129
+ cli.advance('Reading:', idx + 1, n, false) if thr == 0
130
+ read_bidirectional(ds) if idx % cli[:threads] == thr
131
+ idx += 1
132
+ end
133
+ File.open("#{tmp}/#{thr}.json", 'w') do |fh|
134
+ fh.print JSON.fast_generate(@distances)
136
135
  end
137
136
  end
137
+ cli.say
138
+
139
+ cli[:threads].times do |i|
140
+ cli.advance('Merging:', i + 1, cli[:threads], false)
141
+ o = MiGA::Json.parse("#{tmp}/#{i}.json", symbolize: false)
142
+ o.each { |k, v| @distances[k.to_sym].merge!(v) }
143
+ end
144
+ cli.say
145
+ end
146
+
147
+ # Write missing values (threaded)
148
+ MiGA::Parallel.distribute(ref_ds, cli[:threads]) do |ds, idx, thr|
149
+ cli.advance('Datasets:', idx + 1, n, false) if thr == 0
150
+ save_bidirectional(ds)
138
151
  end
139
- Process.waitall
140
152
  cli.say
141
153
  end
142
154
 
@@ -16,22 +16,28 @@ module MiGA::Cli::Action::Doctor::Base
16
16
  end
17
17
 
18
18
  def each_database_file(dataset, &blk)
19
- ref_db = { '01.haai' => :aai, '02.aai' => :aai, '03.ani' => :ani }
20
- qry_db = { '.haai.db' => :aai, '.aai.db' => :aai, '.ani.db' => :ani }
19
+ ref_db = {
20
+ haai: ['01.haai', :aai], aai: ['02.aai', :aai], ani: ['03.ani', :ani]
21
+ }
22
+ qry_db = {
23
+ haai: ['.haai.db', :aai], aai: ['.aai.db', :aai], ani: ['.ani.db', :ani]
24
+ }
21
25
  base = File.join(dataset.project.path, 'data', '09.distances')
22
26
  result = :distances
23
27
  if dataset.ref?
24
28
  file_db = "#{dataset.name}.db"
25
- ref_db.each do |dir, metric|
29
+ ref_db.each do |rank, v|
30
+ dir, metric = *v
26
31
  file = File.join(base, dir, file_db)
27
- blk[file, metric, result] if File.exist? file
32
+ blk[file, metric, result, rank] if File.exist? file
28
33
  end
29
34
  base = File.join(base, '05.taxonomy')
30
35
  result = :taxonomy
31
36
  end
32
- qry_db.each do |ext, metric|
37
+ qry_db.each do |rank, v|
38
+ ext, metric = *v
33
39
  file = File.join(base, "#{dataset.name}#{ext}")
34
- blk[file, metric, result] if File.exist? file
40
+ blk[file, metric, result, rank] if File.exist? file
35
41
  end
36
42
  end
37
43
 
@@ -109,25 +115,41 @@ module MiGA::Cli::Action::Doctor::Base
109
115
  end
110
116
 
111
117
  ##
112
- # Saves all the distance estimates in +a+ -> +b+ into the +b+ databases
113
- # (as +b+ -> +a+), where both +a+ and +b+ are MiGA::Dataset objects
114
- def save_bidirectional(a, b)
115
- each_database_file(a) do |db_file, metric, result|
116
- data = nil
117
- data = MiGA::SQLite.new(db_file).run(
118
- "select seq1, seq2, #{metric}, sd, n, omega " +
119
- "from #{metric} where seq2 = ? limit 1", b.name
120
- ).first
118
+ # Reads all the distance estimates in +a+ -> *, and saves them in memory
119
+ # in the +@distances+ variable.
120
+ def read_bidirectional(a)
121
+ each_database_file(a) do |db_file, metric, result, rank|
122
+ next if rank == :haai # No need for hAAI to be bidirectional
123
+
124
+ sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
125
+ data = MiGA::SQLite.new(db_file).run(sql)
121
126
  next if data.nil? || data.empty?
122
127
 
123
- db_file_b = File.join(File.dirname(db_file), "#{b.name}.db")
124
- next unless File.exist?(db_file_b)
128
+ @distances[rank][a.name] ||= {}
129
+ data.each { |row| @distances[rank][a.name][row.shift] = row }
130
+ end
131
+ end
125
132
 
126
- data[0], data[1] = data[1], data[0]
127
- MiGA::SQLite.new(db_file_b).run(
128
- "insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
129
- "values(?, ?, ?, ?, ?, ?)", data
130
- )
133
+ ##
134
+ # Saves all the distance estimates in * -> +a+ into the +a+ databases
135
+ # (as +a+ -> *), where +a+ is a MiGA::Dataset object
136
+ def save_bidirectional(a)
137
+ each_database_file(a) do |db_file, metric, result, rank|
138
+ next if rank == :haai # No need for hAAI to be bidirectional
139
+
140
+ b2a = @distances[rank].map { |b_name, v| b_name if v[a.name] }.compact
141
+ a2b = @distances[rank][a.name].keys
142
+ SQLite3::Database.new(db_file) do |db|
143
+ sql = <<~SQL
144
+ insert into #{metric}(seq1, seq2, #{metric}, sd, n, omega) \
145
+ values(?, ?, ?, ?, ?, ?);
146
+ SQL
147
+ db.execute('BEGIN TRANSACTION;')
148
+ (b2a - a2b).each do |b_name|
149
+ db.execute(sql, [a.name, b_name] + @distances[rank][b_name][a.name])
150
+ end
151
+ db.execute('COMMIT;')
152
+ end
131
153
  end
132
154
  end
133
155
  end
data/lib/miga/common.rb CHANGED
@@ -3,6 +3,7 @@
3
3
 
4
4
  require 'miga/version'
5
5
  require 'miga/json'
6
+ require 'miga/parallel'
6
7
  require 'miga/common/base'
7
8
  require 'miga/common/path'
8
9
  require 'miga/common/format'
data/lib/miga/json.rb CHANGED
@@ -45,8 +45,10 @@ class MiGA::Json < MiGA::MiGA
45
45
  raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
46
46
 
47
47
  # Parse JSON
48
- params = { symbolize_names: opts[:symbolize],
49
- create_additions: opts[:additions] }
48
+ params = {
49
+ symbolize_names: opts[:symbolize],
50
+ create_additions: opts[:additions]
51
+ }
50
52
  y = JSON.parse(cont, params)
51
53
 
52
54
  # Add defaults
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Parallel execution in MiGA.
5
+ class MiGA::Parallel < MiGA::MiGA
6
+ class << self
7
+ ##
8
+ # Executes the passed block with the thread number as argument (0-numbered)
9
+ # in +threads+ processes
10
+ def process(threads)
11
+ threads.times do |i|
12
+ Process.fork { yield(i) }
13
+ end
14
+ Process.waitall
15
+ end
16
+
17
+ ##
18
+ # Distributes +enum+ across +threads+ and calls the passed block with args:
19
+ # 1. Unitary object from +enum+
20
+ # 2. Index of the unitary object
21
+ # 3. Index of the acting thread
22
+ def distribute(enum, threads)
23
+ process(threads) do |thr|
24
+ enum.each_with_index do |obj, idx|
25
+ yield(obj, idx, thr) if idx % threads == thr
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+
data/lib/miga/sqlite.rb CHANGED
@@ -37,8 +37,9 @@ class MiGA::SQLite < MiGA::MiGA
37
37
  # Executes +cmd+ and returns the result
38
38
  def run(*cmd)
39
39
  busy_attempts ||= 0
40
- conn = SQLite3::Database.new(path)
41
- conn.execute(*cmd)
40
+ y = nil
41
+ SQLite3::Database.new(path) { |conn| y = conn.execute(*cmd) }
42
+ y
42
43
  rescue SQLite3::BusyException => e
43
44
  busy_attempts += 1
44
45
  raise "Database busy #{path}: #{e.message}" if busy_attempts >= 3
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.7, 26, 2].freeze
13
+ VERSION = [0.7, 26, 3].freeze
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2021, 3, 1)
21
+ VERSION_DATE = Date.new(2021, 3, 11)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -47,7 +47,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
47
47
  end
48
48
 
49
49
  def test_net_ftp
50
- cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/ga/GAPJ01.fasta.gz'
50
+ cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/gap/GAPJ01.fasta.gz'
51
51
  n = 'Cjac_L14'
52
52
  rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
53
53
  assert_equal([cjac], rd.ids)
@@ -126,7 +126,6 @@ module MiGA::DistanceRunner::Database
126
126
  def batch_data_to_db(metric, data)
127
127
  db = tmp_dbs[metric]
128
128
  table = metric == :haai ? :aai : metric
129
- `cp #{db} ~/here.db`
130
129
  SQLite3::Database.new(db) do |conn|
131
130
  data.each do |k, v|
132
131
  sql = <<~SQL
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.26.2
4
+ version: 0.7.26.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-01 00:00:00.000000000 Z
11
+ date: 2021-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -187,6 +187,7 @@ files:
187
187
  - lib/miga/json.rb
188
188
  - lib/miga/lair.rb
189
189
  - lib/miga/metadata.rb
190
+ - lib/miga/parallel.rb
190
191
  - lib/miga/project.rb
191
192
  - lib/miga/project/base.rb
192
193
  - lib/miga/project/dataset.rb