miga-base 0.7.25.2 → 0.7.26.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 45e7273aec3b6dae9f942561ee39149fa2c50930d1f3e515666e3c2a4434e7c1
4
- data.tar.gz: a2eaa3b270cd2f171b47a799841957ef2b53b3055138ca6e175615a32e969cd1
3
+ metadata.gz: 0f2c9f2013b77f3a3b1dff0fbb73e74dad0b836d17d4a2129110a6095ffc7d8a
4
+ data.tar.gz: 0d7a1c879970dc9ffae5bae8cbbb51f6ef7ef13e41373fcc7cd5cc93db841104
5
5
  SHA512:
6
- metadata.gz: ec0538b54ad490ddb57deb7d4cb1b4dcbcdedc7c5cfd6fb5afcf40ac1c31154f9b750c72ee0c8aab08991d77d4e275bbeccc8f49c8c65043ae304f99d0211fe6
7
- data.tar.gz: d992f9dc1c52925b88d9132838d9ec27906815a656362346b294fdef2edec669a9f0a161c635cbdf2a1c6cf8ac2f8918179db0b1a0d431e445180eb2033c1650
6
+ metadata.gz: 4da1c02b538370a585efc788a71cbb9fafb5b8383c2882f0ab01d1bdec22a9dac2878bb1fa3a028f78ff19d22ecf70e50c67e20e71dc8a0308437ca19a6368db
7
+ data.tar.gz: 4eb38dafd4eb0dbaf69eed6fa734fec5c0d104d97d1f035cafdcd77365f93ecc723ac01eda643986f0676c4b4156a2504f92c1d98a02d0bd3955306e1a4924f5
@@ -50,7 +50,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
50
50
  # Input data
51
51
  ref_db = reference_db
52
52
  norun = %w[
53
- project_stats haai_distances aai_distances ani_distances clade_finding
53
+ haai_distances aai_distances ani_distances clade_finding
54
54
  ]
55
55
  p_metadata = Hash[norun.map { |i| ["run_#{i}", false] }]
56
56
  p = create_project(
@@ -59,7 +59,7 @@ class MiGA::Cli::Action::ClassifyWf < MiGA::Cli::Action
59
59
  run_ssu: false, run_mytaxa_scan: false, run_distances: false
60
60
  )
61
61
  p.set_option(:ref_project, ref_db.path)
62
- p.set_option(:tax_pvalue, cli[:pvalue], true)
62
+ p.set_option(:tax_pvalue, cli[:pvalue])
63
63
  # Run
64
64
  run_daemon
65
65
  summarize(%w[cds assembly essential_genes]) if cli[:summaries]
@@ -52,7 +52,7 @@ class MiGA::Cli::Action::DerepWf < MiGA::Cli::Action
52
52
  # Input data
53
53
  p = create_project(
54
54
  :assembly,
55
- { run_project_stats: false, run_clades: false },
55
+ { run_clades: false },
56
56
  { run_mytaxa_scan: false, run_ssu: false }
57
57
  )
58
58
  p.set_option(:gsp_metric, cli[:metric].to_s)
@@ -93,7 +93,7 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
93
93
  k += 1
94
94
  cli.advance('Datasets:', k, n, false) if i == 0
95
95
  next unless k % cli[:threads] == i
96
- each_database_file(d) do |db_file, metric, result|
96
+ each_database_file(d) do |db_file, metric, result, _rank|
97
97
  check_sqlite3_database(db_file, metric) do
98
98
  cli.say(
99
99
  " > Removing malformed database from #{d.name}:#{result} "
@@ -119,24 +119,36 @@ class MiGA::Cli::Action::Doctor < MiGA::Cli::Action
119
119
  ref_ds = cli.load_project.each_dataset.select(&:ref?)
120
120
  ref_names = ref_ds.map(&:name)
121
121
  n = ref_ds.size
122
- (0 .. cli[:threads] - 1).map do |i|
123
- Process.fork do
124
- k = 0
125
- ref_ds.each do |d|
126
- k += 1
127
- cli.advance('Datasets:', k, n, false) if i == 0
128
- next unless k % cli[:threads] == i
129
-
130
- saved = saved_targets(d)
131
- next if saved.nil?
132
122
 
133
- (ref_names - saved).each do |k|
134
- save_bidirectional(cli.load_project.dataset(k), d)
135
- end
123
+ # Read data first (threaded)
124
+ @distances = { aai: {}, ani: {} }
125
+ Dir.mktmpdir do |tmp|
126
+ MiGA::Parallel.process(cli[:threads]) do |thr|
127
+ idx = 0
128
+ ref_ds.each do |ds|
129
+ cli.advance('Reading:', idx + 1, n, false) if thr == 0
130
+ read_bidirectional(ds) if idx % cli[:threads] == thr
131
+ idx += 1
132
+ end
133
+ File.open("#{tmp}/#{thr}.json", 'w') do |fh|
134
+ fh.print JSON.fast_generate(@distances)
136
135
  end
137
136
  end
137
+ cli.say
138
+
139
+ cli[:threads].times do |i|
140
+ cli.advance('Merging:', i + 1, cli[:threads], false)
141
+ o = MiGA::Json.parse("#{tmp}/#{i}.json", symbolize: false)
142
+ o.each { |k, v| @distances[k.to_sym].merge!(v) }
143
+ end
144
+ cli.say
145
+ end
146
+
147
+ # Write missing values (threaded)
148
+ MiGA::Parallel.distribute(ref_ds, cli[:threads]) do |ds, idx, thr|
149
+ cli.advance('Datasets:', idx + 1, n, false) if thr == 0
150
+ save_bidirectional(ds)
138
151
  end
139
- Process.waitall
140
152
  cli.say
141
153
  end
142
154
 
@@ -16,22 +16,28 @@ module MiGA::Cli::Action::Doctor::Base
16
16
  end
17
17
 
18
18
  def each_database_file(dataset, &blk)
19
- ref_db = { '01.haai' => :aai, '02.aai' => :aai, '03.ani' => :ani }
20
- qry_db = { '.haai.db' => :aai, '.aai.db' => :aai, '.ani.db' => :ani }
19
+ ref_db = {
20
+ haai: ['01.haai', :aai], aai: ['02.aai', :aai], ani: ['03.ani', :ani]
21
+ }
22
+ qry_db = {
23
+ haai: ['.haai.db', :aai], aai: ['.aai.db', :aai], ani: ['.ani.db', :ani]
24
+ }
21
25
  base = File.join(dataset.project.path, 'data', '09.distances')
22
26
  result = :distances
23
27
  if dataset.ref?
24
28
  file_db = "#{dataset.name}.db"
25
- ref_db.each do |dir, metric|
29
+ ref_db.each do |rank, v|
30
+ dir, metric = *v
26
31
  file = File.join(base, dir, file_db)
27
- blk[file, metric, result] if File.exist? file
32
+ blk[file, metric, result, rank] if File.exist? file
28
33
  end
29
34
  base = File.join(base, '05.taxonomy')
30
35
  result = :taxonomy
31
36
  end
32
- qry_db.each do |ext, metric|
37
+ qry_db.each do |rank, v|
38
+ ext, metric = *v
33
39
  file = File.join(base, "#{dataset.name}#{ext}")
34
- blk[file, metric, result] if File.exist? file
40
+ blk[file, metric, result, rank] if File.exist? file
35
41
  end
36
42
  end
37
43
 
@@ -109,25 +115,41 @@ module MiGA::Cli::Action::Doctor::Base
109
115
  end
110
116
 
111
117
  ##
112
- # Saves all the distance estimates in +a+ -> +b+ into the +b+ databases
113
- # (as +b+ -> +a+), where both +a+ and +b+ are MiGA::Dataset objects
114
- def save_bidirectional(a, b)
115
- each_database_file(a) do |db_file, metric, result|
116
- data = nil
117
- data = MiGA::SQLite.new(db_file).run(
118
- "select seq1, seq2, #{metric}, sd, n, omega " +
119
- "from #{metric} where seq2 = ? limit 1", b.name
120
- ).first
118
+ # Reads all the distance estimates in +a+ -> *, and saves them in memory
119
+ # in the +@distances+ variable.
120
+ def read_bidirectional(a)
121
+ each_database_file(a) do |db_file, metric, result, rank|
122
+ next if rank == :haai # No need for hAAI to be bidirectional
123
+
124
+ sql = "select seq2, #{metric}, sd, n, omega from #{metric}"
125
+ data = MiGA::SQLite.new(db_file).run(sql)
121
126
  next if data.nil? || data.empty?
122
127
 
123
- db_file_b = File.join(File.dirname(db_file), "#{b.name}.db")
124
- next unless File.exist?(db_file_b)
128
+ @distances[rank][a.name] ||= {}
129
+ data.each { |row| @distances[rank][a.name][row.shift] = row }
130
+ end
131
+ end
125
132
 
126
- data[0], data[1] = data[1], data[0]
127
- MiGA::SQLite.new(db_file_b).run(
128
- "insert into #{metric} (seq1, seq2, #{metric}, sd, n, omega) " +
129
- "values(?, ?, ?, ?, ?, ?)", data
130
- )
133
+ ##
134
+ # Saves all the distance estimates in * -> +a+ into the +a+ databases
135
+ # (as +a+ -> *), where +a+ is a MiGA::Dataset object
136
+ def save_bidirectional(a)
137
+ each_database_file(a) do |db_file, metric, result, rank|
138
+ next if rank == :haai # No need for hAAI to be bidirectional
139
+
140
+ b2a = @distances[rank].map { |b_name, v| b_name if v[a.name] }.compact
141
+ a2b = @distances[rank][a.name].keys
142
+ SQLite3::Database.new(db_file) do |db|
143
+ sql = <<~SQL
144
+ insert into #{metric}(seq1, seq2, #{metric}, sd, n, omega) \
145
+ values(?, ?, ?, ?, ?, ?);
146
+ SQL
147
+ db.execute('BEGIN TRANSACTION;')
148
+ (b2a - a2b).each do |b_name|
149
+ db.execute(sql, [a.name, b_name] + @distances[rank][b_name][a.name])
150
+ end
151
+ db.execute('COMMIT;')
152
+ end
131
153
  end
132
154
  end
133
155
  end
@@ -14,6 +14,8 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
14
14
  cli.defaults = {
15
15
  mytaxa: nil,
16
16
  rdp: nil,
17
+ reads: nil,
18
+ optional: nil,
17
19
  config: File.join(ENV['MIGA_HOME'], '.miga_modules'),
18
20
  ask: false,
19
21
  auto: false,
@@ -35,6 +37,16 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
35
37
  'Should I try setting up the RDP classifier?',
36
38
  'By default: interactive (true if --auto)'
37
39
  ) { |v| cli[:rdp] = v }
40
+ opt.on(
41
+ '--[no-]read-processing',
42
+ 'Should I try setting up read processing software?',
43
+ 'By default: interactive (true if --auto)'
44
+ ) { |v| cli[:reads] = v }
45
+ opt.on(
46
+ '--[no-]optional',
47
+ 'Should I try setting up the optional software?',
48
+ 'Automatically sets answers for mytaxa, rdp, and reads'
49
+ ) { |v| cli[:optional] = v }
38
50
  opt.on(
39
51
  '--daemon-type STRING',
40
52
  'Type of daemon launcher, one of: bash, ssh, qsub, msub, slurm',
@@ -104,6 +116,8 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
104
116
  rc_fh.puts "export MIGA_MYTAXA='#{cli[:mytaxa] ? 'yes' : 'no'}'"
105
117
  ask_for_optional(:rdp, 'RDP classifier')
106
118
  rc_fh.puts "export MIGA_RDP='#{cli[:rdp] ? 'yes' : 'no'}'"
119
+ ask_for_optional(:reads, 'read processing')
120
+ rc_fh.puts "export MIGA_READS='#{cli[:reads] ? 'yes' : 'no'}'"
107
121
  paths = {}
108
122
  rc_fh.puts 'MIGA_PATH=""'
109
123
  req_path = File.expand_path('utils/requirements.txt', MiGA.root_path)
@@ -123,20 +137,20 @@ class MiGA::Cli::Action::Init < MiGA::Cli::Action
123
137
  def define_software(ln)
124
138
  r = ln.chomp.split(/\t+/)
125
139
  return if %w[Software --------].include?(r[0])
126
- return if r[0] =~ /\(mytaxa\)$/ && !cli[:mytaxa]
127
- return if r[0] =~ /\(rdp\)$/ && !cli[:rdp]
140
+ %i[mytaxa rdp reads].each { |i| return if r[0] =~ /\(#{i}\)$/ && !cli[i] }
128
141
 
129
142
  r
130
143
  end
131
144
 
132
145
  def ask_for_optional(symbol, name)
133
- if cli[symbol].nil?
134
- cli[symbol] =
135
- cli.ask_user(
136
- "Should I include #{name} modules?",
137
- 'yes', %w(yes no)
138
- ) == 'yes'
139
- end
146
+ cli[symbol] = cli[:optional] if !cli[:optional].nil? && cli[symbol].nil?
147
+ return cli[symbol] unless cli[symbol].nil?
148
+
149
+ cli[symbol] =
150
+ cli.ask_user(
151
+ "Should I include #{name} modules?",
152
+ 'yes', %w(yes no)
153
+ ) == 'yes'
140
154
  end
141
155
 
142
156
  def find_software(exec)
data/lib/miga/common.rb CHANGED
@@ -3,6 +3,7 @@
3
3
 
4
4
  require 'miga/version'
5
5
  require 'miga/json'
6
+ require 'miga/parallel'
6
7
  require 'miga/common/base'
7
8
  require 'miga/common/path'
8
9
  require 'miga/common/format'
data/lib/miga/json.rb CHANGED
@@ -45,8 +45,10 @@ class MiGA::Json < MiGA::MiGA
45
45
  raise "Empty descriptor: #{opts[:contents] ? "''" : path}" if cont.empty?
46
46
 
47
47
  # Parse JSON
48
- params = { symbolize_names: opts[:symbolize],
49
- create_additions: opts[:additions] }
48
+ params = {
49
+ symbolize_names: opts[:symbolize],
50
+ create_additions: opts[:additions]
51
+ }
50
52
  y = JSON.parse(cont, params)
51
53
 
52
54
  # Add defaults
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ ##
4
+ # Parallel execution in MiGA.
5
+ class MiGA::Parallel < MiGA::MiGA
6
+ class << self
7
+ ##
8
+ # Executes the passed block with the thread number as argument (0-numbered)
9
+ # in +threads+ processes
10
+ def process(threads)
11
+ threads.times do |i|
12
+ Process.fork { yield(i) }
13
+ end
14
+ Process.waitall
15
+ end
16
+
17
+ ##
18
+ # Distributes +enum+ across +threads+ and calls the passed block with args:
19
+ # 1. Unitary object from +enum+
20
+ # 2. Index of the unitary object
21
+ # 3. Index of the acting thread
22
+ def distribute(enum, threads)
23
+ process(threads) do |thr|
24
+ enum.each_with_index do |obj, idx|
25
+ yield(obj, idx, thr) if idx % threads == thr
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+
data/lib/miga/sqlite.rb CHANGED
@@ -37,8 +37,9 @@ class MiGA::SQLite < MiGA::MiGA
37
37
  # Executes +cmd+ and returns the result
38
38
  def run(*cmd)
39
39
  busy_attempts ||= 0
40
- conn = SQLite3::Database.new(path)
41
- conn.execute(*cmd)
40
+ y = nil
41
+ SQLite3::Database.new(path) { |conn| y = conn.execute(*cmd) }
42
+ y
42
43
  rescue SQLite3::BusyException => e
43
44
  busy_attempts += 1
44
45
  raise "Database busy #{path}: #{e.message}" if busy_attempts >= 3
data/lib/miga/version.rb CHANGED
@@ -10,7 +10,7 @@ module MiGA
10
10
  # - Float representing the major.minor version.
11
11
  # - Integer representing gem releases of the current version.
12
12
  # - Integer representing minor changes that require new version number.
13
- VERSION = [0.7, 25, 2].freeze
13
+ VERSION = [0.7, 26, 3].freeze
14
14
 
15
15
  ##
16
16
  # Nickname for the current major.minor version.
@@ -18,7 +18,7 @@ module MiGA
18
18
 
19
19
  ##
20
20
  # Date of the current gem release.
21
- VERSION_DATE = Date.new(2021, 2, 25)
21
+ VERSION_DATE = Date.new(2021, 3, 11)
22
22
 
23
23
  ##
24
24
  # Reference of MiGA.
@@ -47,7 +47,7 @@ class RemoteDatasetTest < Test::Unit::TestCase
47
47
  end
48
48
 
49
49
  def test_net_ftp
50
- cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/ga/GAPJ01.fasta.gz'
50
+ cjac = 'ftp://ftp.ebi.ac.uk/pub/databases/ena/tsa/public/gap/GAPJ01.fasta.gz'
51
51
  n = 'Cjac_L14'
52
52
  rd = MiGA::RemoteDataset.new(cjac, :assembly_gz, :web)
53
53
  assert_equal([cjac], rd.ids)
@@ -126,7 +126,6 @@ module MiGA::DistanceRunner::Database
126
126
  def batch_data_to_db(metric, data)
127
127
  db = tmp_dbs[metric]
128
128
  table = metric == :haai ? :aai : metric
129
- `cp #{db} ~/here.db`
130
129
  SQLite3::Database.new(db) do |conn|
131
130
  data.each do |k, v|
132
131
  sql = <<~SQL
@@ -102,7 +102,8 @@ class MiGA::DistanceRunner
102
102
  cl_path = res.file_path :clades_ani95
103
103
  if !cl_path.nil? && File.size?(cl_path) && tsk[0] == :clade_finding
104
104
  clades = File.foreach(cl_path).map { |i| i.chomp.split(',') }
105
- sbj_datasets = clades.find { |i| i.include?(closest[:ds]) }
105
+ sbj_dataset_names = clades.find { |i| i.include?(closest[:ds]) }
106
+ sbj_datasets = sbj_dataset_names&.map { |i| ref_project.dataset(i) }
106
107
  ani_after_aai(sbj_datasets, 80.0) if sbj_datasets
107
108
  end
108
109
 
@@ -10,12 +10,12 @@ FastANI fastANI https://github.com/ParBLiSS/FastANI Required version: 1.1+
10
10
  HMMer 3.0+ hmmsearch http://hmmer.janelia.org/software
11
11
  Bedtools bedtools http://bedtools.readthedocs.org/en/latest/
12
12
  Prodigal prodigal http://prodigal.ornl.gov
13
- IDBA idba_ud http://i.cs.hku.hk/~alse/hkubrg/projects/idba
14
13
  MCL mcl http://micans.org/mcl/
15
14
  Barrnap barrnap http://www.vicbioinformatics.com/software.barrnap.shtml
16
- Scythe scythe https://github.com/vsbuffalo/scythe Required version: 0.991+
17
- FastQC fastqc http://www.bioinformatics.babraham.ac.uk/projects/fastqc
18
- SolexaQA++ SolexaQA++ http://solexaqa.sourceforge.net Required version: v3.1.3+
15
+ IDBA (reads) idba_ud http://i.cs.hku.hk/~alse/hkubrg/projects/idba
16
+ Scythe (reads) scythe https://github.com/vsbuffalo/scythe Required version: 0.991+
17
+ FastQC (reads) fastqc http://www.bioinformatics.babraham.ac.uk/projects/fastqc
18
+ SolexaQA++ (reads) SolexaQA++ http://solexaqa.sourceforge.net Required version: v3.1.3+
19
19
  OpenJDK (rdp) java https://adoptopenjdk.net/ Any Java VM would work
20
20
  MyTaxa (mytaxa) MyTaxa http://enve-omics.ce.gatech.edu/mytaxa
21
21
  Krona (mytaxa) ktImportText https://github.com/marbl/Krona/wiki
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.25.2
4
+ version: 0.7.26.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-25 00:00:00.000000000 Z
11
+ date: 2021-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons
@@ -187,6 +187,7 @@ files:
187
187
  - lib/miga/json.rb
188
188
  - lib/miga/lair.rb
189
189
  - lib/miga/metadata.rb
190
+ - lib/miga/parallel.rb
190
191
  - lib/miga/project.rb
191
192
  - lib/miga/project/base.rb
192
193
  - lib/miga/project/dataset.rb
@@ -554,7 +555,7 @@ homepage: http://enve-omics.ce.gatech.edu/miga
554
555
  licenses:
555
556
  - Artistic-2.0
556
557
  metadata: {}
557
- post_install_message:
558
+ post_install_message:
558
559
  rdoc_options:
559
560
  - lib
560
561
  - README.md
@@ -576,7 +577,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
576
577
  version: '0'
577
578
  requirements: []
578
579
  rubygems_version: 3.1.4
579
- signing_key:
580
+ signing_key:
580
581
  specification_version: 4
581
582
  summary: MiGA
582
583
  test_files: []