miga-base 1.1.3.0 → 1.1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 73de682930481bd837b829588081e2c9e70a87054e9e1d91b7d40bf319030349
4
- data.tar.gz: b0ed9f7f1acf8fb2530fde84803938e8f3d7fac3400a629c3db88779dd9a679f
3
+ metadata.gz: 648be93307cb7996121ae4bac560078d6e9647f104efc4f0ab4d3b43751cd97d
4
+ data.tar.gz: a84201cfcce7903702a62f6c8e73c2b0d02c364f680f93ef5ac27470c7ad3e1a
5
5
  SHA512:
6
- metadata.gz: 4477253800d6a04f3b8e612ed8f5af8ccdb47e5f6aff6efdf2539d252daa7cbc70d009f28ec75fcada925420590614589effce76e465a93d9cb2a7ce093d79ff
7
- data.tar.gz: bbb998c715274dc6b000fa3fc4b9f1f550867b78707b4b5eaa43c24b692939769c2958c3beef0bc10f6dbfd9f38a63b143b2617e44208550b19aa837c45711aa
6
+ metadata.gz: 93c58af91739aea6202085c0f4690cbd344fe873fb212e01780ac5fcdd240cdbd6a0b52047c4ea2582f56a0205976e3a46994646c2dd9b28d230fa655380e4e6
7
+ data.tar.gz: 0f5d9082d800fb54aff6b47413da9135b170d683b400a5641612936a541f238dd85e7ab768877ee67247be7c5ef5916f5b794fad9fb372a10184ca083ce98eb6
@@ -59,13 +59,14 @@ module MiGA::Cli::Action::Doctor::Base
59
59
  next if (lineno += 1) == 1
60
60
 
61
61
  r = ln.split("\t")
62
- next unless [1, 2].map { |i| p.dataset(r[i]).nil? }.any?
62
+ names = [r[0], r[1]]
63
+ next unless names.any? { |i| p.dataset(i).nil? }
63
64
 
64
- [1, 2].each do |i|
65
- if p.dataset(r[i]).nil? || !p.dataset(r[i]).active?
66
- notok[r[i]] = true
65
+ names.each do |i|
66
+ if p.dataset(i).nil? || !p.dataset(i).active?
67
+ notok[i] = true
67
68
  else
68
- fix[r[i]] = true
69
+ fix[i] = true
69
70
  end
70
71
  end
71
72
  end
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.1, 3, 0].freeze
15
+ VERSION = [1.1, 3, 4].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2021, 11, 21)
23
+ VERSION_DATE = Date.new(2021, 11, 30)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -2702,6 +2702,8 @@ def merge_db_opts():
2702
2702
 
2703
2703
  parser.add_argument('-d', '--donors', dest = 'donors', default = None, help = 'Comma-separated string of paths to one or more donor databases. The genomes FROM the donors will be added TO the recipient and the donors will be unaltered')
2704
2704
 
2705
+ parser.add_argument('--donor_file', dest = 'donor_file', default = None, help = 'Alternative way to supply donors. A file containing paths to the donor databases, 1 per line')
2706
+
2705
2707
  parser.add_argument('-r', '--recipient', dest = 'recipient', default = None, help = 'Path to the recipient database. Any genomes FROM the donor database not already in the recipient will be added to this database.')
2706
2708
 
2707
2709
  parser.add_argument('--verbose', dest = 'verbose', action='store_true', help = 'Print minor updates to console. Major updates are printed regardless.')
@@ -2720,16 +2722,23 @@ def merge_db_thread_starter(rev_index, per_db_accs):
2720
2722
 
2721
2723
 
2722
2724
 
2723
- def merge_db(recipient, donors, verbose, threads):
2725
+ def merge_db(recipient, donors, donor_file, verbose, threads):
2724
2726
  #Prettier on the CLI
2725
-
2727
+
2728
+ if donor_file is not None:
2729
+ fh = agnostic_reader(donor_file)
2730
+ donors = [line.strip() for line in fh]
2731
+ fh.close()
2732
+
2726
2733
  if donors is None or recipient is None:
2727
2734
  print("Either donor or target not given. FastAAI is exiting.")
2728
2735
  return None
2729
2736
 
2730
2737
  print("")
2731
2738
 
2732
- donors = donors.split(",")
2739
+ if donor_file is None:
2740
+ donors = donors.split(",")
2741
+
2733
2742
  valid_donors = []
2734
2743
  for d in donors:
2735
2744
  if os.path.exists(d):
@@ -2945,10 +2954,12 @@ def merge_db(recipient, donors, verbose, threads):
2945
2954
  pass
2946
2955
  except:
2947
2956
  #Error
2948
- shutil.rmtree(temp_dir)
2957
+ if os.path.exists(temp_dir):
2958
+ shutil.rmtree(temp_dir)
2949
2959
  finally:
2950
2960
  #Success
2951
- shutil.rmtree(temp_dir)
2961
+ if os.path.exists(temp_dir):
2962
+ shutil.rmtree(temp_dir)
2952
2963
 
2953
2964
  print("\nDatabases merged!")
2954
2965
 
@@ -3454,10 +3465,11 @@ def main():
3454
3465
 
3455
3466
  recipient = opts.recipient
3456
3467
  donors = opts.donors
3468
+ donor_file = opts.donor_file
3457
3469
  verbose = opts.verbose
3458
3470
  threads = opts.threads
3459
3471
 
3460
- merge_db(recipient, donors, verbose, threads)
3472
+ merge_db(recipient, donors, donor_file, verbose, threads)
3461
3473
 
3462
3474
  #################### Query files vs DB ########################
3463
3475
 
@@ -151,22 +151,36 @@ module MiGA::DistanceRunner::Commands
151
151
  donors << tgt_idx if tgt_idx
152
152
  end
153
153
  return nil if donors.empty?
154
- run_cmd <<~CMD
155
- FastAAI merge_db --donors "#{donors.join(',')}" \
156
- --recipient "#{f1 = tmp_file}" --threads #{opts[:thr]}
157
- CMD
154
+
155
+ # Build target database
156
+ f1 = tmp_file
157
+ if donors.size == 1
158
+ File.copy(donors.first, f1)
159
+ else
160
+ File.open(f0 = tmp_file, 'w') { |fh| donors.each { |i| fh.puts i } }
161
+ run_cmd(
162
+ <<~CMD
163
+ FastAAI merge_db --threads #{opts[:thr]} \
164
+ --donor_file "#{f0}" --recipient "#{f1}"
165
+ CMD
166
+ )
167
+ raise "Cannot merge databases into: #{f1}" unless File.size?(f1)
168
+ end
158
169
 
159
170
  # Run FastAAI
160
- run_cmd <<~CMD
161
- FastAAI db_query --query "#{qry_idx}" --target "#{f1}" \
162
- --output "#{f2 = tmp_file}" --threads #{opts[:thr]} \
163
- --do_stdev
164
- CMD
171
+ run_cmd(
172
+ <<~CMD
173
+ FastAAI db_query --query "#{qry_idx}" --target "#{f1}" \
174
+ --output "#{f2 = tmp_file}" --threads #{opts[:thr]} \
175
+ --do_stdev
176
+ CMD
177
+ )
178
+ raise "Cannot find FastAAI output directory: #{f2}" unless Dir.exist?(f2)
165
179
 
166
180
  # Save values in the databases
167
181
  haai_data = {}
168
182
  aai_data = {}
169
- # Ugly workaround to the insistence of FastAAI to not provide the files
183
+ # Ugly workaround to the insistence of FastAAI not to provide the files
170
184
  # I ask for ;-)
171
185
  qry_results = File.basename(qry_idx, '.faix') + '_results.txt'
172
186
  out_file = File.join(f2, 'results', qry_results)
@@ -214,6 +228,6 @@ module MiGA::DistanceRunner::Commands
214
228
 
215
229
  def run_cmd(cmd)
216
230
  puts "CMD: #{cmd}"
217
- `#{cmd}`
231
+ puts `#{cmd} 2>&1`
218
232
  end
219
233
  end
@@ -127,6 +127,7 @@ module MiGA::DistanceRunner::Database
127
127
  db = tmp_dbs[metric]
128
128
  table = metric == :haai ? :aai : metric
129
129
  SQLite3::Database.new(db) do |conn|
130
+ conn.execute('BEGIN TRANSACTION')
130
131
  data.each do |k, v|
131
132
  sql = <<~SQL
132
133
  insert into #{table} (
@@ -135,6 +136,7 @@ module MiGA::DistanceRunner::Database
135
136
  SQL
136
137
  conn.execute(sql, [dataset.name, k] + v)
137
138
  end
139
+ conn.execute('COMMIT')
138
140
  end
139
141
  checkpoint(metric)
140
142
  end
@@ -7,9 +7,12 @@ module MiGA::SubcladeRunner::Pipeline
7
7
  aai90: [:aai_distances, opts[:gsp_aai], :aai]
8
8
  }
9
9
  tasks.each do |k, par|
10
+ # Run only the requested metric
11
+ next unless par[2].to_s == opts[:gsp_metric]
12
+
10
13
  # Final output
11
14
  ogs_file = "miga-project.#{k}-clades"
12
- next if File.size? ogs_file
15
+ next if File.size?(ogs_file)
13
16
 
14
17
  # Build ABC files
15
18
  abc_path = tmp_file("#{k}.abc")
@@ -20,7 +23,7 @@ module MiGA::SubcladeRunner::Pipeline
20
23
  next if ln =~ /^a\tb\tvalue\t/
21
24
 
22
25
  r = ln.chomp.split("\t")
23
- ofh.puts "G>#{r[0]}\tG>#{r[1]}\t#{r[2]}" if r[2].to_f >= par[1]
26
+ ofh.puts("G>#{r[0]}\tG>#{r[1]}\t#{r[2]}") if r[2].to_f >= par[1]
24
27
  end
25
28
  end
26
29
  ofh.close
@@ -29,16 +32,14 @@ module MiGA::SubcladeRunner::Pipeline
29
32
  `ogs.mcl.rb -o '#{ogs_file}.tmp' --abc '#{abc_path}' -t '#{opts[:thr]}'`
30
33
  File.open(ogs_file, 'w') do |fh|
31
34
  File.foreach("#{ogs_file}.tmp").with_index do |ln, lno|
32
- fh.puts ln if lno > 0
35
+ fh.puts(ln) if lno > 0
33
36
  end
34
37
  end
35
38
  File.unlink "#{ogs_file}.tmp"
36
39
  else
37
- FileUtils.touch ogs_file
38
- end
39
- if par[2].to_s == opts[:gsp_metric]
40
- FileUtils.cp(ogs_file, "miga-project.gsp-clades")
40
+ FileUtils.touch(ogs_file)
41
41
  end
42
+ FileUtils.cp(ogs_file, 'miga-project.gsp-clades')
42
43
  end
43
44
 
44
45
  # Find genomospecies medoids
@@ -67,7 +68,7 @@ module MiGA::SubcladeRunner::Pipeline
67
68
  metric_res = project.result(step) or raise "Incomplete step #{step}"
68
69
  matrix = metric_res.file_path(:matrix)
69
70
  `Rscript '#{src}' '#{matrix}' miga-project '#{opts[:thr]}' \
70
- miga-project.ani95-medoids '#{opts[:run_clades] ? 'cluster' : 'empty'}'`
71
+ miga-project.gsp-medoids '#{opts[:run_clades] ? 'cluster' : 'empty'}'`
71
72
  if File.exist? 'miga-project.nwk'
72
73
  File.rename('miga-project.nwk', "miga-project.#{metric}.nwk")
73
74
  end
data/utils/subclades.R CHANGED
@@ -26,14 +26,18 @@ subclades <- function(ani_file, out_base, thr = 1, ani.d = dist(0), sel = NA) {
26
26
  # Normalize input matrix
27
27
  dist_rds <- paste(out_base, "dist.rds", sep = ".")
28
28
  if (!missing(ani_file)) {
29
- if(length(ani.d) == 0 && !file.exists(dist_rds)){
30
- # Read from ani_file
31
- ani.d <- ani_distance(ani_file, sel)
32
- if (is.null(ani.d)) {
33
- generate_empty_files(out_base)
34
- return(NULL)
29
+ if (length(ani.d) == 0) {
30
+ if (file.exists(dist_rds)) {
31
+ ani.d <- readRDS(dist_rds)
35
32
  } else {
36
- saveRDS(ani.d, dist_rds)
33
+ # Read from ani_file
34
+ ani.d <- ani_distance(ani_file, sel)
35
+ if (is.null(ani.d)) {
36
+ generate_empty_files(out_base)
37
+ return(NULL)
38
+ } else {
39
+ saveRDS(ani.d, dist_rds)
40
+ }
37
41
  }
38
42
  }
39
43
  }
@@ -104,17 +108,6 @@ subclade_clustering <- function (out_base, thr, ani.d, dist_rds) {
104
108
  }
105
109
  if (length(labels(ani.d)) <= 8L) return(list())
106
110
 
107
- # Build tree
108
- say("Tree")
109
- ani.ph <- bionj(ani.d)
110
- say("- Write")
111
- express.ori <- options("expressions")$expressions
112
- if(express.ori < ani.ph$Nnode * 4){
113
- options(expressions=min(c(5e7, ani.ph$Nnode * 4)))
114
- }
115
- write.tree(ani.ph, paste(out_base, ".nwk", sep = ""))
116
- options(expressions=express.ori)
117
-
118
111
  # Silhouette
119
112
  say("Silhouette")
120
113
  nn <- length(labels(ani.d))
@@ -146,6 +139,17 @@ subclade_clustering <- function (out_base, thr, ani.d, dist_rds) {
146
139
  ani.types <- ani.cl$clustering
147
140
  ani.medoids <- ani.cl$medoids
148
141
 
142
+ # Build tree
143
+ say("Tree")
144
+ ani.ph <- bionj(ani.d)
145
+ say("- Write")
146
+ express.ori <- options("expressions")$expressions
147
+ if(express.ori < ani.ph$Nnode * 4){
148
+ options(expressions=min(c(5e7, ani.ph$Nnode * 4)))
149
+ }
150
+ write.tree(ani.ph, paste(out_base, ".nwk", sep = ""))
151
+ options(expressions=express.ori)
152
+
149
153
  # Generate graphic report
150
154
  say("Graphic report")
151
155
  pdf(paste(out_base, ".pdf", sep = ""), 7, 12)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.3.0
4
+ version: 1.1.3.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-21 00:00:00.000000000 Z
11
+ date: 2021-11-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons