miga-base 1.1.0.0 → 1.1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4076b3b3a4a4143ac9100ce4d58fada7615f68ad3e6174445510655f62904867
4
- data.tar.gz: '0975a5feb4c9eb71a474be87dd14b58297ef1aa7bd8612c20f1ce65febbdf980'
3
+ metadata.gz: 70955c93dc93a73a0ac28d3aaa8f75b4d0f2e0e5dc8797d1fc2bf57e969a8fbe
4
+ data.tar.gz: 89ba5a42a7a12a104a12f9116f46b0e107bc82b22731622403d09c2e2b7459c7
5
5
  SHA512:
6
- metadata.gz: ebcb7fe28d415ca9709433975585518eb1ecd8e8270c584b6579da222e4d3733cc20d810787c3f764f6a6136e1a6f09b7cb6b1c00114c3ea9c0885370654f3a7
7
- data.tar.gz: '082bd856ed21487e5de709e2067f1d3453f824e0ece7a77716c6fbe70d88a16c4d295196d5c6133e5667142e25f55f7e48e4a785afd160a14e8195a9b7efa6c2'
6
+ metadata.gz: d57d59d5cb439119dda8f4c8ff90cd3d2d253ff631117c3005b60f321014e61d584c04655f6861de29807cfa2a9c569f2e0e35fde6931cc7140982512a35f13c
7
+ data.tar.gz: a9c1090876f87c32376b477363f95410786a2d2da73f72be3d81a2fcfe28c6f1dc3829b741b1ad4986ded59bdc06999ad775739d8ecbd7694b4c6e40077905ec
@@ -12,9 +12,10 @@ module MiGA::Daemon::Base
12
12
  k = k.to_sym
13
13
  unless v.nil?
14
14
  case k
15
- when :latency, :maxjobs, :ppn, :format_version, :verbosity
15
+ when :latency, :maxjobs, :ppn, :ppn_project, :format_version, :verbosity,
16
+ :skip_maintenance
16
17
  v = v.to_i
17
- if !force && v == 0 && k != :verbosity
18
+ if !force && v == 0 && !%i[verbosity skip_maintenance].include?(k)
18
19
  raise "Daemon's #{k} cannot be set to zero"
19
20
  end
20
21
  when :shutdown_when_done, :show_log, :bypass_maintenance
@@ -51,9 +52,23 @@ module MiGA::Daemon::Base
51
52
  end
52
53
 
53
54
  ##
54
- # Returns Integer indicating the number of CPUs per job
55
- def ppn
56
- runopts(:ppn)
55
+ # Returns the number of times maintenance should be skipped before running
56
+ def skip_maintenance
57
+ runopts(:skip_maintenance) || 0
58
+ end
59
+
60
+ ##
61
+ # Returns the running option +opt+ in jobs for +what+. +what+ can be
62
+ # +:dataset+ or +:projects+
63
+ def runopts_for(opt, what)
64
+ runopts(:"#{opt}_#{what}") || runopts(opt)
65
+ end
66
+
67
+ ##
68
+ # Returns Integer indicating the number of CPUs per job, in jobs for +what+.
69
+ # See also #runopts_for
70
+ def ppn(what = :dataset)
71
+ runopts_for(:ppn, what)
57
72
  end
58
73
 
59
74
  ##
data/lib/miga/daemon.rb CHANGED
@@ -93,7 +93,7 @@ class MiGA::Daemon < MiGA::MiGA
93
93
  flush!
94
94
  if (loop_i % 12).zero?
95
95
  purge!
96
- queue_maintenance
96
+ queue_maintenance if (loop_i % (12 * (skip_maintenance + 1))).zero?
97
97
  end
98
98
  save_status
99
99
  sleep(latency)
@@ -231,10 +231,11 @@ class MiGA::Daemon < MiGA::MiGA
231
231
  # Construct the command for the given job definition with current
232
232
  # daemon settings
233
233
  def job_cmd(to_run)
234
+ what = to_run[:ds].nil? ? :project : :dataset
234
235
  vars = {
235
236
  'PROJECT' => project.path,
236
- 'RUNTYPE' => runopts(:type),
237
- 'CORES' => ppn,
237
+ 'RUNTYPE' => runopts_for(:type, what),
238
+ 'CORES' => ppn(what),
238
239
  'MIGA' => MiGA::MiGA.root_path
239
240
  }
240
241
  vars['DATASET'] = to_run[:ds].name unless to_run[:ds].nil?
@@ -246,13 +247,13 @@ class MiGA::Daemon < MiGA::MiGA
246
247
  ),
247
248
  vars: vars.map do |k, v|
248
249
  runopts(:var).miga_variables(key: k, value: v)
249
- end.join(runopts(:varsep)),
250
- cpus: ppn,
250
+ end.join(runopts_for(:varsep, what)),
251
+ cpus: ppn(what),
251
252
  log: File.join(log_dir, "#{to_run[:ds_name]}.log"),
252
253
  task_name: to_run[:task_name],
253
254
  miga: File.join(MiGA::MiGA.root_path, 'bin/miga').shellescape
254
255
  }
255
- runopts(:cmd).miga_variables(var_hsh)
256
+ runopts_for(:cmd, what).miga_variables(var_hsh)
256
257
  end
257
258
 
258
259
  ##
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.1, 0, 0].freeze
15
+ VERSION = [1.1, 2, 1].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2021, 10, 28)
23
+ VERSION_DATE = Date.new(2021, 11, 7)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -583,6 +583,24 @@ class input_file:
583
583
  self.initial_state = "protein+HMM"
584
584
 
585
585
  self.verbose = verbosity
586
+
587
+ #r_scripts_loc = os.path.dirname(sys.modules['metapop'].__file__) + "/metapop_r/"
588
+ #"00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
589
+ self.hmm_path = None
590
+ try:
591
+ #Try to locate the data bundled as it would be with a pip/conda install.
592
+ script_path = os.path.dirname(sys.modules['fastAAI_HMM_models'].__file__)
593
+ hmm_complete_model = script_path + '/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm'
594
+ self.hmm_path = str(hmm_complete_model)
595
+ #Check that the file exists or fail to the except.
596
+ fh = open(self.hmm_path)
597
+ fh.close()
598
+ except:
599
+ #Look in the same dir as the script; old method/MiGA friendly
600
+ script_path = Path(__file__)
601
+ script_dir = script_path.parent
602
+ hmm_complete_model = script_dir / "00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
603
+ self.hmm_path = str(hmm_complete_model)
586
604
 
587
605
  #Functions for externally setting status and file paths of particular types
588
606
  def set_genome(self, path):
@@ -2701,6 +2719,7 @@ def merge_db_thread_starter(rev_index, per_db_accs):
2701
2719
  accs_per_db = per_db_accs
2702
2720
 
2703
2721
 
2722
+
2704
2723
  def merge_db(recipient, donors, verbose, threads):
2705
2724
  #Prettier on the CLI
2706
2725
 
@@ -2828,111 +2847,109 @@ def merge_db(recipient, donors, verbose, threads):
2828
2847
 
2829
2848
  all_accessions = list(all_accessions)
2830
2849
 
2831
- acc_args = [(acc, donor_dbs, recipient) for acc in all_accessions]
2832
-
2833
- if not os.path.exists("FastAAI_temp"):
2834
- os.mkdir("FastAAI_temp")
2835
2850
 
2836
2851
  print("")
2837
2852
  print("Formatting data to add to database. Started at", curtime())
2838
2853
 
2839
- if verbose:
2840
- print("")
2841
- count = 0
2842
- total_counts = len(acc_args)
2843
- try:
2844
- percentage = (count/total_counts)*100
2845
- sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2846
- sys.stdout.flush()
2847
- except:
2848
- #It's not really a big deal if the progress bar cannot be printed.
2849
- pass
2850
-
2851
- last_pct = 0
2852
-
2853
- pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
2854
+ temp_dir = tempfile.mkdtemp()
2855
+ try:
2856
+ acc_args = [(acc, donor_dbs, recipient, temp_dir) for acc in all_accessions]
2854
2857
 
2855
- quiverfull = []
2856
- for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
2857
- acc = result[0]
2858
- child = result[1]
2859
- #sub_gak = result[2]
2860
-
2861
- quiverfull.append([acc, child])
2862
- #gaks.extend(sub_gak)
2863
-
2864
2858
  if verbose:
2865
- count += 1
2859
+ print("")
2860
+ count = 0
2861
+ total_counts = len(acc_args)
2866
2862
  try:
2867
2863
  percentage = (count/total_counts)*100
2868
- log_time = curtime()
2869
- sys.stdout.write('\033[A')
2870
- sys.stdout.flush()
2871
2864
  sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2872
2865
  sys.stdout.flush()
2873
2866
  except:
2874
2867
  #It's not really a big deal if the progress bar cannot be printed.
2875
2868
  pass
2876
-
2877
- pool.close()
2878
- pool.join()
2879
-
2880
- print("")
2881
- print("Adding data to final database. Started at", curtime())
2882
-
2883
- if verbose:
2884
- print("")
2885
2869
 
2886
- count = 0
2887
- total_counts = len(acc_args)
2888
- try:
2889
- percentage = (count/total_counts)*100
2890
- sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2891
- sys.stdout.flush()
2892
- except:
2893
- #It's not really a big deal if the progress bar cannot be printed.
2894
- pass
2895
-
2896
- last_pct = 0
2897
-
2898
- recipient.activate_connection()
2899
- genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
2900
- genome_reindex = []
2901
- for g in joint_genome_index:
2902
- genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
2870
+ last_pct = 0
2903
2871
 
2904
- recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
2905
- recipient.connection.commit()
2906
-
2907
- del genome_reindex
2908
-
2909
- for result in quiverfull:
2910
- acc = result[0]
2911
- child = result[1]
2872
+ pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
2912
2873
 
2913
- recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
2874
+ quiverfull = []
2875
+ for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
2876
+ acc = result[0]
2877
+ child = result[1]
2878
+ #sub_gak = result[2]
2879
+
2880
+ quiverfull.append([acc, child])
2881
+ #gaks.extend(sub_gak)
2882
+
2883
+ if verbose:
2884
+ count += 1
2885
+ try:
2886
+ percentage = (count/total_counts)*100
2887
+ log_time = curtime()
2888
+ sys.stdout.write('\033[A')
2889
+ sys.stdout.flush()
2890
+ sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2891
+ sys.stdout.flush()
2892
+ except:
2893
+ #It's not really a big deal if the progress bar cannot be printed.
2894
+ pass
2895
+
2896
+ pool.close()
2897
+ pool.join()
2898
+
2899
+ print("")
2900
+ print("Adding data to final database. Started at", curtime())
2914
2901
 
2915
2902
  if verbose:
2916
- count += 1
2903
+ print("")
2904
+
2905
+ count = 0
2906
+ total_counts = len(acc_args)
2917
2907
  try:
2918
2908
  percentage = (count/total_counts)*100
2919
- log_time = curtime()
2920
- sys.stdout.write('\033[A')
2921
- sys.stdout.flush()
2922
2909
  sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2923
2910
  sys.stdout.flush()
2924
2911
  except:
2925
2912
  #It's not really a big deal if the progress bar cannot be printed.
2926
2913
  pass
2927
-
2928
- os.rmdir("FastAAI_temp")
2929
- ''' We're only ever increasing the DB size, so we don't actually need to vacuum it.
2930
- if recip_check != "created":
2931
- print("")
2932
- print("Cleaning up the database after the update. This may take a while.")
2933
- recipient.connection.execute("VACUUM")
2934
- recipient.connection.close()
2935
- '''
2914
+
2915
+ last_pct = 0
2916
+
2917
+ recipient.activate_connection()
2918
+ genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
2919
+ genome_reindex = []
2920
+ for g in joint_genome_index:
2921
+ genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
2922
+
2923
+ recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
2924
+ recipient.connection.commit()
2925
+
2926
+ del genome_reindex
2927
+
2928
+ for result in quiverfull:
2929
+ acc = result[0]
2930
+ child = result[1]
2931
+
2932
+ recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
2933
+
2934
+ if verbose:
2935
+ count += 1
2936
+ try:
2937
+ percentage = (count/total_counts)*100
2938
+ log_time = curtime()
2939
+ sys.stdout.write('\033[A')
2940
+ sys.stdout.flush()
2941
+ sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2942
+ sys.stdout.flush()
2943
+ except:
2944
+ #It's not really a big deal if the progress bar cannot be printed.
2945
+ pass
2946
+ except:
2947
+ #Error
2948
+ shutil.rmtree(temp_dir)
2949
+ finally:
2950
+ #Success
2951
+ shutil.rmtree(temp_dir)
2952
+
2936
2953
  print("\nDatabases merged!")
2937
2954
 
2938
2955
  return None
@@ -2947,14 +2964,14 @@ def pull_and_merge_accession(args):
2947
2964
  accession_inverter[accession_index[acc]] = sql_friendly_accession
2948
2965
 
2949
2966
  #joint_genome_index, accession_index, accession_inverter, accs_per_db are global already.
2950
- acc, donor_dbs, recipient = args[0], args[1], args[2]
2967
+ acc, donor_dbs, recipient, temp = args[0], args[1], args[2], args[3]
2951
2968
 
2952
2969
  acc_name = accession_inverter[acc]
2953
2970
  acc_name_gens = acc_name + "_genomes"
2954
2971
 
2955
2972
  query_sql = "SELECT * FROM " + acc_name
2956
2973
 
2957
- temp_db = fastaai_database("FastAAI_temp/"+acc_name+".db")
2974
+ temp_db = fastaai_database(os.path.normpath(temp+"/"+acc_name+".db"))
2958
2975
  temp_db.activate_connection()
2959
2976
 
2960
2977
  create_command = "CREATE TABLE IF NOT EXISTS " + acc_name + " (kmer INTEGER PRIMARY KEY, genomes array)"
@@ -3627,4 +3644,4 @@ def main():
3627
3644
  if __name__ == "__main__":
3628
3645
  main()
3629
3646
 
3630
-
3647
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0.0
4
+ version: 1.1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-28 00:00:00.000000000 Z
11
+ date: 2021-11-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons