miga-base 1.1.0.0 → 1.1.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4076b3b3a4a4143ac9100ce4d58fada7615f68ad3e6174445510655f62904867
4
- data.tar.gz: '0975a5feb4c9eb71a474be87dd14b58297ef1aa7bd8612c20f1ce65febbdf980'
3
+ metadata.gz: 70955c93dc93a73a0ac28d3aaa8f75b4d0f2e0e5dc8797d1fc2bf57e969a8fbe
4
+ data.tar.gz: 89ba5a42a7a12a104a12f9116f46b0e107bc82b22731622403d09c2e2b7459c7
5
5
  SHA512:
6
- metadata.gz: ebcb7fe28d415ca9709433975585518eb1ecd8e8270c584b6579da222e4d3733cc20d810787c3f764f6a6136e1a6f09b7cb6b1c00114c3ea9c0885370654f3a7
7
- data.tar.gz: '082bd856ed21487e5de709e2067f1d3453f824e0ece7a77716c6fbe70d88a16c4d295196d5c6133e5667142e25f55f7e48e4a785afd160a14e8195a9b7efa6c2'
6
+ metadata.gz: d57d59d5cb439119dda8f4c8ff90cd3d2d253ff631117c3005b60f321014e61d584c04655f6861de29807cfa2a9c569f2e0e35fde6931cc7140982512a35f13c
7
+ data.tar.gz: a9c1090876f87c32376b477363f95410786a2d2da73f72be3d81a2fcfe28c6f1dc3829b741b1ad4986ded59bdc06999ad775739d8ecbd7694b4c6e40077905ec
@@ -12,9 +12,10 @@ module MiGA::Daemon::Base
12
12
  k = k.to_sym
13
13
  unless v.nil?
14
14
  case k
15
- when :latency, :maxjobs, :ppn, :format_version, :verbosity
15
+ when :latency, :maxjobs, :ppn, :ppn_project, :format_version, :verbosity,
16
+ :skip_maintenance
16
17
  v = v.to_i
17
- if !force && v == 0 && k != :verbosity
18
+ if !force && v == 0 && !%i[verbosity skip_maintenance].include?(k)
18
19
  raise "Daemon's #{k} cannot be set to zero"
19
20
  end
20
21
  when :shutdown_when_done, :show_log, :bypass_maintenance
@@ -51,9 +52,23 @@ module MiGA::Daemon::Base
51
52
  end
52
53
 
53
54
  ##
54
- # Returns Integer indicating the number of CPUs per job
55
- def ppn
56
- runopts(:ppn)
55
+ # Returns the number of times maintenance should be skipped before running
56
+ def skip_maintenance
57
+ runopts(:skip_maintenance) || 0
58
+ end
59
+
60
+ ##
61
+ # Returns the running option +opt+ in jobs for +what+. +what+ can be
62
+ # +:dataset+ or +:projects+
63
+ def runopts_for(opt, what)
64
+ runopts(:"#{opt}_#{what}") || runopts(opt)
65
+ end
66
+
67
+ ##
68
+ # Returns Integer indicating the number of CPUs per job, in jobs for +what+.
69
+ # See also #runopts_for
70
+ def ppn(what = :dataset)
71
+ runopts_for(:ppn, what)
57
72
  end
58
73
 
59
74
  ##
data/lib/miga/daemon.rb CHANGED
@@ -93,7 +93,7 @@ class MiGA::Daemon < MiGA::MiGA
93
93
  flush!
94
94
  if (loop_i % 12).zero?
95
95
  purge!
96
- queue_maintenance
96
+ queue_maintenance if (loop_i % (12 * (skip_maintenance + 1))).zero?
97
97
  end
98
98
  save_status
99
99
  sleep(latency)
@@ -231,10 +231,11 @@ class MiGA::Daemon < MiGA::MiGA
231
231
  # Construct the command for the given job definition with current
232
232
  # daemon settings
233
233
  def job_cmd(to_run)
234
+ what = to_run[:ds].nil? ? :project : :dataset
234
235
  vars = {
235
236
  'PROJECT' => project.path,
236
- 'RUNTYPE' => runopts(:type),
237
- 'CORES' => ppn,
237
+ 'RUNTYPE' => runopts_for(:type, what),
238
+ 'CORES' => ppn(what),
238
239
  'MIGA' => MiGA::MiGA.root_path
239
240
  }
240
241
  vars['DATASET'] = to_run[:ds].name unless to_run[:ds].nil?
@@ -246,13 +247,13 @@ class MiGA::Daemon < MiGA::MiGA
246
247
  ),
247
248
  vars: vars.map do |k, v|
248
249
  runopts(:var).miga_variables(key: k, value: v)
249
- end.join(runopts(:varsep)),
250
- cpus: ppn,
250
+ end.join(runopts_for(:varsep, what)),
251
+ cpus: ppn(what),
251
252
  log: File.join(log_dir, "#{to_run[:ds_name]}.log"),
252
253
  task_name: to_run[:task_name],
253
254
  miga: File.join(MiGA::MiGA.root_path, 'bin/miga').shellescape
254
255
  }
255
- runopts(:cmd).miga_variables(var_hsh)
256
+ runopts_for(:cmd, what).miga_variables(var_hsh)
256
257
  end
257
258
 
258
259
  ##
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.1, 0, 0].freeze
15
+ VERSION = [1.1, 2, 1].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2021, 10, 28)
23
+ VERSION_DATE = Date.new(2021, 11, 7)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -583,6 +583,24 @@ class input_file:
583
583
  self.initial_state = "protein+HMM"
584
584
 
585
585
  self.verbose = verbosity
586
+
587
+ #r_scripts_loc = os.path.dirname(sys.modules['metapop'].__file__) + "/metapop_r/"
588
+ #"00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
589
+ self.hmm_path = None
590
+ try:
591
+ #Try to locate the data bundled as it would be with a pip/conda install.
592
+ script_path = os.path.dirname(sys.modules['fastAAI_HMM_models'].__file__)
593
+ hmm_complete_model = script_path + '/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm'
594
+ self.hmm_path = str(hmm_complete_model)
595
+ #Check that the file exists or fail to the except.
596
+ fh = open(self.hmm_path)
597
+ fh.close()
598
+ except:
599
+ #Look in the same dir as the script; old method/MiGA friendly
600
+ script_path = Path(__file__)
601
+ script_dir = script_path.parent
602
+ hmm_complete_model = script_dir / "00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
603
+ self.hmm_path = str(hmm_complete_model)
586
604
 
587
605
  #Functions for externally setting status and file paths of particular types
588
606
  def set_genome(self, path):
@@ -2701,6 +2719,7 @@ def merge_db_thread_starter(rev_index, per_db_accs):
2701
2719
  accs_per_db = per_db_accs
2702
2720
 
2703
2721
 
2722
+
2704
2723
  def merge_db(recipient, donors, verbose, threads):
2705
2724
  #Prettier on the CLI
2706
2725
 
@@ -2828,111 +2847,109 @@ def merge_db(recipient, donors, verbose, threads):
2828
2847
 
2829
2848
  all_accessions = list(all_accessions)
2830
2849
 
2831
- acc_args = [(acc, donor_dbs, recipient) for acc in all_accessions]
2832
-
2833
- if not os.path.exists("FastAAI_temp"):
2834
- os.mkdir("FastAAI_temp")
2835
2850
 
2836
2851
  print("")
2837
2852
  print("Formatting data to add to database. Started at", curtime())
2838
2853
 
2839
- if verbose:
2840
- print("")
2841
- count = 0
2842
- total_counts = len(acc_args)
2843
- try:
2844
- percentage = (count/total_counts)*100
2845
- sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2846
- sys.stdout.flush()
2847
- except:
2848
- #It's not really a big deal if the progress bar cannot be printed.
2849
- pass
2850
-
2851
- last_pct = 0
2852
-
2853
- pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
2854
+ temp_dir = tempfile.mkdtemp()
2855
+ try:
2856
+ acc_args = [(acc, donor_dbs, recipient, temp_dir) for acc in all_accessions]
2854
2857
 
2855
- quiverfull = []
2856
- for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
2857
- acc = result[0]
2858
- child = result[1]
2859
- #sub_gak = result[2]
2860
-
2861
- quiverfull.append([acc, child])
2862
- #gaks.extend(sub_gak)
2863
-
2864
2858
  if verbose:
2865
- count += 1
2859
+ print("")
2860
+ count = 0
2861
+ total_counts = len(acc_args)
2866
2862
  try:
2867
2863
  percentage = (count/total_counts)*100
2868
- log_time = curtime()
2869
- sys.stdout.write('\033[A')
2870
- sys.stdout.flush()
2871
2864
  sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2872
2865
  sys.stdout.flush()
2873
2866
  except:
2874
2867
  #It's not really a big deal if the progress bar cannot be printed.
2875
2868
  pass
2876
-
2877
- pool.close()
2878
- pool.join()
2879
-
2880
- print("")
2881
- print("Adding data to final database. Started at", curtime())
2882
-
2883
- if verbose:
2884
- print("")
2885
2869
 
2886
- count = 0
2887
- total_counts = len(acc_args)
2888
- try:
2889
- percentage = (count/total_counts)*100
2890
- sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2891
- sys.stdout.flush()
2892
- except:
2893
- #It's not really a big deal if the progress bar cannot be printed.
2894
- pass
2895
-
2896
- last_pct = 0
2897
-
2898
- recipient.activate_connection()
2899
- genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
2900
- genome_reindex = []
2901
- for g in joint_genome_index:
2902
- genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
2870
+ last_pct = 0
2903
2871
 
2904
- recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
2905
- recipient.connection.commit()
2906
-
2907
- del genome_reindex
2908
-
2909
- for result in quiverfull:
2910
- acc = result[0]
2911
- child = result[1]
2872
+ pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
2912
2873
 
2913
- recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
2874
+ quiverfull = []
2875
+ for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
2876
+ acc = result[0]
2877
+ child = result[1]
2878
+ #sub_gak = result[2]
2879
+
2880
+ quiverfull.append([acc, child])
2881
+ #gaks.extend(sub_gak)
2882
+
2883
+ if verbose:
2884
+ count += 1
2885
+ try:
2886
+ percentage = (count/total_counts)*100
2887
+ log_time = curtime()
2888
+ sys.stdout.write('\033[A')
2889
+ sys.stdout.flush()
2890
+ sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2891
+ sys.stdout.flush()
2892
+ except:
2893
+ #It's not really a big deal if the progress bar cannot be printed.
2894
+ pass
2895
+
2896
+ pool.close()
2897
+ pool.join()
2898
+
2899
+ print("")
2900
+ print("Adding data to final database. Started at", curtime())
2914
2901
 
2915
2902
  if verbose:
2916
- count += 1
2903
+ print("")
2904
+
2905
+ count = 0
2906
+ total_counts = len(acc_args)
2917
2907
  try:
2918
2908
  percentage = (count/total_counts)*100
2919
- log_time = curtime()
2920
- sys.stdout.write('\033[A')
2921
- sys.stdout.flush()
2922
2909
  sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2923
2910
  sys.stdout.flush()
2924
2911
  except:
2925
2912
  #It's not really a big deal if the progress bar cannot be printed.
2926
2913
  pass
2927
-
2928
- os.rmdir("FastAAI_temp")
2929
- ''' We're only ever increasing the DB size, so we don't actually need to vacuum it.
2930
- if recip_check != "created":
2931
- print("")
2932
- print("Cleaning up the database after the update. This may take a while.")
2933
- recipient.connection.execute("VACUUM")
2934
- recipient.connection.close()
2935
- '''
2914
+
2915
+ last_pct = 0
2916
+
2917
+ recipient.activate_connection()
2918
+ genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
2919
+ genome_reindex = []
2920
+ for g in joint_genome_index:
2921
+ genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
2922
+
2923
+ recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
2924
+ recipient.connection.commit()
2925
+
2926
+ del genome_reindex
2927
+
2928
+ for result in quiverfull:
2929
+ acc = result[0]
2930
+ child = result[1]
2931
+
2932
+ recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
2933
+
2934
+ if verbose:
2935
+ count += 1
2936
+ try:
2937
+ percentage = (count/total_counts)*100
2938
+ log_time = curtime()
2939
+ sys.stdout.write('\033[A')
2940
+ sys.stdout.flush()
2941
+ sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2942
+ sys.stdout.flush()
2943
+ except:
2944
+ #It's not really a big deal if the progress bar cannot be printed.
2945
+ pass
2946
+ except:
2947
+ #Error
2948
+ shutil.rmtree(temp_dir)
2949
+ finally:
2950
+ #Success
2951
+ shutil.rmtree(temp_dir)
2952
+
2936
2953
  print("\nDatabases merged!")
2937
2954
 
2938
2955
  return None
@@ -2947,14 +2964,14 @@ def pull_and_merge_accession(args):
2947
2964
  accession_inverter[accession_index[acc]] = sql_friendly_accession
2948
2965
 
2949
2966
  #joint_genome_index, accession_index, accession_inverter, accs_per_db are global already.
2950
- acc, donor_dbs, recipient = args[0], args[1], args[2]
2967
+ acc, donor_dbs, recipient, temp = args[0], args[1], args[2], args[3]
2951
2968
 
2952
2969
  acc_name = accession_inverter[acc]
2953
2970
  acc_name_gens = acc_name + "_genomes"
2954
2971
 
2955
2972
  query_sql = "SELECT * FROM " + acc_name
2956
2973
 
2957
- temp_db = fastaai_database("FastAAI_temp/"+acc_name+".db")
2974
+ temp_db = fastaai_database(os.path.normpath(temp+"/"+acc_name+".db"))
2958
2975
  temp_db.activate_connection()
2959
2976
 
2960
2977
  create_command = "CREATE TABLE IF NOT EXISTS " + acc_name + " (kmer INTEGER PRIMARY KEY, genomes array)"
@@ -3627,4 +3644,4 @@ def main():
3627
3644
  if __name__ == "__main__":
3628
3645
  main()
3629
3646
 
3630
-
3647
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0.0
4
+ version: 1.1.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-28 00:00:00.000000000 Z
11
+ date: 2021-11-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons