miga-base 1.1.1.0 → 1.1.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a666ed2219c2e1573dff12caca0b841d4c649bad62f4f5c0d274fc2c5407b178
4
- data.tar.gz: 9e6a46b35a83b8f2349bb2a676f6c2fba7e375b52156734b636084d68c4626af
3
+ metadata.gz: 4603bf75db8f82a1a30c1d6da1ab045139b13bf1eec5fd2d5be5c79fb2ac0442
4
+ data.tar.gz: 24324b35a885453c01fada6fb07527616b3a952ab4ccb9b9c818c5d06e580acd
5
5
  SHA512:
6
- metadata.gz: 61abc23642093894f068e1edc6252fb7a4e95035284f5469a0c955aebdadbdd2d62a76f395c933c7e819498e9a3b7685dd7257afe6263bc0b51111b141d00a13
7
- data.tar.gz: 72f7024d6d5594a794dd5d7ca68d7b5896d28109a2800189981c9ba1fed85a3ad2e51303a547991fc998360426a5e85fe03dc9e7a61d38520744b6ed2dece8b9
6
+ metadata.gz: d9e555d736e3987d4afc45707307b7f95d2164cb8db822ad639e0f71e919c67972bff76aefdc7b0a1a1e7120e3100b863a8dde14470066ac7d4ab0b5be5c20f7
7
+ data.tar.gz: 54d1823817cb2d4220d885e84e941cf5e4df88f850824cf3837ed716879aec4733f2d2df98bedc03aa29878a1e5bd7641b015c76738eed841d08046c9f50d1bb
@@ -12,9 +12,10 @@ module MiGA::Daemon::Base
12
12
  k = k.to_sym
13
13
  unless v.nil?
14
14
  case k
15
- when :latency, :maxjobs, :ppn, :ppn_project, :format_version, :verbosity
15
+ when :latency, :maxjobs, :ppn, :ppn_project, :format_version, :verbosity,
16
+ :skip_maintenance
16
17
  v = v.to_i
17
- if !force && v == 0 && k != :verbosity
18
+ if !force && v == 0 && !%i[verbosity skip_maintenance].include?(k)
18
19
  raise "Daemon's #{k} cannot be set to zero"
19
20
  end
20
21
  when :shutdown_when_done, :show_log, :bypass_maintenance
@@ -50,6 +51,12 @@ module MiGA::Daemon::Base
50
51
  runopts(:nodelist)
51
52
  end
52
53
 
54
+ ##
55
+ # Returns the number of times maintenance should be skipped before running
56
+ def skip_maintenance
57
+ runopts(:skip_maintenance) || 0
58
+ end
59
+
53
60
  ##
54
61
  # Returns the running option +opt+ in jobs for +what+. +what+ can be
55
62
  # +:dataset+ or +:projects+
data/lib/miga/daemon.rb CHANGED
@@ -93,7 +93,7 @@ class MiGA::Daemon < MiGA::MiGA
93
93
  flush!
94
94
  if (loop_i % 12).zero?
95
95
  purge!
96
- queue_maintenance
96
+ queue_maintenance if (loop_i % (12 * (skip_maintenance + 1))).zero?
97
97
  end
98
98
  save_status
99
99
  sleep(latency)
data/lib/miga/dataset.rb CHANGED
@@ -107,7 +107,7 @@ class MiGA::Dataset < MiGA::MiGA
107
107
  metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
108
108
  metadata[:inactive] = true
109
109
  metadata.save
110
- project.recalculate_tasks('Reference dataset inactivated') if ref?
110
+ project.recalculate_tasks("Reference dataset inactivated: #{name}") if ref?
111
111
  pull_hook :on_inactivate
112
112
  end
113
113
 
@@ -117,7 +117,7 @@ class MiGA::Dataset < MiGA::MiGA
117
117
  metadata[:inactive] = nil
118
118
  metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
119
119
  metadata.save
120
- project.recalculate_tasks('Reference dataset activated') if ref?
120
+ project.recalculate_tasks("Reference dataset activated: #{name}") if ref?
121
121
  pull_hook :on_activate
122
122
  end
123
123
 
@@ -52,7 +52,9 @@ module MiGA::Project::Dataset
52
52
  @metadata[:datasets] << name
53
53
  @dataset_names_hash = nil # Ensure loading even if +do_not_save+ is true
54
54
  save
55
- recalculate_tasks('New reference dataset added') if d.ref? && d.active?
55
+ if d.ref? && d.active?
56
+ recalculate_tasks("Reference dataset added: #{d.name}")
57
+ end
56
58
  pull_hook(:on_add_dataset, name)
57
59
  end
58
60
  dataset(name)
@@ -66,7 +68,9 @@ module MiGA::Project::Dataset
66
68
 
67
69
  self.metadata[:datasets].delete(name)
68
70
  save
69
- recalculate_tasks('Reference dataset unlinked') if d.ref? && d.active?
71
+ if d.ref? && d.active?
72
+ recalculate_tasks("Reference dataset unlinked: #{d.name}")
73
+ end
70
74
  pull_hook(:on_unlink_dataset, name)
71
75
  d
72
76
  end
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.1, 1, 0].freeze
15
+ VERSION = [1.1, 2, 2].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2021, 10, 29)
23
+ VERSION_DATE = Date.new(2021, 11, 9)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -583,6 +583,24 @@ class input_file:
583
583
  self.initial_state = "protein+HMM"
584
584
 
585
585
  self.verbose = verbosity
586
+
587
+ #r_scripts_loc = os.path.dirname(sys.modules['metapop'].__file__) + "/metapop_r/"
588
+ #"00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
589
+ self.hmm_path = None
590
+ try:
591
+ #Try to locate the data bundled as it would be with a pip/conda install.
592
+ script_path = os.path.dirname(sys.modules['fastAAI_HMM_models'].__file__)
593
+ hmm_complete_model = script_path + '/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm'
594
+ self.hmm_path = str(hmm_complete_model)
595
+ #Check that the file exists or fail to the except.
596
+ fh = open(self.hmm_path)
597
+ fh.close()
598
+ except:
599
+ #Look in the same dir as the script; old method/MiGA friendly
600
+ script_path = Path(__file__)
601
+ script_dir = script_path.parent
602
+ hmm_complete_model = script_dir / "00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
603
+ self.hmm_path = str(hmm_complete_model)
586
604
 
587
605
  #Functions for externally setting status and file paths of particular types
588
606
  def set_genome(self, path):
@@ -2701,6 +2719,7 @@ def merge_db_thread_starter(rev_index, per_db_accs):
2701
2719
  accs_per_db = per_db_accs
2702
2720
 
2703
2721
 
2722
+
2704
2723
  def merge_db(recipient, donors, verbose, threads):
2705
2724
  #Prettier on the CLI
2706
2725
 
@@ -2828,111 +2847,109 @@ def merge_db(recipient, donors, verbose, threads):
2828
2847
 
2829
2848
  all_accessions = list(all_accessions)
2830
2849
 
2831
- acc_args = [(acc, donor_dbs, recipient) for acc in all_accessions]
2832
-
2833
- if not os.path.exists("FastAAI_temp"):
2834
- os.mkdir("FastAAI_temp")
2835
2850
 
2836
2851
  print("")
2837
2852
  print("Formatting data to add to database. Started at", curtime())
2838
2853
 
2839
- if verbose:
2840
- print("")
2841
- count = 0
2842
- total_counts = len(acc_args)
2843
- try:
2844
- percentage = (count/total_counts)*100
2845
- sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2846
- sys.stdout.flush()
2847
- except:
2848
- #It's not really a big deal if the progress bar cannot be printed.
2849
- pass
2850
-
2851
- last_pct = 0
2852
-
2853
- pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
2854
+ temp_dir = tempfile.mkdtemp()
2855
+ try:
2856
+ acc_args = [(acc, donor_dbs, recipient, temp_dir) for acc in all_accessions]
2854
2857
 
2855
- quiverfull = []
2856
- for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
2857
- acc = result[0]
2858
- child = result[1]
2859
- #sub_gak = result[2]
2860
-
2861
- quiverfull.append([acc, child])
2862
- #gaks.extend(sub_gak)
2863
-
2864
2858
  if verbose:
2865
- count += 1
2859
+ print("")
2860
+ count = 0
2861
+ total_counts = len(acc_args)
2866
2862
  try:
2867
2863
  percentage = (count/total_counts)*100
2868
- log_time = curtime()
2869
- sys.stdout.write('\033[A')
2870
- sys.stdout.flush()
2871
2864
  sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2872
2865
  sys.stdout.flush()
2873
2866
  except:
2874
2867
  #It's not really a big deal if the progress bar cannot be printed.
2875
2868
  pass
2876
-
2877
- pool.close()
2878
- pool.join()
2879
-
2880
- print("")
2881
- print("Adding data to final database. Started at", curtime())
2882
-
2883
- if verbose:
2884
- print("")
2885
2869
 
2886
- count = 0
2887
- total_counts = len(acc_args)
2888
- try:
2889
- percentage = (count/total_counts)*100
2890
- sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2891
- sys.stdout.flush()
2892
- except:
2893
- #It's not really a big deal if the progress bar cannot be printed.
2894
- pass
2895
-
2896
- last_pct = 0
2897
-
2898
- recipient.activate_connection()
2899
- genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
2900
- genome_reindex = []
2901
- for g in joint_genome_index:
2902
- genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
2870
+ last_pct = 0
2903
2871
 
2904
- recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
2905
- recipient.connection.commit()
2906
-
2907
- del genome_reindex
2908
-
2909
- for result in quiverfull:
2910
- acc = result[0]
2911
- child = result[1]
2872
+ pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
2912
2873
 
2913
- recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
2874
+ quiverfull = []
2875
+ for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
2876
+ acc = result[0]
2877
+ child = result[1]
2878
+ #sub_gak = result[2]
2879
+
2880
+ quiverfull.append([acc, child])
2881
+ #gaks.extend(sub_gak)
2882
+
2883
+ if verbose:
2884
+ count += 1
2885
+ try:
2886
+ percentage = (count/total_counts)*100
2887
+ log_time = curtime()
2888
+ sys.stdout.write('\033[A')
2889
+ sys.stdout.flush()
2890
+ sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2891
+ sys.stdout.flush()
2892
+ except:
2893
+ #It's not really a big deal if the progress bar cannot be printed.
2894
+ pass
2895
+
2896
+ pool.close()
2897
+ pool.join()
2898
+
2899
+ print("")
2900
+ print("Adding data to final database. Started at", curtime())
2914
2901
 
2915
2902
  if verbose:
2916
- count += 1
2903
+ print("")
2904
+
2905
+ count = 0
2906
+ total_counts = len(acc_args)
2917
2907
  try:
2918
2908
  percentage = (count/total_counts)*100
2919
- log_time = curtime()
2920
- sys.stdout.write('\033[A')
2921
- sys.stdout.flush()
2922
2909
  sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2923
2910
  sys.stdout.flush()
2924
2911
  except:
2925
2912
  #It's not really a big deal if the progress bar cannot be printed.
2926
2913
  pass
2927
-
2928
- os.rmdir("FastAAI_temp")
2929
- ''' We're only ever increasing the DB size, so we don't actually need to vacuum it.
2930
- if recip_check != "created":
2931
- print("")
2932
- print("Cleaning up the database after the update. This may take a while.")
2933
- recipient.connection.execute("VACUUM")
2934
- recipient.connection.close()
2935
- '''
2914
+
2915
+ last_pct = 0
2916
+
2917
+ recipient.activate_connection()
2918
+ genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
2919
+ genome_reindex = []
2920
+ for g in joint_genome_index:
2921
+ genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
2922
+
2923
+ recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
2924
+ recipient.connection.commit()
2925
+
2926
+ del genome_reindex
2927
+
2928
+ for result in quiverfull:
2929
+ acc = result[0]
2930
+ child = result[1]
2931
+
2932
+ recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
2933
+
2934
+ if verbose:
2935
+ count += 1
2936
+ try:
2937
+ percentage = (count/total_counts)*100
2938
+ log_time = curtime()
2939
+ sys.stdout.write('\033[A')
2940
+ sys.stdout.flush()
2941
+ sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2942
+ sys.stdout.flush()
2943
+ except:
2944
+ #It's not really a big deal if the progress bar cannot be printed.
2945
+ pass
2946
+ except:
2947
+ #Error
2948
+ shutil.rmtree(temp_dir)
2949
+ finally:
2950
+ #Success
2951
+ shutil.rmtree(temp_dir)
2952
+
2936
2953
  print("\nDatabases merged!")
2937
2954
 
2938
2955
  return None
@@ -2947,14 +2964,14 @@ def pull_and_merge_accession(args):
2947
2964
  accession_inverter[accession_index[acc]] = sql_friendly_accession
2948
2965
 
2949
2966
  #joint_genome_index, accession_index, accession_inverter, accs_per_db are global already.
2950
- acc, donor_dbs, recipient = args[0], args[1], args[2]
2967
+ acc, donor_dbs, recipient, temp = args[0], args[1], args[2], args[3]
2951
2968
 
2952
2969
  acc_name = accession_inverter[acc]
2953
2970
  acc_name_gens = acc_name + "_genomes"
2954
2971
 
2955
2972
  query_sql = "SELECT * FROM " + acc_name
2956
2973
 
2957
- temp_db = fastaai_database("FastAAI_temp/"+acc_name+".db")
2974
+ temp_db = fastaai_database(os.path.normpath(temp+"/"+acc_name+".db"))
2958
2975
  temp_db.activate_connection()
2959
2976
 
2960
2977
  create_command = "CREATE TABLE IF NOT EXISTS " + acc_name + " (kmer INTEGER PRIMARY KEY, genomes array)"
@@ -3627,4 +3644,4 @@ def main():
3627
3644
  if __name__ == "__main__":
3628
3645
  main()
3629
3646
 
3630
-
3647
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1.0
4
+ version: 1.1.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-29 00:00:00.000000000 Z
11
+ date: 2021-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons