miga-base 1.1.1.0 → 1.1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a666ed2219c2e1573dff12caca0b841d4c649bad62f4f5c0d274fc2c5407b178
4
- data.tar.gz: 9e6a46b35a83b8f2349bb2a676f6c2fba7e375b52156734b636084d68c4626af
3
+ metadata.gz: 4603bf75db8f82a1a30c1d6da1ab045139b13bf1eec5fd2d5be5c79fb2ac0442
4
+ data.tar.gz: 24324b35a885453c01fada6fb07527616b3a952ab4ccb9b9c818c5d06e580acd
5
5
  SHA512:
6
- metadata.gz: 61abc23642093894f068e1edc6252fb7a4e95035284f5469a0c955aebdadbdd2d62a76f395c933c7e819498e9a3b7685dd7257afe6263bc0b51111b141d00a13
7
- data.tar.gz: 72f7024d6d5594a794dd5d7ca68d7b5896d28109a2800189981c9ba1fed85a3ad2e51303a547991fc998360426a5e85fe03dc9e7a61d38520744b6ed2dece8b9
6
+ metadata.gz: d9e555d736e3987d4afc45707307b7f95d2164cb8db822ad639e0f71e919c67972bff76aefdc7b0a1a1e7120e3100b863a8dde14470066ac7d4ab0b5be5c20f7
7
+ data.tar.gz: 54d1823817cb2d4220d885e84e941cf5e4df88f850824cf3837ed716879aec4733f2d2df98bedc03aa29878a1e5bd7641b015c76738eed841d08046c9f50d1bb
@@ -12,9 +12,10 @@ module MiGA::Daemon::Base
12
12
  k = k.to_sym
13
13
  unless v.nil?
14
14
  case k
15
- when :latency, :maxjobs, :ppn, :ppn_project, :format_version, :verbosity
15
+ when :latency, :maxjobs, :ppn, :ppn_project, :format_version, :verbosity,
16
+ :skip_maintenance
16
17
  v = v.to_i
17
- if !force && v == 0 && k != :verbosity
18
+ if !force && v == 0 && !%i[verbosity skip_maintenance].include?(k)
18
19
  raise "Daemon's #{k} cannot be set to zero"
19
20
  end
20
21
  when :shutdown_when_done, :show_log, :bypass_maintenance
@@ -50,6 +51,12 @@ module MiGA::Daemon::Base
50
51
  runopts(:nodelist)
51
52
  end
52
53
 
54
+ ##
55
+ # Returns the number of times maintenance should be skipped before running
56
+ def skip_maintenance
57
+ runopts(:skip_maintenance) || 0
58
+ end
59
+
53
60
  ##
54
61
  # Returns the running option +opt+ in jobs for +what+. +what+ can be
55
62
  # +:dataset+ or +:projects+
data/lib/miga/daemon.rb CHANGED
@@ -93,7 +93,7 @@ class MiGA::Daemon < MiGA::MiGA
93
93
  flush!
94
94
  if (loop_i % 12).zero?
95
95
  purge!
96
- queue_maintenance
96
+ queue_maintenance if (loop_i % (12 * (skip_maintenance + 1))).zero?
97
97
  end
98
98
  save_status
99
99
  sleep(latency)
data/lib/miga/dataset.rb CHANGED
@@ -107,7 +107,7 @@ class MiGA::Dataset < MiGA::MiGA
107
107
  metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
108
108
  metadata[:inactive] = true
109
109
  metadata.save
110
- project.recalculate_tasks('Reference dataset inactivated') if ref?
110
+ project.recalculate_tasks("Reference dataset inactivated: #{name}") if ref?
111
111
  pull_hook :on_inactivate
112
112
  end
113
113
 
@@ -117,7 +117,7 @@ class MiGA::Dataset < MiGA::MiGA
117
117
  metadata[:inactive] = nil
118
118
  metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
119
119
  metadata.save
120
- project.recalculate_tasks('Reference dataset activated') if ref?
120
+ project.recalculate_tasks("Reference dataset activated: #{name}") if ref?
121
121
  pull_hook :on_activate
122
122
  end
123
123
 
@@ -52,7 +52,9 @@ module MiGA::Project::Dataset
52
52
  @metadata[:datasets] << name
53
53
  @dataset_names_hash = nil # Ensure loading even if +do_not_save+ is true
54
54
  save
55
- recalculate_tasks('New reference dataset added') if d.ref? && d.active?
55
+ if d.ref? && d.active?
56
+ recalculate_tasks("Reference dataset added: #{d.name}")
57
+ end
56
58
  pull_hook(:on_add_dataset, name)
57
59
  end
58
60
  dataset(name)
@@ -66,7 +68,9 @@ module MiGA::Project::Dataset
66
68
 
67
69
  self.metadata[:datasets].delete(name)
68
70
  save
69
- recalculate_tasks('Reference dataset unlinked') if d.ref? && d.active?
71
+ if d.ref? && d.active?
72
+ recalculate_tasks("Reference dataset unlinked: #{d.name}")
73
+ end
70
74
  pull_hook(:on_unlink_dataset, name)
71
75
  d
72
76
  end
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.1, 1, 0].freeze
15
+ VERSION = [1.1, 2, 2].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -20,7 +20,7 @@ module MiGA
20
20
 
21
21
  ##
22
22
  # Date of the current gem relese.
23
- VERSION_DATE = Date.new(2021, 10, 29)
23
+ VERSION_DATE = Date.new(2021, 11, 9)
24
24
 
25
25
  ##
26
26
  # References of MiGA
@@ -583,6 +583,24 @@ class input_file:
583
583
  self.initial_state = "protein+HMM"
584
584
 
585
585
  self.verbose = verbosity
586
+
587
+ #r_scripts_loc = os.path.dirname(sys.modules['metapop'].__file__) + "/metapop_r/"
588
+ #"00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
589
+ self.hmm_path = None
590
+ try:
591
+ #Try to locate the data bundled as it would be with a pip/conda install.
592
+ script_path = os.path.dirname(sys.modules['fastAAI_HMM_models'].__file__)
593
+ hmm_complete_model = script_path + '/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm'
594
+ self.hmm_path = str(hmm_complete_model)
595
+ #Check that the file exists or fail to the except.
596
+ fh = open(self.hmm_path)
597
+ fh.close()
598
+ except:
599
+ #Look in the same dir as the script; old method/MiGA friendly
600
+ script_path = Path(__file__)
601
+ script_dir = script_path.parent
602
+ hmm_complete_model = script_dir / "00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
603
+ self.hmm_path = str(hmm_complete_model)
586
604
 
587
605
  #Functions for externally setting status and file paths of particular types
588
606
  def set_genome(self, path):
@@ -2701,6 +2719,7 @@ def merge_db_thread_starter(rev_index, per_db_accs):
2701
2719
  accs_per_db = per_db_accs
2702
2720
 
2703
2721
 
2722
+
2704
2723
  def merge_db(recipient, donors, verbose, threads):
2705
2724
  #Prettier on the CLI
2706
2725
 
@@ -2828,111 +2847,109 @@ def merge_db(recipient, donors, verbose, threads):
2828
2847
 
2829
2848
  all_accessions = list(all_accessions)
2830
2849
 
2831
- acc_args = [(acc, donor_dbs, recipient) for acc in all_accessions]
2832
-
2833
- if not os.path.exists("FastAAI_temp"):
2834
- os.mkdir("FastAAI_temp")
2835
2850
 
2836
2851
  print("")
2837
2852
  print("Formatting data to add to database. Started at", curtime())
2838
2853
 
2839
- if verbose:
2840
- print("")
2841
- count = 0
2842
- total_counts = len(acc_args)
2843
- try:
2844
- percentage = (count/total_counts)*100
2845
- sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2846
- sys.stdout.flush()
2847
- except:
2848
- #It's not really a big deal if the progress bar cannot be printed.
2849
- pass
2850
-
2851
- last_pct = 0
2852
-
2853
- pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
2854
+ temp_dir = tempfile.mkdtemp()
2855
+ try:
2856
+ acc_args = [(acc, donor_dbs, recipient, temp_dir) for acc in all_accessions]
2854
2857
 
2855
- quiverfull = []
2856
- for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
2857
- acc = result[0]
2858
- child = result[1]
2859
- #sub_gak = result[2]
2860
-
2861
- quiverfull.append([acc, child])
2862
- #gaks.extend(sub_gak)
2863
-
2864
2858
  if verbose:
2865
- count += 1
2859
+ print("")
2860
+ count = 0
2861
+ total_counts = len(acc_args)
2866
2862
  try:
2867
2863
  percentage = (count/total_counts)*100
2868
- log_time = curtime()
2869
- sys.stdout.write('\033[A')
2870
- sys.stdout.flush()
2871
2864
  sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2872
2865
  sys.stdout.flush()
2873
2866
  except:
2874
2867
  #It's not really a big deal if the progress bar cannot be printed.
2875
2868
  pass
2876
-
2877
- pool.close()
2878
- pool.join()
2879
-
2880
- print("")
2881
- print("Adding data to final database. Started at", curtime())
2882
-
2883
- if verbose:
2884
- print("")
2885
2869
 
2886
- count = 0
2887
- total_counts = len(acc_args)
2888
- try:
2889
- percentage = (count/total_counts)*100
2890
- sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2891
- sys.stdout.flush()
2892
- except:
2893
- #It's not really a big deal if the progress bar cannot be printed.
2894
- pass
2895
-
2896
- last_pct = 0
2897
-
2898
- recipient.activate_connection()
2899
- genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
2900
- genome_reindex = []
2901
- for g in joint_genome_index:
2902
- genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
2870
+ last_pct = 0
2903
2871
 
2904
- recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
2905
- recipient.connection.commit()
2906
-
2907
- del genome_reindex
2908
-
2909
- for result in quiverfull:
2910
- acc = result[0]
2911
- child = result[1]
2872
+ pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
2912
2873
 
2913
- recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
2874
+ quiverfull = []
2875
+ for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
2876
+ acc = result[0]
2877
+ child = result[1]
2878
+ #sub_gak = result[2]
2879
+
2880
+ quiverfull.append([acc, child])
2881
+ #gaks.extend(sub_gak)
2882
+
2883
+ if verbose:
2884
+ count += 1
2885
+ try:
2886
+ percentage = (count/total_counts)*100
2887
+ log_time = curtime()
2888
+ sys.stdout.write('\033[A')
2889
+ sys.stdout.flush()
2890
+ sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2891
+ sys.stdout.flush()
2892
+ except:
2893
+ #It's not really a big deal if the progress bar cannot be printed.
2894
+ pass
2895
+
2896
+ pool.close()
2897
+ pool.join()
2898
+
2899
+ print("")
2900
+ print("Adding data to final database. Started at", curtime())
2914
2901
 
2915
2902
  if verbose:
2916
- count += 1
2903
+ print("")
2904
+
2905
+ count = 0
2906
+ total_counts = len(acc_args)
2917
2907
  try:
2918
2908
  percentage = (count/total_counts)*100
2919
- log_time = curtime()
2920
- sys.stdout.write('\033[A')
2921
- sys.stdout.flush()
2922
2909
  sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2923
2910
  sys.stdout.flush()
2924
2911
  except:
2925
2912
  #It's not really a big deal if the progress bar cannot be printed.
2926
2913
  pass
2927
-
2928
- os.rmdir("FastAAI_temp")
2929
- ''' We're only ever increasing the DB size, so we don't actually need to vacuum it.
2930
- if recip_check != "created":
2931
- print("")
2932
- print("Cleaning up the database after the update. This may take a while.")
2933
- recipient.connection.execute("VACUUM")
2934
- recipient.connection.close()
2935
- '''
2914
+
2915
+ last_pct = 0
2916
+
2917
+ recipient.activate_connection()
2918
+ genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
2919
+ genome_reindex = []
2920
+ for g in joint_genome_index:
2921
+ genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
2922
+
2923
+ recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
2924
+ recipient.connection.commit()
2925
+
2926
+ del genome_reindex
2927
+
2928
+ for result in quiverfull:
2929
+ acc = result[0]
2930
+ child = result[1]
2931
+
2932
+ recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
2933
+
2934
+ if verbose:
2935
+ count += 1
2936
+ try:
2937
+ percentage = (count/total_counts)*100
2938
+ log_time = curtime()
2939
+ sys.stdout.write('\033[A')
2940
+ sys.stdout.flush()
2941
+ sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2942
+ sys.stdout.flush()
2943
+ except:
2944
+ #It's not really a big deal if the progress bar cannot be printed.
2945
+ pass
2946
+ except:
2947
+ #Error
2948
+ shutil.rmtree(temp_dir)
2949
+ finally:
2950
+ #Success
2951
+ shutil.rmtree(temp_dir)
2952
+
2936
2953
  print("\nDatabases merged!")
2937
2954
 
2938
2955
  return None
@@ -2947,14 +2964,14 @@ def pull_and_merge_accession(args):
2947
2964
  accession_inverter[accession_index[acc]] = sql_friendly_accession
2948
2965
 
2949
2966
  #joint_genome_index, accession_index, accession_inverter, accs_per_db are global already.
2950
- acc, donor_dbs, recipient = args[0], args[1], args[2]
2967
+ acc, donor_dbs, recipient, temp = args[0], args[1], args[2], args[3]
2951
2968
 
2952
2969
  acc_name = accession_inverter[acc]
2953
2970
  acc_name_gens = acc_name + "_genomes"
2954
2971
 
2955
2972
  query_sql = "SELECT * FROM " + acc_name
2956
2973
 
2957
- temp_db = fastaai_database("FastAAI_temp/"+acc_name+".db")
2974
+ temp_db = fastaai_database(os.path.normpath(temp+"/"+acc_name+".db"))
2958
2975
  temp_db.activate_connection()
2959
2976
 
2960
2977
  create_command = "CREATE TABLE IF NOT EXISTS " + acc_name + " (kmer INTEGER PRIMARY KEY, genomes array)"
@@ -3627,4 +3644,4 @@ def main():
3627
3644
  if __name__ == "__main__":
3628
3645
  main()
3629
3646
 
3630
-
3647
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1.0
4
+ version: 1.1.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-10-29 00:00:00.000000000 Z
11
+ date: 2021-11-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: daemons