miga-base 1.1.1.0 → 1.1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a666ed2219c2e1573dff12caca0b841d4c649bad62f4f5c0d274fc2c5407b178
4
- data.tar.gz: 9e6a46b35a83b8f2349bb2a676f6c2fba7e375b52156734b636084d68c4626af
3
+ metadata.gz: 8bbb058ee0094bdbf36ddd5e93d8206871de2a774876ad1229d8e681e5a8aa38
4
+ data.tar.gz: 5091f3d1053ae79d02056dddc3eda5cd48c061f5bbd6c78ef185e5f9e24b0d92
5
5
  SHA512:
6
- metadata.gz: 61abc23642093894f068e1edc6252fb7a4e95035284f5469a0c955aebdadbdd2d62a76f395c933c7e819498e9a3b7685dd7257afe6263bc0b51111b141d00a13
7
- data.tar.gz: 72f7024d6d5594a794dd5d7ca68d7b5896d28109a2800189981c9ba1fed85a3ad2e51303a547991fc998360426a5e85fe03dc9e7a61d38520744b6ed2dece8b9
6
+ metadata.gz: 80383eefa8cba0b17b3f21ad6b5d9880acc47e349687dc76c531b01111e665fa2ff144507f5925ca81c70eb642286724b7c2be51531c4b31b1710e5430341f6f
7
+ data.tar.gz: dba9c4d31acab4e50e40d802b10a9268be1a291e99ada00de11fefcacf183aa2a381842980030832fe77db94169239731c9b8a7b264d37d72c3fd8ece4774848
data/lib/miga/version.rb CHANGED
@@ -12,7 +12,7 @@ module MiGA
12
12
  # - String indicating release status:
13
13
  # - rc* release candidate, not released as gem
14
14
  # - [0-9]+ stable release, released as gem
15
- VERSION = [1.1, 1, 0].freeze
15
+ VERSION = [1.1, 1, 1].freeze
16
16
 
17
17
  ##
18
18
  # Nickname for the current major.minor version.
@@ -583,6 +583,24 @@ class input_file:
583
583
  self.initial_state = "protein+HMM"
584
584
 
585
585
  self.verbose = verbosity
586
+
587
+ #r_scripts_loc = os.path.dirname(sys.modules['metapop'].__file__) + "/metapop_r/"
588
+ #"00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
589
+ self.hmm_path = None
590
+ try:
591
+ #Try to locate the data bundled as it would be with a pip/conda install.
592
+ script_path = os.path.dirname(sys.modules['fastAAI_HMM_models'].__file__)
593
+ hmm_complete_model = script_path + '/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm'
594
+ self.hmm_path = str(hmm_complete_model)
595
+ #Check that the file exists or fail to the except.
596
+ fh = open(self.hmm_path)
597
+ fh.close()
598
+ except:
599
+ #Look in the same dir as the script; old method/MiGA friendly
600
+ script_path = Path(__file__)
601
+ script_dir = script_path.parent
602
+ hmm_complete_model = script_dir / "00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
603
+ self.hmm_path = str(hmm_complete_model)
586
604
 
587
605
  #Functions for externally setting status and file paths of particular types
588
606
  def set_genome(self, path):
@@ -2701,6 +2719,7 @@ def merge_db_thread_starter(rev_index, per_db_accs):
2701
2719
  accs_per_db = per_db_accs
2702
2720
 
2703
2721
 
2722
+
2704
2723
  def merge_db(recipient, donors, verbose, threads):
2705
2724
  #Prettier on the CLI
2706
2725
 
@@ -2828,111 +2847,109 @@ def merge_db(recipient, donors, verbose, threads):
2828
2847
 
2829
2848
  all_accessions = list(all_accessions)
2830
2849
 
2831
- acc_args = [(acc, donor_dbs, recipient) for acc in all_accessions]
2832
-
2833
- if not os.path.exists("FastAAI_temp"):
2834
- os.mkdir("FastAAI_temp")
2835
2850
 
2836
2851
  print("")
2837
2852
  print("Formatting data to add to database. Started at", curtime())
2838
2853
 
2839
- if verbose:
2840
- print("")
2841
- count = 0
2842
- total_counts = len(acc_args)
2843
- try:
2844
- percentage = (count/total_counts)*100
2845
- sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2846
- sys.stdout.flush()
2847
- except:
2848
- #It's not really a big deal if the progress bar cannot be printed.
2849
- pass
2850
-
2851
- last_pct = 0
2852
-
2853
- pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
2854
+ temp_dir = tempfile.mkdtemp()
2855
+ try:
2856
+ acc_args = [(acc, donor_dbs, recipient, temp_dir) for acc in all_accessions]
2854
2857
 
2855
- quiverfull = []
2856
- for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
2857
- acc = result[0]
2858
- child = result[1]
2859
- #sub_gak = result[2]
2860
-
2861
- quiverfull.append([acc, child])
2862
- #gaks.extend(sub_gak)
2863
-
2864
2858
  if verbose:
2865
- count += 1
2859
+ print("")
2860
+ count = 0
2861
+ total_counts = len(acc_args)
2866
2862
  try:
2867
2863
  percentage = (count/total_counts)*100
2868
- log_time = curtime()
2869
- sys.stdout.write('\033[A')
2870
- sys.stdout.flush()
2871
2864
  sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2872
2865
  sys.stdout.flush()
2873
2866
  except:
2874
2867
  #It's not really a big deal if the progress bar cannot be printed.
2875
2868
  pass
2876
-
2877
- pool.close()
2878
- pool.join()
2879
-
2880
- print("")
2881
- print("Adding data to final database. Started at", curtime())
2882
-
2883
- if verbose:
2884
- print("")
2885
2869
 
2886
- count = 0
2887
- total_counts = len(acc_args)
2888
- try:
2889
- percentage = (count/total_counts)*100
2890
- sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2891
- sys.stdout.flush()
2892
- except:
2893
- #It's not really a big deal if the progress bar cannot be printed.
2894
- pass
2895
-
2896
- last_pct = 0
2897
-
2898
- recipient.activate_connection()
2899
- genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
2900
- genome_reindex = []
2901
- for g in joint_genome_index:
2902
- genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
2870
+ last_pct = 0
2903
2871
 
2904
- recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
2905
- recipient.connection.commit()
2906
-
2907
- del genome_reindex
2908
-
2909
- for result in quiverfull:
2910
- acc = result[0]
2911
- child = result[1]
2872
+ pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
2912
2873
 
2913
- recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
2874
+ quiverfull = []
2875
+ for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
2876
+ acc = result[0]
2877
+ child = result[1]
2878
+ #sub_gak = result[2]
2879
+
2880
+ quiverfull.append([acc, child])
2881
+ #gaks.extend(sub_gak)
2882
+
2883
+ if verbose:
2884
+ count += 1
2885
+ try:
2886
+ percentage = (count/total_counts)*100
2887
+ log_time = curtime()
2888
+ sys.stdout.write('\033[A')
2889
+ sys.stdout.flush()
2890
+ sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2891
+ sys.stdout.flush()
2892
+ except:
2893
+ #It's not really a big deal if the progress bar cannot be printed.
2894
+ pass
2895
+
2896
+ pool.close()
2897
+ pool.join()
2898
+
2899
+ print("")
2900
+ print("Adding data to final database. Started at", curtime())
2914
2901
 
2915
2902
  if verbose:
2916
- count += 1
2903
+ print("")
2904
+
2905
+ count = 0
2906
+ total_counts = len(acc_args)
2917
2907
  try:
2918
2908
  percentage = (count/total_counts)*100
2919
- log_time = curtime()
2920
- sys.stdout.write('\033[A')
2921
- sys.stdout.flush()
2922
2909
  sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2923
2910
  sys.stdout.flush()
2924
2911
  except:
2925
2912
  #It's not really a big deal if the progress bar cannot be printed.
2926
2913
  pass
2927
-
2928
- os.rmdir("FastAAI_temp")
2929
- ''' We're only ever increasing the DB size, so we don't actually need to vacuum it.
2930
- if recip_check != "created":
2931
- print("")
2932
- print("Cleaning up the database after the update. This may take a while.")
2933
- recipient.connection.execute("VACUUM")
2934
- recipient.connection.close()
2935
- '''
2914
+
2915
+ last_pct = 0
2916
+
2917
+ recipient.activate_connection()
2918
+ genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
2919
+ genome_reindex = []
2920
+ for g in joint_genome_index:
2921
+ genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
2922
+
2923
+ recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
2924
+ recipient.connection.commit()
2925
+
2926
+ del genome_reindex
2927
+
2928
+ for result in quiverfull:
2929
+ acc = result[0]
2930
+ child = result[1]
2931
+
2932
+ recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
2933
+
2934
+ if verbose:
2935
+ count += 1
2936
+ try:
2937
+ percentage = (count/total_counts)*100
2938
+ log_time = curtime()
2939
+ sys.stdout.write('\033[A')
2940
+ sys.stdout.flush()
2941
+ sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
2942
+ sys.stdout.flush()
2943
+ except:
2944
+ #It's not really a big deal if the progress bar cannot be printed.
2945
+ pass
2946
+ except:
2947
+ #Error
2948
+ shutil.rmtree(temp_dir)
2949
+ finally:
2950
+ #Success
2951
+ shutil.rmtree(temp_dir)
2952
+
2936
2953
  print("\nDatabases merged!")
2937
2954
 
2938
2955
  return None
@@ -2947,14 +2964,14 @@ def pull_and_merge_accession(args):
2947
2964
  accession_inverter[accession_index[acc]] = sql_friendly_accession
2948
2965
 
2949
2966
  #joint_genome_index, accession_index, accession_inverter, accs_per_db are global already.
2950
- acc, donor_dbs, recipient = args[0], args[1], args[2]
2967
+ acc, donor_dbs, recipient, temp = args[0], args[1], args[2], args[3]
2951
2968
 
2952
2969
  acc_name = accession_inverter[acc]
2953
2970
  acc_name_gens = acc_name + "_genomes"
2954
2971
 
2955
2972
  query_sql = "SELECT * FROM " + acc_name
2956
2973
 
2957
- temp_db = fastaai_database("FastAAI_temp/"+acc_name+".db")
2974
+ temp_db = fastaai_database(os.path.normpath(temp+"/"+acc_name+".db"))
2958
2975
  temp_db.activate_connection()
2959
2976
 
2960
2977
  create_command = "CREATE TABLE IF NOT EXISTS " + acc_name + " (kmer INTEGER PRIMARY KEY, genomes array)"
@@ -3627,4 +3644,4 @@ def main():
3627
3644
  if __name__ == "__main__":
3628
3645
  main()
3629
3646
 
3630
-
3647
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: miga-base
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1.0
4
+ version: 1.1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Luis M. Rodriguez-R