miga-base 1.1.1.0 → 1.1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/miga/version.rb +1 -1
- data/utils/FastAAI/FastAAI +100 -83
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8bbb058ee0094bdbf36ddd5e93d8206871de2a774876ad1229d8e681e5a8aa38
|
4
|
+
data.tar.gz: 5091f3d1053ae79d02056dddc3eda5cd48c061f5bbd6c78ef185e5f9e24b0d92
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80383eefa8cba0b17b3f21ad6b5d9880acc47e349687dc76c531b01111e665fa2ff144507f5925ca81c70eb642286724b7c2be51531c4b31b1710e5430341f6f
|
7
|
+
data.tar.gz: dba9c4d31acab4e50e40d802b10a9268be1a291e99ada00de11fefcacf183aa2a381842980030832fe77db94169239731c9b8a7b264d37d72c3fd8ece4774848
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.1, 1,
|
15
|
+
VERSION = [1.1, 1, 1].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
data/utils/FastAAI/FastAAI
CHANGED
@@ -583,6 +583,24 @@ class input_file:
|
|
583
583
|
self.initial_state = "protein+HMM"
|
584
584
|
|
585
585
|
self.verbose = verbosity
|
586
|
+
|
587
|
+
#r_scripts_loc = os.path.dirname(sys.modules['metapop'].__file__) + "/metapop_r/"
|
588
|
+
#"00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
|
589
|
+
self.hmm_path = None
|
590
|
+
try:
|
591
|
+
#Try to locate the data bundled as it would be with a pip/conda install.
|
592
|
+
script_path = os.path.dirname(sys.modules['fastAAI_HMM_models'].__file__)
|
593
|
+
hmm_complete_model = script_path + '/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm'
|
594
|
+
self.hmm_path = str(hmm_complete_model)
|
595
|
+
#Check that the file exists or fail to the except.
|
596
|
+
fh = open(self.hmm_path)
|
597
|
+
fh.close()
|
598
|
+
except:
|
599
|
+
#Look in the same dir as the script; old method/MiGA friendly
|
600
|
+
script_path = Path(__file__)
|
601
|
+
script_dir = script_path.parent
|
602
|
+
hmm_complete_model = script_dir / "00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
|
603
|
+
self.hmm_path = str(hmm_complete_model)
|
586
604
|
|
587
605
|
#Functions for externally setting status and file paths of particular types
|
588
606
|
def set_genome(self, path):
|
@@ -2701,6 +2719,7 @@ def merge_db_thread_starter(rev_index, per_db_accs):
|
|
2701
2719
|
accs_per_db = per_db_accs
|
2702
2720
|
|
2703
2721
|
|
2722
|
+
|
2704
2723
|
def merge_db(recipient, donors, verbose, threads):
|
2705
2724
|
#Prettier on the CLI
|
2706
2725
|
|
@@ -2828,111 +2847,109 @@ def merge_db(recipient, donors, verbose, threads):
|
|
2828
2847
|
|
2829
2848
|
all_accessions = list(all_accessions)
|
2830
2849
|
|
2831
|
-
acc_args = [(acc, donor_dbs, recipient) for acc in all_accessions]
|
2832
|
-
|
2833
|
-
if not os.path.exists("FastAAI_temp"):
|
2834
|
-
os.mkdir("FastAAI_temp")
|
2835
2850
|
|
2836
2851
|
print("")
|
2837
2852
|
print("Formatting data to add to database. Started at", curtime())
|
2838
2853
|
|
2839
|
-
|
2840
|
-
|
2841
|
-
|
2842
|
-
total_counts = len(acc_args)
|
2843
|
-
try:
|
2844
|
-
percentage = (count/total_counts)*100
|
2845
|
-
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2846
|
-
sys.stdout.flush()
|
2847
|
-
except:
|
2848
|
-
#It's not really a big deal if the progress bar cannot be printed.
|
2849
|
-
pass
|
2850
|
-
|
2851
|
-
last_pct = 0
|
2852
|
-
|
2853
|
-
pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
|
2854
|
+
temp_dir = tempfile.mkdtemp()
|
2855
|
+
try:
|
2856
|
+
acc_args = [(acc, donor_dbs, recipient, temp_dir) for acc in all_accessions]
|
2854
2857
|
|
2855
|
-
quiverfull = []
|
2856
|
-
for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
|
2857
|
-
acc = result[0]
|
2858
|
-
child = result[1]
|
2859
|
-
#sub_gak = result[2]
|
2860
|
-
|
2861
|
-
quiverfull.append([acc, child])
|
2862
|
-
#gaks.extend(sub_gak)
|
2863
|
-
|
2864
2858
|
if verbose:
|
2865
|
-
|
2859
|
+
print("")
|
2860
|
+
count = 0
|
2861
|
+
total_counts = len(acc_args)
|
2866
2862
|
try:
|
2867
2863
|
percentage = (count/total_counts)*100
|
2868
|
-
log_time = curtime()
|
2869
|
-
sys.stdout.write('\033[A')
|
2870
|
-
sys.stdout.flush()
|
2871
2864
|
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2872
2865
|
sys.stdout.flush()
|
2873
2866
|
except:
|
2874
2867
|
#It's not really a big deal if the progress bar cannot be printed.
|
2875
2868
|
pass
|
2876
|
-
|
2877
|
-
pool.close()
|
2878
|
-
pool.join()
|
2879
|
-
|
2880
|
-
print("")
|
2881
|
-
print("Adding data to final database. Started at", curtime())
|
2882
|
-
|
2883
|
-
if verbose:
|
2884
|
-
print("")
|
2885
2869
|
|
2886
|
-
|
2887
|
-
total_counts = len(acc_args)
|
2888
|
-
try:
|
2889
|
-
percentage = (count/total_counts)*100
|
2890
|
-
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2891
|
-
sys.stdout.flush()
|
2892
|
-
except:
|
2893
|
-
#It's not really a big deal if the progress bar cannot be printed.
|
2894
|
-
pass
|
2895
|
-
|
2896
|
-
last_pct = 0
|
2897
|
-
|
2898
|
-
recipient.activate_connection()
|
2899
|
-
genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
|
2900
|
-
genome_reindex = []
|
2901
|
-
for g in joint_genome_index:
|
2902
|
-
genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
|
2870
|
+
last_pct = 0
|
2903
2871
|
|
2904
|
-
|
2905
|
-
recipient.connection.commit()
|
2906
|
-
|
2907
|
-
del genome_reindex
|
2908
|
-
|
2909
|
-
for result in quiverfull:
|
2910
|
-
acc = result[0]
|
2911
|
-
child = result[1]
|
2872
|
+
pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
|
2912
2873
|
|
2913
|
-
|
2874
|
+
quiverfull = []
|
2875
|
+
for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
|
2876
|
+
acc = result[0]
|
2877
|
+
child = result[1]
|
2878
|
+
#sub_gak = result[2]
|
2879
|
+
|
2880
|
+
quiverfull.append([acc, child])
|
2881
|
+
#gaks.extend(sub_gak)
|
2882
|
+
|
2883
|
+
if verbose:
|
2884
|
+
count += 1
|
2885
|
+
try:
|
2886
|
+
percentage = (count/total_counts)*100
|
2887
|
+
log_time = curtime()
|
2888
|
+
sys.stdout.write('\033[A')
|
2889
|
+
sys.stdout.flush()
|
2890
|
+
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2891
|
+
sys.stdout.flush()
|
2892
|
+
except:
|
2893
|
+
#It's not really a big deal if the progress bar cannot be printed.
|
2894
|
+
pass
|
2895
|
+
|
2896
|
+
pool.close()
|
2897
|
+
pool.join()
|
2898
|
+
|
2899
|
+
print("")
|
2900
|
+
print("Adding data to final database. Started at", curtime())
|
2914
2901
|
|
2915
2902
|
if verbose:
|
2916
|
-
|
2903
|
+
print("")
|
2904
|
+
|
2905
|
+
count = 0
|
2906
|
+
total_counts = len(acc_args)
|
2917
2907
|
try:
|
2918
2908
|
percentage = (count/total_counts)*100
|
2919
|
-
log_time = curtime()
|
2920
|
-
sys.stdout.write('\033[A')
|
2921
|
-
sys.stdout.flush()
|
2922
2909
|
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2923
2910
|
sys.stdout.flush()
|
2924
2911
|
except:
|
2925
2912
|
#It's not really a big deal if the progress bar cannot be printed.
|
2926
2913
|
pass
|
2927
|
-
|
2928
|
-
|
2929
|
-
|
2930
|
-
|
2931
|
-
|
2932
|
-
|
2933
|
-
|
2934
|
-
|
2935
|
-
|
2914
|
+
|
2915
|
+
last_pct = 0
|
2916
|
+
|
2917
|
+
recipient.activate_connection()
|
2918
|
+
genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
|
2919
|
+
genome_reindex = []
|
2920
|
+
for g in joint_genome_index:
|
2921
|
+
genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
|
2922
|
+
|
2923
|
+
recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
|
2924
|
+
recipient.connection.commit()
|
2925
|
+
|
2926
|
+
del genome_reindex
|
2927
|
+
|
2928
|
+
for result in quiverfull:
|
2929
|
+
acc = result[0]
|
2930
|
+
child = result[1]
|
2931
|
+
|
2932
|
+
recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
|
2933
|
+
|
2934
|
+
if verbose:
|
2935
|
+
count += 1
|
2936
|
+
try:
|
2937
|
+
percentage = (count/total_counts)*100
|
2938
|
+
log_time = curtime()
|
2939
|
+
sys.stdout.write('\033[A')
|
2940
|
+
sys.stdout.flush()
|
2941
|
+
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2942
|
+
sys.stdout.flush()
|
2943
|
+
except:
|
2944
|
+
#It's not really a big deal if the progress bar cannot be printed.
|
2945
|
+
pass
|
2946
|
+
except:
|
2947
|
+
#Error
|
2948
|
+
shutil.rmtree(temp_dir)
|
2949
|
+
finally:
|
2950
|
+
#Success
|
2951
|
+
shutil.rmtree(temp_dir)
|
2952
|
+
|
2936
2953
|
print("\nDatabases merged!")
|
2937
2954
|
|
2938
2955
|
return None
|
@@ -2947,14 +2964,14 @@ def pull_and_merge_accession(args):
|
|
2947
2964
|
accession_inverter[accession_index[acc]] = sql_friendly_accession
|
2948
2965
|
|
2949
2966
|
#joint_genome_index, accession_index, accession_inverter, accs_per_db are global already.
|
2950
|
-
acc, donor_dbs, recipient = args[0], args[1], args[2]
|
2967
|
+
acc, donor_dbs, recipient, temp = args[0], args[1], args[2], args[3]
|
2951
2968
|
|
2952
2969
|
acc_name = accession_inverter[acc]
|
2953
2970
|
acc_name_gens = acc_name + "_genomes"
|
2954
2971
|
|
2955
2972
|
query_sql = "SELECT * FROM " + acc_name
|
2956
2973
|
|
2957
|
-
temp_db = fastaai_database("
|
2974
|
+
temp_db = fastaai_database(os.path.normpath(temp+"/"+acc_name+".db"))
|
2958
2975
|
temp_db.activate_connection()
|
2959
2976
|
|
2960
2977
|
create_command = "CREATE TABLE IF NOT EXISTS " + acc_name + " (kmer INTEGER PRIMARY KEY, genomes array)"
|
@@ -3627,4 +3644,4 @@ def main():
|
|
3627
3644
|
if __name__ == "__main__":
|
3628
3645
|
main()
|
3629
3646
|
|
3630
|
-
|
3647
|
+
|