miga-base 1.1.1.0 → 1.1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/version.rb +1 -1
- data/utils/FastAAI/FastAAI +100 -83
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8bbb058ee0094bdbf36ddd5e93d8206871de2a774876ad1229d8e681e5a8aa38
|
4
|
+
data.tar.gz: 5091f3d1053ae79d02056dddc3eda5cd48c061f5bbd6c78ef185e5f9e24b0d92
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80383eefa8cba0b17b3f21ad6b5d9880acc47e349687dc76c531b01111e665fa2ff144507f5925ca81c70eb642286724b7c2be51531c4b31b1710e5430341f6f
|
7
|
+
data.tar.gz: dba9c4d31acab4e50e40d802b10a9268be1a291e99ada00de11fefcacf183aa2a381842980030832fe77db94169239731c9b8a7b264d37d72c3fd8ece4774848
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.1, 1,
|
15
|
+
VERSION = [1.1, 1, 1].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
data/utils/FastAAI/FastAAI
CHANGED
@@ -583,6 +583,24 @@ class input_file:
|
|
583
583
|
self.initial_state = "protein+HMM"
|
584
584
|
|
585
585
|
self.verbose = verbosity
|
586
|
+
|
587
|
+
#r_scripts_loc = os.path.dirname(sys.modules['metapop'].__file__) + "/metapop_r/"
|
588
|
+
#"00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
|
589
|
+
self.hmm_path = None
|
590
|
+
try:
|
591
|
+
#Try to locate the data bundled as it would be with a pip/conda install.
|
592
|
+
script_path = os.path.dirname(sys.modules['fastAAI_HMM_models'].__file__)
|
593
|
+
hmm_complete_model = script_path + '/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm'
|
594
|
+
self.hmm_path = str(hmm_complete_model)
|
595
|
+
#Check that the file exists or fail to the except.
|
596
|
+
fh = open(self.hmm_path)
|
597
|
+
fh.close()
|
598
|
+
except:
|
599
|
+
#Look in the same dir as the script; old method/MiGA friendly
|
600
|
+
script_path = Path(__file__)
|
601
|
+
script_dir = script_path.parent
|
602
|
+
hmm_complete_model = script_dir / "00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
|
603
|
+
self.hmm_path = str(hmm_complete_model)
|
586
604
|
|
587
605
|
#Functions for externally setting status and file paths of particular types
|
588
606
|
def set_genome(self, path):
|
@@ -2701,6 +2719,7 @@ def merge_db_thread_starter(rev_index, per_db_accs):
|
|
2701
2719
|
accs_per_db = per_db_accs
|
2702
2720
|
|
2703
2721
|
|
2722
|
+
|
2704
2723
|
def merge_db(recipient, donors, verbose, threads):
|
2705
2724
|
#Prettier on the CLI
|
2706
2725
|
|
@@ -2828,111 +2847,109 @@ def merge_db(recipient, donors, verbose, threads):
|
|
2828
2847
|
|
2829
2848
|
all_accessions = list(all_accessions)
|
2830
2849
|
|
2831
|
-
acc_args = [(acc, donor_dbs, recipient) for acc in all_accessions]
|
2832
|
-
|
2833
|
-
if not os.path.exists("FastAAI_temp"):
|
2834
|
-
os.mkdir("FastAAI_temp")
|
2835
2850
|
|
2836
2851
|
print("")
|
2837
2852
|
print("Formatting data to add to database. Started at", curtime())
|
2838
2853
|
|
2839
|
-
|
2840
|
-
|
2841
|
-
|
2842
|
-
total_counts = len(acc_args)
|
2843
|
-
try:
|
2844
|
-
percentage = (count/total_counts)*100
|
2845
|
-
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2846
|
-
sys.stdout.flush()
|
2847
|
-
except:
|
2848
|
-
#It's not really a big deal if the progress bar cannot be printed.
|
2849
|
-
pass
|
2850
|
-
|
2851
|
-
last_pct = 0
|
2852
|
-
|
2853
|
-
pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
|
2854
|
+
temp_dir = tempfile.mkdtemp()
|
2855
|
+
try:
|
2856
|
+
acc_args = [(acc, donor_dbs, recipient, temp_dir) for acc in all_accessions]
|
2854
2857
|
|
2855
|
-
quiverfull = []
|
2856
|
-
for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
|
2857
|
-
acc = result[0]
|
2858
|
-
child = result[1]
|
2859
|
-
#sub_gak = result[2]
|
2860
|
-
|
2861
|
-
quiverfull.append([acc, child])
|
2862
|
-
#gaks.extend(sub_gak)
|
2863
|
-
|
2864
2858
|
if verbose:
|
2865
|
-
|
2859
|
+
print("")
|
2860
|
+
count = 0
|
2861
|
+
total_counts = len(acc_args)
|
2866
2862
|
try:
|
2867
2863
|
percentage = (count/total_counts)*100
|
2868
|
-
log_time = curtime()
|
2869
|
-
sys.stdout.write('\033[A')
|
2870
|
-
sys.stdout.flush()
|
2871
2864
|
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2872
2865
|
sys.stdout.flush()
|
2873
2866
|
except:
|
2874
2867
|
#It's not really a big deal if the progress bar cannot be printed.
|
2875
2868
|
pass
|
2876
|
-
|
2877
|
-
pool.close()
|
2878
|
-
pool.join()
|
2879
|
-
|
2880
|
-
print("")
|
2881
|
-
print("Adding data to final database. Started at", curtime())
|
2882
|
-
|
2883
|
-
if verbose:
|
2884
|
-
print("")
|
2885
2869
|
|
2886
|
-
|
2887
|
-
total_counts = len(acc_args)
|
2888
|
-
try:
|
2889
|
-
percentage = (count/total_counts)*100
|
2890
|
-
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2891
|
-
sys.stdout.flush()
|
2892
|
-
except:
|
2893
|
-
#It's not really a big deal if the progress bar cannot be printed.
|
2894
|
-
pass
|
2895
|
-
|
2896
|
-
last_pct = 0
|
2897
|
-
|
2898
|
-
recipient.activate_connection()
|
2899
|
-
genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
|
2900
|
-
genome_reindex = []
|
2901
|
-
for g in joint_genome_index:
|
2902
|
-
genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
|
2870
|
+
last_pct = 0
|
2903
2871
|
|
2904
|
-
|
2905
|
-
recipient.connection.commit()
|
2906
|
-
|
2907
|
-
del genome_reindex
|
2908
|
-
|
2909
|
-
for result in quiverfull:
|
2910
|
-
acc = result[0]
|
2911
|
-
child = result[1]
|
2872
|
+
pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
|
2912
2873
|
|
2913
|
-
|
2874
|
+
quiverfull = []
|
2875
|
+
for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
|
2876
|
+
acc = result[0]
|
2877
|
+
child = result[1]
|
2878
|
+
#sub_gak = result[2]
|
2879
|
+
|
2880
|
+
quiverfull.append([acc, child])
|
2881
|
+
#gaks.extend(sub_gak)
|
2882
|
+
|
2883
|
+
if verbose:
|
2884
|
+
count += 1
|
2885
|
+
try:
|
2886
|
+
percentage = (count/total_counts)*100
|
2887
|
+
log_time = curtime()
|
2888
|
+
sys.stdout.write('\033[A')
|
2889
|
+
sys.stdout.flush()
|
2890
|
+
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2891
|
+
sys.stdout.flush()
|
2892
|
+
except:
|
2893
|
+
#It's not really a big deal if the progress bar cannot be printed.
|
2894
|
+
pass
|
2895
|
+
|
2896
|
+
pool.close()
|
2897
|
+
pool.join()
|
2898
|
+
|
2899
|
+
print("")
|
2900
|
+
print("Adding data to final database. Started at", curtime())
|
2914
2901
|
|
2915
2902
|
if verbose:
|
2916
|
-
|
2903
|
+
print("")
|
2904
|
+
|
2905
|
+
count = 0
|
2906
|
+
total_counts = len(acc_args)
|
2917
2907
|
try:
|
2918
2908
|
percentage = (count/total_counts)*100
|
2919
|
-
log_time = curtime()
|
2920
|
-
sys.stdout.write('\033[A')
|
2921
|
-
sys.stdout.flush()
|
2922
2909
|
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2923
2910
|
sys.stdout.flush()
|
2924
2911
|
except:
|
2925
2912
|
#It's not really a big deal if the progress bar cannot be printed.
|
2926
2913
|
pass
|
2927
|
-
|
2928
|
-
|
2929
|
-
|
2930
|
-
|
2931
|
-
|
2932
|
-
|
2933
|
-
|
2934
|
-
|
2935
|
-
|
2914
|
+
|
2915
|
+
last_pct = 0
|
2916
|
+
|
2917
|
+
recipient.activate_connection()
|
2918
|
+
genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
|
2919
|
+
genome_reindex = []
|
2920
|
+
for g in joint_genome_index:
|
2921
|
+
genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
|
2922
|
+
|
2923
|
+
recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
|
2924
|
+
recipient.connection.commit()
|
2925
|
+
|
2926
|
+
del genome_reindex
|
2927
|
+
|
2928
|
+
for result in quiverfull:
|
2929
|
+
acc = result[0]
|
2930
|
+
child = result[1]
|
2931
|
+
|
2932
|
+
recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
|
2933
|
+
|
2934
|
+
if verbose:
|
2935
|
+
count += 1
|
2936
|
+
try:
|
2937
|
+
percentage = (count/total_counts)*100
|
2938
|
+
log_time = curtime()
|
2939
|
+
sys.stdout.write('\033[A')
|
2940
|
+
sys.stdout.flush()
|
2941
|
+
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2942
|
+
sys.stdout.flush()
|
2943
|
+
except:
|
2944
|
+
#It's not really a big deal if the progress bar cannot be printed.
|
2945
|
+
pass
|
2946
|
+
except:
|
2947
|
+
#Error
|
2948
|
+
shutil.rmtree(temp_dir)
|
2949
|
+
finally:
|
2950
|
+
#Success
|
2951
|
+
shutil.rmtree(temp_dir)
|
2952
|
+
|
2936
2953
|
print("\nDatabases merged!")
|
2937
2954
|
|
2938
2955
|
return None
|
@@ -2947,14 +2964,14 @@ def pull_and_merge_accession(args):
|
|
2947
2964
|
accession_inverter[accession_index[acc]] = sql_friendly_accession
|
2948
2965
|
|
2949
2966
|
#joint_genome_index, accession_index, accession_inverter, accs_per_db are global already.
|
2950
|
-
acc, donor_dbs, recipient = args[0], args[1], args[2]
|
2967
|
+
acc, donor_dbs, recipient, temp = args[0], args[1], args[2], args[3]
|
2951
2968
|
|
2952
2969
|
acc_name = accession_inverter[acc]
|
2953
2970
|
acc_name_gens = acc_name + "_genomes"
|
2954
2971
|
|
2955
2972
|
query_sql = "SELECT * FROM " + acc_name
|
2956
2973
|
|
2957
|
-
temp_db = fastaai_database("
|
2974
|
+
temp_db = fastaai_database(os.path.normpath(temp+"/"+acc_name+".db"))
|
2958
2975
|
temp_db.activate_connection()
|
2959
2976
|
|
2960
2977
|
create_command = "CREATE TABLE IF NOT EXISTS " + acc_name + " (kmer INTEGER PRIMARY KEY, genomes array)"
|
@@ -3627,4 +3644,4 @@ def main():
|
|
3627
3644
|
if __name__ == "__main__":
|
3628
3645
|
main()
|
3629
3646
|
|
3630
|
-
|
3647
|
+
|