miga-base 1.1.1.0 → 1.1.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/daemon/base.rb +9 -2
- data/lib/miga/daemon.rb +1 -1
- data/lib/miga/dataset.rb +2 -2
- data/lib/miga/project/dataset.rb +6 -2
- data/lib/miga/version.rb +2 -2
- data/utils/FastAAI/FastAAI +100 -83
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4603bf75db8f82a1a30c1d6da1ab045139b13bf1eec5fd2d5be5c79fb2ac0442
|
4
|
+
data.tar.gz: 24324b35a885453c01fada6fb07527616b3a952ab4ccb9b9c818c5d06e580acd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9e555d736e3987d4afc45707307b7f95d2164cb8db822ad639e0f71e919c67972bff76aefdc7b0a1a1e7120e3100b863a8dde14470066ac7d4ab0b5be5c20f7
|
7
|
+
data.tar.gz: 54d1823817cb2d4220d885e84e941cf5e4df88f850824cf3837ed716879aec4733f2d2df98bedc03aa29878a1e5bd7641b015c76738eed841d08046c9f50d1bb
|
data/lib/miga/daemon/base.rb
CHANGED
@@ -12,9 +12,10 @@ module MiGA::Daemon::Base
|
|
12
12
|
k = k.to_sym
|
13
13
|
unless v.nil?
|
14
14
|
case k
|
15
|
-
when :latency, :maxjobs, :ppn, :ppn_project, :format_version, :verbosity
|
15
|
+
when :latency, :maxjobs, :ppn, :ppn_project, :format_version, :verbosity,
|
16
|
+
:skip_maintenance
|
16
17
|
v = v.to_i
|
17
|
-
if !force && v == 0 && k
|
18
|
+
if !force && v == 0 && !%i[verbosity skip_maintenance].include?(k)
|
18
19
|
raise "Daemon's #{k} cannot be set to zero"
|
19
20
|
end
|
20
21
|
when :shutdown_when_done, :show_log, :bypass_maintenance
|
@@ -50,6 +51,12 @@ module MiGA::Daemon::Base
|
|
50
51
|
runopts(:nodelist)
|
51
52
|
end
|
52
53
|
|
54
|
+
##
|
55
|
+
# Returns the number of times maintenance should be skipped before running
|
56
|
+
def skip_maintenance
|
57
|
+
runopts(:skip_maintenance) || 0
|
58
|
+
end
|
59
|
+
|
53
60
|
##
|
54
61
|
# Returns the running option +opt+ in jobs for +what+. +what+ can be
|
55
62
|
# +:dataset+ or +:projects+
|
data/lib/miga/daemon.rb
CHANGED
data/lib/miga/dataset.rb
CHANGED
@@ -107,7 +107,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
107
107
|
metadata[:warn] = "Inactive: #{reason}" unless reason.nil?
|
108
108
|
metadata[:inactive] = true
|
109
109
|
metadata.save
|
110
|
-
project.recalculate_tasks(
|
110
|
+
project.recalculate_tasks("Reference dataset inactivated: #{name}") if ref?
|
111
111
|
pull_hook :on_inactivate
|
112
112
|
end
|
113
113
|
|
@@ -117,7 +117,7 @@ class MiGA::Dataset < MiGA::MiGA
|
|
117
117
|
metadata[:inactive] = nil
|
118
118
|
metadata[:warn] = nil if metadata[:warn] && metadata[:warn] =~ /^Inactive: /
|
119
119
|
metadata.save
|
120
|
-
project.recalculate_tasks(
|
120
|
+
project.recalculate_tasks("Reference dataset activated: #{name}") if ref?
|
121
121
|
pull_hook :on_activate
|
122
122
|
end
|
123
123
|
|
data/lib/miga/project/dataset.rb
CHANGED
@@ -52,7 +52,9 @@ module MiGA::Project::Dataset
|
|
52
52
|
@metadata[:datasets] << name
|
53
53
|
@dataset_names_hash = nil # Ensure loading even if +do_not_save+ is true
|
54
54
|
save
|
55
|
-
|
55
|
+
if d.ref? && d.active?
|
56
|
+
recalculate_tasks("Reference dataset added: #{d.name}")
|
57
|
+
end
|
56
58
|
pull_hook(:on_add_dataset, name)
|
57
59
|
end
|
58
60
|
dataset(name)
|
@@ -66,7 +68,9 @@ module MiGA::Project::Dataset
|
|
66
68
|
|
67
69
|
self.metadata[:datasets].delete(name)
|
68
70
|
save
|
69
|
-
|
71
|
+
if d.ref? && d.active?
|
72
|
+
recalculate_tasks("Reference dataset unlinked: #{d.name}")
|
73
|
+
end
|
70
74
|
pull_hook(:on_unlink_dataset, name)
|
71
75
|
d
|
72
76
|
end
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.1,
|
15
|
+
VERSION = [1.1, 2, 2].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2021,
|
23
|
+
VERSION_DATE = Date.new(2021, 11, 9)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/utils/FastAAI/FastAAI
CHANGED
@@ -583,6 +583,24 @@ class input_file:
|
|
583
583
|
self.initial_state = "protein+HMM"
|
584
584
|
|
585
585
|
self.verbose = verbosity
|
586
|
+
|
587
|
+
#r_scripts_loc = os.path.dirname(sys.modules['metapop'].__file__) + "/metapop_r/"
|
588
|
+
#"00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
|
589
|
+
self.hmm_path = None
|
590
|
+
try:
|
591
|
+
#Try to locate the data bundled as it would be with a pip/conda install.
|
592
|
+
script_path = os.path.dirname(sys.modules['fastAAI_HMM_models'].__file__)
|
593
|
+
hmm_complete_model = script_path + '/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm'
|
594
|
+
self.hmm_path = str(hmm_complete_model)
|
595
|
+
#Check that the file exists or fail to the except.
|
596
|
+
fh = open(self.hmm_path)
|
597
|
+
fh.close()
|
598
|
+
except:
|
599
|
+
#Look in the same dir as the script; old method/MiGA friendly
|
600
|
+
script_path = Path(__file__)
|
601
|
+
script_dir = script_path.parent
|
602
|
+
hmm_complete_model = script_dir / "00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
|
603
|
+
self.hmm_path = str(hmm_complete_model)
|
586
604
|
|
587
605
|
#Functions for externally setting status and file paths of particular types
|
588
606
|
def set_genome(self, path):
|
@@ -2701,6 +2719,7 @@ def merge_db_thread_starter(rev_index, per_db_accs):
|
|
2701
2719
|
accs_per_db = per_db_accs
|
2702
2720
|
|
2703
2721
|
|
2722
|
+
|
2704
2723
|
def merge_db(recipient, donors, verbose, threads):
|
2705
2724
|
#Prettier on the CLI
|
2706
2725
|
|
@@ -2828,111 +2847,109 @@ def merge_db(recipient, donors, verbose, threads):
|
|
2828
2847
|
|
2829
2848
|
all_accessions = list(all_accessions)
|
2830
2849
|
|
2831
|
-
acc_args = [(acc, donor_dbs, recipient) for acc in all_accessions]
|
2832
|
-
|
2833
|
-
if not os.path.exists("FastAAI_temp"):
|
2834
|
-
os.mkdir("FastAAI_temp")
|
2835
2850
|
|
2836
2851
|
print("")
|
2837
2852
|
print("Formatting data to add to database. Started at", curtime())
|
2838
2853
|
|
2839
|
-
|
2840
|
-
|
2841
|
-
|
2842
|
-
total_counts = len(acc_args)
|
2843
|
-
try:
|
2844
|
-
percentage = (count/total_counts)*100
|
2845
|
-
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2846
|
-
sys.stdout.flush()
|
2847
|
-
except:
|
2848
|
-
#It's not really a big deal if the progress bar cannot be printed.
|
2849
|
-
pass
|
2850
|
-
|
2851
|
-
last_pct = 0
|
2852
|
-
|
2853
|
-
pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
|
2854
|
+
temp_dir = tempfile.mkdtemp()
|
2855
|
+
try:
|
2856
|
+
acc_args = [(acc, donor_dbs, recipient, temp_dir) for acc in all_accessions]
|
2854
2857
|
|
2855
|
-
quiverfull = []
|
2856
|
-
for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
|
2857
|
-
acc = result[0]
|
2858
|
-
child = result[1]
|
2859
|
-
#sub_gak = result[2]
|
2860
|
-
|
2861
|
-
quiverfull.append([acc, child])
|
2862
|
-
#gaks.extend(sub_gak)
|
2863
|
-
|
2864
2858
|
if verbose:
|
2865
|
-
|
2859
|
+
print("")
|
2860
|
+
count = 0
|
2861
|
+
total_counts = len(acc_args)
|
2866
2862
|
try:
|
2867
2863
|
percentage = (count/total_counts)*100
|
2868
|
-
log_time = curtime()
|
2869
|
-
sys.stdout.write('\033[A')
|
2870
|
-
sys.stdout.flush()
|
2871
2864
|
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2872
2865
|
sys.stdout.flush()
|
2873
2866
|
except:
|
2874
2867
|
#It's not really a big deal if the progress bar cannot be printed.
|
2875
2868
|
pass
|
2876
|
-
|
2877
|
-
pool.close()
|
2878
|
-
pool.join()
|
2879
|
-
|
2880
|
-
print("")
|
2881
|
-
print("Adding data to final database. Started at", curtime())
|
2882
|
-
|
2883
|
-
if verbose:
|
2884
|
-
print("")
|
2885
2869
|
|
2886
|
-
|
2887
|
-
total_counts = len(acc_args)
|
2888
|
-
try:
|
2889
|
-
percentage = (count/total_counts)*100
|
2890
|
-
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2891
|
-
sys.stdout.flush()
|
2892
|
-
except:
|
2893
|
-
#It's not really a big deal if the progress bar cannot be printed.
|
2894
|
-
pass
|
2895
|
-
|
2896
|
-
last_pct = 0
|
2897
|
-
|
2898
|
-
recipient.activate_connection()
|
2899
|
-
genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
|
2900
|
-
genome_reindex = []
|
2901
|
-
for g in joint_genome_index:
|
2902
|
-
genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
|
2870
|
+
last_pct = 0
|
2903
2871
|
|
2904
|
-
|
2905
|
-
recipient.connection.commit()
|
2906
|
-
|
2907
|
-
del genome_reindex
|
2908
|
-
|
2909
|
-
for result in quiverfull:
|
2910
|
-
acc = result[0]
|
2911
|
-
child = result[1]
|
2872
|
+
pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
|
2912
2873
|
|
2913
|
-
|
2874
|
+
quiverfull = []
|
2875
|
+
for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
|
2876
|
+
acc = result[0]
|
2877
|
+
child = result[1]
|
2878
|
+
#sub_gak = result[2]
|
2879
|
+
|
2880
|
+
quiverfull.append([acc, child])
|
2881
|
+
#gaks.extend(sub_gak)
|
2882
|
+
|
2883
|
+
if verbose:
|
2884
|
+
count += 1
|
2885
|
+
try:
|
2886
|
+
percentage = (count/total_counts)*100
|
2887
|
+
log_time = curtime()
|
2888
|
+
sys.stdout.write('\033[A')
|
2889
|
+
sys.stdout.flush()
|
2890
|
+
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2891
|
+
sys.stdout.flush()
|
2892
|
+
except:
|
2893
|
+
#It's not really a big deal if the progress bar cannot be printed.
|
2894
|
+
pass
|
2895
|
+
|
2896
|
+
pool.close()
|
2897
|
+
pool.join()
|
2898
|
+
|
2899
|
+
print("")
|
2900
|
+
print("Adding data to final database. Started at", curtime())
|
2914
2901
|
|
2915
2902
|
if verbose:
|
2916
|
-
|
2903
|
+
print("")
|
2904
|
+
|
2905
|
+
count = 0
|
2906
|
+
total_counts = len(acc_args)
|
2917
2907
|
try:
|
2918
2908
|
percentage = (count/total_counts)*100
|
2919
|
-
log_time = curtime()
|
2920
|
-
sys.stdout.write('\033[A')
|
2921
|
-
sys.stdout.flush()
|
2922
2909
|
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2923
2910
|
sys.stdout.flush()
|
2924
2911
|
except:
|
2925
2912
|
#It's not really a big deal if the progress bar cannot be printed.
|
2926
2913
|
pass
|
2927
|
-
|
2928
|
-
|
2929
|
-
|
2930
|
-
|
2931
|
-
|
2932
|
-
|
2933
|
-
|
2934
|
-
|
2935
|
-
|
2914
|
+
|
2915
|
+
last_pct = 0
|
2916
|
+
|
2917
|
+
recipient.activate_connection()
|
2918
|
+
genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
|
2919
|
+
genome_reindex = []
|
2920
|
+
for g in joint_genome_index:
|
2921
|
+
genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
|
2922
|
+
|
2923
|
+
recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
|
2924
|
+
recipient.connection.commit()
|
2925
|
+
|
2926
|
+
del genome_reindex
|
2927
|
+
|
2928
|
+
for result in quiverfull:
|
2929
|
+
acc = result[0]
|
2930
|
+
child = result[1]
|
2931
|
+
|
2932
|
+
recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
|
2933
|
+
|
2934
|
+
if verbose:
|
2935
|
+
count += 1
|
2936
|
+
try:
|
2937
|
+
percentage = (count/total_counts)*100
|
2938
|
+
log_time = curtime()
|
2939
|
+
sys.stdout.write('\033[A')
|
2940
|
+
sys.stdout.flush()
|
2941
|
+
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2942
|
+
sys.stdout.flush()
|
2943
|
+
except:
|
2944
|
+
#It's not really a big deal if the progress bar cannot be printed.
|
2945
|
+
pass
|
2946
|
+
except:
|
2947
|
+
#Error
|
2948
|
+
shutil.rmtree(temp_dir)
|
2949
|
+
finally:
|
2950
|
+
#Success
|
2951
|
+
shutil.rmtree(temp_dir)
|
2952
|
+
|
2936
2953
|
print("\nDatabases merged!")
|
2937
2954
|
|
2938
2955
|
return None
|
@@ -2947,14 +2964,14 @@ def pull_and_merge_accession(args):
|
|
2947
2964
|
accession_inverter[accession_index[acc]] = sql_friendly_accession
|
2948
2965
|
|
2949
2966
|
#joint_genome_index, accession_index, accession_inverter, accs_per_db are global already.
|
2950
|
-
acc, donor_dbs, recipient = args[0], args[1], args[2]
|
2967
|
+
acc, donor_dbs, recipient, temp = args[0], args[1], args[2], args[3]
|
2951
2968
|
|
2952
2969
|
acc_name = accession_inverter[acc]
|
2953
2970
|
acc_name_gens = acc_name + "_genomes"
|
2954
2971
|
|
2955
2972
|
query_sql = "SELECT * FROM " + acc_name
|
2956
2973
|
|
2957
|
-
temp_db = fastaai_database("
|
2974
|
+
temp_db = fastaai_database(os.path.normpath(temp+"/"+acc_name+".db"))
|
2958
2975
|
temp_db.activate_connection()
|
2959
2976
|
|
2960
2977
|
create_command = "CREATE TABLE IF NOT EXISTS " + acc_name + " (kmer INTEGER PRIMARY KEY, genomes array)"
|
@@ -3627,4 +3644,4 @@ def main():
|
|
3627
3644
|
if __name__ == "__main__":
|
3628
3645
|
main()
|
3629
3646
|
|
3630
|
-
|
3647
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|