miga-base 1.1.0.0 → 1.1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/miga/daemon/base.rb +20 -5
- data/lib/miga/daemon.rb +7 -6
- data/lib/miga/version.rb +2 -2
- data/utils/FastAAI/FastAAI +100 -83
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70955c93dc93a73a0ac28d3aaa8f75b4d0f2e0e5dc8797d1fc2bf57e969a8fbe
|
4
|
+
data.tar.gz: 89ba5a42a7a12a104a12f9116f46b0e107bc82b22731622403d09c2e2b7459c7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d57d59d5cb439119dda8f4c8ff90cd3d2d253ff631117c3005b60f321014e61d584c04655f6861de29807cfa2a9c569f2e0e35fde6931cc7140982512a35f13c
|
7
|
+
data.tar.gz: a9c1090876f87c32376b477363f95410786a2d2da73f72be3d81a2fcfe28c6f1dc3829b741b1ad4986ded59bdc06999ad775739d8ecbd7694b4c6e40077905ec
|
data/lib/miga/daemon/base.rb
CHANGED
@@ -12,9 +12,10 @@ module MiGA::Daemon::Base
|
|
12
12
|
k = k.to_sym
|
13
13
|
unless v.nil?
|
14
14
|
case k
|
15
|
-
when :latency, :maxjobs, :ppn, :format_version, :verbosity
|
15
|
+
when :latency, :maxjobs, :ppn, :ppn_project, :format_version, :verbosity,
|
16
|
+
:skip_maintenance
|
16
17
|
v = v.to_i
|
17
|
-
if !force && v == 0 && k
|
18
|
+
if !force && v == 0 && !%i[verbosity skip_maintenance].include?(k)
|
18
19
|
raise "Daemon's #{k} cannot be set to zero"
|
19
20
|
end
|
20
21
|
when :shutdown_when_done, :show_log, :bypass_maintenance
|
@@ -51,9 +52,23 @@ module MiGA::Daemon::Base
|
|
51
52
|
end
|
52
53
|
|
53
54
|
##
|
54
|
-
# Returns
|
55
|
-
def
|
56
|
-
runopts(:
|
55
|
+
# Returns the number of times maintenance should be skipped before running
|
56
|
+
def skip_maintenance
|
57
|
+
runopts(:skip_maintenance) || 0
|
58
|
+
end
|
59
|
+
|
60
|
+
##
|
61
|
+
# Returns the running option +opt+ in jobs for +what+. +what+ can be
|
62
|
+
# +:dataset+ or +:projects+
|
63
|
+
def runopts_for(opt, what)
|
64
|
+
runopts(:"#{opt}_#{what}") || runopts(opt)
|
65
|
+
end
|
66
|
+
|
67
|
+
##
|
68
|
+
# Returns Integer indicating the number of CPUs per job, in jobs for +what+.
|
69
|
+
# See also #runopts_for
|
70
|
+
def ppn(what = :dataset)
|
71
|
+
runopts_for(:ppn, what)
|
57
72
|
end
|
58
73
|
|
59
74
|
##
|
data/lib/miga/daemon.rb
CHANGED
@@ -93,7 +93,7 @@ class MiGA::Daemon < MiGA::MiGA
|
|
93
93
|
flush!
|
94
94
|
if (loop_i % 12).zero?
|
95
95
|
purge!
|
96
|
-
queue_maintenance
|
96
|
+
queue_maintenance if (loop_i % (12 * (skip_maintenance + 1))).zero?
|
97
97
|
end
|
98
98
|
save_status
|
99
99
|
sleep(latency)
|
@@ -231,10 +231,11 @@ class MiGA::Daemon < MiGA::MiGA
|
|
231
231
|
# Construct the command for the given job definition with current
|
232
232
|
# daemon settings
|
233
233
|
def job_cmd(to_run)
|
234
|
+
what = to_run[:ds].nil? ? :project : :dataset
|
234
235
|
vars = {
|
235
236
|
'PROJECT' => project.path,
|
236
|
-
'RUNTYPE' =>
|
237
|
-
'CORES' => ppn,
|
237
|
+
'RUNTYPE' => runopts_for(:type, what),
|
238
|
+
'CORES' => ppn(what),
|
238
239
|
'MIGA' => MiGA::MiGA.root_path
|
239
240
|
}
|
240
241
|
vars['DATASET'] = to_run[:ds].name unless to_run[:ds].nil?
|
@@ -246,13 +247,13 @@ class MiGA::Daemon < MiGA::MiGA
|
|
246
247
|
),
|
247
248
|
vars: vars.map do |k, v|
|
248
249
|
runopts(:var).miga_variables(key: k, value: v)
|
249
|
-
end.join(
|
250
|
-
cpus: ppn,
|
250
|
+
end.join(runopts_for(:varsep, what)),
|
251
|
+
cpus: ppn(what),
|
251
252
|
log: File.join(log_dir, "#{to_run[:ds_name]}.log"),
|
252
253
|
task_name: to_run[:task_name],
|
253
254
|
miga: File.join(MiGA::MiGA.root_path, 'bin/miga').shellescape
|
254
255
|
}
|
255
|
-
|
256
|
+
runopts_for(:cmd, what).miga_variables(var_hsh)
|
256
257
|
end
|
257
258
|
|
258
259
|
##
|
data/lib/miga/version.rb
CHANGED
@@ -12,7 +12,7 @@ module MiGA
|
|
12
12
|
# - String indicating release status:
|
13
13
|
# - rc* release candidate, not released as gem
|
14
14
|
# - [0-9]+ stable release, released as gem
|
15
|
-
VERSION = [1.1,
|
15
|
+
VERSION = [1.1, 2, 1].freeze
|
16
16
|
|
17
17
|
##
|
18
18
|
# Nickname for the current major.minor version.
|
@@ -20,7 +20,7 @@ module MiGA
|
|
20
20
|
|
21
21
|
##
|
22
22
|
# Date of the current gem relese.
|
23
|
-
VERSION_DATE = Date.new(2021,
|
23
|
+
VERSION_DATE = Date.new(2021, 11, 7)
|
24
24
|
|
25
25
|
##
|
26
26
|
# References of MiGA
|
data/utils/FastAAI/FastAAI
CHANGED
@@ -583,6 +583,24 @@ class input_file:
|
|
583
583
|
self.initial_state = "protein+HMM"
|
584
584
|
|
585
585
|
self.verbose = verbosity
|
586
|
+
|
587
|
+
#r_scripts_loc = os.path.dirname(sys.modules['metapop'].__file__) + "/metapop_r/"
|
588
|
+
#"00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
|
589
|
+
self.hmm_path = None
|
590
|
+
try:
|
591
|
+
#Try to locate the data bundled as it would be with a pip/conda install.
|
592
|
+
script_path = os.path.dirname(sys.modules['fastAAI_HMM_models'].__file__)
|
593
|
+
hmm_complete_model = script_path + '/00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm'
|
594
|
+
self.hmm_path = str(hmm_complete_model)
|
595
|
+
#Check that the file exists or fail to the except.
|
596
|
+
fh = open(self.hmm_path)
|
597
|
+
fh.close()
|
598
|
+
except:
|
599
|
+
#Look in the same dir as the script; old method/MiGA friendly
|
600
|
+
script_path = Path(__file__)
|
601
|
+
script_dir = script_path.parent
|
602
|
+
hmm_complete_model = script_dir / "00.Libraries/01.SCG_HMMs/Complete_SCG_DB.hmm"
|
603
|
+
self.hmm_path = str(hmm_complete_model)
|
586
604
|
|
587
605
|
#Functions for externally setting status and file paths of particular types
|
588
606
|
def set_genome(self, path):
|
@@ -2701,6 +2719,7 @@ def merge_db_thread_starter(rev_index, per_db_accs):
|
|
2701
2719
|
accs_per_db = per_db_accs
|
2702
2720
|
|
2703
2721
|
|
2722
|
+
|
2704
2723
|
def merge_db(recipient, donors, verbose, threads):
|
2705
2724
|
#Prettier on the CLI
|
2706
2725
|
|
@@ -2828,111 +2847,109 @@ def merge_db(recipient, donors, verbose, threads):
|
|
2828
2847
|
|
2829
2848
|
all_accessions = list(all_accessions)
|
2830
2849
|
|
2831
|
-
acc_args = [(acc, donor_dbs, recipient) for acc in all_accessions]
|
2832
|
-
|
2833
|
-
if not os.path.exists("FastAAI_temp"):
|
2834
|
-
os.mkdir("FastAAI_temp")
|
2835
2850
|
|
2836
2851
|
print("")
|
2837
2852
|
print("Formatting data to add to database. Started at", curtime())
|
2838
2853
|
|
2839
|
-
|
2840
|
-
|
2841
|
-
|
2842
|
-
total_counts = len(acc_args)
|
2843
|
-
try:
|
2844
|
-
percentage = (count/total_counts)*100
|
2845
|
-
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2846
|
-
sys.stdout.flush()
|
2847
|
-
except:
|
2848
|
-
#It's not really a big deal if the progress bar cannot be printed.
|
2849
|
-
pass
|
2850
|
-
|
2851
|
-
last_pct = 0
|
2852
|
-
|
2853
|
-
pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
|
2854
|
+
temp_dir = tempfile.mkdtemp()
|
2855
|
+
try:
|
2856
|
+
acc_args = [(acc, donor_dbs, recipient, temp_dir) for acc in all_accessions]
|
2854
2857
|
|
2855
|
-
quiverfull = []
|
2856
|
-
for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
|
2857
|
-
acc = result[0]
|
2858
|
-
child = result[1]
|
2859
|
-
#sub_gak = result[2]
|
2860
|
-
|
2861
|
-
quiverfull.append([acc, child])
|
2862
|
-
#gaks.extend(sub_gak)
|
2863
|
-
|
2864
2858
|
if verbose:
|
2865
|
-
|
2859
|
+
print("")
|
2860
|
+
count = 0
|
2861
|
+
total_counts = len(acc_args)
|
2866
2862
|
try:
|
2867
2863
|
percentage = (count/total_counts)*100
|
2868
|
-
log_time = curtime()
|
2869
|
-
sys.stdout.write('\033[A')
|
2870
|
-
sys.stdout.flush()
|
2871
2864
|
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2872
2865
|
sys.stdout.flush()
|
2873
2866
|
except:
|
2874
2867
|
#It's not really a big deal if the progress bar cannot be printed.
|
2875
2868
|
pass
|
2876
|
-
|
2877
|
-
pool.close()
|
2878
|
-
pool.join()
|
2879
|
-
|
2880
|
-
print("")
|
2881
|
-
print("Adding data to final database. Started at", curtime())
|
2882
|
-
|
2883
|
-
if verbose:
|
2884
|
-
print("")
|
2885
2869
|
|
2886
|
-
|
2887
|
-
total_counts = len(acc_args)
|
2888
|
-
try:
|
2889
|
-
percentage = (count/total_counts)*100
|
2890
|
-
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2891
|
-
sys.stdout.flush()
|
2892
|
-
except:
|
2893
|
-
#It's not really a big deal if the progress bar cannot be printed.
|
2894
|
-
pass
|
2895
|
-
|
2896
|
-
last_pct = 0
|
2897
|
-
|
2898
|
-
recipient.activate_connection()
|
2899
|
-
genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
|
2900
|
-
genome_reindex = []
|
2901
|
-
for g in joint_genome_index:
|
2902
|
-
genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
|
2870
|
+
last_pct = 0
|
2903
2871
|
|
2904
|
-
|
2905
|
-
recipient.connection.commit()
|
2906
|
-
|
2907
|
-
del genome_reindex
|
2908
|
-
|
2909
|
-
for result in quiverfull:
|
2910
|
-
acc = result[0]
|
2911
|
-
child = result[1]
|
2872
|
+
pool = multiprocessing.Pool(threads, initializer=merge_db_thread_starter, initargs = (reverse_genome_indices, accs_per_db,))
|
2912
2873
|
|
2913
|
-
|
2874
|
+
quiverfull = []
|
2875
|
+
for result in pool.imap_unordered(pull_and_merge_accession, acc_args):
|
2876
|
+
acc = result[0]
|
2877
|
+
child = result[1]
|
2878
|
+
#sub_gak = result[2]
|
2879
|
+
|
2880
|
+
quiverfull.append([acc, child])
|
2881
|
+
#gaks.extend(sub_gak)
|
2882
|
+
|
2883
|
+
if verbose:
|
2884
|
+
count += 1
|
2885
|
+
try:
|
2886
|
+
percentage = (count/total_counts)*100
|
2887
|
+
log_time = curtime()
|
2888
|
+
sys.stdout.write('\033[A')
|
2889
|
+
sys.stdout.flush()
|
2890
|
+
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2891
|
+
sys.stdout.flush()
|
2892
|
+
except:
|
2893
|
+
#It's not really a big deal if the progress bar cannot be printed.
|
2894
|
+
pass
|
2895
|
+
|
2896
|
+
pool.close()
|
2897
|
+
pool.join()
|
2898
|
+
|
2899
|
+
print("")
|
2900
|
+
print("Adding data to final database. Started at", curtime())
|
2914
2901
|
|
2915
2902
|
if verbose:
|
2916
|
-
|
2903
|
+
print("")
|
2904
|
+
|
2905
|
+
count = 0
|
2906
|
+
total_counts = len(acc_args)
|
2917
2907
|
try:
|
2918
2908
|
percentage = (count/total_counts)*100
|
2919
|
-
log_time = curtime()
|
2920
|
-
sys.stdout.write('\033[A')
|
2921
|
-
sys.stdout.flush()
|
2922
2909
|
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2923
2910
|
sys.stdout.flush()
|
2924
2911
|
except:
|
2925
2912
|
#It's not really a big deal if the progress bar cannot be printed.
|
2926
2913
|
pass
|
2927
|
-
|
2928
|
-
|
2929
|
-
|
2930
|
-
|
2931
|
-
|
2932
|
-
|
2933
|
-
|
2934
|
-
|
2935
|
-
|
2914
|
+
|
2915
|
+
last_pct = 0
|
2916
|
+
|
2917
|
+
recipient.activate_connection()
|
2918
|
+
genome_list_update_sql = "INSERT OR REPLACE INTO genome_index VALUES (?, ?, ?)"
|
2919
|
+
genome_reindex = []
|
2920
|
+
for g in joint_genome_index:
|
2921
|
+
genome_reindex.append((g, joint_genome_index[g], joint_genome_counts[joint_genome_index[g]]))
|
2922
|
+
|
2923
|
+
recipient.cursor.executemany(genome_list_update_sql, genome_reindex)
|
2924
|
+
recipient.connection.commit()
|
2925
|
+
|
2926
|
+
del genome_reindex
|
2927
|
+
|
2928
|
+
for result in quiverfull:
|
2929
|
+
acc = result[0]
|
2930
|
+
child = result[1]
|
2931
|
+
|
2932
|
+
recipient.add_child_to_parent(acc, child, genomes_too = True, update_gak = True)
|
2933
|
+
|
2934
|
+
if verbose:
|
2935
|
+
count += 1
|
2936
|
+
try:
|
2937
|
+
percentage = (count/total_counts)*100
|
2938
|
+
log_time = curtime()
|
2939
|
+
sys.stdout.write('\033[A')
|
2940
|
+
sys.stdout.flush()
|
2941
|
+
sys.stdout.write("Completion".rjust(3)+ ' |'+('#'*int(percentage/2)).ljust(50)+'| ' + ('%.2f'%percentage).rjust(7)+'% ( ' + str(count) + " of " + str(total_counts) + ' done at ' + curtime() + " )\n")
|
2942
|
+
sys.stdout.flush()
|
2943
|
+
except:
|
2944
|
+
#It's not really a big deal if the progress bar cannot be printed.
|
2945
|
+
pass
|
2946
|
+
except:
|
2947
|
+
#Error
|
2948
|
+
shutil.rmtree(temp_dir)
|
2949
|
+
finally:
|
2950
|
+
#Success
|
2951
|
+
shutil.rmtree(temp_dir)
|
2952
|
+
|
2936
2953
|
print("\nDatabases merged!")
|
2937
2954
|
|
2938
2955
|
return None
|
@@ -2947,14 +2964,14 @@ def pull_and_merge_accession(args):
|
|
2947
2964
|
accession_inverter[accession_index[acc]] = sql_friendly_accession
|
2948
2965
|
|
2949
2966
|
#joint_genome_index, accession_index, accession_inverter, accs_per_db are global already.
|
2950
|
-
acc, donor_dbs, recipient = args[0], args[1], args[2]
|
2967
|
+
acc, donor_dbs, recipient, temp = args[0], args[1], args[2], args[3]
|
2951
2968
|
|
2952
2969
|
acc_name = accession_inverter[acc]
|
2953
2970
|
acc_name_gens = acc_name + "_genomes"
|
2954
2971
|
|
2955
2972
|
query_sql = "SELECT * FROM " + acc_name
|
2956
2973
|
|
2957
|
-
temp_db = fastaai_database("
|
2974
|
+
temp_db = fastaai_database(os.path.normpath(temp+"/"+acc_name+".db"))
|
2958
2975
|
temp_db.activate_connection()
|
2959
2976
|
|
2960
2977
|
create_command = "CREATE TABLE IF NOT EXISTS " + acc_name + " (kmer INTEGER PRIMARY KEY, genomes array)"
|
@@ -3627,4 +3644,4 @@ def main():
|
|
3627
3644
|
if __name__ == "__main__":
|
3628
3645
|
main()
|
3629
3646
|
|
3630
|
-
|
3647
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: miga-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Luis M. Rodriguez-R
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: daemons
|