bio-gemma-wrapper 0.99.6 → 0.99.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/Rakefile +87 -0
- data/VERSION +1 -1
- data/bin/gemma-wrapper +240 -63
- data/gemma-wrapper.gemspec +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8516f4e6692ceed95f95d5d55f530a2bfbf2f6f8fd6daf8e5918752c6be6cae7
|
4
|
+
data.tar.gz: b6942b33acc903f423a9c6bcf53920df326a18f58eb307fc713d220c1d3d88ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15fdc14eafe7a33aa330a1e156f23cfd6a1d7bc43e48aacb126aa8157b8c0e1a8e649d02bc06eda4bf5704e1c0e04520ab07be492096ab92eb198372140442e5
|
7
|
+
data.tar.gz: c2a70aedae7743e63276285129665860d58bbdf7b701289b05cdbfc82d35207e44240b92b533bed55c6ced28f893cd08af228281dcb7de0c8c54d272a0e1474e
|
data/README.md
CHANGED
data/Rakefile
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
# Run tests with, for example
|
4
|
+
#
|
5
|
+
# env GEMMA_COMMAND=../gemma/bin/gemma rake test
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rake'
|
9
|
+
|
10
|
+
task default: %w[test]
|
11
|
+
|
12
|
+
task :test do
|
13
|
+
ruby "bin/gemma-wrapper --json --force -- \
|
14
|
+
-g test/data/input/BXD_geno.txt.gz \
|
15
|
+
-p test/data/input/BXD_pheno.txt \
|
16
|
+
-a test/data/input/BXD_snps.txt \
|
17
|
+
-gk \
|
18
|
+
-debug > K0.json"
|
19
|
+
K0 = File.read("K0.json")
|
20
|
+
fail "Wrong Hash in #{K0}" if K0 !~ /1b700de28f242d561fc6769a07d88403764a996f/
|
21
|
+
fail "Expected error is 0 in #{K0}" if K0 !~ /errno\":0/
|
22
|
+
fail "Test failed" if $? != 0
|
23
|
+
ruby "bin/gemma-wrapper --json --input K0.json -- \
|
24
|
+
-g test/data/input/BXD_geno.txt.gz \
|
25
|
+
-p test/data/input/BXD_pheno.txt \
|
26
|
+
-c test/data/input/BXD_covariates2.txt \
|
27
|
+
-a test/data/input/BXD_snps.txt \
|
28
|
+
-lmm 2 -maf 0.1 \
|
29
|
+
-debug > GWA0.json"
|
30
|
+
gwa0 = File.read("GWA0.json")
|
31
|
+
fail "Wrong Hash in #{gwa0}" if gwa0 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
|
32
|
+
fail "Expected cache hit in #{gwa0}" if gwa0 !~ /cache_hit\":true/
|
33
|
+
fail "Test failed" if $? != 0
|
34
|
+
ruby "bin/gemma-wrapper --debug --json --force \
|
35
|
+
--loco --chromosomes 1,2,3,4 -- \
|
36
|
+
-g test/data/input/BXD_geno.txt.gz \
|
37
|
+
-p test/data/input/BXD_pheno.txt \
|
38
|
+
-a test/data/input/BXD_snps.txt \
|
39
|
+
-gk -debug > KLOCO1.json"
|
40
|
+
kloco1 = File.read("KLOCO1.json")
|
41
|
+
fail "Wrong Hash in #{kloco1}" if kloco1 !~ /1b700de28f242d561fc6769a07d88403764a996f/
|
42
|
+
fail "Expected error is 0 in #{kloco1}" if kloco1 !~ /errno\":0/
|
43
|
+
fail "Test failed" if $? != 0
|
44
|
+
# run again for cache hits
|
45
|
+
ruby "bin/gemma-wrapper --json \
|
46
|
+
--loco --chromosomes 1,2,3,4 -- \
|
47
|
+
-g test/data/input/BXD_geno.txt.gz \
|
48
|
+
-p test/data/input/BXD_pheno.txt \
|
49
|
+
-a test/data/input/BXD_snps.txt \
|
50
|
+
-gk -debug > KLOCO2.json"
|
51
|
+
kloco2 = File.read("KLOCO2.json")
|
52
|
+
fail "Wrong Hash in #{kloco2}" if kloco2 !~ /1b700de28f242d561fc6769a07d88403764a996f/
|
53
|
+
fail "Expected cache hit in #{kloco2}" if kloco2 !~ /cache_hit\":true/
|
54
|
+
fail "Test failed" if $? != 0
|
55
|
+
ruby "bin/gemma-wrapper --json --force --loco --input KLOCO1.json -- \
|
56
|
+
-g test/data/input/BXD_geno.txt.gz \
|
57
|
+
-p test/data/input/BXD_pheno.txt \
|
58
|
+
-c test/data/input/BXD_covariates2.txt \
|
59
|
+
-a test/data/input/BXD_snps.txt \
|
60
|
+
-lmm 2 -maf 0.1 \
|
61
|
+
-debug > GWA1.json"
|
62
|
+
gwa1 = File.read("GWA1.json")
|
63
|
+
fail "Wrong Hash in #{gwa1}" if gwa1 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
|
64
|
+
fail "Test failed" if $? != 0
|
65
|
+
# and run again
|
66
|
+
ruby "bin/gemma-wrapper --json --loco --input KLOCO2.json -- \
|
67
|
+
-g test/data/input/BXD_geno.txt.gz \
|
68
|
+
-p test/data/input/BXD_pheno.txt \
|
69
|
+
-c test/data/input/BXD_covariates2.txt \
|
70
|
+
-a test/data/input/BXD_snps.txt \
|
71
|
+
-lmm 2 -maf 0.1 \
|
72
|
+
-debug > GWA2.json"
|
73
|
+
fail "Test failed" if $? != 0
|
74
|
+
gwa2 = File.read("GWA2.json")
|
75
|
+
fail "Wrong Hash in #{gwa2}" if gwa2 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
|
76
|
+
fail "Expected cache hit in #{gwa2}" if gwa2 !~ /cache_hit\":true/
|
77
|
+
end
|
78
|
+
|
79
|
+
require 'rdoc/task'
|
80
|
+
Rake::RDocTask.new do |rdoc|
|
81
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
82
|
+
|
83
|
+
rdoc.rdoc_dir = 'rdoc'
|
84
|
+
rdoc.title = "bio-gemma-wrapper #{version}"
|
85
|
+
rdoc.rdoc_files.include('README*')
|
86
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
87
|
+
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.99.
|
1
|
+
0.99.7
|
data/bin/gemma-wrapper
CHANGED
@@ -4,35 +4,35 @@
|
|
4
4
|
# Author:: Pjotr Prins
|
5
5
|
# License:: GPL3
|
6
6
|
#
|
7
|
-
# Copyright (C) 2017-
|
7
|
+
# Copyright (C) 2017-2024 Pjotr Prins <pjotr.prins@thebird.nl>
|
8
8
|
|
9
9
|
USAGE = "
|
10
10
|
GEMMA wrapper example:
|
11
11
|
|
12
12
|
Simple caching of K computation with
|
13
13
|
|
14
|
-
gemma-wrapper --
|
15
|
-
-g test/data/input/BXD_geno.txt.gz
|
16
|
-
-p test/data/input/BXD_pheno.txt
|
14
|
+
gemma-wrapper -- \
|
15
|
+
-g test/data/input/BXD_geno.txt.gz \
|
16
|
+
-p test/data/input/BXD_pheno.txt \
|
17
17
|
-a test/data/input/BXD_snps.txt \
|
18
|
-
-gk
|
18
|
+
-gk > K.json
|
19
19
|
|
20
20
|
LOCO K computation with caching and JSON output
|
21
21
|
|
22
|
-
gemma-wrapper --json --loco --
|
23
|
-
-g test/data/input/BXD_geno.txt.gz
|
24
|
-
-p test/data/input/BXD_pheno.txt
|
25
|
-
-a test/data/input/BXD_snps.txt
|
22
|
+
gemma-wrapper --json --loco -- \
|
23
|
+
-g test/data/input/BXD_geno.txt.gz \
|
24
|
+
-p test/data/input/BXD_pheno.txt \
|
25
|
+
-a test/data/input/BXD_snps.txt \
|
26
26
|
-gk -debug > K.json
|
27
27
|
|
28
28
|
LMM's using the K's captured in K.json using the --input switch
|
29
29
|
|
30
|
-
gemma-wrapper --json --loco --input K.json --
|
31
|
-
-g test/data/input/BXD_geno.txt.gz
|
32
|
-
-p test/data/input/BXD_pheno.txt
|
33
|
-
-c test/data/input/BXD_covariates2.txt
|
34
|
-
-a test/data/input/BXD_snps.txt
|
35
|
-
-lmm
|
30
|
+
gemma-wrapper --json --loco --input K.json -- \
|
31
|
+
-g test/data/input/BXD_geno.txt.gz \
|
32
|
+
-p test/data/input/BXD_pheno.txt \
|
33
|
+
-c test/data/input/BXD_covariates2.txt \
|
34
|
+
-a test/data/input/BXD_snps.txt \
|
35
|
+
-lmm 9 -maf 0.1 \
|
36
36
|
-debug > GWA.json
|
37
37
|
|
38
38
|
Gemma gets used from the path. You can override by setting
|
@@ -45,6 +45,7 @@ GEMMA_V_MINOR = 4
|
|
45
45
|
|
46
46
|
basepath = File.dirname(File.dirname(__FILE__))
|
47
47
|
$: << File.join(basepath,'lib')
|
48
|
+
BIN = File.join(basepath,'bin')
|
48
49
|
|
49
50
|
VERSION_FILENAME=File.join(basepath,'VERSION')
|
50
51
|
version = File.new(VERSION_FILENAME).read.chomp
|
@@ -69,7 +70,10 @@ hashme = nil
|
|
69
70
|
require 'digest/sha1'
|
70
71
|
require 'fileutils'
|
71
72
|
require 'optparse'
|
73
|
+
require 'open3'
|
74
|
+
require 'socket' # for hostname
|
72
75
|
require 'tempfile'
|
76
|
+
require 'time'
|
73
77
|
require 'tmpdir'
|
74
78
|
|
75
79
|
require 'lock'
|
@@ -80,7 +84,7 @@ if split_at
|
|
80
84
|
gemma_args = ARGV[split_at+1..-1]
|
81
85
|
end
|
82
86
|
|
83
|
-
options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir(), quiet: false, permute_phenotypes: false, parallel: nil }
|
87
|
+
options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir(), quiet: false, permute_phenotypes: false, lmdb: nil, parallel: nil }
|
84
88
|
|
85
89
|
opts = OptionParser.new do |o|
|
86
90
|
o.banner = "\nUsage: #{File.basename($0)} [options] -- [gemma-options]"
|
@@ -99,6 +103,22 @@ opts = OptionParser.new do |o|
|
|
99
103
|
options[:loco] = b
|
100
104
|
end
|
101
105
|
|
106
|
+
o.on('--population NAME', 'Add population identifier to metadata') do |n|
|
107
|
+
options[:population] = n
|
108
|
+
end
|
109
|
+
|
110
|
+
o.on('--name NAME', 'Add dataset identifier to metadata') do |n|
|
111
|
+
options[:name] = n
|
112
|
+
end
|
113
|
+
|
114
|
+
o.on('--id ID', 'Add identifier to metadata') do |n|
|
115
|
+
options[:id] = n
|
116
|
+
end
|
117
|
+
|
118
|
+
o.on('--trait TRAIT', 'Add trait identifier to metadata') do |n|
|
119
|
+
options[:trait] = n
|
120
|
+
end
|
121
|
+
|
102
122
|
o.on('--chromosomes [1,2,3]',Array,'Run specific chromosomes') do |lst|
|
103
123
|
options[:chromosomes] = lst
|
104
124
|
end
|
@@ -120,6 +140,10 @@ opts = OptionParser.new do |o|
|
|
120
140
|
options[:force] = true
|
121
141
|
end
|
122
142
|
|
143
|
+
o.on("--keep", "Keep intermediate files in output") do |q|
|
144
|
+
options[:keep] = true
|
145
|
+
end
|
146
|
+
|
123
147
|
o.on("--parallel", "Run jobs in parallel") do |b|
|
124
148
|
options[:parallel] = true
|
125
149
|
end
|
@@ -128,6 +152,10 @@ opts = OptionParser.new do |o|
|
|
128
152
|
options[:parallel] = false
|
129
153
|
end
|
130
154
|
|
155
|
+
o.on("--lmdb", "Generate lmdb output") do |b|
|
156
|
+
options[:lmdb] = true
|
157
|
+
end
|
158
|
+
|
131
159
|
o.on("--slurm[=opts]",String,"Use slurm PBS for submitting jobs") do |slurm|
|
132
160
|
options[:slurm_opts] = ""
|
133
161
|
options[:slurm] = true
|
@@ -169,11 +197,18 @@ opts.parse!(ARGV)
|
|
169
197
|
OUTPUT = (options[:json] ? $stderr : $stdout )
|
170
198
|
|
171
199
|
record = { warnings: [], errno: 0, debug: [] }
|
200
|
+
record[:name] = options[:name] if options[:name]
|
201
|
+
record[:id] = options[:id] if options[:id]
|
202
|
+
record[:trait] = options[:trait] if options[:trait]
|
203
|
+
d = DateTime.now
|
204
|
+
record[:time] = d.strftime("%Y/%m/%d %H:%M")
|
205
|
+
record[:user] = ENV["USER"]
|
206
|
+
record[:hostname] = Socket.gethostname
|
172
207
|
|
173
208
|
require 'json'
|
174
209
|
|
175
210
|
json_out = lambda do
|
176
|
-
|
211
|
+
record.to_json if options[:json]
|
177
212
|
end
|
178
213
|
|
179
214
|
# ---- Some error handlers
|
@@ -215,17 +250,18 @@ end
|
|
215
250
|
# ---- Start banner
|
216
251
|
|
217
252
|
GEMMA_K_VERSION=version
|
218
|
-
GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017-
|
253
|
+
GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017-2024\n"
|
219
254
|
info.call GEMMA_K_BANNER
|
220
255
|
|
221
256
|
# Check gemma version
|
222
257
|
begin
|
223
258
|
gemma_command2 = options[:gemma_command]
|
224
|
-
info.call "NOTE: gemma-wrapper is soon to be replaced"
|
259
|
+
# info.call "NOTE: gemma-wrapper is soon to be replaced"
|
225
260
|
|
261
|
+
debug.call("Invoke #{gemma_command2}")
|
226
262
|
GEMMA_INFO = `#{gemma_command2}`
|
227
263
|
rescue Errno::ENOENT
|
228
|
-
gemma_command2 = "gemma"
|
264
|
+
gemma_command2 = "gemma" if not gemma_command2
|
229
265
|
error.call "<#{gemma_command2}> command not found"
|
230
266
|
end
|
231
267
|
|
@@ -249,7 +285,7 @@ if options[:show_help] or gemma_args == nil
|
|
249
285
|
end
|
250
286
|
|
251
287
|
if RUBY_VERSION =~ /^1/
|
252
|
-
warning "
|
288
|
+
warning "does not run on Ruby 1.x\n"
|
253
289
|
end
|
254
290
|
|
255
291
|
# ---- LOCO defaults to parallel
|
@@ -272,6 +308,9 @@ if options[:parallel]
|
|
272
308
|
error.call "<parallel> command not found"
|
273
309
|
end
|
274
310
|
parallel_cmds = []
|
311
|
+
if not options[:json]
|
312
|
+
error.call "<parallel> needs --json switch"
|
313
|
+
end
|
275
314
|
end
|
276
315
|
|
277
316
|
# ---- Fetch chromosomes from SNP annotation file
|
@@ -288,23 +327,69 @@ if DO_COMPUTE_GWA and options[:permute_phenotypes]
|
|
288
327
|
raise "Did not expect GEMMA -p phenotype whith permutations (only use --permutate-phenotypes)" if pheno_idx
|
289
328
|
end
|
290
329
|
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
330
|
+
matches = {
|
331
|
+
chr: [:string, /-loco (\S+) /],
|
332
|
+
user_time: [:float, /User time \(seconds\): ([\d\.]+)/],
|
333
|
+
system_time: [:float, /System time \(seconds\): ([\d\.]+)/],
|
334
|
+
perc_cpu: [:int, /Percent of CPU this job got: (\d+)%/],
|
335
|
+
wall_clock: [:string, /Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): (\S+)/],
|
336
|
+
ram_usage_gb: [:gb, /Maximum resident set size \(kbytes\): (\d+)/],
|
337
|
+
command: [:string, /Command being timed: (.+)/]
|
338
|
+
}
|
339
|
+
|
340
|
+
parse_stats = lambda { |buf|
|
341
|
+
stats = {}
|
342
|
+
buf.split("\\n").each do |s|
|
343
|
+
if s =~ /^\t/
|
344
|
+
matches.each do |k,v|
|
345
|
+
type,m = v
|
346
|
+
if s =~ m
|
347
|
+
# $stderr.print $1,s
|
348
|
+
stats[k] =
|
349
|
+
case type
|
350
|
+
when :float
|
351
|
+
$1.to_f
|
352
|
+
when :int
|
353
|
+
$1.to_i
|
354
|
+
when :gb
|
355
|
+
(($1.to_f)/1048576.0).round(3)
|
356
|
+
else
|
357
|
+
$1
|
358
|
+
end
|
359
|
+
end
|
299
360
|
end
|
300
|
-
io.close
|
301
|
-
err = $?.to_i
|
302
361
|
end
|
303
|
-
else
|
304
|
-
$stderr.print `#{cmd}`
|
305
|
-
err = $?.to_i
|
306
362
|
end
|
307
|
-
|
363
|
+
stats
|
364
|
+
}
|
365
|
+
|
366
|
+
run_stat = {}
|
367
|
+
|
368
|
+
execute = lambda { |cmd|
|
369
|
+
info.call("Executing: #{cmd}")
|
370
|
+
err = 0
|
371
|
+
stdout_buf = ""
|
372
|
+
stderr_buf = ""
|
373
|
+
stats = {}
|
374
|
+
Open3.popen3("time -v #{cmd}") do |stdin,stdout,stderr,wait_thr|
|
375
|
+
stderr_buf = stderr.read
|
376
|
+
stdout_buf = stdout.read
|
377
|
+
stats = parse_stats.call(stderr_buf)
|
378
|
+
stdin.close
|
379
|
+
stdout.close
|
380
|
+
stderr.close
|
381
|
+
err = wait_thr.value
|
382
|
+
end
|
383
|
+
$stderr.print(stderr_buf) if options[:debug]
|
384
|
+
if err and err != 0
|
385
|
+
$stderr.print(stdout_buf)
|
386
|
+
$stderr.print(stderr_buf) if not options[:debug]
|
387
|
+
$stderr.print "FATAL ERROR: gemma-wrapper bailed out with #{err}\n"
|
388
|
+
# sleep 10_000
|
389
|
+
$stderr.print Kernel.caller().join("\n")
|
390
|
+
exit 1
|
391
|
+
end
|
392
|
+
return err,stats
|
308
393
|
}
|
309
394
|
|
310
395
|
compute_hash = lambda do | phenofn = nil |
|
@@ -319,6 +404,7 @@ compute_hash = lambda do | phenofn = nil |
|
|
319
404
|
end
|
320
405
|
debug.call("Hashing on ",hm)
|
321
406
|
hm.each do | item |
|
407
|
+
# if entry is a file use the hash of its content, otherwise just the entry itself
|
322
408
|
if File.file?(item)
|
323
409
|
hashes << Digest::SHA1.hexdigest(File.read(item))
|
324
410
|
debug.call [item,hashes.last]
|
@@ -343,6 +429,7 @@ hashme =
|
|
343
429
|
end
|
344
430
|
|
345
431
|
HASH = compute_hash.call()
|
432
|
+
options[:compute_hash_on] = hashme
|
346
433
|
options[:hash] = HASH
|
347
434
|
|
348
435
|
at_exit do
|
@@ -351,7 +438,7 @@ end
|
|
351
438
|
|
352
439
|
Lock.create(HASH) # this will wait for a lock to expire
|
353
440
|
|
354
|
-
|
441
|
+
JOBLOG = HASH+"-parallel.log"
|
355
442
|
|
356
443
|
# Create cache dir
|
357
444
|
FileUtils::mkdir_p options[:cache_dir]
|
@@ -365,7 +452,7 @@ GEMMA_ARGS = gemma_args
|
|
365
452
|
debug.call "Options: ",options,"\n" if !options[:quiet]
|
366
453
|
|
367
454
|
invoke_gemma = lambda do |extra_args, cache_hit = false, chr = "full", permutation = 1|
|
368
|
-
cmd = "#{gemma_command2} #{
|
455
|
+
cmd = "time -v #{gemma_command2} #{extra_args.join(' ')} #{GEMMA_ARGS.join(' ')}"
|
369
456
|
record[:gemma_command] = cmd
|
370
457
|
return if cache_hit
|
371
458
|
if options[:slurm]
|
@@ -395,7 +482,7 @@ srun #{cmd}
|
|
395
482
|
info.call("Add parallel job: ",cmd)
|
396
483
|
parallel_cmds << cmd
|
397
484
|
else
|
398
|
-
err = execute.call(cmd)
|
485
|
+
err,stats = execute.call(cmd)
|
399
486
|
end
|
400
487
|
err
|
401
488
|
else
|
@@ -416,6 +503,8 @@ srun #{cmd}
|
|
416
503
|
end
|
417
504
|
end
|
418
505
|
|
506
|
+
create_archive = false
|
507
|
+
|
419
508
|
# Takes the hash value and checks whether the (output) file exists
|
420
509
|
# returns datafn, logfn, cache_hit
|
421
510
|
cache = lambda do | chr, ext, h=HASH, permutation=0 |
|
@@ -427,10 +516,15 @@ cache = lambda do | chr, ext, h=HASH, permutation=0 |
|
|
427
516
|
logfn = prefix+".log.txt"
|
428
517
|
datafn = prefix+ext
|
429
518
|
record[:files] ||= []
|
430
|
-
|
519
|
+
log_basefn = File.basename(logfn)
|
520
|
+
data_basefn = File.basename(datafn)
|
521
|
+
log_tmpfn = tmpdir+"/"+log_basefn
|
522
|
+
data_tmpfn = tmpdir+"/"+data_basefn
|
523
|
+
record[:files].push [chr,log_basefn,data_basefn]
|
431
524
|
if !options[:force]
|
432
|
-
|
433
|
-
|
525
|
+
info.call "Checking for #{data_tmpfn}"
|
526
|
+
if File.exist? log_tmpfn and File.exist? data_tmpfn
|
527
|
+
if File.read(log_tmpfn).include? "total computation time"
|
434
528
|
record[:cache_hit] = true
|
435
529
|
info.call "#{logfn} CACHE HIT!\n"
|
436
530
|
return hashi, true
|
@@ -448,8 +542,10 @@ kinship = lambda do | chr = nil |
|
|
448
542
|
when 2 then '.sXX.txt'
|
449
543
|
else error.call "Unknown kinship type"
|
450
544
|
end
|
545
|
+
# ---- check cache:
|
451
546
|
hashi, cache_hit = cache.call chr,ext
|
452
547
|
if not cache_hit
|
548
|
+
create_archive = true
|
453
549
|
if chr != nil
|
454
550
|
invoke_gemma.call [ '-loco', chr, '-o', hashi ], cache_hit
|
455
551
|
else
|
@@ -466,8 +562,10 @@ gwas = lambda do | chr, kfn, pfn, permutation=0 |
|
|
466
562
|
hash = compute_hash.call(pfn)
|
467
563
|
hashi, cache_hit = cache.call(chr,".assoc.txt",hash,permutation)
|
468
564
|
if not cache_hit
|
469
|
-
|
565
|
+
create_archive = true
|
566
|
+
args = []
|
470
567
|
args << [ '-loco', chr ] if chr != nil
|
568
|
+
args << [ '-k', kfn, '-o', hashi ]
|
471
569
|
args << [ '-p', pfn ] if pfn
|
472
570
|
invoke_gemma.call args,false,chr,permutation
|
473
571
|
end
|
@@ -480,12 +578,20 @@ if LOCO
|
|
480
578
|
end
|
481
579
|
end
|
482
580
|
|
581
|
+
json_in = nil
|
582
|
+
|
483
583
|
if DO_COMPUTE_KINSHIP
|
484
584
|
# compute K
|
585
|
+
ARCHIVE = options[:cache_dir]+"/"+HASH+"-gemma-cXX.tar.xz"
|
586
|
+
|
587
|
+
if File.exist? ARCHIVE and not options[:force]
|
588
|
+
info.call "Unpack archive #{ARCHIVE}!"
|
589
|
+
execute.call "tar xJf #{ARCHIVE} -C #{tmpdir}"
|
590
|
+
end
|
485
591
|
info.call CHROMOSOMES
|
486
592
|
if LOCO
|
487
593
|
CHROMOSOMES.each do |chr|
|
488
|
-
info.call "LOCO for ",chr
|
594
|
+
info.call "Compute kinship LOCO for chr ",chr
|
489
595
|
kinship.call(chr)
|
490
596
|
end
|
491
597
|
else
|
@@ -493,6 +599,11 @@ if DO_COMPUTE_KINSHIP
|
|
493
599
|
end
|
494
600
|
else
|
495
601
|
# DO_COMPUTE_GWA
|
602
|
+
ARCHIVE = options[:cache_dir]+"/"+HASH+"-gemma-GWA.tar.xz"
|
603
|
+
if File.exist? ARCHIVE and not options[:force]
|
604
|
+
info.call "Unpack archive #{ARCHIVE}!"
|
605
|
+
execute.call "env XZ_OPT='-T0' tar xJf #{ARCHIVE} -C #{tmpdir}"
|
606
|
+
end
|
496
607
|
begin
|
497
608
|
json_in = JSON.parse(File.read(options[:input]))
|
498
609
|
rescue TypeError
|
@@ -504,12 +615,20 @@ else
|
|
504
615
|
if LOCO
|
505
616
|
k_files = json_in["files"].map { |rec| [rec[0],rec[2]] }
|
506
617
|
k_files.each do | chr, kfn | # call a GWA for each chromosome
|
507
|
-
|
618
|
+
|
619
|
+
kfn2 = options[:cache_dir]+"/"+kfn
|
620
|
+
if not File.exist?(kfn2) and json_in["archive"]
|
621
|
+
# we aim to unpack the archive once on reuse
|
622
|
+
archive_grm = options[:cache_dir]+"/"+json_in["archive"]
|
623
|
+
execute.call "env XZ_OPT='-T0' tar xJf #{archive_grm} -C #{options[:cache_dir]}"
|
624
|
+
end
|
625
|
+
|
626
|
+
gwas.call(chr,kfn2,pfn)
|
508
627
|
end
|
509
628
|
else
|
510
629
|
kfn = json_in["files"][0][2]
|
511
630
|
CHROMOSOMES.each do | chr |
|
512
|
-
gwas.call(chr,kfn,pfn)
|
631
|
+
gwas.call(chr,tmpdir+"/"+kfn,pfn)
|
513
632
|
end
|
514
633
|
end
|
515
634
|
# Permute
|
@@ -562,6 +681,7 @@ end
|
|
562
681
|
# ---- Invoke parallel
|
563
682
|
if options[:parallel]
|
564
683
|
# parallel_cmds = ["echo 1","sleep 1 && echo 2", "false", "echo 3"]
|
684
|
+
joblog = tmpdir+"/"+JOBLOG
|
565
685
|
|
566
686
|
Tempfile.open("commands.txt") do |f|
|
567
687
|
cmdfn = f.path
|
@@ -571,38 +691,95 @@ if options[:parallel]
|
|
571
691
|
end
|
572
692
|
end
|
573
693
|
cmd = "cat \"#{cmdfn}\""
|
574
|
-
|
694
|
+
debug.call("tmpdir=#{tmpdir}")
|
695
|
+
err,stats = execute.call(cmd+"|parallel --results #{tmpdir} --joblog #{joblog}") # first try optimistically to run all jobs in parallel
|
575
696
|
if err != 0
|
576
|
-
[
|
697
|
+
[4,1].each do |jobs|
|
577
698
|
info.call("Failed to complete parallel run -- retrying with smaller RAM footprint!")
|
578
|
-
err = execute.call(cmd+"|parallel
|
699
|
+
err,stats = execute.call(cmd+"|parallel -j #{jobs} --results #{tmpdir} --resume --joblog #{joblog}")
|
579
700
|
break if err == 0
|
580
701
|
end
|
581
702
|
if err != 0
|
582
703
|
info.call("Parallel run failed!")
|
583
704
|
debug.call("Job log is: ",File.read(joblog))
|
584
|
-
# Remove remaining files
|
585
|
-
FileUtils.mv joblog, joblog+".bak", verbose: false, force: true
|
586
|
-
FileUtils.rm_rf("#{tmpdir}/*", secure: true)
|
587
705
|
exit err
|
588
706
|
end
|
589
707
|
end
|
590
708
|
end
|
591
709
|
info.call("Run successful!")
|
592
|
-
FileUtils.mv joblog, joblog+".bak", verbose: false, force: true
|
593
710
|
end
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
711
|
+
|
712
|
+
# Collect stats from parallel run
|
713
|
+
|
714
|
+
run_stats = {}
|
715
|
+
$stderr.print "STATS"
|
716
|
+
Dir.glob(tmpdir+'/*/*' ).each do | dir |
|
717
|
+
File.open("#{dir}/stderr") { |f|
|
718
|
+
run_stat = parse_stats.call(f.read)
|
719
|
+
chr = run_stat[:chr]
|
720
|
+
run_stats[chr] = run_stat
|
721
|
+
}
|
722
|
+
end
|
723
|
+
# Now add up the stats
|
724
|
+
user_time = 0.0
|
725
|
+
system_time = 0.0
|
726
|
+
wall_clock = "0"
|
727
|
+
ram_usage_gb = 0.0
|
728
|
+
run_stats.each do | k, v |
|
729
|
+
wall_clock=v[:wall_clock] if v[:wall_clock]>wall_clock
|
730
|
+
ram_usage_gb += v[:ram_usage_gb]
|
731
|
+
user_time += v[:user_time]
|
732
|
+
system_time += v[:system_time]
|
733
|
+
end
|
734
|
+
|
735
|
+
record[:user_time] = user_time
|
736
|
+
record[:system_time] = system_time
|
737
|
+
record[:wall_clock] = wall_clock
|
738
|
+
record[:ram_usage_gb] = ram_usage_gb.round(2)
|
739
|
+
record[:run_stats] = run_stats
|
740
|
+
|
741
|
+
if create_archive
|
742
|
+
if DO_COMPUTE_GWA
|
743
|
+
LMDB = tmpdir+"/"+HASH+'.mdb'
|
744
|
+
# create lmdb database - we call out into a python script for that.
|
745
|
+
# first create a JSON record
|
746
|
+
|
747
|
+
meta = {
|
748
|
+
type: "gemma-wrapper",
|
749
|
+
version: version,
|
750
|
+
population: options[:population],
|
751
|
+
name: options[:name],
|
752
|
+
trait: options[:trait],
|
753
|
+
url: "https://genenetwork.org/show_trait?trait_id="+options[:trait]+"&dataset="+options[:name],
|
754
|
+
archive_GRM: json_in["archive"],
|
755
|
+
archive_GWA: File.basename(ARCHIVE),
|
756
|
+
}
|
757
|
+
if options[:id] and options[:id] =~ /,/ # this is GN specific
|
758
|
+
dataid,probesetid,probesetfreezeid = options[:id].split(",")
|
759
|
+
meta[:dataid] = dataid.to_i
|
760
|
+
meta[:probesetid] = probesetid.to_i
|
761
|
+
meta[:probesetfreezeid] = probesetfreezeid.to_i
|
762
|
+
end
|
763
|
+
record[:meta] = meta
|
764
|
+
metafn = tmpdir+"/"+HASH+"-meta.json"
|
765
|
+
File.write(metafn,record.to_json)
|
766
|
+
# sleep 10_000
|
767
|
+
if options[:lmdb]
|
768
|
+
File.unlink(LMDB) if File.exist?(LMDB) # removed any cached lmdb
|
769
|
+
execute.call "python3 #{BIN}/gemma2lmdb.py --db=#{LMDB} --meta=#{metafn} #{tmpdir}/*assoc.txt"
|
770
|
+
end
|
771
|
+
if not options[:keep]
|
772
|
+
execute.call "rm -f #{tmpdir}/1/*/* #{tmpdir}/*.txt #{tmpdir}/*.log #{tmpdir}/*.mdb-lock" # remove GEMMA output files
|
773
|
+
FileUtils.rm_rf("#{tmpdir}/1", secure: true)
|
774
|
+
end
|
605
775
|
end
|
776
|
+
File.write(tmpdir+"/"+HASH+"-gemma-wrapper-output.json",json_out.call)
|
777
|
+
info.call "Creating archive #{ARCHIVE}..."
|
778
|
+
execute.call "env XZ_OPT='-T0' tar -cvJf #{ARCHIVE} -C #{tmpdir} ."
|
606
779
|
end
|
607
780
|
|
608
781
|
end # tmpdir
|
782
|
+
|
783
|
+
record[:archive] = File.basename(ARCHIVE)
|
784
|
+
|
785
|
+
print json_out.call
|
data/gemma-wrapper.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gemma-wrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.99.
|
4
|
+
version: 0.99.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pjotr Prins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: GEMMA wrapper adds LOCO and permutation support. Also runs in parallel
|
14
14
|
and caches K between runs with LOCO support
|
@@ -21,6 +21,7 @@ files:
|
|
21
21
|
- Gemfile
|
22
22
|
- LICENSE.txt
|
23
23
|
- README.md
|
24
|
+
- Rakefile
|
24
25
|
- VERSION
|
25
26
|
- bin/gemma-wrapper
|
26
27
|
- gemma-wrapper.gemspec
|
@@ -44,7 +45,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
44
45
|
- !ruby/object:Gem::Version
|
45
46
|
version: '0'
|
46
47
|
requirements: []
|
47
|
-
rubygems_version: 3.
|
48
|
+
rubygems_version: 3.4.19
|
48
49
|
signing_key:
|
49
50
|
specification_version: 4
|
50
51
|
summary: GEMMA with LOCO and permutations
|