bio-gemma-wrapper 0.99.6 → 0.99.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +1 -1
- data/Rakefile +87 -0
- data/VERSION +1 -1
- data/bin/gemma-wrapper +240 -63
- data/gemma-wrapper.gemspec +1 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8516f4e6692ceed95f95d5d55f530a2bfbf2f6f8fd6daf8e5918752c6be6cae7
|
4
|
+
data.tar.gz: b6942b33acc903f423a9c6bcf53920df326a18f58eb307fc713d220c1d3d88ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15fdc14eafe7a33aa330a1e156f23cfd6a1d7bc43e48aacb126aa8157b8c0e1a8e649d02bc06eda4bf5704e1c0e04520ab07be492096ab92eb198372140442e5
|
7
|
+
data.tar.gz: c2a70aedae7743e63276285129665860d58bbdf7b701289b05cdbfc82d35207e44240b92b533bed55c6ced28f893cd08af228281dcb7de0c8c54d272a0e1474e
|
data/README.md
CHANGED
data/Rakefile
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
# Run tests with, for example
|
4
|
+
#
|
5
|
+
# env GEMMA_COMMAND=../gemma/bin/gemma rake test
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rake'
|
9
|
+
|
10
|
+
task default: %w[test]
|
11
|
+
|
12
|
+
task :test do
|
13
|
+
ruby "bin/gemma-wrapper --json --force -- \
|
14
|
+
-g test/data/input/BXD_geno.txt.gz \
|
15
|
+
-p test/data/input/BXD_pheno.txt \
|
16
|
+
-a test/data/input/BXD_snps.txt \
|
17
|
+
-gk \
|
18
|
+
-debug > K0.json"
|
19
|
+
K0 = File.read("K0.json")
|
20
|
+
fail "Wrong Hash in #{K0}" if K0 !~ /1b700de28f242d561fc6769a07d88403764a996f/
|
21
|
+
fail "Expected error is 0 in #{K0}" if K0 !~ /errno\":0/
|
22
|
+
fail "Test failed" if $? != 0
|
23
|
+
ruby "bin/gemma-wrapper --json --input K0.json -- \
|
24
|
+
-g test/data/input/BXD_geno.txt.gz \
|
25
|
+
-p test/data/input/BXD_pheno.txt \
|
26
|
+
-c test/data/input/BXD_covariates2.txt \
|
27
|
+
-a test/data/input/BXD_snps.txt \
|
28
|
+
-lmm 2 -maf 0.1 \
|
29
|
+
-debug > GWA0.json"
|
30
|
+
gwa0 = File.read("GWA0.json")
|
31
|
+
fail "Wrong Hash in #{gwa0}" if gwa0 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
|
32
|
+
fail "Expected cache hit in #{gwa0}" if gwa0 !~ /cache_hit\":true/
|
33
|
+
fail "Test failed" if $? != 0
|
34
|
+
ruby "bin/gemma-wrapper --debug --json --force \
|
35
|
+
--loco --chromosomes 1,2,3,4 -- \
|
36
|
+
-g test/data/input/BXD_geno.txt.gz \
|
37
|
+
-p test/data/input/BXD_pheno.txt \
|
38
|
+
-a test/data/input/BXD_snps.txt \
|
39
|
+
-gk -debug > KLOCO1.json"
|
40
|
+
kloco1 = File.read("KLOCO1.json")
|
41
|
+
fail "Wrong Hash in #{kloco1}" if kloco1 !~ /1b700de28f242d561fc6769a07d88403764a996f/
|
42
|
+
fail "Expected error is 0 in #{kloco1}" if kloco1 !~ /errno\":0/
|
43
|
+
fail "Test failed" if $? != 0
|
44
|
+
# run again for cache hits
|
45
|
+
ruby "bin/gemma-wrapper --json \
|
46
|
+
--loco --chromosomes 1,2,3,4 -- \
|
47
|
+
-g test/data/input/BXD_geno.txt.gz \
|
48
|
+
-p test/data/input/BXD_pheno.txt \
|
49
|
+
-a test/data/input/BXD_snps.txt \
|
50
|
+
-gk -debug > KLOCO2.json"
|
51
|
+
kloco2 = File.read("KLOCO2.json")
|
52
|
+
fail "Wrong Hash in #{kloco2}" if kloco2 !~ /1b700de28f242d561fc6769a07d88403764a996f/
|
53
|
+
fail "Expected cache hit in #{kloco2}" if kloco2 !~ /cache_hit\":true/
|
54
|
+
fail "Test failed" if $? != 0
|
55
|
+
ruby "bin/gemma-wrapper --json --force --loco --input KLOCO1.json -- \
|
56
|
+
-g test/data/input/BXD_geno.txt.gz \
|
57
|
+
-p test/data/input/BXD_pheno.txt \
|
58
|
+
-c test/data/input/BXD_covariates2.txt \
|
59
|
+
-a test/data/input/BXD_snps.txt \
|
60
|
+
-lmm 2 -maf 0.1 \
|
61
|
+
-debug > GWA1.json"
|
62
|
+
gwa1 = File.read("GWA1.json")
|
63
|
+
fail "Wrong Hash in #{gwa1}" if gwa1 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
|
64
|
+
fail "Test failed" if $? != 0
|
65
|
+
# and run again
|
66
|
+
ruby "bin/gemma-wrapper --json --loco --input KLOCO2.json -- \
|
67
|
+
-g test/data/input/BXD_geno.txt.gz \
|
68
|
+
-p test/data/input/BXD_pheno.txt \
|
69
|
+
-c test/data/input/BXD_covariates2.txt \
|
70
|
+
-a test/data/input/BXD_snps.txt \
|
71
|
+
-lmm 2 -maf 0.1 \
|
72
|
+
-debug > GWA2.json"
|
73
|
+
fail "Test failed" if $? != 0
|
74
|
+
gwa2 = File.read("GWA2.json")
|
75
|
+
fail "Wrong Hash in #{gwa2}" if gwa2 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
|
76
|
+
fail "Expected cache hit in #{gwa2}" if gwa2 !~ /cache_hit\":true/
|
77
|
+
end
|
78
|
+
|
79
|
+
require 'rdoc/task'
|
80
|
+
Rake::RDocTask.new do |rdoc|
|
81
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
82
|
+
|
83
|
+
rdoc.rdoc_dir = 'rdoc'
|
84
|
+
rdoc.title = "bio-gemma-wrapper #{version}"
|
85
|
+
rdoc.rdoc_files.include('README*')
|
86
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
87
|
+
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.99.
|
1
|
+
0.99.7
|
data/bin/gemma-wrapper
CHANGED
@@ -4,35 +4,35 @@
|
|
4
4
|
# Author:: Pjotr Prins
|
5
5
|
# License:: GPL3
|
6
6
|
#
|
7
|
-
# Copyright (C) 2017-
|
7
|
+
# Copyright (C) 2017-2024 Pjotr Prins <pjotr.prins@thebird.nl>
|
8
8
|
|
9
9
|
USAGE = "
|
10
10
|
GEMMA wrapper example:
|
11
11
|
|
12
12
|
Simple caching of K computation with
|
13
13
|
|
14
|
-
gemma-wrapper --
|
15
|
-
-g test/data/input/BXD_geno.txt.gz
|
16
|
-
-p test/data/input/BXD_pheno.txt
|
14
|
+
gemma-wrapper -- \
|
15
|
+
-g test/data/input/BXD_geno.txt.gz \
|
16
|
+
-p test/data/input/BXD_pheno.txt \
|
17
17
|
-a test/data/input/BXD_snps.txt \
|
18
|
-
-gk
|
18
|
+
-gk > K.json
|
19
19
|
|
20
20
|
LOCO K computation with caching and JSON output
|
21
21
|
|
22
|
-
gemma-wrapper --json --loco --
|
23
|
-
-g test/data/input/BXD_geno.txt.gz
|
24
|
-
-p test/data/input/BXD_pheno.txt
|
25
|
-
-a test/data/input/BXD_snps.txt
|
22
|
+
gemma-wrapper --json --loco -- \
|
23
|
+
-g test/data/input/BXD_geno.txt.gz \
|
24
|
+
-p test/data/input/BXD_pheno.txt \
|
25
|
+
-a test/data/input/BXD_snps.txt \
|
26
26
|
-gk -debug > K.json
|
27
27
|
|
28
28
|
LMM's using the K's captured in K.json using the --input switch
|
29
29
|
|
30
|
-
gemma-wrapper --json --loco --input K.json --
|
31
|
-
-g test/data/input/BXD_geno.txt.gz
|
32
|
-
-p test/data/input/BXD_pheno.txt
|
33
|
-
-c test/data/input/BXD_covariates2.txt
|
34
|
-
-a test/data/input/BXD_snps.txt
|
35
|
-
-lmm
|
30
|
+
gemma-wrapper --json --loco --input K.json -- \
|
31
|
+
-g test/data/input/BXD_geno.txt.gz \
|
32
|
+
-p test/data/input/BXD_pheno.txt \
|
33
|
+
-c test/data/input/BXD_covariates2.txt \
|
34
|
+
-a test/data/input/BXD_snps.txt \
|
35
|
+
-lmm 9 -maf 0.1 \
|
36
36
|
-debug > GWA.json
|
37
37
|
|
38
38
|
Gemma gets used from the path. You can override by setting
|
@@ -45,6 +45,7 @@ GEMMA_V_MINOR = 4
|
|
45
45
|
|
46
46
|
basepath = File.dirname(File.dirname(__FILE__))
|
47
47
|
$: << File.join(basepath,'lib')
|
48
|
+
BIN = File.join(basepath,'bin')
|
48
49
|
|
49
50
|
VERSION_FILENAME=File.join(basepath,'VERSION')
|
50
51
|
version = File.new(VERSION_FILENAME).read.chomp
|
@@ -69,7 +70,10 @@ hashme = nil
|
|
69
70
|
require 'digest/sha1'
|
70
71
|
require 'fileutils'
|
71
72
|
require 'optparse'
|
73
|
+
require 'open3'
|
74
|
+
require 'socket' # for hostname
|
72
75
|
require 'tempfile'
|
76
|
+
require 'time'
|
73
77
|
require 'tmpdir'
|
74
78
|
|
75
79
|
require 'lock'
|
@@ -80,7 +84,7 @@ if split_at
|
|
80
84
|
gemma_args = ARGV[split_at+1..-1]
|
81
85
|
end
|
82
86
|
|
83
|
-
options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir(), quiet: false, permute_phenotypes: false, parallel: nil }
|
87
|
+
options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir(), quiet: false, permute_phenotypes: false, lmdb: nil, parallel: nil }
|
84
88
|
|
85
89
|
opts = OptionParser.new do |o|
|
86
90
|
o.banner = "\nUsage: #{File.basename($0)} [options] -- [gemma-options]"
|
@@ -99,6 +103,22 @@ opts = OptionParser.new do |o|
|
|
99
103
|
options[:loco] = b
|
100
104
|
end
|
101
105
|
|
106
|
+
o.on('--population NAME', 'Add population identifier to metadata') do |n|
|
107
|
+
options[:population] = n
|
108
|
+
end
|
109
|
+
|
110
|
+
o.on('--name NAME', 'Add dataset identifier to metadata') do |n|
|
111
|
+
options[:name] = n
|
112
|
+
end
|
113
|
+
|
114
|
+
o.on('--id ID', 'Add identifier to metadata') do |n|
|
115
|
+
options[:id] = n
|
116
|
+
end
|
117
|
+
|
118
|
+
o.on('--trait TRAIT', 'Add trait identifier to metadata') do |n|
|
119
|
+
options[:trait] = n
|
120
|
+
end
|
121
|
+
|
102
122
|
o.on('--chromosomes [1,2,3]',Array,'Run specific chromosomes') do |lst|
|
103
123
|
options[:chromosomes] = lst
|
104
124
|
end
|
@@ -120,6 +140,10 @@ opts = OptionParser.new do |o|
|
|
120
140
|
options[:force] = true
|
121
141
|
end
|
122
142
|
|
143
|
+
o.on("--keep", "Keep intermediate files in output") do |q|
|
144
|
+
options[:keep] = true
|
145
|
+
end
|
146
|
+
|
123
147
|
o.on("--parallel", "Run jobs in parallel") do |b|
|
124
148
|
options[:parallel] = true
|
125
149
|
end
|
@@ -128,6 +152,10 @@ opts = OptionParser.new do |o|
|
|
128
152
|
options[:parallel] = false
|
129
153
|
end
|
130
154
|
|
155
|
+
o.on("--lmdb", "Generate lmdb output") do |b|
|
156
|
+
options[:lmdb] = true
|
157
|
+
end
|
158
|
+
|
131
159
|
o.on("--slurm[=opts]",String,"Use slurm PBS for submitting jobs") do |slurm|
|
132
160
|
options[:slurm_opts] = ""
|
133
161
|
options[:slurm] = true
|
@@ -169,11 +197,18 @@ opts.parse!(ARGV)
|
|
169
197
|
OUTPUT = (options[:json] ? $stderr : $stdout )
|
170
198
|
|
171
199
|
record = { warnings: [], errno: 0, debug: [] }
|
200
|
+
record[:name] = options[:name] if options[:name]
|
201
|
+
record[:id] = options[:id] if options[:id]
|
202
|
+
record[:trait] = options[:trait] if options[:trait]
|
203
|
+
d = DateTime.now
|
204
|
+
record[:time] = d.strftime("%Y/%m/%d %H:%M")
|
205
|
+
record[:user] = ENV["USER"]
|
206
|
+
record[:hostname] = Socket.gethostname
|
172
207
|
|
173
208
|
require 'json'
|
174
209
|
|
175
210
|
json_out = lambda do
|
176
|
-
|
211
|
+
record.to_json if options[:json]
|
177
212
|
end
|
178
213
|
|
179
214
|
# ---- Some error handlers
|
@@ -215,17 +250,18 @@ end
|
|
215
250
|
# ---- Start banner
|
216
251
|
|
217
252
|
GEMMA_K_VERSION=version
|
218
|
-
GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017-
|
253
|
+
GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017-2024\n"
|
219
254
|
info.call GEMMA_K_BANNER
|
220
255
|
|
221
256
|
# Check gemma version
|
222
257
|
begin
|
223
258
|
gemma_command2 = options[:gemma_command]
|
224
|
-
info.call "NOTE: gemma-wrapper is soon to be replaced"
|
259
|
+
# info.call "NOTE: gemma-wrapper is soon to be replaced"
|
225
260
|
|
261
|
+
debug.call("Invoke #{gemma_command2}")
|
226
262
|
GEMMA_INFO = `#{gemma_command2}`
|
227
263
|
rescue Errno::ENOENT
|
228
|
-
gemma_command2 = "gemma"
|
264
|
+
gemma_command2 = "gemma" if not gemma_command2
|
229
265
|
error.call "<#{gemma_command2}> command not found"
|
230
266
|
end
|
231
267
|
|
@@ -249,7 +285,7 @@ if options[:show_help] or gemma_args == nil
|
|
249
285
|
end
|
250
286
|
|
251
287
|
if RUBY_VERSION =~ /^1/
|
252
|
-
warning "
|
288
|
+
warning "does not run on Ruby 1.x\n"
|
253
289
|
end
|
254
290
|
|
255
291
|
# ---- LOCO defaults to parallel
|
@@ -272,6 +308,9 @@ if options[:parallel]
|
|
272
308
|
error.call "<parallel> command not found"
|
273
309
|
end
|
274
310
|
parallel_cmds = []
|
311
|
+
if not options[:json]
|
312
|
+
error.call "<parallel> needs --json switch"
|
313
|
+
end
|
275
314
|
end
|
276
315
|
|
277
316
|
# ---- Fetch chromosomes from SNP annotation file
|
@@ -288,23 +327,69 @@ if DO_COMPUTE_GWA and options[:permute_phenotypes]
|
|
288
327
|
raise "Did not expect GEMMA -p phenotype whith permutations (only use --permutate-phenotypes)" if pheno_idx
|
289
328
|
end
|
290
329
|
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
330
|
+
matches = {
|
331
|
+
chr: [:string, /-loco (\S+) /],
|
332
|
+
user_time: [:float, /User time \(seconds\): ([\d\.]+)/],
|
333
|
+
system_time: [:float, /System time \(seconds\): ([\d\.]+)/],
|
334
|
+
perc_cpu: [:int, /Percent of CPU this job got: (\d+)%/],
|
335
|
+
wall_clock: [:string, /Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): (\S+)/],
|
336
|
+
ram_usage_gb: [:gb, /Maximum resident set size \(kbytes\): (\d+)/],
|
337
|
+
command: [:string, /Command being timed: (.+)/]
|
338
|
+
}
|
339
|
+
|
340
|
+
parse_stats = lambda { |buf|
|
341
|
+
stats = {}
|
342
|
+
buf.split("\\n").each do |s|
|
343
|
+
if s =~ /^\t/
|
344
|
+
matches.each do |k,v|
|
345
|
+
type,m = v
|
346
|
+
if s =~ m
|
347
|
+
# $stderr.print $1,s
|
348
|
+
stats[k] =
|
349
|
+
case type
|
350
|
+
when :float
|
351
|
+
$1.to_f
|
352
|
+
when :int
|
353
|
+
$1.to_i
|
354
|
+
when :gb
|
355
|
+
(($1.to_f)/1048576.0).round(3)
|
356
|
+
else
|
357
|
+
$1
|
358
|
+
end
|
359
|
+
end
|
299
360
|
end
|
300
|
-
io.close
|
301
|
-
err = $?.to_i
|
302
361
|
end
|
303
|
-
else
|
304
|
-
$stderr.print `#{cmd}`
|
305
|
-
err = $?.to_i
|
306
362
|
end
|
307
|
-
|
363
|
+
stats
|
364
|
+
}
|
365
|
+
|
366
|
+
run_stat = {}
|
367
|
+
|
368
|
+
execute = lambda { |cmd|
|
369
|
+
info.call("Executing: #{cmd}")
|
370
|
+
err = 0
|
371
|
+
stdout_buf = ""
|
372
|
+
stderr_buf = ""
|
373
|
+
stats = {}
|
374
|
+
Open3.popen3("time -v #{cmd}") do |stdin,stdout,stderr,wait_thr|
|
375
|
+
stderr_buf = stderr.read
|
376
|
+
stdout_buf = stdout.read
|
377
|
+
stats = parse_stats.call(stderr_buf)
|
378
|
+
stdin.close
|
379
|
+
stdout.close
|
380
|
+
stderr.close
|
381
|
+
err = wait_thr.value
|
382
|
+
end
|
383
|
+
$stderr.print(stderr_buf) if options[:debug]
|
384
|
+
if err and err != 0
|
385
|
+
$stderr.print(stdout_buf)
|
386
|
+
$stderr.print(stderr_buf) if not options[:debug]
|
387
|
+
$stderr.print "FATAL ERROR: gemma-wrapper bailed out with #{err}\n"
|
388
|
+
# sleep 10_000
|
389
|
+
$stderr.print Kernel.caller().join("\n")
|
390
|
+
exit 1
|
391
|
+
end
|
392
|
+
return err,stats
|
308
393
|
}
|
309
394
|
|
310
395
|
compute_hash = lambda do | phenofn = nil |
|
@@ -319,6 +404,7 @@ compute_hash = lambda do | phenofn = nil |
|
|
319
404
|
end
|
320
405
|
debug.call("Hashing on ",hm)
|
321
406
|
hm.each do | item |
|
407
|
+
# if entry is a file use the hash of its content, otherwise just the entry itself
|
322
408
|
if File.file?(item)
|
323
409
|
hashes << Digest::SHA1.hexdigest(File.read(item))
|
324
410
|
debug.call [item,hashes.last]
|
@@ -343,6 +429,7 @@ hashme =
|
|
343
429
|
end
|
344
430
|
|
345
431
|
HASH = compute_hash.call()
|
432
|
+
options[:compute_hash_on] = hashme
|
346
433
|
options[:hash] = HASH
|
347
434
|
|
348
435
|
at_exit do
|
@@ -351,7 +438,7 @@ end
|
|
351
438
|
|
352
439
|
Lock.create(HASH) # this will wait for a lock to expire
|
353
440
|
|
354
|
-
|
441
|
+
JOBLOG = HASH+"-parallel.log"
|
355
442
|
|
356
443
|
# Create cache dir
|
357
444
|
FileUtils::mkdir_p options[:cache_dir]
|
@@ -365,7 +452,7 @@ GEMMA_ARGS = gemma_args
|
|
365
452
|
debug.call "Options: ",options,"\n" if !options[:quiet]
|
366
453
|
|
367
454
|
invoke_gemma = lambda do |extra_args, cache_hit = false, chr = "full", permutation = 1|
|
368
|
-
cmd = "#{gemma_command2} #{
|
455
|
+
cmd = "time -v #{gemma_command2} #{extra_args.join(' ')} #{GEMMA_ARGS.join(' ')}"
|
369
456
|
record[:gemma_command] = cmd
|
370
457
|
return if cache_hit
|
371
458
|
if options[:slurm]
|
@@ -395,7 +482,7 @@ srun #{cmd}
|
|
395
482
|
info.call("Add parallel job: ",cmd)
|
396
483
|
parallel_cmds << cmd
|
397
484
|
else
|
398
|
-
err = execute.call(cmd)
|
485
|
+
err,stats = execute.call(cmd)
|
399
486
|
end
|
400
487
|
err
|
401
488
|
else
|
@@ -416,6 +503,8 @@ srun #{cmd}
|
|
416
503
|
end
|
417
504
|
end
|
418
505
|
|
506
|
+
create_archive = false
|
507
|
+
|
419
508
|
# Takes the hash value and checks whether the (output) file exists
|
420
509
|
# returns datafn, logfn, cache_hit
|
421
510
|
cache = lambda do | chr, ext, h=HASH, permutation=0 |
|
@@ -427,10 +516,15 @@ cache = lambda do | chr, ext, h=HASH, permutation=0 |
|
|
427
516
|
logfn = prefix+".log.txt"
|
428
517
|
datafn = prefix+ext
|
429
518
|
record[:files] ||= []
|
430
|
-
|
519
|
+
log_basefn = File.basename(logfn)
|
520
|
+
data_basefn = File.basename(datafn)
|
521
|
+
log_tmpfn = tmpdir+"/"+log_basefn
|
522
|
+
data_tmpfn = tmpdir+"/"+data_basefn
|
523
|
+
record[:files].push [chr,log_basefn,data_basefn]
|
431
524
|
if !options[:force]
|
432
|
-
|
433
|
-
|
525
|
+
info.call "Checking for #{data_tmpfn}"
|
526
|
+
if File.exist? log_tmpfn and File.exist? data_tmpfn
|
527
|
+
if File.read(log_tmpfn).include? "total computation time"
|
434
528
|
record[:cache_hit] = true
|
435
529
|
info.call "#{logfn} CACHE HIT!\n"
|
436
530
|
return hashi, true
|
@@ -448,8 +542,10 @@ kinship = lambda do | chr = nil |
|
|
448
542
|
when 2 then '.sXX.txt'
|
449
543
|
else error.call "Unknown kinship type"
|
450
544
|
end
|
545
|
+
# ---- check cache:
|
451
546
|
hashi, cache_hit = cache.call chr,ext
|
452
547
|
if not cache_hit
|
548
|
+
create_archive = true
|
453
549
|
if chr != nil
|
454
550
|
invoke_gemma.call [ '-loco', chr, '-o', hashi ], cache_hit
|
455
551
|
else
|
@@ -466,8 +562,10 @@ gwas = lambda do | chr, kfn, pfn, permutation=0 |
|
|
466
562
|
hash = compute_hash.call(pfn)
|
467
563
|
hashi, cache_hit = cache.call(chr,".assoc.txt",hash,permutation)
|
468
564
|
if not cache_hit
|
469
|
-
|
565
|
+
create_archive = true
|
566
|
+
args = []
|
470
567
|
args << [ '-loco', chr ] if chr != nil
|
568
|
+
args << [ '-k', kfn, '-o', hashi ]
|
471
569
|
args << [ '-p', pfn ] if pfn
|
472
570
|
invoke_gemma.call args,false,chr,permutation
|
473
571
|
end
|
@@ -480,12 +578,20 @@ if LOCO
|
|
480
578
|
end
|
481
579
|
end
|
482
580
|
|
581
|
+
json_in = nil
|
582
|
+
|
483
583
|
if DO_COMPUTE_KINSHIP
|
484
584
|
# compute K
|
585
|
+
ARCHIVE = options[:cache_dir]+"/"+HASH+"-gemma-cXX.tar.xz"
|
586
|
+
|
587
|
+
if File.exist? ARCHIVE and not options[:force]
|
588
|
+
info.call "Unpack archive #{ARCHIVE}!"
|
589
|
+
execute.call "tar xJf #{ARCHIVE} -C #{tmpdir}"
|
590
|
+
end
|
485
591
|
info.call CHROMOSOMES
|
486
592
|
if LOCO
|
487
593
|
CHROMOSOMES.each do |chr|
|
488
|
-
info.call "LOCO for ",chr
|
594
|
+
info.call "Compute kinship LOCO for chr ",chr
|
489
595
|
kinship.call(chr)
|
490
596
|
end
|
491
597
|
else
|
@@ -493,6 +599,11 @@ if DO_COMPUTE_KINSHIP
|
|
493
599
|
end
|
494
600
|
else
|
495
601
|
# DO_COMPUTE_GWA
|
602
|
+
ARCHIVE = options[:cache_dir]+"/"+HASH+"-gemma-GWA.tar.xz"
|
603
|
+
if File.exist? ARCHIVE and not options[:force]
|
604
|
+
info.call "Unpack archive #{ARCHIVE}!"
|
605
|
+
execute.call "env XZ_OPT='-T0' tar xJf #{ARCHIVE} -C #{tmpdir}"
|
606
|
+
end
|
496
607
|
begin
|
497
608
|
json_in = JSON.parse(File.read(options[:input]))
|
498
609
|
rescue TypeError
|
@@ -504,12 +615,20 @@ else
|
|
504
615
|
if LOCO
|
505
616
|
k_files = json_in["files"].map { |rec| [rec[0],rec[2]] }
|
506
617
|
k_files.each do | chr, kfn | # call a GWA for each chromosome
|
507
|
-
|
618
|
+
|
619
|
+
kfn2 = options[:cache_dir]+"/"+kfn
|
620
|
+
if not File.exist?(kfn2) and json_in["archive"]
|
621
|
+
# we aim to unpack the archive once on reuse
|
622
|
+
archive_grm = options[:cache_dir]+"/"+json_in["archive"]
|
623
|
+
execute.call "env XZ_OPT='-T0' tar xJf #{archive_grm} -C #{options[:cache_dir]}"
|
624
|
+
end
|
625
|
+
|
626
|
+
gwas.call(chr,kfn2,pfn)
|
508
627
|
end
|
509
628
|
else
|
510
629
|
kfn = json_in["files"][0][2]
|
511
630
|
CHROMOSOMES.each do | chr |
|
512
|
-
gwas.call(chr,kfn,pfn)
|
631
|
+
gwas.call(chr,tmpdir+"/"+kfn,pfn)
|
513
632
|
end
|
514
633
|
end
|
515
634
|
# Permute
|
@@ -562,6 +681,7 @@ end
|
|
562
681
|
# ---- Invoke parallel
|
563
682
|
if options[:parallel]
|
564
683
|
# parallel_cmds = ["echo 1","sleep 1 && echo 2", "false", "echo 3"]
|
684
|
+
joblog = tmpdir+"/"+JOBLOG
|
565
685
|
|
566
686
|
Tempfile.open("commands.txt") do |f|
|
567
687
|
cmdfn = f.path
|
@@ -571,38 +691,95 @@ if options[:parallel]
|
|
571
691
|
end
|
572
692
|
end
|
573
693
|
cmd = "cat \"#{cmdfn}\""
|
574
|
-
|
694
|
+
debug.call("tmpdir=#{tmpdir}")
|
695
|
+
err,stats = execute.call(cmd+"|parallel --results #{tmpdir} --joblog #{joblog}") # first try optimistically to run all jobs in parallel
|
575
696
|
if err != 0
|
576
|
-
[
|
697
|
+
[4,1].each do |jobs|
|
577
698
|
info.call("Failed to complete parallel run -- retrying with smaller RAM footprint!")
|
578
|
-
err = execute.call(cmd+"|parallel
|
699
|
+
err,stats = execute.call(cmd+"|parallel -j #{jobs} --results #{tmpdir} --resume --joblog #{joblog}")
|
579
700
|
break if err == 0
|
580
701
|
end
|
581
702
|
if err != 0
|
582
703
|
info.call("Parallel run failed!")
|
583
704
|
debug.call("Job log is: ",File.read(joblog))
|
584
|
-
# Remove remaining files
|
585
|
-
FileUtils.mv joblog, joblog+".bak", verbose: false, force: true
|
586
|
-
FileUtils.rm_rf("#{tmpdir}/*", secure: true)
|
587
705
|
exit err
|
588
706
|
end
|
589
707
|
end
|
590
708
|
end
|
591
709
|
info.call("Run successful!")
|
592
|
-
FileUtils.mv joblog, joblog+".bak", verbose: false, force: true
|
593
710
|
end
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
711
|
+
|
712
|
+
# Collect stats from parallel run
|
713
|
+
|
714
|
+
run_stats = {}
|
715
|
+
$stderr.print "STATS"
|
716
|
+
Dir.glob(tmpdir+'/*/*' ).each do | dir |
|
717
|
+
File.open("#{dir}/stderr") { |f|
|
718
|
+
run_stat = parse_stats.call(f.read)
|
719
|
+
chr = run_stat[:chr]
|
720
|
+
run_stats[chr] = run_stat
|
721
|
+
}
|
722
|
+
end
|
723
|
+
# Now add up the stats
|
724
|
+
user_time = 0.0
|
725
|
+
system_time = 0.0
|
726
|
+
wall_clock = "0"
|
727
|
+
ram_usage_gb = 0.0
|
728
|
+
run_stats.each do | k, v |
|
729
|
+
wall_clock=v[:wall_clock] if v[:wall_clock]>wall_clock
|
730
|
+
ram_usage_gb += v[:ram_usage_gb]
|
731
|
+
user_time += v[:user_time]
|
732
|
+
system_time += v[:system_time]
|
733
|
+
end
|
734
|
+
|
735
|
+
record[:user_time] = user_time
|
736
|
+
record[:system_time] = system_time
|
737
|
+
record[:wall_clock] = wall_clock
|
738
|
+
record[:ram_usage_gb] = ram_usage_gb.round(2)
|
739
|
+
record[:run_stats] = run_stats
|
740
|
+
|
741
|
+
if create_archive
|
742
|
+
if DO_COMPUTE_GWA
|
743
|
+
LMDB = tmpdir+"/"+HASH+'.mdb'
|
744
|
+
# create lmdb database - we call out into a python script for that.
|
745
|
+
# first create a JSON record
|
746
|
+
|
747
|
+
meta = {
|
748
|
+
type: "gemma-wrapper",
|
749
|
+
version: version,
|
750
|
+
population: options[:population],
|
751
|
+
name: options[:name],
|
752
|
+
trait: options[:trait],
|
753
|
+
url: "https://genenetwork.org/show_trait?trait_id="+options[:trait]+"&dataset="+options[:name],
|
754
|
+
archive_GRM: json_in["archive"],
|
755
|
+
archive_GWA: File.basename(ARCHIVE),
|
756
|
+
}
|
757
|
+
if options[:id] and options[:id] =~ /,/ # this is GN specific
|
758
|
+
dataid,probesetid,probesetfreezeid = options[:id].split(",")
|
759
|
+
meta[:dataid] = dataid.to_i
|
760
|
+
meta[:probesetid] = probesetid.to_i
|
761
|
+
meta[:probesetfreezeid] = probesetfreezeid.to_i
|
762
|
+
end
|
763
|
+
record[:meta] = meta
|
764
|
+
metafn = tmpdir+"/"+HASH+"-meta.json"
|
765
|
+
File.write(metafn,record.to_json)
|
766
|
+
# sleep 10_000
|
767
|
+
if options[:lmdb]
|
768
|
+
File.unlink(LMDB) if File.exist?(LMDB) # removed any cached lmdb
|
769
|
+
execute.call "python3 #{BIN}/gemma2lmdb.py --db=#{LMDB} --meta=#{metafn} #{tmpdir}/*assoc.txt"
|
770
|
+
end
|
771
|
+
if not options[:keep]
|
772
|
+
execute.call "rm -f #{tmpdir}/1/*/* #{tmpdir}/*.txt #{tmpdir}/*.log #{tmpdir}/*.mdb-lock" # remove GEMMA output files
|
773
|
+
FileUtils.rm_rf("#{tmpdir}/1", secure: true)
|
774
|
+
end
|
605
775
|
end
|
776
|
+
File.write(tmpdir+"/"+HASH+"-gemma-wrapper-output.json",json_out.call)
|
777
|
+
info.call "Creating archive #{ARCHIVE}..."
|
778
|
+
execute.call "env XZ_OPT='-T0' tar -cvJf #{ARCHIVE} -C #{tmpdir} ."
|
606
779
|
end
|
607
780
|
|
608
781
|
end # tmpdir
|
782
|
+
|
783
|
+
record[:archive] = File.basename(ARCHIVE)
|
784
|
+
|
785
|
+
print json_out.call
|
data/gemma-wrapper.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gemma-wrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.99.
|
4
|
+
version: 0.99.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pjotr Prins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-07-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: GEMMA wrapper adds LOCO and permutation support. Also runs in parallel
|
14
14
|
and caches K between runs with LOCO support
|
@@ -21,6 +21,7 @@ files:
|
|
21
21
|
- Gemfile
|
22
22
|
- LICENSE.txt
|
23
23
|
- README.md
|
24
|
+
- Rakefile
|
24
25
|
- VERSION
|
25
26
|
- bin/gemma-wrapper
|
26
27
|
- gemma-wrapper.gemspec
|
@@ -44,7 +45,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
44
45
|
- !ruby/object:Gem::Version
|
45
46
|
version: '0'
|
46
47
|
requirements: []
|
47
|
-
rubygems_version: 3.
|
48
|
+
rubygems_version: 3.4.19
|
48
49
|
signing_key:
|
49
50
|
specification_version: 4
|
50
51
|
summary: GEMMA with LOCO and permutations
|