bio-gemma-wrapper 0.99.6 → 0.99.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69d74ac5f1a705132d7ddc86bd1182c11fc37cf32062dbb28909f16684a827cb
4
- data.tar.gz: 6f912b3c03474c1334a105d6c9471c02d39c205e94b0d07e5281718beec65ee7
3
+ metadata.gz: 8516f4e6692ceed95f95d5d55f530a2bfbf2f6f8fd6daf8e5918752c6be6cae7
4
+ data.tar.gz: b6942b33acc903f423a9c6bcf53920df326a18f58eb307fc713d220c1d3d88ed
5
5
  SHA512:
6
- metadata.gz: 35fbdf4ccfc482f6898e35b1e46e840db87f34efb35a096206ffef4a131ac8b21a4e6b7893be6ee053a59837eadba6547258d2360d796794c71a60670458943f
7
- data.tar.gz: 43f7a7438f475583930e6cf1f50d34206fba7674b3df836791cf7064bd7ca55dddc2cc101f7c5a89d8042645b6dd06245b84d3b38501bc1eab7d18774703a01a
6
+ metadata.gz: 15fdc14eafe7a33aa330a1e156f23cfd6a1d7bc43e48aacb126aa8157b8c0e1a8e649d02bc06eda4bf5704e1c0e04520ab07be492096ab92eb198372140442e5
7
+ data.tar.gz: c2a70aedae7743e63276285129665860d58bbdf7b701289b05cdbfc82d35207e44240b92b533bed55c6ced28f893cd08af228281dcb7de0c8c54d272a0e1474e
data/README.md CHANGED
@@ -247,4 +247,4 @@ ruby bin/gemma-wrapper --help
247
247
 
248
248
  ## Copyright
249
249
 
250
- Copyright (c) 2017-2021 Pjotr Prins. See [LICENSE.txt](LICENSE.txt) for further details.
250
+ Copyright (c) 2017-2023 Pjotr Prins. See [LICENSE.txt](LICENSE.txt) for further details.
data/Rakefile ADDED
@@ -0,0 +1,87 @@
1
+ # encoding: utf-8
2
+ #
3
+ # Run tests with, for example
4
+ #
5
+ # env GEMMA_COMMAND=../gemma/bin/gemma rake test
6
+
7
+ require 'rubygems'
8
+ require 'rake'
9
+
10
+ task default: %w[test]
11
+
12
+ task :test do
13
+ ruby "bin/gemma-wrapper --json --force -- \
14
+ -g test/data/input/BXD_geno.txt.gz \
15
+ -p test/data/input/BXD_pheno.txt \
16
+ -a test/data/input/BXD_snps.txt \
17
+ -gk \
18
+ -debug > K0.json"
19
+ K0 = File.read("K0.json")
20
+ fail "Wrong Hash in #{K0}" if K0 !~ /1b700de28f242d561fc6769a07d88403764a996f/
21
+ fail "Expected error is 0 in #{K0}" if K0 !~ /errno\":0/
22
+ fail "Test failed" if $? != 0
23
+ ruby "bin/gemma-wrapper --json --input K0.json -- \
24
+ -g test/data/input/BXD_geno.txt.gz \
25
+ -p test/data/input/BXD_pheno.txt \
26
+ -c test/data/input/BXD_covariates2.txt \
27
+ -a test/data/input/BXD_snps.txt \
28
+ -lmm 2 -maf 0.1 \
29
+ -debug > GWA0.json"
30
+ gwa0 = File.read("GWA0.json")
31
+ fail "Wrong Hash in #{gwa0}" if gwa0 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
32
+ fail "Expected cache hit in #{gwa0}" if gwa0 !~ /cache_hit\":true/
33
+ fail "Test failed" if $? != 0
34
+ ruby "bin/gemma-wrapper --debug --json --force \
35
+ --loco --chromosomes 1,2,3,4 -- \
36
+ -g test/data/input/BXD_geno.txt.gz \
37
+ -p test/data/input/BXD_pheno.txt \
38
+ -a test/data/input/BXD_snps.txt \
39
+ -gk -debug > KLOCO1.json"
40
+ kloco1 = File.read("KLOCO1.json")
41
+ fail "Wrong Hash in #{kloco1}" if kloco1 !~ /1b700de28f242d561fc6769a07d88403764a996f/
42
+ fail "Expected error is 0 in #{kloco1}" if kloco1 !~ /errno\":0/
43
+ fail "Test failed" if $? != 0
44
+ # run again for cache hits
45
+ ruby "bin/gemma-wrapper --json \
46
+ --loco --chromosomes 1,2,3,4 -- \
47
+ -g test/data/input/BXD_geno.txt.gz \
48
+ -p test/data/input/BXD_pheno.txt \
49
+ -a test/data/input/BXD_snps.txt \
50
+ -gk -debug > KLOCO2.json"
51
+ kloco2 = File.read("KLOCO2.json")
52
+ fail "Wrong Hash in #{kloco2}" if kloco2 !~ /1b700de28f242d561fc6769a07d88403764a996f/
53
+ fail "Expected cache hit in #{kloco2}" if kloco2 !~ /cache_hit\":true/
54
+ fail "Test failed" if $? != 0
55
+ ruby "bin/gemma-wrapper --json --force --loco --input KLOCO1.json -- \
56
+ -g test/data/input/BXD_geno.txt.gz \
57
+ -p test/data/input/BXD_pheno.txt \
58
+ -c test/data/input/BXD_covariates2.txt \
59
+ -a test/data/input/BXD_snps.txt \
60
+ -lmm 2 -maf 0.1 \
61
+ -debug > GWA1.json"
62
+ gwa1 = File.read("GWA1.json")
63
+ fail "Wrong Hash in #{gwa1}" if gwa1 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
64
+ fail "Test failed" if $? != 0
65
+ # and run again
66
+ ruby "bin/gemma-wrapper --json --loco --input KLOCO2.json -- \
67
+ -g test/data/input/BXD_geno.txt.gz \
68
+ -p test/data/input/BXD_pheno.txt \
69
+ -c test/data/input/BXD_covariates2.txt \
70
+ -a test/data/input/BXD_snps.txt \
71
+ -lmm 2 -maf 0.1 \
72
+ -debug > GWA2.json"
73
+ fail "Test failed" if $? != 0
74
+ gwa2 = File.read("GWA2.json")
75
+ fail "Wrong Hash in #{gwa2}" if gwa2 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
76
+ fail "Expected cache hit in #{gwa2}" if gwa2 !~ /cache_hit\":true/
77
+ end
78
+
79
+ require 'rdoc/task'
80
+ Rake::RDocTask.new do |rdoc|
81
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
82
+
83
+ rdoc.rdoc_dir = 'rdoc'
84
+ rdoc.title = "bio-gemma-wrapper #{version}"
85
+ rdoc.rdoc_files.include('README*')
86
+ rdoc.rdoc_files.include('lib/**/*.rb')
87
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.99.6
1
+ 0.99.7
data/bin/gemma-wrapper CHANGED
@@ -4,35 +4,35 @@
4
4
  # Author:: Pjotr Prins
5
5
  # License:: GPL3
6
6
  #
7
- # Copyright (C) 2017-2022 Pjotr Prins <pjotr.prins@thebird.nl>
7
+ # Copyright (C) 2017-2024 Pjotr Prins <pjotr.prins@thebird.nl>
8
8
 
9
9
  USAGE = "
10
10
  GEMMA wrapper example:
11
11
 
12
12
  Simple caching of K computation with
13
13
 
14
- gemma-wrapper -- \\
15
- -g test/data/input/BXD_geno.txt.gz \\
16
- -p test/data/input/BXD_pheno.txt \\
14
+ gemma-wrapper -- \
15
+ -g test/data/input/BXD_geno.txt.gz \
16
+ -p test/data/input/BXD_pheno.txt \
17
17
  -a test/data/input/BXD_snps.txt \
18
- -gk
18
+ -gk > K.json
19
19
 
20
20
  LOCO K computation with caching and JSON output
21
21
 
22
- gemma-wrapper --json --loco -- \\
23
- -g test/data/input/BXD_geno.txt.gz \\
24
- -p test/data/input/BXD_pheno.txt \\
25
- -a test/data/input/BXD_snps.txt \\
22
+ gemma-wrapper --json --loco -- \
23
+ -g test/data/input/BXD_geno.txt.gz \
24
+ -p test/data/input/BXD_pheno.txt \
25
+ -a test/data/input/BXD_snps.txt \
26
26
  -gk -debug > K.json
27
27
 
28
28
  LMM's using the K's captured in K.json using the --input switch
29
29
 
30
- gemma-wrapper --json --loco --input K.json -- \\
31
- -g test/data/input/BXD_geno.txt.gz \\
32
- -p test/data/input/BXD_pheno.txt \\
33
- -c test/data/input/BXD_covariates2.txt \\
34
- -a test/data/input/BXD_snps.txt \\
35
- -lmm 2 -maf 0.1 \\
30
+ gemma-wrapper --json --loco --input K.json -- \
31
+ -g test/data/input/BXD_geno.txt.gz \
32
+ -p test/data/input/BXD_pheno.txt \
33
+ -c test/data/input/BXD_covariates2.txt \
34
+ -a test/data/input/BXD_snps.txt \
35
+ -lmm 9 -maf 0.1 \
36
36
  -debug > GWA.json
37
37
 
38
38
  Gemma gets used from the path. You can override by setting
@@ -45,6 +45,7 @@ GEMMA_V_MINOR = 4
45
45
 
46
46
  basepath = File.dirname(File.dirname(__FILE__))
47
47
  $: << File.join(basepath,'lib')
48
+ BIN = File.join(basepath,'bin')
48
49
 
49
50
  VERSION_FILENAME=File.join(basepath,'VERSION')
50
51
  version = File.new(VERSION_FILENAME).read.chomp
@@ -69,7 +70,10 @@ hashme = nil
69
70
  require 'digest/sha1'
70
71
  require 'fileutils'
71
72
  require 'optparse'
73
+ require 'open3'
74
+ require 'socket' # for hostname
72
75
  require 'tempfile'
76
+ require 'time'
73
77
  require 'tmpdir'
74
78
 
75
79
  require 'lock'
@@ -80,7 +84,7 @@ if split_at
80
84
  gemma_args = ARGV[split_at+1..-1]
81
85
  end
82
86
 
83
- options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir(), quiet: false, permute_phenotypes: false, parallel: nil }
87
+ options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir(), quiet: false, permute_phenotypes: false, lmdb: nil, parallel: nil }
84
88
 
85
89
  opts = OptionParser.new do |o|
86
90
  o.banner = "\nUsage: #{File.basename($0)} [options] -- [gemma-options]"
@@ -99,6 +103,22 @@ opts = OptionParser.new do |o|
99
103
  options[:loco] = b
100
104
  end
101
105
 
106
+ o.on('--population NAME', 'Add population identifier to metadata') do |n|
107
+ options[:population] = n
108
+ end
109
+
110
+ o.on('--name NAME', 'Add dataset identifier to metadata') do |n|
111
+ options[:name] = n
112
+ end
113
+
114
+ o.on('--id ID', 'Add identifier to metadata') do |n|
115
+ options[:id] = n
116
+ end
117
+
118
+ o.on('--trait TRAIT', 'Add trait identifier to metadata') do |n|
119
+ options[:trait] = n
120
+ end
121
+
102
122
  o.on('--chromosomes [1,2,3]',Array,'Run specific chromosomes') do |lst|
103
123
  options[:chromosomes] = lst
104
124
  end
@@ -120,6 +140,10 @@ opts = OptionParser.new do |o|
120
140
  options[:force] = true
121
141
  end
122
142
 
143
+ o.on("--keep", "Keep intermediate files in output") do |q|
144
+ options[:keep] = true
145
+ end
146
+
123
147
  o.on("--parallel", "Run jobs in parallel") do |b|
124
148
  options[:parallel] = true
125
149
  end
@@ -128,6 +152,10 @@ opts = OptionParser.new do |o|
128
152
  options[:parallel] = false
129
153
  end
130
154
 
155
+ o.on("--lmdb", "Generate lmdb output") do |b|
156
+ options[:lmdb] = true
157
+ end
158
+
131
159
  o.on("--slurm[=opts]",String,"Use slurm PBS for submitting jobs") do |slurm|
132
160
  options[:slurm_opts] = ""
133
161
  options[:slurm] = true
@@ -169,11 +197,18 @@ opts.parse!(ARGV)
169
197
  OUTPUT = (options[:json] ? $stderr : $stdout )
170
198
 
171
199
  record = { warnings: [], errno: 0, debug: [] }
200
+ record[:name] = options[:name] if options[:name]
201
+ record[:id] = options[:id] if options[:id]
202
+ record[:trait] = options[:trait] if options[:trait]
203
+ d = DateTime.now
204
+ record[:time] = d.strftime("%Y/%m/%d %H:%M")
205
+ record[:user] = ENV["USER"]
206
+ record[:hostname] = Socket.gethostname
172
207
 
173
208
  require 'json'
174
209
 
175
210
  json_out = lambda do
176
- print record.to_json if options[:json]
211
+ record.to_json if options[:json]
177
212
  end
178
213
 
179
214
  # ---- Some error handlers
@@ -215,17 +250,18 @@ end
215
250
  # ---- Start banner
216
251
 
217
252
  GEMMA_K_VERSION=version
218
- GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017-2022\n"
253
+ GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017-2024\n"
219
254
  info.call GEMMA_K_BANNER
220
255
 
221
256
  # Check gemma version
222
257
  begin
223
258
  gemma_command2 = options[:gemma_command]
224
- info.call "NOTE: gemma-wrapper is soon to be replaced"
259
+ # info.call "NOTE: gemma-wrapper is soon to be replaced"
225
260
 
261
+ debug.call("Invoke #{gemma_command2}")
226
262
  GEMMA_INFO = `#{gemma_command2}`
227
263
  rescue Errno::ENOENT
228
- gemma_command2 = "gemma"
264
+ gemma_command2 = "gemma" if not gemma_command2
229
265
  error.call "<#{gemma_command2}> command not found"
230
266
  end
231
267
 
@@ -249,7 +285,7 @@ if options[:show_help] or gemma_args == nil
249
285
  end
250
286
 
251
287
  if RUBY_VERSION =~ /^1/
252
- warning "runs on Ruby 2.x only\n"
288
+ warning "does not run on Ruby 1.x\n"
253
289
  end
254
290
 
255
291
  # ---- LOCO defaults to parallel
@@ -272,6 +308,9 @@ if options[:parallel]
272
308
  error.call "<parallel> command not found"
273
309
  end
274
310
  parallel_cmds = []
311
+ if not options[:json]
312
+ error.call "<parallel> needs --json switch"
313
+ end
275
314
  end
276
315
 
277
316
  # ---- Fetch chromosomes from SNP annotation file
@@ -288,23 +327,69 @@ if DO_COMPUTE_GWA and options[:permute_phenotypes]
288
327
  raise "Did not expect GEMMA -p phenotype whith permutations (only use --permutate-phenotypes)" if pheno_idx
289
328
  end
290
329
 
291
- execute = lambda { |cmd|
292
- info.call("Executing: #{cmd}")
293
- err = 0
294
- if not options[:debug]
295
- # send output to stderr line by line
296
- IO.popen("#{cmd}") do |io|
297
- while s = io.gets
298
- $stderr.print s
330
+ matches = {
331
+ chr: [:string, /-loco (\S+) /],
332
+ user_time: [:float, /User time \(seconds\): ([\d\.]+)/],
333
+ system_time: [:float, /System time \(seconds\): ([\d\.]+)/],
334
+ perc_cpu: [:int, /Percent of CPU this job got: (\d+)%/],
335
+ wall_clock: [:string, /Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): (\S+)/],
336
+ ram_usage_gb: [:gb, /Maximum resident set size \(kbytes\): (\d+)/],
337
+ command: [:string, /Command being timed: (.+)/]
338
+ }
339
+
340
+ parse_stats = lambda { |buf|
341
+ stats = {}
342
+ buf.split("\\n").each do |s|
343
+ if s =~ /^\t/
344
+ matches.each do |k,v|
345
+ type,m = v
346
+ if s =~ m
347
+ # $stderr.print $1,s
348
+ stats[k] =
349
+ case type
350
+ when :float
351
+ $1.to_f
352
+ when :int
353
+ $1.to_i
354
+ when :gb
355
+ (($1.to_f)/1048576.0).round(3)
356
+ else
357
+ $1
358
+ end
359
+ end
299
360
  end
300
- io.close
301
- err = $?.to_i
302
361
  end
303
- else
304
- $stderr.print `#{cmd}`
305
- err = $?.to_i
306
362
  end
307
- err
363
+ stats
364
+ }
365
+
366
+ run_stat = {}
367
+
368
+ execute = lambda { |cmd|
369
+ info.call("Executing: #{cmd}")
370
+ err = 0
371
+ stdout_buf = ""
372
+ stderr_buf = ""
373
+ stats = {}
374
+ Open3.popen3("time -v #{cmd}") do |stdin,stdout,stderr,wait_thr|
375
+ stderr_buf = stderr.read
376
+ stdout_buf = stdout.read
377
+ stats = parse_stats.call(stderr_buf)
378
+ stdin.close
379
+ stdout.close
380
+ stderr.close
381
+ err = wait_thr.value
382
+ end
383
+ $stderr.print(stderr_buf) if options[:debug]
384
+ if err and err != 0
385
+ $stderr.print(stdout_buf)
386
+ $stderr.print(stderr_buf) if not options[:debug]
387
+ $stderr.print "FATAL ERROR: gemma-wrapper bailed out with #{err}\n"
388
+ # sleep 10_000
389
+ $stderr.print Kernel.caller().join("\n")
390
+ exit 1
391
+ end
392
+ return err,stats
308
393
  }
309
394
 
310
395
  compute_hash = lambda do | phenofn = nil |
@@ -319,6 +404,7 @@ compute_hash = lambda do | phenofn = nil |
319
404
  end
320
405
  debug.call("Hashing on ",hm)
321
406
  hm.each do | item |
407
+ # if entry is a file use the hash of its content, otherwise just the entry itself
322
408
  if File.file?(item)
323
409
  hashes << Digest::SHA1.hexdigest(File.read(item))
324
410
  debug.call [item,hashes.last]
@@ -343,6 +429,7 @@ hashme =
343
429
  end
344
430
 
345
431
  HASH = compute_hash.call()
432
+ options[:compute_hash_on] = hashme
346
433
  options[:hash] = HASH
347
434
 
348
435
  at_exit do
@@ -351,7 +438,7 @@ end
351
438
 
352
439
  Lock.create(HASH) # this will wait for a lock to expire
353
440
 
354
- joblog = options[:cache_dir]+"/"+HASH+"-parallel.log"
441
+ JOBLOG = HASH+"-parallel.log"
355
442
 
356
443
  # Create cache dir
357
444
  FileUtils::mkdir_p options[:cache_dir]
@@ -365,7 +452,7 @@ GEMMA_ARGS = gemma_args
365
452
  debug.call "Options: ",options,"\n" if !options[:quiet]
366
453
 
367
454
  invoke_gemma = lambda do |extra_args, cache_hit = false, chr = "full", permutation = 1|
368
- cmd = "#{gemma_command2} #{GEMMA_ARGS.join(' ')} #{extra_args.join(' ')}"
455
+ cmd = "time -v #{gemma_command2} #{extra_args.join(' ')} #{GEMMA_ARGS.join(' ')}"
369
456
  record[:gemma_command] = cmd
370
457
  return if cache_hit
371
458
  if options[:slurm]
@@ -395,7 +482,7 @@ srun #{cmd}
395
482
  info.call("Add parallel job: ",cmd)
396
483
  parallel_cmds << cmd
397
484
  else
398
- err = execute.call(cmd)
485
+ err,stats = execute.call(cmd)
399
486
  end
400
487
  err
401
488
  else
@@ -416,6 +503,8 @@ srun #{cmd}
416
503
  end
417
504
  end
418
505
 
506
+ create_archive = false
507
+
419
508
  # Takes the hash value and checks whether the (output) file exists
420
509
  # returns datafn, logfn, cache_hit
421
510
  cache = lambda do | chr, ext, h=HASH, permutation=0 |
@@ -427,10 +516,15 @@ cache = lambda do | chr, ext, h=HASH, permutation=0 |
427
516
  logfn = prefix+".log.txt"
428
517
  datafn = prefix+ext
429
518
  record[:files] ||= []
430
- record[:files].push [chr,logfn,datafn]
519
+ log_basefn = File.basename(logfn)
520
+ data_basefn = File.basename(datafn)
521
+ log_tmpfn = tmpdir+"/"+log_basefn
522
+ data_tmpfn = tmpdir+"/"+data_basefn
523
+ record[:files].push [chr,log_basefn,data_basefn]
431
524
  if !options[:force]
432
- if File.exist? logfn and File.exist? datafn
433
- if File.read(logfn).include? "total computation time"
525
+ info.call "Checking for #{data_tmpfn}"
526
+ if File.exist? log_tmpfn and File.exist? data_tmpfn
527
+ if File.read(log_tmpfn).include? "total computation time"
434
528
  record[:cache_hit] = true
435
529
  info.call "#{logfn} CACHE HIT!\n"
436
530
  return hashi, true
@@ -448,8 +542,10 @@ kinship = lambda do | chr = nil |
448
542
  when 2 then '.sXX.txt'
449
543
  else error.call "Unknown kinship type"
450
544
  end
545
+ # ---- check cache:
451
546
  hashi, cache_hit = cache.call chr,ext
452
547
  if not cache_hit
548
+ create_archive = true
453
549
  if chr != nil
454
550
  invoke_gemma.call [ '-loco', chr, '-o', hashi ], cache_hit
455
551
  else
@@ -466,8 +562,10 @@ gwas = lambda do | chr, kfn, pfn, permutation=0 |
466
562
  hash = compute_hash.call(pfn)
467
563
  hashi, cache_hit = cache.call(chr,".assoc.txt",hash,permutation)
468
564
  if not cache_hit
469
- args = [ '-k', kfn, '-o', hashi ]
565
+ create_archive = true
566
+ args = []
470
567
  args << [ '-loco', chr ] if chr != nil
568
+ args << [ '-k', kfn, '-o', hashi ]
471
569
  args << [ '-p', pfn ] if pfn
472
570
  invoke_gemma.call args,false,chr,permutation
473
571
  end
@@ -480,12 +578,20 @@ if LOCO
480
578
  end
481
579
  end
482
580
 
581
+ json_in = nil
582
+
483
583
  if DO_COMPUTE_KINSHIP
484
584
  # compute K
585
+ ARCHIVE = options[:cache_dir]+"/"+HASH+"-gemma-cXX.tar.xz"
586
+
587
+ if File.exist? ARCHIVE and not options[:force]
588
+ info.call "Unpack archive #{ARCHIVE}!"
589
+ execute.call "tar xJf #{ARCHIVE} -C #{tmpdir}"
590
+ end
485
591
  info.call CHROMOSOMES
486
592
  if LOCO
487
593
  CHROMOSOMES.each do |chr|
488
- info.call "LOCO for ",chr
594
+ info.call "Compute kinship LOCO for chr ",chr
489
595
  kinship.call(chr)
490
596
  end
491
597
  else
@@ -493,6 +599,11 @@ if DO_COMPUTE_KINSHIP
493
599
  end
494
600
  else
495
601
  # DO_COMPUTE_GWA
602
+ ARCHIVE = options[:cache_dir]+"/"+HASH+"-gemma-GWA.tar.xz"
603
+ if File.exist? ARCHIVE and not options[:force]
604
+ info.call "Unpack archive #{ARCHIVE}!"
605
+ execute.call "env XZ_OPT='-T0' tar xJf #{ARCHIVE} -C #{tmpdir}"
606
+ end
496
607
  begin
497
608
  json_in = JSON.parse(File.read(options[:input]))
498
609
  rescue TypeError
@@ -504,12 +615,20 @@ else
504
615
  if LOCO
505
616
  k_files = json_in["files"].map { |rec| [rec[0],rec[2]] }
506
617
  k_files.each do | chr, kfn | # call a GWA for each chromosome
507
- gwas.call(chr,kfn,pfn)
618
+
619
+ kfn2 = options[:cache_dir]+"/"+kfn
620
+ if not File.exist?(kfn2) and json_in["archive"]
621
+ # we aim to unpack the archive once on reuse
622
+ archive_grm = options[:cache_dir]+"/"+json_in["archive"]
623
+ execute.call "env XZ_OPT='-T0' tar xJf #{archive_grm} -C #{options[:cache_dir]}"
624
+ end
625
+
626
+ gwas.call(chr,kfn2,pfn)
508
627
  end
509
628
  else
510
629
  kfn = json_in["files"][0][2]
511
630
  CHROMOSOMES.each do | chr |
512
- gwas.call(chr,kfn,pfn)
631
+ gwas.call(chr,tmpdir+"/"+kfn,pfn)
513
632
  end
514
633
  end
515
634
  # Permute
@@ -562,6 +681,7 @@ end
562
681
  # ---- Invoke parallel
563
682
  if options[:parallel]
564
683
  # parallel_cmds = ["echo 1","sleep 1 && echo 2", "false", "echo 3"]
684
+ joblog = tmpdir+"/"+JOBLOG
565
685
 
566
686
  Tempfile.open("commands.txt") do |f|
567
687
  cmdfn = f.path
@@ -571,38 +691,95 @@ if options[:parallel]
571
691
  end
572
692
  end
573
693
  cmd = "cat \"#{cmdfn}\""
574
- err = execute.call(cmd+"|parallel --joblog #{joblog}") # first try optimistically to run all jobs in parallel
694
+ debug.call("tmpdir=#{tmpdir}")
695
+ err,stats = execute.call(cmd+"|parallel --results #{tmpdir} --joblog #{joblog}") # first try optimistically to run all jobs in parallel
575
696
  if err != 0
576
- [16,8,4,1].each do |jobs|
697
+ [4,1].each do |jobs|
577
698
  info.call("Failed to complete parallel run -- retrying with smaller RAM footprint!")
578
- err = execute.call(cmd+"|parallel -j #{jobs} --resume --joblog #{joblog}")
699
+ err,stats = execute.call(cmd+"|parallel -j #{jobs} --results #{tmpdir} --resume --joblog #{joblog}")
579
700
  break if err == 0
580
701
  end
581
702
  if err != 0
582
703
  info.call("Parallel run failed!")
583
704
  debug.call("Job log is: ",File.read(joblog))
584
- # Remove remaining files
585
- FileUtils.mv joblog, joblog+".bak", verbose: false, force: true
586
- FileUtils.rm_rf("#{tmpdir}/*", secure: true)
587
705
  exit err
588
706
  end
589
707
  end
590
708
  end
591
709
  info.call("Run successful!")
592
- FileUtils.mv joblog, joblog+".bak", verbose: false, force: true
593
710
  end
594
- json_out.call
595
-
596
- # copy all output files to the cache_dir. If a file exists only emit a warning
597
- Dir.glob("*.txt", base: tmpdir) do | fn |
598
- source = tmpdir + "/" + fn
599
- dest = options[:cache_dir] + "/" + fn
600
- if not File.exist?(dest) or options[:force]
601
- info.call "Move #{source} to #{dest}"
602
- FileUtils.mv source, dest, verbose: false
603
- else
604
- warning.call "File #{dest} already exists. Not overwriting"
711
+
712
+ # Collect stats from parallel run
713
+
714
+ run_stats = {}
715
+ $stderr.print "STATS"
716
+ Dir.glob(tmpdir+'/*/*' ).each do | dir |
717
+ File.open("#{dir}/stderr") { |f|
718
+ run_stat = parse_stats.call(f.read)
719
+ chr = run_stat[:chr]
720
+ run_stats[chr] = run_stat
721
+ }
722
+ end
723
+ # Now add up the stats
724
+ user_time = 0.0
725
+ system_time = 0.0
726
+ wall_clock = "0"
727
+ ram_usage_gb = 0.0
728
+ run_stats.each do | k, v |
729
+ wall_clock=v[:wall_clock] if v[:wall_clock]>wall_clock
730
+ ram_usage_gb += v[:ram_usage_gb]
731
+ user_time += v[:user_time]
732
+ system_time += v[:system_time]
733
+ end
734
+
735
+ record[:user_time] = user_time
736
+ record[:system_time] = system_time
737
+ record[:wall_clock] = wall_clock
738
+ record[:ram_usage_gb] = ram_usage_gb.round(2)
739
+ record[:run_stats] = run_stats
740
+
741
+ if create_archive
742
+ if DO_COMPUTE_GWA
743
+ LMDB = tmpdir+"/"+HASH+'.mdb'
744
+ # create lmdb database - we call out into a python script for that.
745
+ # first create a JSON record
746
+
747
+ meta = {
748
+ type: "gemma-wrapper",
749
+ version: version,
750
+ population: options[:population],
751
+ name: options[:name],
752
+ trait: options[:trait],
753
+ url: "https://genenetwork.org/show_trait?trait_id="+options[:trait]+"&dataset="+options[:name],
754
+ archive_GRM: json_in["archive"],
755
+ archive_GWA: File.basename(ARCHIVE),
756
+ }
757
+ if options[:id] and options[:id] =~ /,/ # this is GN specific
758
+ dataid,probesetid,probesetfreezeid = options[:id].split(",")
759
+ meta[:dataid] = dataid.to_i
760
+ meta[:probesetid] = probesetid.to_i
761
+ meta[:probesetfreezeid] = probesetfreezeid.to_i
762
+ end
763
+ record[:meta] = meta
764
+ metafn = tmpdir+"/"+HASH+"-meta.json"
765
+ File.write(metafn,record.to_json)
766
+ # sleep 10_000
767
+ if options[:lmdb]
768
+ File.unlink(LMDB) if File.exist?(LMDB) # removed any cached lmdb
769
+ execute.call "python3 #{BIN}/gemma2lmdb.py --db=#{LMDB} --meta=#{metafn} #{tmpdir}/*assoc.txt"
770
+ end
771
+ if not options[:keep]
772
+ execute.call "rm -f #{tmpdir}/1/*/* #{tmpdir}/*.txt #{tmpdir}/*.log #{tmpdir}/*.mdb-lock" # remove GEMMA output files
773
+ FileUtils.rm_rf("#{tmpdir}/1", secure: true)
774
+ end
605
775
  end
776
+ File.write(tmpdir+"/"+HASH+"-gemma-wrapper-output.json",json_out.call)
777
+ info.call "Creating archive #{ARCHIVE}..."
778
+ execute.call "env XZ_OPT='-T0' tar -cvJf #{ARCHIVE} -C #{tmpdir} ."
606
779
  end
607
780
 
608
781
  end # tmpdir
782
+
783
+ record[:archive] = File.basename(ARCHIVE)
784
+
785
+ print json_out.call
@@ -7,6 +7,7 @@ Gem::Specification.new do |s|
7
7
  s.email = 'pjotr.public01@thebird.nl'
8
8
  s.files = ["bin/gemma-wrapper",
9
9
  "lib/lock.rb",
10
+ "Rakefile",
10
11
  "Gemfile",
11
12
  "LICENSE.txt",
12
13
  "README.md",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-gemma-wrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.99.6
4
+ version: 0.99.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pjotr Prins
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-22 00:00:00.000000000 Z
11
+ date: 2024-07-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: GEMMA wrapper adds LOCO and permutation support. Also runs in parallel
14
14
  and caches K between runs with LOCO support
@@ -21,6 +21,7 @@ files:
21
21
  - Gemfile
22
22
  - LICENSE.txt
23
23
  - README.md
24
+ - Rakefile
24
25
  - VERSION
25
26
  - bin/gemma-wrapper
26
27
  - gemma-wrapper.gemspec
@@ -44,7 +45,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
44
45
  - !ruby/object:Gem::Version
45
46
  version: '0'
46
47
  requirements: []
47
- rubygems_version: 3.2.22
48
+ rubygems_version: 3.4.19
48
49
  signing_key:
49
50
  specification_version: 4
50
51
  summary: GEMMA with LOCO and permutations