bio-gemma-wrapper 0.99.6 → 0.99.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69d74ac5f1a705132d7ddc86bd1182c11fc37cf32062dbb28909f16684a827cb
4
- data.tar.gz: 6f912b3c03474c1334a105d6c9471c02d39c205e94b0d07e5281718beec65ee7
3
+ metadata.gz: 8516f4e6692ceed95f95d5d55f530a2bfbf2f6f8fd6daf8e5918752c6be6cae7
4
+ data.tar.gz: b6942b33acc903f423a9c6bcf53920df326a18f58eb307fc713d220c1d3d88ed
5
5
  SHA512:
6
- metadata.gz: 35fbdf4ccfc482f6898e35b1e46e840db87f34efb35a096206ffef4a131ac8b21a4e6b7893be6ee053a59837eadba6547258d2360d796794c71a60670458943f
7
- data.tar.gz: 43f7a7438f475583930e6cf1f50d34206fba7674b3df836791cf7064bd7ca55dddc2cc101f7c5a89d8042645b6dd06245b84d3b38501bc1eab7d18774703a01a
6
+ metadata.gz: 15fdc14eafe7a33aa330a1e156f23cfd6a1d7bc43e48aacb126aa8157b8c0e1a8e649d02bc06eda4bf5704e1c0e04520ab07be492096ab92eb198372140442e5
7
+ data.tar.gz: c2a70aedae7743e63276285129665860d58bbdf7b701289b05cdbfc82d35207e44240b92b533bed55c6ced28f893cd08af228281dcb7de0c8c54d272a0e1474e
data/README.md CHANGED
@@ -247,4 +247,4 @@ ruby bin/gemma-wrapper --help
247
247
 
248
248
  ## Copyright
249
249
 
250
- Copyright (c) 2017-2021 Pjotr Prins. See [LICENSE.txt](LICENSE.txt) for further details.
250
+ Copyright (c) 2017-2023 Pjotr Prins. See [LICENSE.txt](LICENSE.txt) for further details.
data/Rakefile ADDED
@@ -0,0 +1,87 @@
1
+ # encoding: utf-8
2
+ #
3
+ # Run tests with, for example
4
+ #
5
+ # env GEMMA_COMMAND=../gemma/bin/gemma rake test
6
+
7
+ require 'rubygems'
8
+ require 'rake'
9
+
10
+ task default: %w[test]
11
+
12
+ task :test do
13
+ ruby "bin/gemma-wrapper --json --force -- \
14
+ -g test/data/input/BXD_geno.txt.gz \
15
+ -p test/data/input/BXD_pheno.txt \
16
+ -a test/data/input/BXD_snps.txt \
17
+ -gk \
18
+ -debug > K0.json"
19
+ K0 = File.read("K0.json")
20
+ fail "Wrong Hash in #{K0}" if K0 !~ /1b700de28f242d561fc6769a07d88403764a996f/
21
+ fail "Expected error is 0 in #{K0}" if K0 !~ /errno\":0/
22
+ fail "Test failed" if $? != 0
23
+ ruby "bin/gemma-wrapper --json --input K0.json -- \
24
+ -g test/data/input/BXD_geno.txt.gz \
25
+ -p test/data/input/BXD_pheno.txt \
26
+ -c test/data/input/BXD_covariates2.txt \
27
+ -a test/data/input/BXD_snps.txt \
28
+ -lmm 2 -maf 0.1 \
29
+ -debug > GWA0.json"
30
+ gwa0 = File.read("GWA0.json")
31
+ fail "Wrong Hash in #{gwa0}" if gwa0 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
32
+ fail "Expected cache hit in #{gwa0}" if gwa0 !~ /cache_hit\":true/
33
+ fail "Test failed" if $? != 0
34
+ ruby "bin/gemma-wrapper --debug --json --force \
35
+ --loco --chromosomes 1,2,3,4 -- \
36
+ -g test/data/input/BXD_geno.txt.gz \
37
+ -p test/data/input/BXD_pheno.txt \
38
+ -a test/data/input/BXD_snps.txt \
39
+ -gk -debug > KLOCO1.json"
40
+ kloco1 = File.read("KLOCO1.json")
41
+ fail "Wrong Hash in #{kloco1}" if kloco1 !~ /1b700de28f242d561fc6769a07d88403764a996f/
42
+ fail "Expected error is 0 in #{kloco1}" if kloco1 !~ /errno\":0/
43
+ fail "Test failed" if $? != 0
44
+ # run again for cache hits
45
+ ruby "bin/gemma-wrapper --json \
46
+ --loco --chromosomes 1,2,3,4 -- \
47
+ -g test/data/input/BXD_geno.txt.gz \
48
+ -p test/data/input/BXD_pheno.txt \
49
+ -a test/data/input/BXD_snps.txt \
50
+ -gk -debug > KLOCO2.json"
51
+ kloco2 = File.read("KLOCO2.json")
52
+ fail "Wrong Hash in #{kloco2}" if kloco2 !~ /1b700de28f242d561fc6769a07d88403764a996f/
53
+ fail "Expected cache hit in #{kloco2}" if kloco2 !~ /cache_hit\":true/
54
+ fail "Test failed" if $? != 0
55
+ ruby "bin/gemma-wrapper --json --force --loco --input KLOCO1.json -- \
56
+ -g test/data/input/BXD_geno.txt.gz \
57
+ -p test/data/input/BXD_pheno.txt \
58
+ -c test/data/input/BXD_covariates2.txt \
59
+ -a test/data/input/BXD_snps.txt \
60
+ -lmm 2 -maf 0.1 \
61
+ -debug > GWA1.json"
62
+ gwa1 = File.read("GWA1.json")
63
+ fail "Wrong Hash in #{gwa1}" if gwa1 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
64
+ fail "Test failed" if $? != 0
65
+ # and run again
66
+ ruby "bin/gemma-wrapper --json --loco --input KLOCO2.json -- \
67
+ -g test/data/input/BXD_geno.txt.gz \
68
+ -p test/data/input/BXD_pheno.txt \
69
+ -c test/data/input/BXD_covariates2.txt \
70
+ -a test/data/input/BXD_snps.txt \
71
+ -lmm 2 -maf 0.1 \
72
+ -debug > GWA2.json"
73
+ fail "Test failed" if $? != 0
74
+ gwa2 = File.read("GWA2.json")
75
+ fail "Wrong Hash in #{gwa2}" if gwa2 !~ /9e411810ad341de6456ce0c6efd4f973356d0bad/
76
+ fail "Expected cache hit in #{gwa2}" if gwa2 !~ /cache_hit\":true/
77
+ end
78
+
79
+ require 'rdoc/task'
80
+ Rake::RDocTask.new do |rdoc|
81
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
82
+
83
+ rdoc.rdoc_dir = 'rdoc'
84
+ rdoc.title = "bio-gemma-wrapper #{version}"
85
+ rdoc.rdoc_files.include('README*')
86
+ rdoc.rdoc_files.include('lib/**/*.rb')
87
+ end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.99.6
1
+ 0.99.7
data/bin/gemma-wrapper CHANGED
@@ -4,35 +4,35 @@
4
4
  # Author:: Pjotr Prins
5
5
  # License:: GPL3
6
6
  #
7
- # Copyright (C) 2017-2022 Pjotr Prins <pjotr.prins@thebird.nl>
7
+ # Copyright (C) 2017-2024 Pjotr Prins <pjotr.prins@thebird.nl>
8
8
 
9
9
  USAGE = "
10
10
  GEMMA wrapper example:
11
11
 
12
12
  Simple caching of K computation with
13
13
 
14
- gemma-wrapper -- \\
15
- -g test/data/input/BXD_geno.txt.gz \\
16
- -p test/data/input/BXD_pheno.txt \\
14
+ gemma-wrapper -- \
15
+ -g test/data/input/BXD_geno.txt.gz \
16
+ -p test/data/input/BXD_pheno.txt \
17
17
  -a test/data/input/BXD_snps.txt \
18
- -gk
18
+ -gk > K.json
19
19
 
20
20
  LOCO K computation with caching and JSON output
21
21
 
22
- gemma-wrapper --json --loco -- \\
23
- -g test/data/input/BXD_geno.txt.gz \\
24
- -p test/data/input/BXD_pheno.txt \\
25
- -a test/data/input/BXD_snps.txt \\
22
+ gemma-wrapper --json --loco -- \
23
+ -g test/data/input/BXD_geno.txt.gz \
24
+ -p test/data/input/BXD_pheno.txt \
25
+ -a test/data/input/BXD_snps.txt \
26
26
  -gk -debug > K.json
27
27
 
28
28
  LMM's using the K's captured in K.json using the --input switch
29
29
 
30
- gemma-wrapper --json --loco --input K.json -- \\
31
- -g test/data/input/BXD_geno.txt.gz \\
32
- -p test/data/input/BXD_pheno.txt \\
33
- -c test/data/input/BXD_covariates2.txt \\
34
- -a test/data/input/BXD_snps.txt \\
35
- -lmm 2 -maf 0.1 \\
30
+ gemma-wrapper --json --loco --input K.json -- \
31
+ -g test/data/input/BXD_geno.txt.gz \
32
+ -p test/data/input/BXD_pheno.txt \
33
+ -c test/data/input/BXD_covariates2.txt \
34
+ -a test/data/input/BXD_snps.txt \
35
+ -lmm 9 -maf 0.1 \
36
36
  -debug > GWA.json
37
37
 
38
38
  Gemma gets used from the path. You can override by setting
@@ -45,6 +45,7 @@ GEMMA_V_MINOR = 4
45
45
 
46
46
  basepath = File.dirname(File.dirname(__FILE__))
47
47
  $: << File.join(basepath,'lib')
48
+ BIN = File.join(basepath,'bin')
48
49
 
49
50
  VERSION_FILENAME=File.join(basepath,'VERSION')
50
51
  version = File.new(VERSION_FILENAME).read.chomp
@@ -69,7 +70,10 @@ hashme = nil
69
70
  require 'digest/sha1'
70
71
  require 'fileutils'
71
72
  require 'optparse'
73
+ require 'open3'
74
+ require 'socket' # for hostname
72
75
  require 'tempfile'
76
+ require 'time'
73
77
  require 'tmpdir'
74
78
 
75
79
  require 'lock'
@@ -80,7 +84,7 @@ if split_at
80
84
  gemma_args = ARGV[split_at+1..-1]
81
85
  end
82
86
 
83
- options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir(), quiet: false, permute_phenotypes: false, parallel: nil }
87
+ options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir(), quiet: false, permute_phenotypes: false, lmdb: nil, parallel: nil }
84
88
 
85
89
  opts = OptionParser.new do |o|
86
90
  o.banner = "\nUsage: #{File.basename($0)} [options] -- [gemma-options]"
@@ -99,6 +103,22 @@ opts = OptionParser.new do |o|
99
103
  options[:loco] = b
100
104
  end
101
105
 
106
+ o.on('--population NAME', 'Add population identifier to metadata') do |n|
107
+ options[:population] = n
108
+ end
109
+
110
+ o.on('--name NAME', 'Add dataset identifier to metadata') do |n|
111
+ options[:name] = n
112
+ end
113
+
114
+ o.on('--id ID', 'Add identifier to metadata') do |n|
115
+ options[:id] = n
116
+ end
117
+
118
+ o.on('--trait TRAIT', 'Add trait identifier to metadata') do |n|
119
+ options[:trait] = n
120
+ end
121
+
102
122
  o.on('--chromosomes [1,2,3]',Array,'Run specific chromosomes') do |lst|
103
123
  options[:chromosomes] = lst
104
124
  end
@@ -120,6 +140,10 @@ opts = OptionParser.new do |o|
120
140
  options[:force] = true
121
141
  end
122
142
 
143
+ o.on("--keep", "Keep intermediate files in output") do |q|
144
+ options[:keep] = true
145
+ end
146
+
123
147
  o.on("--parallel", "Run jobs in parallel") do |b|
124
148
  options[:parallel] = true
125
149
  end
@@ -128,6 +152,10 @@ opts = OptionParser.new do |o|
128
152
  options[:parallel] = false
129
153
  end
130
154
 
155
+ o.on("--lmdb", "Generate lmdb output") do |b|
156
+ options[:lmdb] = true
157
+ end
158
+
131
159
  o.on("--slurm[=opts]",String,"Use slurm PBS for submitting jobs") do |slurm|
132
160
  options[:slurm_opts] = ""
133
161
  options[:slurm] = true
@@ -169,11 +197,18 @@ opts.parse!(ARGV)
169
197
  OUTPUT = (options[:json] ? $stderr : $stdout )
170
198
 
171
199
  record = { warnings: [], errno: 0, debug: [] }
200
+ record[:name] = options[:name] if options[:name]
201
+ record[:id] = options[:id] if options[:id]
202
+ record[:trait] = options[:trait] if options[:trait]
203
+ d = DateTime.now
204
+ record[:time] = d.strftime("%Y/%m/%d %H:%M")
205
+ record[:user] = ENV["USER"]
206
+ record[:hostname] = Socket.gethostname
172
207
 
173
208
  require 'json'
174
209
 
175
210
  json_out = lambda do
176
- print record.to_json if options[:json]
211
+ record.to_json if options[:json]
177
212
  end
178
213
 
179
214
  # ---- Some error handlers
@@ -215,17 +250,18 @@ end
215
250
  # ---- Start banner
216
251
 
217
252
  GEMMA_K_VERSION=version
218
- GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017-2022\n"
253
+ GEMMA_K_BANNER = "gemma-wrapper #{version} (Ruby #{RUBY_VERSION}) by Pjotr Prins 2017-2024\n"
219
254
  info.call GEMMA_K_BANNER
220
255
 
221
256
  # Check gemma version
222
257
  begin
223
258
  gemma_command2 = options[:gemma_command]
224
- info.call "NOTE: gemma-wrapper is soon to be replaced"
259
+ # info.call "NOTE: gemma-wrapper is soon to be replaced"
225
260
 
261
+ debug.call("Invoke #{gemma_command2}")
226
262
  GEMMA_INFO = `#{gemma_command2}`
227
263
  rescue Errno::ENOENT
228
- gemma_command2 = "gemma"
264
+ gemma_command2 = "gemma" if not gemma_command2
229
265
  error.call "<#{gemma_command2}> command not found"
230
266
  end
231
267
 
@@ -249,7 +285,7 @@ if options[:show_help] or gemma_args == nil
249
285
  end
250
286
 
251
287
  if RUBY_VERSION =~ /^1/
252
- warning "runs on Ruby 2.x only\n"
288
+ warning "does not run on Ruby 1.x\n"
253
289
  end
254
290
 
255
291
  # ---- LOCO defaults to parallel
@@ -272,6 +308,9 @@ if options[:parallel]
272
308
  error.call "<parallel> command not found"
273
309
  end
274
310
  parallel_cmds = []
311
+ if not options[:json]
312
+ error.call "<parallel> needs --json switch"
313
+ end
275
314
  end
276
315
 
277
316
  # ---- Fetch chromosomes from SNP annotation file
@@ -288,23 +327,69 @@ if DO_COMPUTE_GWA and options[:permute_phenotypes]
288
327
  raise "Did not expect GEMMA -p phenotype whith permutations (only use --permutate-phenotypes)" if pheno_idx
289
328
  end
290
329
 
291
- execute = lambda { |cmd|
292
- info.call("Executing: #{cmd}")
293
- err = 0
294
- if not options[:debug]
295
- # send output to stderr line by line
296
- IO.popen("#{cmd}") do |io|
297
- while s = io.gets
298
- $stderr.print s
330
+ matches = {
331
+ chr: [:string, /-loco (\S+) /],
332
+ user_time: [:float, /User time \(seconds\): ([\d\.]+)/],
333
+ system_time: [:float, /System time \(seconds\): ([\d\.]+)/],
334
+ perc_cpu: [:int, /Percent of CPU this job got: (\d+)%/],
335
+ wall_clock: [:string, /Elapsed \(wall clock\) time \(h:mm:ss or m:ss\): (\S+)/],
336
+ ram_usage_gb: [:gb, /Maximum resident set size \(kbytes\): (\d+)/],
337
+ command: [:string, /Command being timed: (.+)/]
338
+ }
339
+
340
+ parse_stats = lambda { |buf|
341
+ stats = {}
342
+ buf.split("\\n").each do |s|
343
+ if s =~ /^\t/
344
+ matches.each do |k,v|
345
+ type,m = v
346
+ if s =~ m
347
+ # $stderr.print $1,s
348
+ stats[k] =
349
+ case type
350
+ when :float
351
+ $1.to_f
352
+ when :int
353
+ $1.to_i
354
+ when :gb
355
+ (($1.to_f)/1048576.0).round(3)
356
+ else
357
+ $1
358
+ end
359
+ end
299
360
  end
300
- io.close
301
- err = $?.to_i
302
361
  end
303
- else
304
- $stderr.print `#{cmd}`
305
- err = $?.to_i
306
362
  end
307
- err
363
+ stats
364
+ }
365
+
366
+ run_stat = {}
367
+
368
+ execute = lambda { |cmd|
369
+ info.call("Executing: #{cmd}")
370
+ err = 0
371
+ stdout_buf = ""
372
+ stderr_buf = ""
373
+ stats = {}
374
+ Open3.popen3("time -v #{cmd}") do |stdin,stdout,stderr,wait_thr|
375
+ stderr_buf = stderr.read
376
+ stdout_buf = stdout.read
377
+ stats = parse_stats.call(stderr_buf)
378
+ stdin.close
379
+ stdout.close
380
+ stderr.close
381
+ err = wait_thr.value
382
+ end
383
+ $stderr.print(stderr_buf) if options[:debug]
384
+ if err and err != 0
385
+ $stderr.print(stdout_buf)
386
+ $stderr.print(stderr_buf) if not options[:debug]
387
+ $stderr.print "FATAL ERROR: gemma-wrapper bailed out with #{err}\n"
388
+ # sleep 10_000
389
+ $stderr.print Kernel.caller().join("\n")
390
+ exit 1
391
+ end
392
+ return err,stats
308
393
  }
309
394
 
310
395
  compute_hash = lambda do | phenofn = nil |
@@ -319,6 +404,7 @@ compute_hash = lambda do | phenofn = nil |
319
404
  end
320
405
  debug.call("Hashing on ",hm)
321
406
  hm.each do | item |
407
+ # if entry is a file use the hash of its content, otherwise just the entry itself
322
408
  if File.file?(item)
323
409
  hashes << Digest::SHA1.hexdigest(File.read(item))
324
410
  debug.call [item,hashes.last]
@@ -343,6 +429,7 @@ hashme =
343
429
  end
344
430
 
345
431
  HASH = compute_hash.call()
432
+ options[:compute_hash_on] = hashme
346
433
  options[:hash] = HASH
347
434
 
348
435
  at_exit do
@@ -351,7 +438,7 @@ end
351
438
 
352
439
  Lock.create(HASH) # this will wait for a lock to expire
353
440
 
354
- joblog = options[:cache_dir]+"/"+HASH+"-parallel.log"
441
+ JOBLOG = HASH+"-parallel.log"
355
442
 
356
443
  # Create cache dir
357
444
  FileUtils::mkdir_p options[:cache_dir]
@@ -365,7 +452,7 @@ GEMMA_ARGS = gemma_args
365
452
  debug.call "Options: ",options,"\n" if !options[:quiet]
366
453
 
367
454
  invoke_gemma = lambda do |extra_args, cache_hit = false, chr = "full", permutation = 1|
368
- cmd = "#{gemma_command2} #{GEMMA_ARGS.join(' ')} #{extra_args.join(' ')}"
455
+ cmd = "time -v #{gemma_command2} #{extra_args.join(' ')} #{GEMMA_ARGS.join(' ')}"
369
456
  record[:gemma_command] = cmd
370
457
  return if cache_hit
371
458
  if options[:slurm]
@@ -395,7 +482,7 @@ srun #{cmd}
395
482
  info.call("Add parallel job: ",cmd)
396
483
  parallel_cmds << cmd
397
484
  else
398
- err = execute.call(cmd)
485
+ err,stats = execute.call(cmd)
399
486
  end
400
487
  err
401
488
  else
@@ -416,6 +503,8 @@ srun #{cmd}
416
503
  end
417
504
  end
418
505
 
506
+ create_archive = false
507
+
419
508
  # Takes the hash value and checks whether the (output) file exists
420
509
  # returns datafn, logfn, cache_hit
421
510
  cache = lambda do | chr, ext, h=HASH, permutation=0 |
@@ -427,10 +516,15 @@ cache = lambda do | chr, ext, h=HASH, permutation=0 |
427
516
  logfn = prefix+".log.txt"
428
517
  datafn = prefix+ext
429
518
  record[:files] ||= []
430
- record[:files].push [chr,logfn,datafn]
519
+ log_basefn = File.basename(logfn)
520
+ data_basefn = File.basename(datafn)
521
+ log_tmpfn = tmpdir+"/"+log_basefn
522
+ data_tmpfn = tmpdir+"/"+data_basefn
523
+ record[:files].push [chr,log_basefn,data_basefn]
431
524
  if !options[:force]
432
- if File.exist? logfn and File.exist? datafn
433
- if File.read(logfn).include? "total computation time"
525
+ info.call "Checking for #{data_tmpfn}"
526
+ if File.exist? log_tmpfn and File.exist? data_tmpfn
527
+ if File.read(log_tmpfn).include? "total computation time"
434
528
  record[:cache_hit] = true
435
529
  info.call "#{logfn} CACHE HIT!\n"
436
530
  return hashi, true
@@ -448,8 +542,10 @@ kinship = lambda do | chr = nil |
448
542
  when 2 then '.sXX.txt'
449
543
  else error.call "Unknown kinship type"
450
544
  end
545
+ # ---- check cache:
451
546
  hashi, cache_hit = cache.call chr,ext
452
547
  if not cache_hit
548
+ create_archive = true
453
549
  if chr != nil
454
550
  invoke_gemma.call [ '-loco', chr, '-o', hashi ], cache_hit
455
551
  else
@@ -466,8 +562,10 @@ gwas = lambda do | chr, kfn, pfn, permutation=0 |
466
562
  hash = compute_hash.call(pfn)
467
563
  hashi, cache_hit = cache.call(chr,".assoc.txt",hash,permutation)
468
564
  if not cache_hit
469
- args = [ '-k', kfn, '-o', hashi ]
565
+ create_archive = true
566
+ args = []
470
567
  args << [ '-loco', chr ] if chr != nil
568
+ args << [ '-k', kfn, '-o', hashi ]
471
569
  args << [ '-p', pfn ] if pfn
472
570
  invoke_gemma.call args,false,chr,permutation
473
571
  end
@@ -480,12 +578,20 @@ if LOCO
480
578
  end
481
579
  end
482
580
 
581
+ json_in = nil
582
+
483
583
  if DO_COMPUTE_KINSHIP
484
584
  # compute K
585
+ ARCHIVE = options[:cache_dir]+"/"+HASH+"-gemma-cXX.tar.xz"
586
+
587
+ if File.exist? ARCHIVE and not options[:force]
588
+ info.call "Unpack archive #{ARCHIVE}!"
589
+ execute.call "tar xJf #{ARCHIVE} -C #{tmpdir}"
590
+ end
485
591
  info.call CHROMOSOMES
486
592
  if LOCO
487
593
  CHROMOSOMES.each do |chr|
488
- info.call "LOCO for ",chr
594
+ info.call "Compute kinship LOCO for chr ",chr
489
595
  kinship.call(chr)
490
596
  end
491
597
  else
@@ -493,6 +599,11 @@ if DO_COMPUTE_KINSHIP
493
599
  end
494
600
  else
495
601
  # DO_COMPUTE_GWA
602
+ ARCHIVE = options[:cache_dir]+"/"+HASH+"-gemma-GWA.tar.xz"
603
+ if File.exist? ARCHIVE and not options[:force]
604
+ info.call "Unpack archive #{ARCHIVE}!"
605
+ execute.call "env XZ_OPT='-T0' tar xJf #{ARCHIVE} -C #{tmpdir}"
606
+ end
496
607
  begin
497
608
  json_in = JSON.parse(File.read(options[:input]))
498
609
  rescue TypeError
@@ -504,12 +615,20 @@ else
504
615
  if LOCO
505
616
  k_files = json_in["files"].map { |rec| [rec[0],rec[2]] }
506
617
  k_files.each do | chr, kfn | # call a GWA for each chromosome
507
- gwas.call(chr,kfn,pfn)
618
+
619
+ kfn2 = options[:cache_dir]+"/"+kfn
620
+ if not File.exist?(kfn2) and json_in["archive"]
621
+ # we aim to unpack the archive once on reuse
622
+ archive_grm = options[:cache_dir]+"/"+json_in["archive"]
623
+ execute.call "env XZ_OPT='-T0' tar xJf #{archive_grm} -C #{options[:cache_dir]}"
624
+ end
625
+
626
+ gwas.call(chr,kfn2,pfn)
508
627
  end
509
628
  else
510
629
  kfn = json_in["files"][0][2]
511
630
  CHROMOSOMES.each do | chr |
512
- gwas.call(chr,kfn,pfn)
631
+ gwas.call(chr,tmpdir+"/"+kfn,pfn)
513
632
  end
514
633
  end
515
634
  # Permute
@@ -562,6 +681,7 @@ end
562
681
  # ---- Invoke parallel
563
682
  if options[:parallel]
564
683
  # parallel_cmds = ["echo 1","sleep 1 && echo 2", "false", "echo 3"]
684
+ joblog = tmpdir+"/"+JOBLOG
565
685
 
566
686
  Tempfile.open("commands.txt") do |f|
567
687
  cmdfn = f.path
@@ -571,38 +691,95 @@ if options[:parallel]
571
691
  end
572
692
  end
573
693
  cmd = "cat \"#{cmdfn}\""
574
- err = execute.call(cmd+"|parallel --joblog #{joblog}") # first try optimistically to run all jobs in parallel
694
+ debug.call("tmpdir=#{tmpdir}")
695
+ err,stats = execute.call(cmd+"|parallel --results #{tmpdir} --joblog #{joblog}") # first try optimistically to run all jobs in parallel
575
696
  if err != 0
576
- [16,8,4,1].each do |jobs|
697
+ [4,1].each do |jobs|
577
698
  info.call("Failed to complete parallel run -- retrying with smaller RAM footprint!")
578
- err = execute.call(cmd+"|parallel -j #{jobs} --resume --joblog #{joblog}")
699
+ err,stats = execute.call(cmd+"|parallel -j #{jobs} --results #{tmpdir} --resume --joblog #{joblog}")
579
700
  break if err == 0
580
701
  end
581
702
  if err != 0
582
703
  info.call("Parallel run failed!")
583
704
  debug.call("Job log is: ",File.read(joblog))
584
- # Remove remaining files
585
- FileUtils.mv joblog, joblog+".bak", verbose: false, force: true
586
- FileUtils.rm_rf("#{tmpdir}/*", secure: true)
587
705
  exit err
588
706
  end
589
707
  end
590
708
  end
591
709
  info.call("Run successful!")
592
- FileUtils.mv joblog, joblog+".bak", verbose: false, force: true
593
710
  end
594
- json_out.call
595
-
596
- # copy all output files to the cache_dir. If a file exists only emit a warning
597
- Dir.glob("*.txt", base: tmpdir) do | fn |
598
- source = tmpdir + "/" + fn
599
- dest = options[:cache_dir] + "/" + fn
600
- if not File.exist?(dest) or options[:force]
601
- info.call "Move #{source} to #{dest}"
602
- FileUtils.mv source, dest, verbose: false
603
- else
604
- warning.call "File #{dest} already exists. Not overwriting"
711
+
712
+ # Collect stats from parallel run
713
+
714
+ run_stats = {}
715
+ $stderr.print "STATS"
716
+ Dir.glob(tmpdir+'/*/*' ).each do | dir |
717
+ File.open("#{dir}/stderr") { |f|
718
+ run_stat = parse_stats.call(f.read)
719
+ chr = run_stat[:chr]
720
+ run_stats[chr] = run_stat
721
+ }
722
+ end
723
+ # Now add up the stats
724
+ user_time = 0.0
725
+ system_time = 0.0
726
+ wall_clock = "0"
727
+ ram_usage_gb = 0.0
728
+ run_stats.each do | k, v |
729
+ wall_clock=v[:wall_clock] if v[:wall_clock]>wall_clock
730
+ ram_usage_gb += v[:ram_usage_gb]
731
+ user_time += v[:user_time]
732
+ system_time += v[:system_time]
733
+ end
734
+
735
+ record[:user_time] = user_time
736
+ record[:system_time] = system_time
737
+ record[:wall_clock] = wall_clock
738
+ record[:ram_usage_gb] = ram_usage_gb.round(2)
739
+ record[:run_stats] = run_stats
740
+
741
+ if create_archive
742
+ if DO_COMPUTE_GWA
743
+ LMDB = tmpdir+"/"+HASH+'.mdb'
744
+ # create lmdb database - we call out into a python script for that.
745
+ # first create a JSON record
746
+
747
+ meta = {
748
+ type: "gemma-wrapper",
749
+ version: version,
750
+ population: options[:population],
751
+ name: options[:name],
752
+ trait: options[:trait],
753
+ url: "https://genenetwork.org/show_trait?trait_id="+options[:trait]+"&dataset="+options[:name],
754
+ archive_GRM: json_in["archive"],
755
+ archive_GWA: File.basename(ARCHIVE),
756
+ }
757
+ if options[:id] and options[:id] =~ /,/ # this is GN specific
758
+ dataid,probesetid,probesetfreezeid = options[:id].split(",")
759
+ meta[:dataid] = dataid.to_i
760
+ meta[:probesetid] = probesetid.to_i
761
+ meta[:probesetfreezeid] = probesetfreezeid.to_i
762
+ end
763
+ record[:meta] = meta
764
+ metafn = tmpdir+"/"+HASH+"-meta.json"
765
+ File.write(metafn,record.to_json)
766
+ # sleep 10_000
767
+ if options[:lmdb]
768
+ File.unlink(LMDB) if File.exist?(LMDB) # removed any cached lmdb
769
+ execute.call "python3 #{BIN}/gemma2lmdb.py --db=#{LMDB} --meta=#{metafn} #{tmpdir}/*assoc.txt"
770
+ end
771
+ if not options[:keep]
772
+ execute.call "rm -f #{tmpdir}/1/*/* #{tmpdir}/*.txt #{tmpdir}/*.log #{tmpdir}/*.mdb-lock" # remove GEMMA output files
773
+ FileUtils.rm_rf("#{tmpdir}/1", secure: true)
774
+ end
605
775
  end
776
+ File.write(tmpdir+"/"+HASH+"-gemma-wrapper-output.json",json_out.call)
777
+ info.call "Creating archive #{ARCHIVE}..."
778
+ execute.call "env XZ_OPT='-T0' tar -cvJf #{ARCHIVE} -C #{tmpdir} ."
606
779
  end
607
780
 
608
781
  end # tmpdir
782
+
783
+ record[:archive] = File.basename(ARCHIVE)
784
+
785
+ print json_out.call
@@ -7,6 +7,7 @@ Gem::Specification.new do |s|
7
7
  s.email = 'pjotr.public01@thebird.nl'
8
8
  s.files = ["bin/gemma-wrapper",
9
9
  "lib/lock.rb",
10
+ "Rakefile",
10
11
  "Gemfile",
11
12
  "LICENSE.txt",
12
13
  "README.md",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bio-gemma-wrapper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.99.6
4
+ version: 0.99.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Pjotr Prins
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-01-22 00:00:00.000000000 Z
11
+ date: 2024-07-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: GEMMA wrapper adds LOCO and permutation support. Also runs in parallel
14
14
  and caches K between runs with LOCO support
@@ -21,6 +21,7 @@ files:
21
21
  - Gemfile
22
22
  - LICENSE.txt
23
23
  - README.md
24
+ - Rakefile
24
25
  - VERSION
25
26
  - bin/gemma-wrapper
26
27
  - gemma-wrapper.gemspec
@@ -44,7 +45,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
44
45
  - !ruby/object:Gem::Version
45
46
  version: '0'
46
47
  requirements: []
47
- rubygems_version: 3.2.22
48
+ rubygems_version: 3.4.19
48
49
  signing_key:
49
50
  specification_version: 4
50
51
  summary: GEMMA with LOCO and permutations