bio-gemma-wrapper 0.99.2 → 0.99.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +26 -10
- data/VERSION +1 -1
- data/bin/gemma-wrapper +57 -13
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0bd37b153e121de9c1758af736cd6904744da2de3540f2a7c547cc423382d8d1
|
4
|
+
data.tar.gz: 84298a943e7cfe6126653895d9714babc83a7be2bf903c7b61ff9072f1d4e4a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f32d48ec2f194a513e0cf8f15463b05662e1b269fdd22a110e4cdfb9f6bc541238bac7e766cbc31a8b782379891636e4f79fb3e3667d567ab3c610298d4f11c2
|
7
|
+
data.tar.gz: abc9b3faf8ef2f63d566caa14312bde2b2f438c722a83e90f7da775c55e2415171efbfab385273c82e45398c31e5306fb99437c38e287570652a3b04a5432883
|
data/README.md
CHANGED
@@ -8,11 +8,12 @@ Nat. Genet., 2016)](cfw.gif)
|
|
8
8
|
## Introduction
|
9
9
|
|
10
10
|
Gemma-wrapper allows running GEMMA with LOCO, GEMMA with caching,
|
11
|
-
GEMMA in parallel (now the default), and GEMMA on
|
12
|
-
is used to run GEMMA as part of the
|
13
|
-
environment.
|
11
|
+
GEMMA in parallel (now the default with LOCO), and GEMMA on
|
12
|
+
PBS. Gemma-wrapper is used to run GEMMA as part of the
|
13
|
+
https://genenetwork.org/ environment.
|
14
14
|
|
15
|
-
Note that gemma-wrapper is projected to be integrated
|
15
|
+
Note that a version of gemma-wrapper is projected to be integrated
|
16
|
+
into gemma itself.
|
16
17
|
|
17
18
|
GEMMA is a software toolkit for fast application of linear mixed
|
18
19
|
models (LMMs) and related models to genome-wide association studies
|
@@ -29,6 +30,14 @@ does a pass-through of all standard GEMMA invocation switches. On
|
|
29
30
|
return gemma-wrapper can return a JSON object (--json) which is
|
30
31
|
useful for web-services.
|
31
32
|
|
33
|
+
## Performance
|
34
|
+
|
35
|
+
LOCO runs in parallel by default which is at least a 5x performance
|
36
|
+
improvement on a machine with enough cores. GEMMA without LOCO,
|
37
|
+
however, does not run in parallel by default. Performance
|
38
|
+
improvements with the parallel implementation for LOCO and non-LOCO
|
39
|
+
can be viewed [here](./test/performance/releases.gmi).
|
40
|
+
|
32
41
|
## Installation
|
33
42
|
|
34
43
|
Prerequisites are
|
@@ -53,15 +62,19 @@ and it will render something like
|
|
53
62
|
Usage: gemma-wrapper [options] -- [gemma-options]
|
54
63
|
--permutate n Permutate # times by shuffling phenotypes
|
55
64
|
--permute-phenotypes filen Phenotypes to be shuffled in permutations
|
56
|
-
--loco
|
65
|
+
--loco Run full leave-one-chromosome-out (LOCO)
|
66
|
+
--chromosomes [1,2,3] Run specific chromosomes
|
57
67
|
--input filen JSON input variables (used for LOCO)
|
58
68
|
--cache-dir path Use a cache directory
|
59
69
|
--json Create output file in JSON format
|
60
|
-
--force Force computation
|
61
|
-
--
|
70
|
+
--force Force computation (override cache)
|
71
|
+
--parallel Run jobs in parallel
|
72
|
+
--no-parallel Do not run jobs in parallel
|
73
|
+
--slurm[=opts] Use slurm PBS for submitting jobs
|
62
74
|
--q, --quiet Run quietly
|
63
75
|
-v, --verbose Run verbosely
|
64
|
-
|
76
|
+
-d, --debug Show debug messages and keep intermediate output
|
77
|
+
--dry-run Show commands, but don't execute
|
65
78
|
-- Anything after gets passed to GEMMA
|
66
79
|
|
67
80
|
-h, --help display this help and exit
|
@@ -99,6 +112,7 @@ the data files are found):
|
|
99
112
|
gemma-wrapper -- \
|
100
113
|
-g test/data/input/BXD_geno.txt.gz \
|
101
114
|
-p test/data/input/BXD_pheno.txt \
|
115
|
+
-a test/data/input/BXD_snps.txt \
|
102
116
|
-gk \
|
103
117
|
-debug
|
104
118
|
|
@@ -116,6 +130,7 @@ You can also get JSON output on STDOUT by providing the --json switch
|
|
116
130
|
gemma-wrapper --json -- \
|
117
131
|
-g test/data/input/BXD_geno.txt.gz \
|
118
132
|
-p test/data/input/BXD_pheno.txt \
|
133
|
+
-a test/data/input/BXD_snps.txt \
|
119
134
|
-gk \
|
120
135
|
-debug > K.json
|
121
136
|
|
@@ -133,6 +148,7 @@ default. If you want something else provide a --cache-dir, e.g.
|
|
133
148
|
gemma-wrapper --cache-dir ~/.gemma-cache -- \
|
134
149
|
-g test/data/input/BXD_geno.txt.gz \
|
135
150
|
-p test/data/input/BXD_pheno.txt \
|
151
|
+
-a test/data/input/BXD_snps.txt \
|
136
152
|
-gk \
|
137
153
|
-debug
|
138
154
|
|
@@ -143,7 +159,7 @@ will store K in ~/.gemma-cache.
|
|
143
159
|
Run the LMM using the K's captured earlier in K.json using the --input
|
144
160
|
switch
|
145
161
|
|
146
|
-
gemma-wrapper --json --
|
162
|
+
gemma-wrapper --json --input K.json -- \
|
147
163
|
-g test/data/input/BXD_geno.txt.gz \
|
148
164
|
-p test/data/input/BXD_pheno.txt \
|
149
165
|
-c test/data/input/BXD_covariates2.txt \
|
@@ -163,7 +179,7 @@ https://github.com/genetics-statistics/GEMMA/issues/46). To loop all
|
|
163
179
|
chromosomes first create all K's with
|
164
180
|
|
165
181
|
gemma-wrapper --json \
|
166
|
-
--loco
|
182
|
+
--loco -- \
|
167
183
|
-g test/data/input/BXD_geno.txt.gz \
|
168
184
|
-p test/data/input/BXD_pheno.txt \
|
169
185
|
-a test/data/input/BXD_snps.txt \
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.99.
|
1
|
+
0.99.3
|
data/bin/gemma-wrapper
CHANGED
@@ -14,12 +14,12 @@ GEMMA wrapper example:
|
|
14
14
|
gemma-wrapper -- \\
|
15
15
|
-g test/data/input/BXD_geno.txt.gz \\
|
16
16
|
-p test/data/input/BXD_pheno.txt \\
|
17
|
+
-a test/data/input/BXD_snps.txt \
|
17
18
|
-gk
|
18
19
|
|
19
20
|
LOCO K computation with caching and JSON output
|
20
21
|
|
21
|
-
gemma-wrapper --json \\
|
22
|
-
--loco 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,X -- \\
|
22
|
+
gemma-wrapper --json --loco -- \\
|
23
23
|
-g test/data/input/BXD_geno.txt.gz \\
|
24
24
|
-p test/data/input/BXD_pheno.txt \\
|
25
25
|
-a test/data/input/BXD_snps.txt \\
|
@@ -72,11 +72,12 @@ require 'tempfile'
|
|
72
72
|
require 'tmpdir'
|
73
73
|
|
74
74
|
split_at = ARGV.index('--')
|
75
|
+
|
75
76
|
if split_at
|
76
77
|
gemma_args = ARGV[split_at+1..-1]
|
77
78
|
end
|
78
79
|
|
79
|
-
options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir(), quiet: false, parallel:
|
80
|
+
options = { show_help: false, source: 'https://github.com/genetics-statistics/gemma-wrapper', version: version+' (Pjotr Prins)', date: Time.now.to_s, gemma_command: gemma_command, cache_dir: Dir.tmpdir(), quiet: false, permute_phenotypes: false, parallel: nil }
|
80
81
|
|
81
82
|
opts = OptionParser.new do |o|
|
82
83
|
o.banner = "\nUsage: #{File.basename($0)} [options] -- [gemma-options]"
|
@@ -91,8 +92,12 @@ opts = OptionParser.new do |o|
|
|
91
92
|
raise "Phenotype input file #{phenotypes} does not exist" if !File.exist?(phenotypes)
|
92
93
|
end
|
93
94
|
|
94
|
-
o.on('--loco
|
95
|
-
options[:loco] =
|
95
|
+
o.on('--loco', 'Run full leave-one-chromosome-out (LOCO)') do |b|
|
96
|
+
options[:loco] = b
|
97
|
+
end
|
98
|
+
|
99
|
+
o.on('--chromosomes [1,2,3]',Array,'Run specific chromosomes') do |lst|
|
100
|
+
options[:chromosomes] = lst
|
96
101
|
end
|
97
102
|
|
98
103
|
o.on('--input filen',String, 'JSON input variables (used for LOCO)') do |filen|
|
@@ -112,6 +117,10 @@ opts = OptionParser.new do |o|
|
|
112
117
|
options[:force] = true
|
113
118
|
end
|
114
119
|
|
120
|
+
o.on("--parallel", "Run jobs in parallel") do |b|
|
121
|
+
options[:parallel] = true
|
122
|
+
end
|
123
|
+
|
115
124
|
o.on("--no-parallel", "Do not run jobs in parallel") do |b|
|
116
125
|
options[:parallel] = false
|
117
126
|
end
|
@@ -190,6 +199,15 @@ info = lambda do |*msg|
|
|
190
199
|
OUTPUT.print *msg,"\n" if !options[:quiet]
|
191
200
|
end
|
192
201
|
|
202
|
+
# Fetch chromosomes
|
203
|
+
def get_chromosomes annofn
|
204
|
+
h = {}
|
205
|
+
File.open(annofn,"r").each_line do | line |
|
206
|
+
chr = line.split(/\s+/)[2]
|
207
|
+
h[chr] = true
|
208
|
+
end
|
209
|
+
h.map { |k,v| k }
|
210
|
+
end
|
193
211
|
# ---- Start banner
|
194
212
|
|
195
213
|
GEMMA_K_VERSION=version
|
@@ -230,13 +248,17 @@ if RUBY_VERSION =~ /^1/
|
|
230
248
|
warning "runs on Ruby 2.x only\n"
|
231
249
|
end
|
232
250
|
|
251
|
+
# ---- LOCO defaults to parallel
|
252
|
+
if options[:parallel] == nil
|
253
|
+
options[:parallel] = true if options[:loco]
|
254
|
+
end
|
255
|
+
|
233
256
|
debug.call(options) # some debug output
|
234
257
|
debug.call(record)
|
235
258
|
|
236
259
|
DO_COMPUTE_KINSHIP = gemma_args.include?("-gk")
|
237
260
|
DO_COMPUTE_GWA = !DO_COMPUTE_KINSHIP
|
238
261
|
|
239
|
-
# ---- Set up parallel
|
240
262
|
if options[:parallel]
|
241
263
|
begin
|
242
264
|
skip_cite = `echo "will cite" |parallel --citation`
|
@@ -248,6 +270,11 @@ if options[:parallel]
|
|
248
270
|
parallel_cmds = []
|
249
271
|
end
|
250
272
|
|
273
|
+
# ---- Fetch chromosomes from SNP annotation file
|
274
|
+
anno_idx = gemma_args.index '-a'
|
275
|
+
raise "Expected GEMMA -a genotype file switch" if anno_idx == nil
|
276
|
+
CHROMOSOMES = get_chromosomes(gemma_args[anno_idx+1])
|
277
|
+
|
251
278
|
# ---- Compute HASH on inputs
|
252
279
|
hashme = []
|
253
280
|
geno_idx = gemma_args.index '-g'
|
@@ -434,11 +461,17 @@ gwas = lambda do | chr, kfn, pfn, permutation=0 |
|
|
434
461
|
end
|
435
462
|
|
436
463
|
LOCO = options[:loco]
|
464
|
+
if LOCO
|
465
|
+
if options[:chromosomes]
|
466
|
+
CHROMOSOMES = options[:chromosomes]
|
467
|
+
end
|
468
|
+
end
|
469
|
+
|
437
470
|
if DO_COMPUTE_KINSHIP
|
438
471
|
# compute K
|
439
|
-
info.call
|
440
|
-
if LOCO
|
441
|
-
|
472
|
+
info.call CHROMOSOMES
|
473
|
+
if LOCO
|
474
|
+
CHROMOSOMES.each do |chr|
|
442
475
|
info.call "LOCO for ",chr
|
443
476
|
kinship.call(chr)
|
444
477
|
end
|
@@ -447,13 +480,24 @@ if DO_COMPUTE_KINSHIP
|
|
447
480
|
end
|
448
481
|
else
|
449
482
|
# DO_COMPUTE_GWA
|
450
|
-
|
483
|
+
begin
|
484
|
+
json_in = JSON.parse(File.read(options[:input]))
|
485
|
+
rescue TypeError
|
486
|
+
raise "Missing JSON input file?"
|
487
|
+
end
|
451
488
|
raise "JSON problem, file #{options[:input]} is not -gk derived" if json_in["type"] != "K"
|
452
489
|
|
453
490
|
pfn = options[:permute_phenotypes] # can be nil
|
454
|
-
|
455
|
-
|
456
|
-
|
491
|
+
if LOCO
|
492
|
+
k_files = json_in["files"].map { |rec| [rec[0],rec[2]] }
|
493
|
+
k_files.each do | chr, kfn | # call a GWA for each chromosome
|
494
|
+
gwas.call(chr,kfn,pfn)
|
495
|
+
end
|
496
|
+
else
|
497
|
+
kfn = json_in["files"][0][2]
|
498
|
+
CHROMOSOMES.each do | chr |
|
499
|
+
gwas.call(chr,kfn,pfn)
|
500
|
+
end
|
457
501
|
end
|
458
502
|
# Permute
|
459
503
|
if options[:permutate]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gemma-wrapper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.99.
|
4
|
+
version: 0.99.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Pjotr Prins
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-08-
|
11
|
+
date: 2021-08-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: GEMMA wrapper adds LOCO and permutation support. Also runs in parallel
|
14
14
|
and caches K between runs with LOCO support
|