bio-bwa 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +28 -0
- data/LICENSE.txt +35 -0
- data/README.rdoc +33 -0
- data/Rakefile +56 -0
- data/VERSION +1 -0
- data/bio-bwa.gemspec +152 -0
- data/doc/Bio.html +93 -0
- data/doc/Bio/BWA.html +2884 -0
- data/doc/Bio/BWA/Library.html +229 -0
- data/doc/_index.html +119 -0
- data/doc/class_list.html +36 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +53 -0
- data/doc/css/style.css +310 -0
- data/doc/file.LICENSE.html +88 -0
- data/doc/file.README.html +119 -0
- data/doc/file_list.html +41 -0
- data/doc/frames.html +13 -0
- data/doc/index.html +119 -0
- data/doc/js/app.js +203 -0
- data/doc/js/full_list.js +149 -0
- data/doc/js/jquery.js +154 -0
- data/doc/method_list.html +171 -0
- data/doc/top-level-namespace.html +88 -0
- data/ext/COPYING +674 -0
- data/ext/ChangeLog +3864 -0
- data/ext/NEWS +555 -0
- data/ext/README +29 -0
- data/ext/bamlite.c +155 -0
- data/ext/bamlite.h +94 -0
- data/ext/bntseq.c +303 -0
- data/ext/bntseq.h +80 -0
- data/ext/bwa.1 +562 -0
- data/ext/bwape.c +807 -0
- data/ext/bwase.c +686 -0
- data/ext/bwase.h +27 -0
- data/ext/bwaseqio.c +222 -0
- data/ext/bwt.c +250 -0
- data/ext/bwt.h +105 -0
- data/ext/bwt_gen/Makefile +23 -0
- data/ext/bwt_gen/QSufSort.c +496 -0
- data/ext/bwt_gen/QSufSort.h +40 -0
- data/ext/bwt_gen/bwt_gen.c +1547 -0
- data/ext/bwt_gen/bwt_gen.h +105 -0
- data/ext/bwt_lite.c +94 -0
- data/ext/bwt_lite.h +29 -0
- data/ext/bwtaln.c +345 -0
- data/ext/bwtaln.h +150 -0
- data/ext/bwtgap.c +264 -0
- data/ext/bwtgap.h +38 -0
- data/ext/bwtindex.c +186 -0
- data/ext/bwtio.c +77 -0
- data/ext/bwtmisc.c +269 -0
- data/ext/bwtsw2.h +51 -0
- data/ext/bwtsw2_aux.c +650 -0
- data/ext/bwtsw2_chain.c +107 -0
- data/ext/bwtsw2_core.c +594 -0
- data/ext/bwtsw2_main.c +100 -0
- data/ext/cs2nt.c +191 -0
- data/ext/is.c +218 -0
- data/ext/khash.h +506 -0
- data/ext/kseq.h +208 -0
- data/ext/ksort.h +269 -0
- data/ext/kstring.c +35 -0
- data/ext/kstring.h +46 -0
- data/ext/kvec.h +90 -0
- data/ext/main.c +63 -0
- data/ext/main.h +29 -0
- data/ext/mkrf_conf.rb +49 -0
- data/ext/qualfa2fq.pl +27 -0
- data/ext/simple_dp.c +162 -0
- data/ext/simpletest.c +23 -0
- data/ext/solid2fastq.pl +111 -0
- data/ext/stdaln.c +1072 -0
- data/ext/stdaln.h +162 -0
- data/ext/utils.c +82 -0
- data/ext/utils.h +54 -0
- data/lib/bio-bwa.rb +7 -0
- data/lib/bio/bwa.rb +312 -0
- data/lib/bio/bwa/library.rb +42 -0
- data/test/data/testdata.fa +602 -0
- data/test/data/testdata.long.fa +175 -0
- data/test/data/testdata.short.fa +2 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-bwa_basic.rb +62 -0
- data/test/test_bio-bwa_make_index.rb +42 -0
- data/test/test_bio-bwa_run_aln.rb +49 -0
- data/test/test_bio-bwa_sam_conversion.rb +49 -0
- metadata +218 -0
data/ext/stdaln.h
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2003-2006, 2008, by Heng Li <lh3lh3@gmail.com>
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
20
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
22
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
23
|
+
SOFTWARE.
|
24
|
+
*/
|
25
|
+
|
26
|
+
/*
|
27
|
+
2009-07-23, 0.10.0
|
28
|
+
|
29
|
+
- Use 32-bit to store CIGAR
|
30
|
+
|
31
|
+
- Report suboptimal aligments
|
32
|
+
|
33
|
+
- Implemented half-fixed-half-open DP
|
34
|
+
|
35
|
+
2009-04-26, 0.9.10
|
36
|
+
|
37
|
+
- Allow to set a threshold for local alignment
|
38
|
+
|
39
|
+
2009-02-18, 0.9.9
|
40
|
+
|
41
|
+
- Fixed a bug when no residue matches
|
42
|
+
|
43
|
+
2008-08-04, 0.9.8
|
44
|
+
|
45
|
+
- Fixed the wrong declaration of aln_stdaln_aux()
|
46
|
+
|
47
|
+
- Avoid 0 coordinate for global alignment
|
48
|
+
|
49
|
+
2008-08-01, 0.9.7
|
50
|
+
|
51
|
+
- Change gap_end penalty to 5 in aln_param_bwa
|
52
|
+
|
53
|
+
- Add function to convert path_t to the CIGAR format
|
54
|
+
|
55
|
+
2008-08-01, 0.9.6
|
56
|
+
|
57
|
+
- The first gap now costs (gap_open+gap_ext), instead of
|
58
|
+
gap_open. Scoring systems are modified accordingly.
|
59
|
+
|
60
|
+
- Gap end is now correctly handled. Previously it is not correct.
|
61
|
+
|
62
|
+
- Change license to MIT.
|
63
|
+
|
64
|
+
*/
|
65
|
+
|
66
|
+
#ifndef LH3_STDALN_H_
|
67
|
+
#define LH3_STDALN_H_
|
68
|
+
|
69
|
+
|
70
|
+
#define STDALN_VERSION 0.11.0
|
71
|
+
|
72
|
+
#include <stdint.h>
|
73
|
+
|
74
|
+
#define FROM_M 0
|
75
|
+
#define FROM_I 1
|
76
|
+
#define FROM_D 2
|
77
|
+
#define FROM_S 3
|
78
|
+
|
79
|
+
#define ALN_TYPE_LOCAL 0
|
80
|
+
#define ALN_TYPE_GLOBAL 1
|
81
|
+
#define ALN_TYPE_EXTEND 2
|
82
|
+
|
83
|
+
/* This is the smallest integer. It might be CPU-dependent in very RARE cases. */
|
84
|
+
#define MINOR_INF -1073741823
|
85
|
+
|
86
|
+
typedef struct
|
87
|
+
{
|
88
|
+
int gap_open;
|
89
|
+
int gap_ext;
|
90
|
+
int gap_end;
|
91
|
+
|
92
|
+
int *matrix;
|
93
|
+
int row;
|
94
|
+
int band_width;
|
95
|
+
} AlnParam;
|
96
|
+
|
97
|
+
typedef struct
|
98
|
+
{
|
99
|
+
int i, j;
|
100
|
+
unsigned char ctype;
|
101
|
+
} path_t;
|
102
|
+
|
103
|
+
typedef struct
|
104
|
+
{
|
105
|
+
path_t *path; /* for advanced users... :-) */
|
106
|
+
int path_len; /* for advanced users... :-) */
|
107
|
+
int start1, end1; /* start and end of the first sequence, coordinations are 1-based */
|
108
|
+
int start2, end2; /* start and end of the second sequence, coordinations are 1-based */
|
109
|
+
int score, subo; /* score */
|
110
|
+
|
111
|
+
char *out1, *out2; /* print them, and then you will know */
|
112
|
+
char *outm;
|
113
|
+
|
114
|
+
int n_cigar;
|
115
|
+
uint32_t *cigar32;
|
116
|
+
} AlnAln;
|
117
|
+
|
118
|
+
#ifdef __cplusplus
|
119
|
+
extern "C" {
|
120
|
+
#endif
|
121
|
+
|
122
|
+
AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
|
123
|
+
int type, int do_align, int len1, int len2);
|
124
|
+
AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int type, int do_align);
|
125
|
+
void aln_free_AlnAln(AlnAln *aa);
|
126
|
+
|
127
|
+
int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
128
|
+
path_t *path, int *path_len);
|
129
|
+
int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
130
|
+
path_t *path, int *path_len, int _thres, int *_subo);
|
131
|
+
int aln_extend_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
132
|
+
path_t *path, int *path_len, int G0, uint8_t *_mem);
|
133
|
+
uint16_t *aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
|
134
|
+
uint32_t *aln_path2cigar32(const path_t *path, int path_len, int *n_cigar);
|
135
|
+
|
136
|
+
#ifdef __cplusplus
|
137
|
+
}
|
138
|
+
#endif
|
139
|
+
|
140
|
+
/********************
|
141
|
+
* global variables *
|
142
|
+
********************/
|
143
|
+
|
144
|
+
extern AlnParam aln_param_bwa; /* = { 37, 9, 0, aln_sm_maq, 5, 50 }; */
|
145
|
+
extern AlnParam aln_param_blast; /* = { 5, 2, 2, aln_sm_blast, 5, 50 }; */
|
146
|
+
extern AlnParam aln_param_nt2nt; /* = { 10, 2, 2, aln_sm_nt, 16, 75 }; */
|
147
|
+
extern AlnParam aln_param_aa2aa; /* = { 20, 19, 19, aln_sm_read, 16, 75 }; */
|
148
|
+
extern AlnParam aln_param_rd2rd; /* = { 12, 2, 2, aln_sm_blosum62, 22, 50 }; */
|
149
|
+
|
150
|
+
/* common nucleotide score matrix for 16 bases */
|
151
|
+
extern int aln_sm_nt[], aln_sm_bwa[];
|
152
|
+
|
153
|
+
/* BLOSUM62 and BLOSUM45 */
|
154
|
+
extern int aln_sm_blosum62[], aln_sm_blosum45[];
|
155
|
+
|
156
|
+
/* common read for 16 bases. note that read alignment is quite different from common nucleotide alignment */
|
157
|
+
extern int aln_sm_read[];
|
158
|
+
|
159
|
+
/* human-mouse score matrix for 4 bases */
|
160
|
+
extern int aln_sm_hs[];
|
161
|
+
|
162
|
+
#endif
|
data/ext/utils.c
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008 Genome Research Ltd (GRL).
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
20
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
22
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
23
|
+
SOFTWARE.
|
24
|
+
*/
|
25
|
+
|
26
|
+
/* Contact: Heng Li <lh3@sanger.ac.uk> */
|
27
|
+
|
28
|
+
#include <stdio.h>
|
29
|
+
#include <stdarg.h>
|
30
|
+
#include <stdlib.h>
|
31
|
+
#include <string.h>
|
32
|
+
#include <zlib.h>
|
33
|
+
#include "utils.h"
|
34
|
+
|
35
|
+
FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
|
36
|
+
{
|
37
|
+
FILE *fp = 0;
|
38
|
+
if (strcmp(fn, "-") == 0)
|
39
|
+
return (strstr(mode, "r"))? stdin : stdout;
|
40
|
+
if ((fp = fopen(fn, mode)) == 0) {
|
41
|
+
fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
|
42
|
+
abort();
|
43
|
+
}
|
44
|
+
return fp;
|
45
|
+
}
|
46
|
+
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp)
|
47
|
+
{
|
48
|
+
if (freopen(fn, mode, fp) == 0) {
|
49
|
+
fprintf(stderr, "[%s] fail to open file '%s': ", func, fn);
|
50
|
+
perror(NULL);
|
51
|
+
fprintf(stderr, "Abort!\n");
|
52
|
+
abort();
|
53
|
+
}
|
54
|
+
return fp;
|
55
|
+
}
|
56
|
+
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode)
|
57
|
+
{
|
58
|
+
gzFile fp;
|
59
|
+
if (strcmp(fn, "-") == 0)
|
60
|
+
return gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode);
|
61
|
+
if ((fp = gzopen(fn, mode)) == 0) {
|
62
|
+
fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
|
63
|
+
abort();
|
64
|
+
}
|
65
|
+
return fp;
|
66
|
+
}
|
67
|
+
void err_fatal(const char *header, const char *fmt, ...)
|
68
|
+
{
|
69
|
+
va_list args;
|
70
|
+
va_start(args, fmt);
|
71
|
+
fprintf(stderr, "[%s] ", header);
|
72
|
+
vfprintf(stderr, fmt, args);
|
73
|
+
fprintf(stderr, " Abort!\n");
|
74
|
+
va_end(args);
|
75
|
+
abort();
|
76
|
+
}
|
77
|
+
|
78
|
+
void err_fatal_simple_core(const char *func, const char *msg)
|
79
|
+
{
|
80
|
+
fprintf(stderr, "[%s] %s Abort!\n", func, msg);
|
81
|
+
abort();
|
82
|
+
}
|
data/ext/utils.h
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008 Genome Research Ltd (GRL).
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
20
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
22
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
23
|
+
SOFTWARE.
|
24
|
+
*/
|
25
|
+
|
26
|
+
/* Contact: Heng Li <lh3@sanger.ac.uk> */
|
27
|
+
|
28
|
+
#ifndef LH3_UTILS_H
|
29
|
+
#define LH3_UTILS_H
|
30
|
+
|
31
|
+
#include <stdio.h>
|
32
|
+
#include <zlib.h>
|
33
|
+
|
34
|
+
#define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg)
|
35
|
+
#define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
|
36
|
+
#define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp)
|
37
|
+
#define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode)
|
38
|
+
#define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg)
|
39
|
+
|
40
|
+
#ifdef __cplusplus
|
41
|
+
extern "C" {
|
42
|
+
#endif
|
43
|
+
|
44
|
+
void err_fatal(const char *header, const char *fmt, ...);
|
45
|
+
void err_fatal_simple_core(const char *func, const char *msg);
|
46
|
+
FILE *err_xopen_core(const char *func, const char *fn, const char *mode);
|
47
|
+
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp);
|
48
|
+
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode);
|
49
|
+
|
50
|
+
#ifdef __cplusplus
|
51
|
+
}
|
52
|
+
#endif
|
53
|
+
|
54
|
+
#endif
|
data/lib/bio-bwa.rb
ADDED
data/lib/bio/bwa.rb
ADDED
@@ -0,0 +1,312 @@
|
|
1
|
+
module Bio
|
2
|
+
# @author Francesco Strozzi https://github.com/fstrozzi
|
3
|
+
class BWA
|
4
|
+
extend FFI::Library
|
5
|
+
ffi_lib Bio::BWA::Library.load
|
6
|
+
|
7
|
+
# Convert a Fasta to Packed format
|
8
|
+
# @param [Hash]. params Options.
|
9
|
+
# @option params [String] :file_in the Fasta or FastQ file (REQUIRED)
|
10
|
+
# @option params [String] :prefix the prefix name for the PAC file
|
11
|
+
def self.fa2pac(params={})
|
12
|
+
valid_params = %q(file_in prefix)
|
13
|
+
last_params = [:file_in, :prefix]
|
14
|
+
mandatory_params = [:file_in]
|
15
|
+
check_mandatory(mandatory_params, params)
|
16
|
+
args = build_parameters("fa2pac",valid_params,params,last_params)
|
17
|
+
call_BWA_function(args)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Convert a Packed file format to Burrows-Wheeler Transform format
|
21
|
+
# @param [Hash]. params Options.
|
22
|
+
# @option params [String] :file_in the PAC file (REQUIRED)
|
23
|
+
# @option params [String] :file_out the name of the BWT file (REQUIRED)
|
24
|
+
def self.pac2bwt(params={})
|
25
|
+
valid_params = %q(file_in file_out)
|
26
|
+
last_params = [:file_in,:file_out]
|
27
|
+
check_mandatory(last_params, params)
|
28
|
+
args = build_parameters("pac2bwt",valid_params,params,last_params)
|
29
|
+
call_BWA_function(args)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Convert a BWT file to the new BWT format
|
33
|
+
# @param [Hash]. params Options.
|
34
|
+
# @option params [String] :file_in the BWT file (REQUIRED)
|
35
|
+
# @note this method overwrite existing BWT file
|
36
|
+
def self.bwtupdate(params={})
|
37
|
+
valid_params = %w(file_in)
|
38
|
+
last_params = [:file_in]
|
39
|
+
check_mandatory(last_params, params)
|
40
|
+
args = build_parameters("bwtupdate",valid_params,params,last_params)
|
41
|
+
call_BWA_function(args)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Generate reverse Packed format
|
45
|
+
# @param [Hash]. params Options.
|
46
|
+
# @option params [String] :file_in the PAC file (REQUIRED)
|
47
|
+
# @option params [String] :file_out the name of the REV PAC (REQUIRED)
|
48
|
+
def self.pac_rev(params={})
|
49
|
+
valid_params = %w(file_in file_out)
|
50
|
+
last_params = [:file_in,:file_out]
|
51
|
+
check_mandatory(last_params, params)
|
52
|
+
args = build_parameters("pac_rev",valid_params,params,last_params)
|
53
|
+
call_BWA_function(args)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Generate SA file from BWT and Occ files
|
57
|
+
# @param [Hash]. params Options.
|
58
|
+
# @option params [String] :file_in the PAC file (REQUIRED)
|
59
|
+
# @option params [String] :file_out the name of the REV PAC (REQUIRED)
|
60
|
+
def self.bwt2sa(params={})
|
61
|
+
valid_params = %q(file_in file_out i)
|
62
|
+
last_params = [:file_in,:file_out]
|
63
|
+
check_mandatory(last_params, params)
|
64
|
+
args = build_parameters("bwt2sa",valid_params,params,last_params)
|
65
|
+
call_BWA_function(args)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Generate the BWT index for a Fasta database
|
69
|
+
# @param [Hash]. params Options.
|
70
|
+
# @option params [String] :file_in the Fasta file (REQUIRED)
|
71
|
+
# @option params [String] :p the prefix for the database files that will be generated [default is Fasta name]
|
72
|
+
# @option params [String] :a the algorithm to be used for indexing: 'is' (short database)[default] or 'bwtsw' (long database)
|
73
|
+
# @option params [Boolean] :c colorspace database index
|
74
|
+
# @note Boolean values must be set to 'true'
|
75
|
+
def self.make_index(params = {})
|
76
|
+
valid_params = %w(file_in p a c)
|
77
|
+
mandatory_params = [:file_in]
|
78
|
+
last_params = [:file_in]
|
79
|
+
check_mandatory(mandatory_params, params)
|
80
|
+
params = change_arg_name(params,:prefix,:p) if params[:prefix]
|
81
|
+
args = build_parameters("index",valid_params,params,last_params)
|
82
|
+
call_BWA_function(args)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Run the alignment for short query sequences
|
86
|
+
# @param [Hash] params Options
|
87
|
+
# @option params [String] :file_in the FastQ file (REQUIRED)
|
88
|
+
# @option params [String] :prefix the prefix of the database index files (REQUIRED)
|
89
|
+
# @option params [String] :file_out the output of the alignment in SAI format (REQUIRED)
|
90
|
+
# @option params [Integer] :n max #diff (int) or missing prob under 0.02 err rate (float) [0.04]
|
91
|
+
# @option params [Integer] :o maximum number or fraction of gap opens [1]
|
92
|
+
# @option params [Integer] :e maximum number of gap extensions, -1 for disabling long gaps [-1]
|
93
|
+
# @option params [Integer] :m maximum entries in the queue [2000000]
|
94
|
+
# @option params [Integer] :t number of threads [1]
|
95
|
+
# @option params [Integer] :M mismatch penalty [3]
|
96
|
+
# @option params [Integer] :O gap open penalty [11]
|
97
|
+
# @option params [Integer] :R stop searching when there are >INT equally best hits [30]
|
98
|
+
# @option params [Integer] :q quality threshold for read trimming down to 35bp [0]
|
99
|
+
# @option params [Integer] :B length of barcode
|
100
|
+
# @option params [Boolean] :c input sequences are in the color space
|
101
|
+
# @option params [Boolean] :L log-scaled gap penalty for long deletions
|
102
|
+
# @option params [Boolean] :N non-iterative mode: search for all n-difference hits (slow)
|
103
|
+
# @option params [Boolean] :I the input is in the Illumina 1.3+ FASTQ-like format
|
104
|
+
# @option params [Boolean] :b the input read file is in the BAM format
|
105
|
+
# @option params [Boolean] :single use single-end reads only (effective with -b)
|
106
|
+
# @option params [Boolean] :first use the 1st read in a pair (effective with -b)
|
107
|
+
# @option params [Boolean] :second use the 2nd read in a pair (effective with -b)
|
108
|
+
# @option params [Integer] :i do not put an indel within INT bp towards the ends [5]
|
109
|
+
# @option params [Integer] :d maximum occurrences for extending a long deletion [10]
|
110
|
+
# @option params [Integer] :l seed length [32]
|
111
|
+
# @option params [Integer] :k maximum differences in the seed [2]
|
112
|
+
# @option params [Integer] :E gap extension penalty [4]
|
113
|
+
# @note Boolean values must be set to 'true'
|
114
|
+
def self.short_read_alignment(params={})
|
115
|
+
args = ["aln"]
|
116
|
+
valid_params = %w(n o e i d l k c L R m t N M O E q f b single first second I B prefix file_in)
|
117
|
+
mandatory_params = [:prefix,:file_in,:file_out]
|
118
|
+
last_params = [:prefix,:file_in]
|
119
|
+
check_mandatory(mandatory_params, params)
|
120
|
+
params = change_arg_name(params,:file_out,:f) if params[:file_out]
|
121
|
+
params = change_arg_name(params,:single,"0") if params[:single]
|
122
|
+
params = change_arg_name(params,:first,"1") if params[:first]
|
123
|
+
params = change_arg_name(params,:second,"2") if params[:second]
|
124
|
+
args = build_parameters("aln",valid_params,params,last_params)
|
125
|
+
call_BWA_function(args)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Convert the SAI alignment output into SAM format (single end)
|
129
|
+
# @param [Hash] params Options
|
130
|
+
# @option params [String] :fastq the FastQ file (REQUIRED)
|
131
|
+
# @option params [String] :prefix the prefix of the database index files (REQUIRED)
|
132
|
+
# @option params [String] :sai the alignment file in SAI format (REQUIRED)
|
133
|
+
# @option params [String] :file_out the file name of the SAM output
|
134
|
+
# @option params [Integer] :n max_occ
|
135
|
+
# @option params [String] :r RG_line
|
136
|
+
def self.sai_to_sam_single(params = {})
|
137
|
+
valid_params = %w(n r fastq sai prefix f)
|
138
|
+
mandatory_params = [:prefix,:sai,:fastq]
|
139
|
+
last_params = [:prefix,:sai,:fastq]
|
140
|
+
check_mandatory(mandatory_params, params)
|
141
|
+
params = change_arg_name(params,:file_out,:f) if params[:file_out]
|
142
|
+
args = build_parameters("sai2sam_se",valid_params,params,last_params)
|
143
|
+
call_BWA_function(args)
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
# Convert the SAI alignment output into SAM format (paired ends)
|
148
|
+
# @param [Hash] params Options
|
149
|
+
# @option params [String] :prefix the prefix of the database index files (REQUIRED)
|
150
|
+
# @option params [Array] :sai the two alignment files in SAI format (REQUIRED)
|
151
|
+
# @option params [Array] :fastq the two fastq files (REQUIRED)
|
152
|
+
# @option params [Integer] :a maximum insert size [500]
|
153
|
+
# @option params [Integer] :o maximum occurrences for one end [100000]
|
154
|
+
# @option params [Integer] :n maximum hits to output for paired reads [3]
|
155
|
+
# @option params [Integer] :N maximum hits to output for discordant pairs [10]
|
156
|
+
# @option params [Float] :c prior of chimeric rate (lower bound) [1.0e-05]
|
157
|
+
# @option params [String] :r read group header line such as '@RG\tID:foo\tSM:bar'
|
158
|
+
# @option params [Boolean] :P preload index into memory (for base-space reads only)
|
159
|
+
# @option params [Boolean] :s disable Smith-Waterman for the unmapped mate
|
160
|
+
# @option params [Boolean] :A disable insert size estimate (force :s)
|
161
|
+
# @note Boolean values must be set to 'true'
|
162
|
+
def self.sai_to_sam_paired(params = {})
|
163
|
+
valid_params = %w(a o s P n N c f A r prefix first_sai second_sai first_fastq second_fastq)
|
164
|
+
mandatory_params = [:prefix, :sai, :fastq]
|
165
|
+
last_params = [:prefix, :first_sai, :second_sai, :first_fastq, :second_fastq]
|
166
|
+
check_mandatory(mandatory_params, params)
|
167
|
+
params = change_arg_name(params,:file_out,:f) if params[:file_out]
|
168
|
+
if params[:sai]
|
169
|
+
raise ArgumentError,"you must provide an array with two SAI files!" unless params[:sai].is_a?(Array) and params[:sai].size == 2
|
170
|
+
params[:first_sai] = params[:sai][0]
|
171
|
+
params[:second_sai] = params[:sai][1]
|
172
|
+
params.delete(:sai)
|
173
|
+
end
|
174
|
+
if params[:fastq]
|
175
|
+
raise ArgumentError,"you must provide an array with two FastQ files!" unless params[:fastq].is_a?(Array) and params[:fastq].size == 2
|
176
|
+
params[:first_fastq] = params[:fastq][0]
|
177
|
+
params[:second_fastq] = params[:fastq][1]
|
178
|
+
params.delete(:fastq)
|
179
|
+
end
|
180
|
+
args = build_parameters("sai2sam_pe",valid_params,params,last_params)
|
181
|
+
call_BWA_function(args)
|
182
|
+
end
|
183
|
+
|
184
|
+
# Run the alignment for long query sequences
|
185
|
+
# @param [Hash] params Options
|
186
|
+
# @option params [String] :file_in the FastQ file (REQUIRED)
|
187
|
+
# @option params [String] :prefix the prefix of the database index files (REQUIRED)
|
188
|
+
# @option params [String] :file_out the output of the alignment in SAM format (REQUIRED)
|
189
|
+
# @option params [Integer] :a score for a match [1]
|
190
|
+
# @option params [Integer] :b mismatch penalty [3]
|
191
|
+
# @option params [Integer] :q gap open penalty [5]
|
192
|
+
# @option params [Integer] :r gap extension penalty [2]
|
193
|
+
# @option params [Integer] :t number of threads [1]
|
194
|
+
# @option params [Integer] :w band width [50]
|
195
|
+
# @option params [Float] :m mask level [0.50]
|
196
|
+
# @option params [Integer] :T score threshold divided by a [30]
|
197
|
+
# @option params [Integer] :s maximum seeding interval size [3]
|
198
|
+
# @option params [Integer] :z Z-best [1]
|
199
|
+
# @option params [Integer] :N number of seeds to trigger reverse alignment [5]
|
200
|
+
# @option params [Float] :c coefficient of length-threshold adjustment [5.5]
|
201
|
+
# @option params [Boolean] :H in SAM output, use hard clipping rather than soft
|
202
|
+
# @note Boolean arguments must be set to 'true'
|
203
|
+
def self.long_read_alignment(params = {})
|
204
|
+
valid_params = %w(q r a b t T w d z m y s c N H f prefix file_in)
|
205
|
+
mandatory_params = [:prefix, :file_in, :file_out]
|
206
|
+
last_params = [:prefix,:file_in]
|
207
|
+
check_mandatory(mandatory_params, params)
|
208
|
+
params = change_arg_name(params,:file_out,:f) if params[:file_out]
|
209
|
+
args = build_parameters("bwtsw2",valid_params,params,last_params)
|
210
|
+
call_BWA_function(args)
|
211
|
+
end
|
212
|
+
|
213
|
+
# Run the alignment between multiple short sequences and ONE long sequence
|
214
|
+
# @param [Hash] params Options
|
215
|
+
# @option params [String] :short_seq the short query sequence (REQUIRED)
|
216
|
+
# @option params [String] :long_seq the long database sequence (REQUIRED)
|
217
|
+
# @option params [String] :file_out the alignment output
|
218
|
+
# @option params [Integer] :T minimum score [1]
|
219
|
+
# @option params [Boolean] :p protein alignment (suppressing :r)
|
220
|
+
# @option params [Boolean] :f forward strand only
|
221
|
+
# @option params [Boolean] :r reverse strand only
|
222
|
+
# @option params [Boolean] :g global alignment
|
223
|
+
# @note Boolean values must be set to 'true'
|
224
|
+
def self.simple_SW(params = {})
|
225
|
+
args = ["stdsw"]
|
226
|
+
valid_params = %w(g T f r p file_out long_seq short_seq)
|
227
|
+
mandatory_params = [:long_seq,:short_seq]
|
228
|
+
last_params = mandatory_params
|
229
|
+
check_mandatory(mandatory_params, params)
|
230
|
+
file_out = params[:file_out]
|
231
|
+
params.delete(:file_out)
|
232
|
+
args = build_parameters("stdsw",valid_params,params,last_params)
|
233
|
+
$stdout.reopen(file_out,"w") if file_out
|
234
|
+
call_BWA_function(args)
|
235
|
+
$stdout.reopen("/dev/tty","w") if file_out
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
|
240
|
+
######## Methods to handle C functions and arguments ########
|
241
|
+
|
242
|
+
attach_function :bwa_fa2pac, [:int,:pointer], :int
|
243
|
+
attach_function :bwa_pac2bwt, [:int,:pointer], :int
|
244
|
+
attach_function :bwa_bwtupdate, [:int,:pointer], :int
|
245
|
+
attach_function :bwa_pac_rev, [:int,:pointer], :int
|
246
|
+
attach_function :bwa_bwt2sa, [:int,:pointer], :int
|
247
|
+
attach_function :bwa_index, [:int,:pointer], :int
|
248
|
+
attach_function :bwa_aln, [:int,:pointer], :int
|
249
|
+
attach_function :bwa_sai2sam_se, [:int, :pointer], :int
|
250
|
+
attach_function :bwa_sai2sam_pe, [:int,:pointer], :int
|
251
|
+
attach_function :bwa_bwtsw2, [:int, :pointer], :int
|
252
|
+
attach_function :bwa_stdsw, [:int, :pointer], :int
|
253
|
+
|
254
|
+
# Internal method to call the BWA C functions
|
255
|
+
# @note this method should not be called directly
|
256
|
+
def self.call_BWA_function(args)
|
257
|
+
c_args = build_args_for_BWA(args)
|
258
|
+
self.send("bwa_#{args[0]}".to_sym,args.size,c_args) # call the C function and pass the arguments size and parameters list (same as int argc, char *argv[])
|
259
|
+
end
|
260
|
+
|
261
|
+
# Internal method to build argument list for BWA C functions
|
262
|
+
# @note this method should not be called directly
|
263
|
+
def self.build_args_for_BWA(args)
|
264
|
+
cmd_args = args.map do |arg|
|
265
|
+
FFI::MemoryPointer.from_string(arg.to_s) # convert every parameters into a string and then into a memory pointer
|
266
|
+
end
|
267
|
+
exec_args = FFI::MemoryPointer.new(:pointer, cmd_args.length) # creating a pointer to an array of pointers
|
268
|
+
cmd_args.each_with_index do |arg, i|
|
269
|
+
exec_args[i].put_pointer(0, arg) # filling in the array of pointers
|
270
|
+
end
|
271
|
+
return exec_args
|
272
|
+
end
|
273
|
+
|
274
|
+
# Internal method to produce a correct parameter list for BWA functions
|
275
|
+
# @note this method should not be called directly
|
276
|
+
def self.build_parameters(function_name,valid_params,params,last_params)
|
277
|
+
args = [function_name]
|
278
|
+
params.each_key do |k|
|
279
|
+
raise ArgumentError, "Unknown parameter '#{k}'" unless valid_params.include?(k.to_s)
|
280
|
+
if params[k] and !last_params.include?(k) then # check if value exists and if is not a last_params (required at the end of BWA functions)
|
281
|
+
args << "-#{k}"
|
282
|
+
args << params[k] unless params[k] == true # skipping boolean values. just include the param name
|
283
|
+
end
|
284
|
+
end
|
285
|
+
last_params.each {|p| args << params[p]} # now adding the last_params so the parameter list is in the correct order for BWA functions
|
286
|
+
return args
|
287
|
+
end
|
288
|
+
|
289
|
+
# Internal method to check if mandatory params have been set
|
290
|
+
# @note this method should not be called directly
|
291
|
+
def self.check_mandatory(mandatory_params, params)
|
292
|
+
mandatory_params.each {|mp| raise ArgumentError,"You must provide parameter '#{mp}'" unless params.include?(mp)}
|
293
|
+
end
|
294
|
+
|
295
|
+
# Internal method used to change parameters name from Ruby to BWA functions
|
296
|
+
# @note this method should not be called directly
|
297
|
+
def self.change_arg_name(hash,key,new_key)
|
298
|
+
hash[new_key] = hash[key]
|
299
|
+
hash.delete(key)
|
300
|
+
return hash
|
301
|
+
end
|
302
|
+
|
303
|
+
private_class_method :call_BWA_function
|
304
|
+
private_class_method :build_args_for_BWA
|
305
|
+
private_class_method :build_parameters
|
306
|
+
private_class_method :check_mandatory
|
307
|
+
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
|
312
|
+
|