bio-bwa 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
- data/Gemfile +15 -0
- data/Gemfile.lock +28 -0
- data/LICENSE.txt +35 -0
- data/README.rdoc +33 -0
- data/Rakefile +56 -0
- data/VERSION +1 -0
- data/bio-bwa.gemspec +152 -0
- data/doc/Bio.html +93 -0
- data/doc/Bio/BWA.html +2884 -0
- data/doc/Bio/BWA/Library.html +229 -0
- data/doc/_index.html +119 -0
- data/doc/class_list.html +36 -0
- data/doc/css/common.css +1 -0
- data/doc/css/full_list.css +53 -0
- data/doc/css/style.css +310 -0
- data/doc/file.LICENSE.html +88 -0
- data/doc/file.README.html +119 -0
- data/doc/file_list.html +41 -0
- data/doc/frames.html +13 -0
- data/doc/index.html +119 -0
- data/doc/js/app.js +203 -0
- data/doc/js/full_list.js +149 -0
- data/doc/js/jquery.js +154 -0
- data/doc/method_list.html +171 -0
- data/doc/top-level-namespace.html +88 -0
- data/ext/COPYING +674 -0
- data/ext/ChangeLog +3864 -0
- data/ext/NEWS +555 -0
- data/ext/README +29 -0
- data/ext/bamlite.c +155 -0
- data/ext/bamlite.h +94 -0
- data/ext/bntseq.c +303 -0
- data/ext/bntseq.h +80 -0
- data/ext/bwa.1 +562 -0
- data/ext/bwape.c +807 -0
- data/ext/bwase.c +686 -0
- data/ext/bwase.h +27 -0
- data/ext/bwaseqio.c +222 -0
- data/ext/bwt.c +250 -0
- data/ext/bwt.h +105 -0
- data/ext/bwt_gen/Makefile +23 -0
- data/ext/bwt_gen/QSufSort.c +496 -0
- data/ext/bwt_gen/QSufSort.h +40 -0
- data/ext/bwt_gen/bwt_gen.c +1547 -0
- data/ext/bwt_gen/bwt_gen.h +105 -0
- data/ext/bwt_lite.c +94 -0
- data/ext/bwt_lite.h +29 -0
- data/ext/bwtaln.c +345 -0
- data/ext/bwtaln.h +150 -0
- data/ext/bwtgap.c +264 -0
- data/ext/bwtgap.h +38 -0
- data/ext/bwtindex.c +186 -0
- data/ext/bwtio.c +77 -0
- data/ext/bwtmisc.c +269 -0
- data/ext/bwtsw2.h +51 -0
- data/ext/bwtsw2_aux.c +650 -0
- data/ext/bwtsw2_chain.c +107 -0
- data/ext/bwtsw2_core.c +594 -0
- data/ext/bwtsw2_main.c +100 -0
- data/ext/cs2nt.c +191 -0
- data/ext/is.c +218 -0
- data/ext/khash.h +506 -0
- data/ext/kseq.h +208 -0
- data/ext/ksort.h +269 -0
- data/ext/kstring.c +35 -0
- data/ext/kstring.h +46 -0
- data/ext/kvec.h +90 -0
- data/ext/main.c +63 -0
- data/ext/main.h +29 -0
- data/ext/mkrf_conf.rb +49 -0
- data/ext/qualfa2fq.pl +27 -0
- data/ext/simple_dp.c +162 -0
- data/ext/simpletest.c +23 -0
- data/ext/solid2fastq.pl +111 -0
- data/ext/stdaln.c +1072 -0
- data/ext/stdaln.h +162 -0
- data/ext/utils.c +82 -0
- data/ext/utils.h +54 -0
- data/lib/bio-bwa.rb +7 -0
- data/lib/bio/bwa.rb +312 -0
- data/lib/bio/bwa/library.rb +42 -0
- data/test/data/testdata.fa +602 -0
- data/test/data/testdata.long.fa +175 -0
- data/test/data/testdata.short.fa +2 -0
- data/test/helper.rb +18 -0
- data/test/test_bio-bwa_basic.rb +62 -0
- data/test/test_bio-bwa_make_index.rb +42 -0
- data/test/test_bio-bwa_run_aln.rb +49 -0
- data/test/test_bio-bwa_sam_conversion.rb +49 -0
- metadata +218 -0
data/ext/stdaln.h
ADDED
@@ -0,0 +1,162 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2003-2006, 2008, by Heng Li <lh3lh3@gmail.com>
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
20
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
22
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
23
|
+
SOFTWARE.
|
24
|
+
*/
|
25
|
+
|
26
|
+
/*
|
27
|
+
2009-07-23, 0.10.0
|
28
|
+
|
29
|
+
- Use 32-bit to store CIGAR
|
30
|
+
|
31
|
+
- Report suboptimal aligments
|
32
|
+
|
33
|
+
- Implemented half-fixed-half-open DP
|
34
|
+
|
35
|
+
2009-04-26, 0.9.10
|
36
|
+
|
37
|
+
- Allow to set a threshold for local alignment
|
38
|
+
|
39
|
+
2009-02-18, 0.9.9
|
40
|
+
|
41
|
+
- Fixed a bug when no residue matches
|
42
|
+
|
43
|
+
2008-08-04, 0.9.8
|
44
|
+
|
45
|
+
- Fixed the wrong declaration of aln_stdaln_aux()
|
46
|
+
|
47
|
+
- Avoid 0 coordinate for global alignment
|
48
|
+
|
49
|
+
2008-08-01, 0.9.7
|
50
|
+
|
51
|
+
- Change gap_end penalty to 5 in aln_param_bwa
|
52
|
+
|
53
|
+
- Add function to convert path_t to the CIGAR format
|
54
|
+
|
55
|
+
2008-08-01, 0.9.6
|
56
|
+
|
57
|
+
- The first gap now costs (gap_open+gap_ext), instead of
|
58
|
+
gap_open. Scoring systems are modified accordingly.
|
59
|
+
|
60
|
+
- Gap end is now correctly handled. Previously it is not correct.
|
61
|
+
|
62
|
+
- Change license to MIT.
|
63
|
+
|
64
|
+
*/
|
65
|
+
|
66
|
+
#ifndef LH3_STDALN_H_
|
67
|
+
#define LH3_STDALN_H_
|
68
|
+
|
69
|
+
|
70
|
+
#define STDALN_VERSION 0.11.0
|
71
|
+
|
72
|
+
#include <stdint.h>
|
73
|
+
|
74
|
+
#define FROM_M 0
|
75
|
+
#define FROM_I 1
|
76
|
+
#define FROM_D 2
|
77
|
+
#define FROM_S 3
|
78
|
+
|
79
|
+
#define ALN_TYPE_LOCAL 0
|
80
|
+
#define ALN_TYPE_GLOBAL 1
|
81
|
+
#define ALN_TYPE_EXTEND 2
|
82
|
+
|
83
|
+
/* This is the smallest integer. It might be CPU-dependent in very RARE cases. */
|
84
|
+
#define MINOR_INF -1073741823
|
85
|
+
|
86
|
+
typedef struct
|
87
|
+
{
|
88
|
+
int gap_open;
|
89
|
+
int gap_ext;
|
90
|
+
int gap_end;
|
91
|
+
|
92
|
+
int *matrix;
|
93
|
+
int row;
|
94
|
+
int band_width;
|
95
|
+
} AlnParam;
|
96
|
+
|
97
|
+
typedef struct
|
98
|
+
{
|
99
|
+
int i, j;
|
100
|
+
unsigned char ctype;
|
101
|
+
} path_t;
|
102
|
+
|
103
|
+
typedef struct
|
104
|
+
{
|
105
|
+
path_t *path; /* for advanced users... :-) */
|
106
|
+
int path_len; /* for advanced users... :-) */
|
107
|
+
int start1, end1; /* start and end of the first sequence, coordinations are 1-based */
|
108
|
+
int start2, end2; /* start and end of the second sequence, coordinations are 1-based */
|
109
|
+
int score, subo; /* score */
|
110
|
+
|
111
|
+
char *out1, *out2; /* print them, and then you will know */
|
112
|
+
char *outm;
|
113
|
+
|
114
|
+
int n_cigar;
|
115
|
+
uint32_t *cigar32;
|
116
|
+
} AlnAln;
|
117
|
+
|
118
|
+
#ifdef __cplusplus
|
119
|
+
extern "C" {
|
120
|
+
#endif
|
121
|
+
|
122
|
+
AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
|
123
|
+
int type, int do_align, int len1, int len2);
|
124
|
+
AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int type, int do_align);
|
125
|
+
void aln_free_AlnAln(AlnAln *aa);
|
126
|
+
|
127
|
+
int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
128
|
+
path_t *path, int *path_len);
|
129
|
+
int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
130
|
+
path_t *path, int *path_len, int _thres, int *_subo);
|
131
|
+
int aln_extend_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
132
|
+
path_t *path, int *path_len, int G0, uint8_t *_mem);
|
133
|
+
uint16_t *aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
|
134
|
+
uint32_t *aln_path2cigar32(const path_t *path, int path_len, int *n_cigar);
|
135
|
+
|
136
|
+
#ifdef __cplusplus
|
137
|
+
}
|
138
|
+
#endif
|
139
|
+
|
140
|
+
/********************
|
141
|
+
* global variables *
|
142
|
+
********************/
|
143
|
+
|
144
|
+
extern AlnParam aln_param_bwa; /* = { 37, 9, 0, aln_sm_maq, 5, 50 }; */
|
145
|
+
extern AlnParam aln_param_blast; /* = { 5, 2, 2, aln_sm_blast, 5, 50 }; */
|
146
|
+
extern AlnParam aln_param_nt2nt; /* = { 10, 2, 2, aln_sm_nt, 16, 75 }; */
|
147
|
+
extern AlnParam aln_param_aa2aa; /* = { 20, 19, 19, aln_sm_read, 16, 75 }; */
|
148
|
+
extern AlnParam aln_param_rd2rd; /* = { 12, 2, 2, aln_sm_blosum62, 22, 50 }; */
|
149
|
+
|
150
|
+
/* common nucleotide score matrix for 16 bases */
|
151
|
+
extern int aln_sm_nt[], aln_sm_bwa[];
|
152
|
+
|
153
|
+
/* BLOSUM62 and BLOSUM45 */
|
154
|
+
extern int aln_sm_blosum62[], aln_sm_blosum45[];
|
155
|
+
|
156
|
+
/* common read for 16 bases. note that read alignment is quite different from common nucleotide alignment */
|
157
|
+
extern int aln_sm_read[];
|
158
|
+
|
159
|
+
/* human-mouse score matrix for 4 bases */
|
160
|
+
extern int aln_sm_hs[];
|
161
|
+
|
162
|
+
#endif
|
data/ext/utils.c
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008 Genome Research Ltd (GRL).
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
20
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
22
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
23
|
+
SOFTWARE.
|
24
|
+
*/
|
25
|
+
|
26
|
+
/* Contact: Heng Li <lh3@sanger.ac.uk> */
|
27
|
+
|
28
|
+
#include <stdio.h>
|
29
|
+
#include <stdarg.h>
|
30
|
+
#include <stdlib.h>
|
31
|
+
#include <string.h>
|
32
|
+
#include <zlib.h>
|
33
|
+
#include "utils.h"
|
34
|
+
|
35
|
+
FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
|
36
|
+
{
|
37
|
+
FILE *fp = 0;
|
38
|
+
if (strcmp(fn, "-") == 0)
|
39
|
+
return (strstr(mode, "r"))? stdin : stdout;
|
40
|
+
if ((fp = fopen(fn, mode)) == 0) {
|
41
|
+
fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
|
42
|
+
abort();
|
43
|
+
}
|
44
|
+
return fp;
|
45
|
+
}
|
46
|
+
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp)
|
47
|
+
{
|
48
|
+
if (freopen(fn, mode, fp) == 0) {
|
49
|
+
fprintf(stderr, "[%s] fail to open file '%s': ", func, fn);
|
50
|
+
perror(NULL);
|
51
|
+
fprintf(stderr, "Abort!\n");
|
52
|
+
abort();
|
53
|
+
}
|
54
|
+
return fp;
|
55
|
+
}
|
56
|
+
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode)
|
57
|
+
{
|
58
|
+
gzFile fp;
|
59
|
+
if (strcmp(fn, "-") == 0)
|
60
|
+
return gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode);
|
61
|
+
if ((fp = gzopen(fn, mode)) == 0) {
|
62
|
+
fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
|
63
|
+
abort();
|
64
|
+
}
|
65
|
+
return fp;
|
66
|
+
}
|
67
|
+
void err_fatal(const char *header, const char *fmt, ...)
|
68
|
+
{
|
69
|
+
va_list args;
|
70
|
+
va_start(args, fmt);
|
71
|
+
fprintf(stderr, "[%s] ", header);
|
72
|
+
vfprintf(stderr, fmt, args);
|
73
|
+
fprintf(stderr, " Abort!\n");
|
74
|
+
va_end(args);
|
75
|
+
abort();
|
76
|
+
}
|
77
|
+
|
78
|
+
void err_fatal_simple_core(const char *func, const char *msg)
|
79
|
+
{
|
80
|
+
fprintf(stderr, "[%s] %s Abort!\n", func, msg);
|
81
|
+
abort();
|
82
|
+
}
|
data/ext/utils.h
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008 Genome Research Ltd (GRL).
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
20
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
22
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
23
|
+
SOFTWARE.
|
24
|
+
*/
|
25
|
+
|
26
|
+
/* Contact: Heng Li <lh3@sanger.ac.uk> */
|
27
|
+
|
28
|
+
#ifndef LH3_UTILS_H
|
29
|
+
#define LH3_UTILS_H
|
30
|
+
|
31
|
+
#include <stdio.h>
|
32
|
+
#include <zlib.h>
|
33
|
+
|
34
|
+
#define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg)
|
35
|
+
#define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
|
36
|
+
#define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp)
|
37
|
+
#define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode)
|
38
|
+
#define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg)
|
39
|
+
|
40
|
+
#ifdef __cplusplus
|
41
|
+
extern "C" {
|
42
|
+
#endif
|
43
|
+
|
44
|
+
void err_fatal(const char *header, const char *fmt, ...);
|
45
|
+
void err_fatal_simple_core(const char *func, const char *msg);
|
46
|
+
FILE *err_xopen_core(const char *func, const char *fn, const char *mode);
|
47
|
+
FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp);
|
48
|
+
gzFile err_xzopen_core(const char *func, const char *fn, const char *mode);
|
49
|
+
|
50
|
+
#ifdef __cplusplus
|
51
|
+
}
|
52
|
+
#endif
|
53
|
+
|
54
|
+
#endif
|
data/lib/bio-bwa.rb
ADDED
data/lib/bio/bwa.rb
ADDED
@@ -0,0 +1,312 @@
|
|
1
|
+
module Bio
|
2
|
+
# @author Francesco Strozzi https://github.com/fstrozzi
|
3
|
+
class BWA
|
4
|
+
extend FFI::Library
|
5
|
+
ffi_lib Bio::BWA::Library.load
|
6
|
+
|
7
|
+
# Convert a Fasta to Packed format
|
8
|
+
# @param [Hash]. params Options.
|
9
|
+
# @option params [String] :file_in the Fasta or FastQ file (REQUIRED)
|
10
|
+
# @option params [String] :prefix the prefix name for the PAC file
|
11
|
+
def self.fa2pac(params={})
|
12
|
+
valid_params = %q(file_in prefix)
|
13
|
+
last_params = [:file_in, :prefix]
|
14
|
+
mandatory_params = [:file_in]
|
15
|
+
check_mandatory(mandatory_params, params)
|
16
|
+
args = build_parameters("fa2pac",valid_params,params,last_params)
|
17
|
+
call_BWA_function(args)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Convert a Packed file format to Burrows-Wheeler Transform format
|
21
|
+
# @param [Hash]. params Options.
|
22
|
+
# @option params [String] :file_in the PAC file (REQUIRED)
|
23
|
+
# @option params [String] :file_out the name of the BWT file (REQUIRED)
|
24
|
+
def self.pac2bwt(params={})
|
25
|
+
valid_params = %q(file_in file_out)
|
26
|
+
last_params = [:file_in,:file_out]
|
27
|
+
check_mandatory(last_params, params)
|
28
|
+
args = build_parameters("pac2bwt",valid_params,params,last_params)
|
29
|
+
call_BWA_function(args)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Convert a BWT file to the new BWT format
|
33
|
+
# @param [Hash]. params Options.
|
34
|
+
# @option params [String] :file_in the BWT file (REQUIRED)
|
35
|
+
# @note this method overwrite existing BWT file
|
36
|
+
def self.bwtupdate(params={})
|
37
|
+
valid_params = %w(file_in)
|
38
|
+
last_params = [:file_in]
|
39
|
+
check_mandatory(last_params, params)
|
40
|
+
args = build_parameters("bwtupdate",valid_params,params,last_params)
|
41
|
+
call_BWA_function(args)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Generate reverse Packed format
|
45
|
+
# @param [Hash]. params Options.
|
46
|
+
# @option params [String] :file_in the PAC file (REQUIRED)
|
47
|
+
# @option params [String] :file_out the name of the REV PAC (REQUIRED)
|
48
|
+
def self.pac_rev(params={})
|
49
|
+
valid_params = %w(file_in file_out)
|
50
|
+
last_params = [:file_in,:file_out]
|
51
|
+
check_mandatory(last_params, params)
|
52
|
+
args = build_parameters("pac_rev",valid_params,params,last_params)
|
53
|
+
call_BWA_function(args)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Generate SA file from BWT and Occ files
|
57
|
+
# @param [Hash]. params Options.
|
58
|
+
# @option params [String] :file_in the PAC file (REQUIRED)
|
59
|
+
# @option params [String] :file_out the name of the REV PAC (REQUIRED)
|
60
|
+
def self.bwt2sa(params={})
|
61
|
+
valid_params = %q(file_in file_out i)
|
62
|
+
last_params = [:file_in,:file_out]
|
63
|
+
check_mandatory(last_params, params)
|
64
|
+
args = build_parameters("bwt2sa",valid_params,params,last_params)
|
65
|
+
call_BWA_function(args)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Generate the BWT index for a Fasta database
|
69
|
+
# @param [Hash]. params Options.
|
70
|
+
# @option params [String] :file_in the Fasta file (REQUIRED)
|
71
|
+
# @option params [String] :p the prefix for the database files that will be generated [default is Fasta name]
|
72
|
+
# @option params [String] :a the algorithm to be used for indexing: 'is' (short database)[default] or 'bwtsw' (long database)
|
73
|
+
# @option params [Boolean] :c colorspace database index
|
74
|
+
# @note Boolean values must be set to 'true'
|
75
|
+
def self.make_index(params = {})
|
76
|
+
valid_params = %w(file_in p a c)
|
77
|
+
mandatory_params = [:file_in]
|
78
|
+
last_params = [:file_in]
|
79
|
+
check_mandatory(mandatory_params, params)
|
80
|
+
params = change_arg_name(params,:prefix,:p) if params[:prefix]
|
81
|
+
args = build_parameters("index",valid_params,params,last_params)
|
82
|
+
call_BWA_function(args)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Run the alignment for short query sequences
|
86
|
+
# @param [Hash] params Options
|
87
|
+
# @option params [String] :file_in the FastQ file (REQUIRED)
|
88
|
+
# @option params [String] :prefix the prefix of the database index files (REQUIRED)
|
89
|
+
# @option params [String] :file_out the output of the alignment in SAI format (REQUIRED)
|
90
|
+
# @option params [Integer] :n max #diff (int) or missing prob under 0.02 err rate (float) [0.04]
|
91
|
+
# @option params [Integer] :o maximum number or fraction of gap opens [1]
|
92
|
+
# @option params [Integer] :e maximum number of gap extensions, -1 for disabling long gaps [-1]
|
93
|
+
# @option params [Integer] :m maximum entries in the queue [2000000]
|
94
|
+
# @option params [Integer] :t number of threads [1]
|
95
|
+
# @option params [Integer] :M mismatch penalty [3]
|
96
|
+
# @option params [Integer] :O gap open penalty [11]
|
97
|
+
# @option params [Integer] :R stop searching when there are >INT equally best hits [30]
|
98
|
+
# @option params [Integer] :q quality threshold for read trimming down to 35bp [0]
|
99
|
+
# @option params [Integer] :B length of barcode
|
100
|
+
# @option params [Boolean] :c input sequences are in the color space
|
101
|
+
# @option params [Boolean] :L log-scaled gap penalty for long deletions
|
102
|
+
# @option params [Boolean] :N non-iterative mode: search for all n-difference hits (slow)
|
103
|
+
# @option params [Boolean] :I the input is in the Illumina 1.3+ FASTQ-like format
|
104
|
+
# @option params [Boolean] :b the input read file is in the BAM format
|
105
|
+
# @option params [Boolean] :single use single-end reads only (effective with -b)
|
106
|
+
# @option params [Boolean] :first use the 1st read in a pair (effective with -b)
|
107
|
+
# @option params [Boolean] :second use the 2nd read in a pair (effective with -b)
|
108
|
+
# @option params [Integer] :i do not put an indel within INT bp towards the ends [5]
|
109
|
+
# @option params [Integer] :d maximum occurrences for extending a long deletion [10]
|
110
|
+
# @option params [Integer] :l seed length [32]
|
111
|
+
# @option params [Integer] :k maximum differences in the seed [2]
|
112
|
+
# @option params [Integer] :E gap extension penalty [4]
|
113
|
+
# @note Boolean values must be set to 'true'
|
114
|
+
def self.short_read_alignment(params={})
|
115
|
+
args = ["aln"]
|
116
|
+
valid_params = %w(n o e i d l k c L R m t N M O E q f b single first second I B prefix file_in)
|
117
|
+
mandatory_params = [:prefix,:file_in,:file_out]
|
118
|
+
last_params = [:prefix,:file_in]
|
119
|
+
check_mandatory(mandatory_params, params)
|
120
|
+
params = change_arg_name(params,:file_out,:f) if params[:file_out]
|
121
|
+
params = change_arg_name(params,:single,"0") if params[:single]
|
122
|
+
params = change_arg_name(params,:first,"1") if params[:first]
|
123
|
+
params = change_arg_name(params,:second,"2") if params[:second]
|
124
|
+
args = build_parameters("aln",valid_params,params,last_params)
|
125
|
+
call_BWA_function(args)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Convert the SAI alignment output into SAM format (single end)
|
129
|
+
# @param [Hash] params Options
|
130
|
+
# @option params [String] :fastq the FastQ file (REQUIRED)
|
131
|
+
# @option params [String] :prefix the prefix of the database index files (REQUIRED)
|
132
|
+
# @option params [String] :sai the alignment file in SAI format (REQUIRED)
|
133
|
+
# @option params [String] :file_out the file name of the SAM output
|
134
|
+
# @option params [Integer] :n max_occ
|
135
|
+
# @option params [String] :r RG_line
|
136
|
+
def self.sai_to_sam_single(params = {})
|
137
|
+
valid_params = %w(n r fastq sai prefix f)
|
138
|
+
mandatory_params = [:prefix,:sai,:fastq]
|
139
|
+
last_params = [:prefix,:sai,:fastq]
|
140
|
+
check_mandatory(mandatory_params, params)
|
141
|
+
params = change_arg_name(params,:file_out,:f) if params[:file_out]
|
142
|
+
args = build_parameters("sai2sam_se",valid_params,params,last_params)
|
143
|
+
call_BWA_function(args)
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
# Convert the SAI alignment output into SAM format (paired ends)
|
148
|
+
# @param [Hash] params Options
|
149
|
+
# @option params [String] :prefix the prefix of the database index files (REQUIRED)
|
150
|
+
# @option params [Array] :sai the two alignment files in SAI format (REQUIRED)
|
151
|
+
# @option params [Array] :fastq the two fastq files (REQUIRED)
|
152
|
+
# @option params [Integer] :a maximum insert size [500]
|
153
|
+
# @option params [Integer] :o maximum occurrences for one end [100000]
|
154
|
+
# @option params [Integer] :n maximum hits to output for paired reads [3]
|
155
|
+
# @option params [Integer] :N maximum hits to output for discordant pairs [10]
|
156
|
+
# @option params [Float] :c prior of chimeric rate (lower bound) [1.0e-05]
|
157
|
+
# @option params [String] :r read group header line such as '@RG\tID:foo\tSM:bar'
|
158
|
+
# @option params [Boolean] :P preload index into memory (for base-space reads only)
|
159
|
+
# @option params [Boolean] :s disable Smith-Waterman for the unmapped mate
|
160
|
+
# @option params [Boolean] :A disable insert size estimate (force :s)
|
161
|
+
# @note Boolean values must be set to 'true'
|
162
|
+
def self.sai_to_sam_paired(params = {})
|
163
|
+
valid_params = %w(a o s P n N c f A r prefix first_sai second_sai first_fastq second_fastq)
|
164
|
+
mandatory_params = [:prefix, :sai, :fastq]
|
165
|
+
last_params = [:prefix, :first_sai, :second_sai, :first_fastq, :second_fastq]
|
166
|
+
check_mandatory(mandatory_params, params)
|
167
|
+
params = change_arg_name(params,:file_out,:f) if params[:file_out]
|
168
|
+
if params[:sai]
|
169
|
+
raise ArgumentError,"you must provide an array with two SAI files!" unless params[:sai].is_a?(Array) and params[:sai].size == 2
|
170
|
+
params[:first_sai] = params[:sai][0]
|
171
|
+
params[:second_sai] = params[:sai][1]
|
172
|
+
params.delete(:sai)
|
173
|
+
end
|
174
|
+
if params[:fastq]
|
175
|
+
raise ArgumentError,"you must provide an array with two FastQ files!" unless params[:fastq].is_a?(Array) and params[:fastq].size == 2
|
176
|
+
params[:first_fastq] = params[:fastq][0]
|
177
|
+
params[:second_fastq] = params[:fastq][1]
|
178
|
+
params.delete(:fastq)
|
179
|
+
end
|
180
|
+
args = build_parameters("sai2sam_pe",valid_params,params,last_params)
|
181
|
+
call_BWA_function(args)
|
182
|
+
end
|
183
|
+
|
184
|
+
# Run the alignment for long query sequences
|
185
|
+
# @param [Hash] params Options
|
186
|
+
# @option params [String] :file_in the FastQ file (REQUIRED)
|
187
|
+
# @option params [String] :prefix the prefix of the database index files (REQUIRED)
|
188
|
+
# @option params [String] :file_out the output of the alignment in SAM format (REQUIRED)
|
189
|
+
# @option params [Integer] :a score for a match [1]
|
190
|
+
# @option params [Integer] :b mismatch penalty [3]
|
191
|
+
# @option params [Integer] :q gap open penalty [5]
|
192
|
+
# @option params [Integer] :r gap extension penalty [2]
|
193
|
+
# @option params [Integer] :t number of threads [1]
|
194
|
+
# @option params [Integer] :w band width [50]
|
195
|
+
# @option params [Float] :m mask level [0.50]
|
196
|
+
# @option params [Integer] :T score threshold divided by a [30]
|
197
|
+
# @option params [Integer] :s maximum seeding interval size [3]
|
198
|
+
# @option params [Integer] :z Z-best [1]
|
199
|
+
# @option params [Integer] :N number of seeds to trigger reverse alignment [5]
|
200
|
+
# @option params [Float] :c coefficient of length-threshold adjustment [5.5]
|
201
|
+
# @option params [Boolean] :H in SAM output, use hard clipping rather than soft
|
202
|
+
# @note Boolean arguments must be set to 'true'
|
203
|
+
def self.long_read_alignment(params = {})
|
204
|
+
valid_params = %w(q r a b t T w d z m y s c N H f prefix file_in)
|
205
|
+
mandatory_params = [:prefix, :file_in, :file_out]
|
206
|
+
last_params = [:prefix,:file_in]
|
207
|
+
check_mandatory(mandatory_params, params)
|
208
|
+
params = change_arg_name(params,:file_out,:f) if params[:file_out]
|
209
|
+
args = build_parameters("bwtsw2",valid_params,params,last_params)
|
210
|
+
call_BWA_function(args)
|
211
|
+
end
|
212
|
+
|
213
|
+
# Run the alignment between multiple short sequences and ONE long sequence
|
214
|
+
# @param [Hash] params Options
|
215
|
+
# @option params [String] :short_seq the short query sequence (REQUIRED)
|
216
|
+
# @option params [String] :long_seq the long database sequence (REQUIRED)
|
217
|
+
# @option params [String] :file_out the alignment output
|
218
|
+
# @option params [Integer] :T minimum score [1]
|
219
|
+
# @option params [Boolean] :p protein alignment (suppressing :r)
|
220
|
+
# @option params [Boolean] :f forward strand only
|
221
|
+
# @option params [Boolean] :r reverse strand only
|
222
|
+
# @option params [Boolean] :g global alignment
|
223
|
+
# @note Boolean values must be set to 'true'
|
224
|
+
def self.simple_SW(params = {})
|
225
|
+
args = ["stdsw"]
|
226
|
+
valid_params = %w(g T f r p file_out long_seq short_seq)
|
227
|
+
mandatory_params = [:long_seq,:short_seq]
|
228
|
+
last_params = mandatory_params
|
229
|
+
check_mandatory(mandatory_params, params)
|
230
|
+
file_out = params[:file_out]
|
231
|
+
params.delete(:file_out)
|
232
|
+
args = build_parameters("stdsw",valid_params,params,last_params)
|
233
|
+
$stdout.reopen(file_out,"w") if file_out
|
234
|
+
call_BWA_function(args)
|
235
|
+
$stdout.reopen("/dev/tty","w") if file_out
|
236
|
+
end
|
237
|
+
|
238
|
+
|
239
|
+
|
240
|
+
######## Methods to handle C functions and arguments ########
|
241
|
+
|
242
|
+
attach_function :bwa_fa2pac, [:int,:pointer], :int
|
243
|
+
attach_function :bwa_pac2bwt, [:int,:pointer], :int
|
244
|
+
attach_function :bwa_bwtupdate, [:int,:pointer], :int
|
245
|
+
attach_function :bwa_pac_rev, [:int,:pointer], :int
|
246
|
+
attach_function :bwa_bwt2sa, [:int,:pointer], :int
|
247
|
+
attach_function :bwa_index, [:int,:pointer], :int
|
248
|
+
attach_function :bwa_aln, [:int,:pointer], :int
|
249
|
+
attach_function :bwa_sai2sam_se, [:int, :pointer], :int
|
250
|
+
attach_function :bwa_sai2sam_pe, [:int,:pointer], :int
|
251
|
+
attach_function :bwa_bwtsw2, [:int, :pointer], :int
|
252
|
+
attach_function :bwa_stdsw, [:int, :pointer], :int
|
253
|
+
|
254
|
+
# Internal method to call the BWA C functions
|
255
|
+
# @note this method should not be called directly
|
256
|
+
def self.call_BWA_function(args)
|
257
|
+
c_args = build_args_for_BWA(args)
|
258
|
+
self.send("bwa_#{args[0]}".to_sym,args.size,c_args) # call the C function and pass the arguments size and parameters list (same as int argc, char *argv[])
|
259
|
+
end
|
260
|
+
|
261
|
+
# Internal method to build argument list for BWA C functions
|
262
|
+
# @note this method should not be called directly
|
263
|
+
def self.build_args_for_BWA(args)
|
264
|
+
cmd_args = args.map do |arg|
|
265
|
+
FFI::MemoryPointer.from_string(arg.to_s) # convert every parameters into a string and then into a memory pointer
|
266
|
+
end
|
267
|
+
exec_args = FFI::MemoryPointer.new(:pointer, cmd_args.length) # creating a pointer to an array of pointers
|
268
|
+
cmd_args.each_with_index do |arg, i|
|
269
|
+
exec_args[i].put_pointer(0, arg) # filling in the array of pointers
|
270
|
+
end
|
271
|
+
return exec_args
|
272
|
+
end
|
273
|
+
|
274
|
+
# Internal method to produce a correct parameter list for BWA functions
|
275
|
+
# @note this method should not be called directly
|
276
|
+
def self.build_parameters(function_name,valid_params,params,last_params)
|
277
|
+
args = [function_name]
|
278
|
+
params.each_key do |k|
|
279
|
+
raise ArgumentError, "Unknown parameter '#{k}'" unless valid_params.include?(k.to_s)
|
280
|
+
if params[k] and !last_params.include?(k) then # check if value exists and if is not a last_params (required at the end of BWA functions)
|
281
|
+
args << "-#{k}"
|
282
|
+
args << params[k] unless params[k] == true # skipping boolean values. just include the param name
|
283
|
+
end
|
284
|
+
end
|
285
|
+
last_params.each {|p| args << params[p]} # now adding the last_params so the parameter list is in the correct order for BWA functions
|
286
|
+
return args
|
287
|
+
end
|
288
|
+
|
289
|
+
# Internal method to check if mandatory params have been set
|
290
|
+
# @note this method should not be called directly
|
291
|
+
def self.check_mandatory(mandatory_params, params)
|
292
|
+
mandatory_params.each {|mp| raise ArgumentError,"You must provide parameter '#{mp}'" unless params.include?(mp)}
|
293
|
+
end
|
294
|
+
|
295
|
+
# Internal method used to change parameters name from Ruby to BWA functions
|
296
|
+
# @note this method should not be called directly
|
297
|
+
def self.change_arg_name(hash,key,new_key)
|
298
|
+
hash[new_key] = hash[key]
|
299
|
+
hash.delete(key)
|
300
|
+
return hash
|
301
|
+
end
|
302
|
+
|
303
|
+
private_class_method :call_BWA_function
|
304
|
+
private_class_method :build_args_for_BWA
|
305
|
+
private_class_method :build_parameters
|
306
|
+
private_class_method :check_mandatory
|
307
|
+
|
308
|
+
end
|
309
|
+
end
|
310
|
+
|
311
|
+
|
312
|
+
|