bio-bwa 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. data/.document +5 -0
  2. data/Gemfile +15 -0
  3. data/Gemfile.lock +28 -0
  4. data/LICENSE.txt +35 -0
  5. data/README.rdoc +33 -0
  6. data/Rakefile +56 -0
  7. data/VERSION +1 -0
  8. data/bio-bwa.gemspec +152 -0
  9. data/doc/Bio.html +93 -0
  10. data/doc/Bio/BWA.html +2884 -0
  11. data/doc/Bio/BWA/Library.html +229 -0
  12. data/doc/_index.html +119 -0
  13. data/doc/class_list.html +36 -0
  14. data/doc/css/common.css +1 -0
  15. data/doc/css/full_list.css +53 -0
  16. data/doc/css/style.css +310 -0
  17. data/doc/file.LICENSE.html +88 -0
  18. data/doc/file.README.html +119 -0
  19. data/doc/file_list.html +41 -0
  20. data/doc/frames.html +13 -0
  21. data/doc/index.html +119 -0
  22. data/doc/js/app.js +203 -0
  23. data/doc/js/full_list.js +149 -0
  24. data/doc/js/jquery.js +154 -0
  25. data/doc/method_list.html +171 -0
  26. data/doc/top-level-namespace.html +88 -0
  27. data/ext/COPYING +674 -0
  28. data/ext/ChangeLog +3864 -0
  29. data/ext/NEWS +555 -0
  30. data/ext/README +29 -0
  31. data/ext/bamlite.c +155 -0
  32. data/ext/bamlite.h +94 -0
  33. data/ext/bntseq.c +303 -0
  34. data/ext/bntseq.h +80 -0
  35. data/ext/bwa.1 +562 -0
  36. data/ext/bwape.c +807 -0
  37. data/ext/bwase.c +686 -0
  38. data/ext/bwase.h +27 -0
  39. data/ext/bwaseqio.c +222 -0
  40. data/ext/bwt.c +250 -0
  41. data/ext/bwt.h +105 -0
  42. data/ext/bwt_gen/Makefile +23 -0
  43. data/ext/bwt_gen/QSufSort.c +496 -0
  44. data/ext/bwt_gen/QSufSort.h +40 -0
  45. data/ext/bwt_gen/bwt_gen.c +1547 -0
  46. data/ext/bwt_gen/bwt_gen.h +105 -0
  47. data/ext/bwt_lite.c +94 -0
  48. data/ext/bwt_lite.h +29 -0
  49. data/ext/bwtaln.c +345 -0
  50. data/ext/bwtaln.h +150 -0
  51. data/ext/bwtgap.c +264 -0
  52. data/ext/bwtgap.h +38 -0
  53. data/ext/bwtindex.c +186 -0
  54. data/ext/bwtio.c +77 -0
  55. data/ext/bwtmisc.c +269 -0
  56. data/ext/bwtsw2.h +51 -0
  57. data/ext/bwtsw2_aux.c +650 -0
  58. data/ext/bwtsw2_chain.c +107 -0
  59. data/ext/bwtsw2_core.c +594 -0
  60. data/ext/bwtsw2_main.c +100 -0
  61. data/ext/cs2nt.c +191 -0
  62. data/ext/is.c +218 -0
  63. data/ext/khash.h +506 -0
  64. data/ext/kseq.h +208 -0
  65. data/ext/ksort.h +269 -0
  66. data/ext/kstring.c +35 -0
  67. data/ext/kstring.h +46 -0
  68. data/ext/kvec.h +90 -0
  69. data/ext/main.c +63 -0
  70. data/ext/main.h +29 -0
  71. data/ext/mkrf_conf.rb +49 -0
  72. data/ext/qualfa2fq.pl +27 -0
  73. data/ext/simple_dp.c +162 -0
  74. data/ext/simpletest.c +23 -0
  75. data/ext/solid2fastq.pl +111 -0
  76. data/ext/stdaln.c +1072 -0
  77. data/ext/stdaln.h +162 -0
  78. data/ext/utils.c +82 -0
  79. data/ext/utils.h +54 -0
  80. data/lib/bio-bwa.rb +7 -0
  81. data/lib/bio/bwa.rb +312 -0
  82. data/lib/bio/bwa/library.rb +42 -0
  83. data/test/data/testdata.fa +602 -0
  84. data/test/data/testdata.long.fa +175 -0
  85. data/test/data/testdata.short.fa +2 -0
  86. data/test/helper.rb +18 -0
  87. data/test/test_bio-bwa_basic.rb +62 -0
  88. data/test/test_bio-bwa_make_index.rb +42 -0
  89. data/test/test_bio-bwa_run_aln.rb +49 -0
  90. data/test/test_bio-bwa_sam_conversion.rb +49 -0
  91. metadata +218 -0
data/ext/stdaln.h ADDED
@@ -0,0 +1,162 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2003-2006, 2008, by Heng Li <lh3lh3@gmail.com>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /*
27
+ 2009-07-23, 0.10.0
28
+
29
+ - Use 32-bit to store CIGAR
30
+
31
+ - Report suboptimal aligments
32
+
33
+ - Implemented half-fixed-half-open DP
34
+
35
+ 2009-04-26, 0.9.10
36
+
37
+ - Allow to set a threshold for local alignment
38
+
39
+ 2009-02-18, 0.9.9
40
+
41
+ - Fixed a bug when no residue matches
42
+
43
+ 2008-08-04, 0.9.8
44
+
45
+ - Fixed the wrong declaration of aln_stdaln_aux()
46
+
47
+ - Avoid 0 coordinate for global alignment
48
+
49
+ 2008-08-01, 0.9.7
50
+
51
+ - Change gap_end penalty to 5 in aln_param_bwa
52
+
53
+ - Add function to convert path_t to the CIGAR format
54
+
55
+ 2008-08-01, 0.9.6
56
+
57
+ - The first gap now costs (gap_open+gap_ext), instead of
58
+ gap_open. Scoring systems are modified accordingly.
59
+
60
+ - Gap end is now correctly handled. Previously it is not correct.
61
+
62
+ - Change license to MIT.
63
+
64
+ */
65
+
66
+ #ifndef LH3_STDALN_H_
67
+ #define LH3_STDALN_H_
68
+
69
+
70
+ #define STDALN_VERSION 0.11.0
71
+
72
+ #include <stdint.h>
73
+
74
+ #define FROM_M 0
75
+ #define FROM_I 1
76
+ #define FROM_D 2
77
+ #define FROM_S 3
78
+
79
+ #define ALN_TYPE_LOCAL 0
80
+ #define ALN_TYPE_GLOBAL 1
81
+ #define ALN_TYPE_EXTEND 2
82
+
83
+ /* This is the smallest integer. It might be CPU-dependent in very RARE cases. */
84
+ #define MINOR_INF -1073741823
85
+
86
+ typedef struct
87
+ {
88
+ int gap_open;
89
+ int gap_ext;
90
+ int gap_end;
91
+
92
+ int *matrix;
93
+ int row;
94
+ int band_width;
95
+ } AlnParam;
96
+
97
+ typedef struct
98
+ {
99
+ int i, j;
100
+ unsigned char ctype;
101
+ } path_t;
102
+
103
+ typedef struct
104
+ {
105
+ path_t *path; /* for advanced users... :-) */
106
+ int path_len; /* for advanced users... :-) */
107
+ int start1, end1; /* start and end of the first sequence, coordinations are 1-based */
108
+ int start2, end2; /* start and end of the second sequence, coordinations are 1-based */
109
+ int score, subo; /* score */
110
+
111
+ char *out1, *out2; /* print them, and then you will know */
112
+ char *outm;
113
+
114
+ int n_cigar;
115
+ uint32_t *cigar32;
116
+ } AlnAln;
117
+
118
+ #ifdef __cplusplus
119
+ extern "C" {
120
+ #endif
121
+
122
+ AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
123
+ int type, int do_align, int len1, int len2);
124
+ AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int type, int do_align);
125
+ void aln_free_AlnAln(AlnAln *aa);
126
+
127
+ int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
128
+ path_t *path, int *path_len);
129
+ int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
130
+ path_t *path, int *path_len, int _thres, int *_subo);
131
+ int aln_extend_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
132
+ path_t *path, int *path_len, int G0, uint8_t *_mem);
133
+ uint16_t *aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
134
+ uint32_t *aln_path2cigar32(const path_t *path, int path_len, int *n_cigar);
135
+
136
+ #ifdef __cplusplus
137
+ }
138
+ #endif
139
+
140
+ /********************
141
+ * global variables *
142
+ ********************/
143
+
144
+ extern AlnParam aln_param_bwa; /* = { 37, 9, 0, aln_sm_maq, 5, 50 }; */
145
+ extern AlnParam aln_param_blast; /* = { 5, 2, 2, aln_sm_blast, 5, 50 }; */
146
+ extern AlnParam aln_param_nt2nt; /* = { 10, 2, 2, aln_sm_nt, 16, 75 }; */
147
+ extern AlnParam aln_param_aa2aa; /* = { 20, 19, 19, aln_sm_read, 16, 75 }; */
148
+ extern AlnParam aln_param_rd2rd; /* = { 12, 2, 2, aln_sm_blosum62, 22, 50 }; */
149
+
150
+ /* common nucleotide score matrix for 16 bases */
151
+ extern int aln_sm_nt[], aln_sm_bwa[];
152
+
153
+ /* BLOSUM62 and BLOSUM45 */
154
+ extern int aln_sm_blosum62[], aln_sm_blosum45[];
155
+
156
+ /* common read for 16 bases. note that read alignment is quite different from common nucleotide alignment */
157
+ extern int aln_sm_read[];
158
+
159
+ /* human-mouse score matrix for 4 bases */
160
+ extern int aln_sm_hs[];
161
+
162
+ #endif
data/ext/utils.c ADDED
@@ -0,0 +1,82 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008 Genome Research Ltd (GRL).
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
+
28
+ #include <stdio.h>
29
+ #include <stdarg.h>
30
+ #include <stdlib.h>
31
+ #include <string.h>
32
+ #include <zlib.h>
33
+ #include "utils.h"
34
+
35
+ FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
36
+ {
37
+ FILE *fp = 0;
38
+ if (strcmp(fn, "-") == 0)
39
+ return (strstr(mode, "r"))? stdin : stdout;
40
+ if ((fp = fopen(fn, mode)) == 0) {
41
+ fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
42
+ abort();
43
+ }
44
+ return fp;
45
+ }
46
+ FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp)
47
+ {
48
+ if (freopen(fn, mode, fp) == 0) {
49
+ fprintf(stderr, "[%s] fail to open file '%s': ", func, fn);
50
+ perror(NULL);
51
+ fprintf(stderr, "Abort!\n");
52
+ abort();
53
+ }
54
+ return fp;
55
+ }
56
+ gzFile err_xzopen_core(const char *func, const char *fn, const char *mode)
57
+ {
58
+ gzFile fp;
59
+ if (strcmp(fn, "-") == 0)
60
+ return gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode);
61
+ if ((fp = gzopen(fn, mode)) == 0) {
62
+ fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
63
+ abort();
64
+ }
65
+ return fp;
66
+ }
67
+ void err_fatal(const char *header, const char *fmt, ...)
68
+ {
69
+ va_list args;
70
+ va_start(args, fmt);
71
+ fprintf(stderr, "[%s] ", header);
72
+ vfprintf(stderr, fmt, args);
73
+ fprintf(stderr, " Abort!\n");
74
+ va_end(args);
75
+ abort();
76
+ }
77
+
78
+ void err_fatal_simple_core(const char *func, const char *msg)
79
+ {
80
+ fprintf(stderr, "[%s] %s Abort!\n", func, msg);
81
+ abort();
82
+ }
data/ext/utils.h ADDED
@@ -0,0 +1,54 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008 Genome Research Ltd (GRL).
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
+
28
+ #ifndef LH3_UTILS_H
29
+ #define LH3_UTILS_H
30
+
31
+ #include <stdio.h>
32
+ #include <zlib.h>
33
+
34
+ #define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg)
35
+ #define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
36
+ #define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp)
37
+ #define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode)
38
+ #define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg)
39
+
40
+ #ifdef __cplusplus
41
+ extern "C" {
42
+ #endif
43
+
44
+ void err_fatal(const char *header, const char *fmt, ...);
45
+ void err_fatal_simple_core(const char *func, const char *msg);
46
+ FILE *err_xopen_core(const char *func, const char *fn, const char *mode);
47
+ FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp);
48
+ gzFile err_xzopen_core(const char *func, const char *fn, const char *mode);
49
+
50
+ #ifdef __cplusplus
51
+ }
52
+ #endif
53
+
54
+ #endif
data/lib/bio-bwa.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'ffi'
3
+ require 'bio/bwa/library'
4
+ require 'bio/bwa'
5
+
6
+
7
+
data/lib/bio/bwa.rb ADDED
@@ -0,0 +1,312 @@
1
+ module Bio
2
+ # @author Francesco Strozzi https://github.com/fstrozzi
3
+ class BWA
4
+ extend FFI::Library
5
+ ffi_lib Bio::BWA::Library.load
6
+
7
+ # Convert a Fasta to Packed format
8
+ # @param [Hash]. params Options.
9
+ # @option params [String] :file_in the Fasta or FastQ file (REQUIRED)
10
+ # @option params [String] :prefix the prefix name for the PAC file
11
+ def self.fa2pac(params={})
12
+ valid_params = %q(file_in prefix)
13
+ last_params = [:file_in, :prefix]
14
+ mandatory_params = [:file_in]
15
+ check_mandatory(mandatory_params, params)
16
+ args = build_parameters("fa2pac",valid_params,params,last_params)
17
+ call_BWA_function(args)
18
+ end
19
+
20
+ # Convert a Packed file format to Burrows-Wheeler Transform format
21
+ # @param [Hash]. params Options.
22
+ # @option params [String] :file_in the PAC file (REQUIRED)
23
+ # @option params [String] :file_out the name of the BWT file (REQUIRED)
24
+ def self.pac2bwt(params={})
25
+ valid_params = %q(file_in file_out)
26
+ last_params = [:file_in,:file_out]
27
+ check_mandatory(last_params, params)
28
+ args = build_parameters("pac2bwt",valid_params,params,last_params)
29
+ call_BWA_function(args)
30
+ end
31
+
32
+ # Convert a BWT file to the new BWT format
33
+ # @param [Hash]. params Options.
34
+ # @option params [String] :file_in the BWT file (REQUIRED)
35
+ # @note this method overwrite existing BWT file
36
+ def self.bwtupdate(params={})
37
+ valid_params = %w(file_in)
38
+ last_params = [:file_in]
39
+ check_mandatory(last_params, params)
40
+ args = build_parameters("bwtupdate",valid_params,params,last_params)
41
+ call_BWA_function(args)
42
+ end
43
+
44
+ # Generate reverse Packed format
45
+ # @param [Hash]. params Options.
46
+ # @option params [String] :file_in the PAC file (REQUIRED)
47
+ # @option params [String] :file_out the name of the REV PAC (REQUIRED)
48
+ def self.pac_rev(params={})
49
+ valid_params = %w(file_in file_out)
50
+ last_params = [:file_in,:file_out]
51
+ check_mandatory(last_params, params)
52
+ args = build_parameters("pac_rev",valid_params,params,last_params)
53
+ call_BWA_function(args)
54
+ end
55
+
56
+ # Generate SA file from BWT and Occ files
57
+ # @param [Hash]. params Options.
58
+ # @option params [String] :file_in the PAC file (REQUIRED)
59
+ # @option params [String] :file_out the name of the REV PAC (REQUIRED)
60
+ def self.bwt2sa(params={})
61
+ valid_params = %q(file_in file_out i)
62
+ last_params = [:file_in,:file_out]
63
+ check_mandatory(last_params, params)
64
+ args = build_parameters("bwt2sa",valid_params,params,last_params)
65
+ call_BWA_function(args)
66
+ end
67
+
68
+ # Generate the BWT index for a Fasta database
69
+ # @param [Hash]. params Options.
70
+ # @option params [String] :file_in the Fasta file (REQUIRED)
71
+ # @option params [String] :p the prefix for the database files that will be generated [default is Fasta name]
72
+ # @option params [String] :a the algorithm to be used for indexing: 'is' (short database)[default] or 'bwtsw' (long database)
73
+ # @option params [Boolean] :c colorspace database index
74
+ # @note Boolean values must be set to 'true'
75
+ def self.make_index(params = {})
76
+ valid_params = %w(file_in p a c)
77
+ mandatory_params = [:file_in]
78
+ last_params = [:file_in]
79
+ check_mandatory(mandatory_params, params)
80
+ params = change_arg_name(params,:prefix,:p) if params[:prefix]
81
+ args = build_parameters("index",valid_params,params,last_params)
82
+ call_BWA_function(args)
83
+ end
84
+
85
+ # Run the alignment for short query sequences
86
+ # @param [Hash] params Options
87
+ # @option params [String] :file_in the FastQ file (REQUIRED)
88
+ # @option params [String] :prefix the prefix of the database index files (REQUIRED)
89
+ # @option params [String] :file_out the output of the alignment in SAI format (REQUIRED)
90
+ # @option params [Integer] :n max #diff (int) or missing prob under 0.02 err rate (float) [0.04]
91
+ # @option params [Integer] :o maximum number or fraction of gap opens [1]
92
+ # @option params [Integer] :e maximum number of gap extensions, -1 for disabling long gaps [-1]
93
+ # @option params [Integer] :m maximum entries in the queue [2000000]
94
+ # @option params [Integer] :t number of threads [1]
95
+ # @option params [Integer] :M mismatch penalty [3]
96
+ # @option params [Integer] :O gap open penalty [11]
97
+ # @option params [Integer] :R stop searching when there are >INT equally best hits [30]
98
+ # @option params [Integer] :q quality threshold for read trimming down to 35bp [0]
99
+ # @option params [Integer] :B length of barcode
100
+ # @option params [Boolean] :c input sequences are in the color space
101
+ # @option params [Boolean] :L log-scaled gap penalty for long deletions
102
+ # @option params [Boolean] :N non-iterative mode: search for all n-difference hits (slow)
103
+ # @option params [Boolean] :I the input is in the Illumina 1.3+ FASTQ-like format
104
+ # @option params [Boolean] :b the input read file is in the BAM format
105
+ # @option params [Boolean] :single use single-end reads only (effective with -b)
106
+ # @option params [Boolean] :first use the 1st read in a pair (effective with -b)
107
+ # @option params [Boolean] :second use the 2nd read in a pair (effective with -b)
108
+ # @option params [Integer] :i do not put an indel within INT bp towards the ends [5]
109
+ # @option params [Integer] :d maximum occurrences for extending a long deletion [10]
110
+ # @option params [Integer] :l seed length [32]
111
+ # @option params [Integer] :k maximum differences in the seed [2]
112
+ # @option params [Integer] :E gap extension penalty [4]
113
+ # @note Boolean values must be set to 'true'
114
+ def self.short_read_alignment(params={})
115
+ args = ["aln"]
116
+ valid_params = %w(n o e i d l k c L R m t N M O E q f b single first second I B prefix file_in)
117
+ mandatory_params = [:prefix,:file_in,:file_out]
118
+ last_params = [:prefix,:file_in]
119
+ check_mandatory(mandatory_params, params)
120
+ params = change_arg_name(params,:file_out,:f) if params[:file_out]
121
+ params = change_arg_name(params,:single,"0") if params[:single]
122
+ params = change_arg_name(params,:first,"1") if params[:first]
123
+ params = change_arg_name(params,:second,"2") if params[:second]
124
+ args = build_parameters("aln",valid_params,params,last_params)
125
+ call_BWA_function(args)
126
+ end
127
+
128
+ # Convert the SAI alignment output into SAM format (single end)
129
+ # @param [Hash] params Options
130
+ # @option params [String] :fastq the FastQ file (REQUIRED)
131
+ # @option params [String] :prefix the prefix of the database index files (REQUIRED)
132
+ # @option params [String] :sai the alignment file in SAI format (REQUIRED)
133
+ # @option params [String] :file_out the file name of the SAM output
134
+ # @option params [Integer] :n max_occ
135
+ # @option params [String] :r RG_line
136
+ def self.sai_to_sam_single(params = {})
137
+ valid_params = %w(n r fastq sai prefix f)
138
+ mandatory_params = [:prefix,:sai,:fastq]
139
+ last_params = [:prefix,:sai,:fastq]
140
+ check_mandatory(mandatory_params, params)
141
+ params = change_arg_name(params,:file_out,:f) if params[:file_out]
142
+ args = build_parameters("sai2sam_se",valid_params,params,last_params)
143
+ call_BWA_function(args)
144
+ end
145
+
146
+
147
+ # Convert the SAI alignment output into SAM format (paired ends)
148
+ # @param [Hash] params Options
149
+ # @option params [String] :prefix the prefix of the database index files (REQUIRED)
150
+ # @option params [Array] :sai the two alignment files in SAI format (REQUIRED)
151
+ # @option params [Array] :fastq the two fastq files (REQUIRED)
152
+ # @option params [Integer] :a maximum insert size [500]
153
+ # @option params [Integer] :o maximum occurrences for one end [100000]
154
+ # @option params [Integer] :n maximum hits to output for paired reads [3]
155
+ # @option params [Integer] :N maximum hits to output for discordant pairs [10]
156
+ # @option params [Float] :c prior of chimeric rate (lower bound) [1.0e-05]
157
+ # @option params [String] :r read group header line such as '@RG\tID:foo\tSM:bar'
158
+ # @option params [Boolean] :P preload index into memory (for base-space reads only)
159
+ # @option params [Boolean] :s disable Smith-Waterman for the unmapped mate
160
+ # @option params [Boolean] :A disable insert size estimate (force :s)
161
+ # @note Boolean values must be set to 'true'
162
+ def self.sai_to_sam_paired(params = {})
163
+ valid_params = %w(a o s P n N c f A r prefix first_sai second_sai first_fastq second_fastq)
164
+ mandatory_params = [:prefix, :sai, :fastq]
165
+ last_params = [:prefix, :first_sai, :second_sai, :first_fastq, :second_fastq]
166
+ check_mandatory(mandatory_params, params)
167
+ params = change_arg_name(params,:file_out,:f) if params[:file_out]
168
+ if params[:sai]
169
+ raise ArgumentError,"you must provide an array with two SAI files!" unless params[:sai].is_a?(Array) and params[:sai].size == 2
170
+ params[:first_sai] = params[:sai][0]
171
+ params[:second_sai] = params[:sai][1]
172
+ params.delete(:sai)
173
+ end
174
+ if params[:fastq]
175
+ raise ArgumentError,"you must provide an array with two FastQ files!" unless params[:fastq].is_a?(Array) and params[:fastq].size == 2
176
+ params[:first_fastq] = params[:fastq][0]
177
+ params[:second_fastq] = params[:fastq][1]
178
+ params.delete(:fastq)
179
+ end
180
+ args = build_parameters("sai2sam_pe",valid_params,params,last_params)
181
+ call_BWA_function(args)
182
+ end
183
+
184
+ # Run the alignment for long query sequences
185
+ # @param [Hash] params Options
186
+ # @option params [String] :file_in the FastQ file (REQUIRED)
187
+ # @option params [String] :prefix the prefix of the database index files (REQUIRED)
188
+ # @option params [String] :file_out the output of the alignment in SAM format (REQUIRED)
189
+ # @option params [Integer] :a score for a match [1]
190
+ # @option params [Integer] :b mismatch penalty [3]
191
+ # @option params [Integer] :q gap open penalty [5]
192
+ # @option params [Integer] :r gap extension penalty [2]
193
+ # @option params [Integer] :t number of threads [1]
194
+ # @option params [Integer] :w band width [50]
195
+ # @option params [Float] :m mask level [0.50]
196
+ # @option params [Integer] :T score threshold divided by a [30]
197
+ # @option params [Integer] :s maximum seeding interval size [3]
198
+ # @option params [Integer] :z Z-best [1]
199
+ # @option params [Integer] :N number of seeds to trigger reverse alignment [5]
200
+ # @option params [Float] :c coefficient of length-threshold adjustment [5.5]
201
+ # @option params [Boolean] :H in SAM output, use hard clipping rather than soft
202
+ # @note Boolean arguments must be set to 'true'
203
+ def self.long_read_alignment(params = {})
204
+ valid_params = %w(q r a b t T w d z m y s c N H f prefix file_in)
205
+ mandatory_params = [:prefix, :file_in, :file_out]
206
+ last_params = [:prefix,:file_in]
207
+ check_mandatory(mandatory_params, params)
208
+ params = change_arg_name(params,:file_out,:f) if params[:file_out]
209
+ args = build_parameters("bwtsw2",valid_params,params,last_params)
210
+ call_BWA_function(args)
211
+ end
212
+
213
+ # Run the alignment between multiple short sequences and ONE long sequence
214
+ # @param [Hash] params Options
215
+ # @option params [String] :short_seq the short query sequence (REQUIRED)
216
+ # @option params [String] :long_seq the long database sequence (REQUIRED)
217
+ # @option params [String] :file_out the alignment output
218
+ # @option params [Integer] :T minimum score [1]
219
+ # @option params [Boolean] :p protein alignment (suppressing :r)
220
+ # @option params [Boolean] :f forward strand only
221
+ # @option params [Boolean] :r reverse strand only
222
+ # @option params [Boolean] :g global alignment
223
+ # @note Boolean values must be set to 'true'
224
+ def self.simple_SW(params = {})
225
+ args = ["stdsw"]
226
+ valid_params = %w(g T f r p file_out long_seq short_seq)
227
+ mandatory_params = [:long_seq,:short_seq]
228
+ last_params = mandatory_params
229
+ check_mandatory(mandatory_params, params)
230
+ file_out = params[:file_out]
231
+ params.delete(:file_out)
232
+ args = build_parameters("stdsw",valid_params,params,last_params)
233
+ $stdout.reopen(file_out,"w") if file_out
234
+ call_BWA_function(args)
235
+ $stdout.reopen("/dev/tty","w") if file_out
236
+ end
237
+
238
+
239
+
240
+ ######## Methods to handle C functions and arguments ########
241
+
242
+ attach_function :bwa_fa2pac, [:int,:pointer], :int
243
+ attach_function :bwa_pac2bwt, [:int,:pointer], :int
244
+ attach_function :bwa_bwtupdate, [:int,:pointer], :int
245
+ attach_function :bwa_pac_rev, [:int,:pointer], :int
246
+ attach_function :bwa_bwt2sa, [:int,:pointer], :int
247
+ attach_function :bwa_index, [:int,:pointer], :int
248
+ attach_function :bwa_aln, [:int,:pointer], :int
249
+ attach_function :bwa_sai2sam_se, [:int, :pointer], :int
250
+ attach_function :bwa_sai2sam_pe, [:int,:pointer], :int
251
+ attach_function :bwa_bwtsw2, [:int, :pointer], :int
252
+ attach_function :bwa_stdsw, [:int, :pointer], :int
253
+
254
+ # Internal method to call the BWA C functions
255
+ # @note this method should not be called directly
256
+ def self.call_BWA_function(args)
257
+ c_args = build_args_for_BWA(args)
258
+ self.send("bwa_#{args[0]}".to_sym,args.size,c_args) # call the C function and pass the arguments size and parameters list (same as int argc, char *argv[])
259
+ end
260
+
261
+ # Internal method to build argument list for BWA C functions
262
+ # @note this method should not be called directly
263
+ def self.build_args_for_BWA(args)
264
+ cmd_args = args.map do |arg|
265
+ FFI::MemoryPointer.from_string(arg.to_s) # convert every parameters into a string and then into a memory pointer
266
+ end
267
+ exec_args = FFI::MemoryPointer.new(:pointer, cmd_args.length) # creating a pointer to an array of pointers
268
+ cmd_args.each_with_index do |arg, i|
269
+ exec_args[i].put_pointer(0, arg) # filling in the array of pointers
270
+ end
271
+ return exec_args
272
+ end
273
+
274
+ # Internal method to produce a correct parameter list for BWA functions
275
+ # @note this method should not be called directly
276
+ def self.build_parameters(function_name,valid_params,params,last_params)
277
+ args = [function_name]
278
+ params.each_key do |k|
279
+ raise ArgumentError, "Unknown parameter '#{k}'" unless valid_params.include?(k.to_s)
280
+ if params[k] and !last_params.include?(k) then # check if value exists and if is not a last_params (required at the end of BWA functions)
281
+ args << "-#{k}"
282
+ args << params[k] unless params[k] == true # skipping boolean values. just include the param name
283
+ end
284
+ end
285
+ last_params.each {|p| args << params[p]} # now adding the last_params so the parameter list is in the correct order for BWA functions
286
+ return args
287
+ end
288
+
289
+ # Internal method to check if mandatory params have been set
290
+ # @note this method should not be called directly
291
+ def self.check_mandatory(mandatory_params, params)
292
+ mandatory_params.each {|mp| raise ArgumentError,"You must provide parameter '#{mp}'" unless params.include?(mp)}
293
+ end
294
+
295
+ # Internal method used to change parameters name from Ruby to BWA functions
296
+ # @note this method should not be called directly
297
+ def self.change_arg_name(hash,key,new_key)
298
+ hash[new_key] = hash[key]
299
+ hash.delete(key)
300
+ return hash
301
+ end
302
+
303
+ private_class_method :call_BWA_function
304
+ private_class_method :build_args_for_BWA
305
+ private_class_method :build_parameters
306
+ private_class_method :check_mandatory
307
+
308
+ end
309
+ end
310
+
311
+
312
+