bio-bwa 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. data/.document +5 -0
  2. data/Gemfile +15 -0
  3. data/Gemfile.lock +28 -0
  4. data/LICENSE.txt +35 -0
  5. data/README.rdoc +33 -0
  6. data/Rakefile +56 -0
  7. data/VERSION +1 -0
  8. data/bio-bwa.gemspec +152 -0
  9. data/doc/Bio.html +93 -0
  10. data/doc/Bio/BWA.html +2884 -0
  11. data/doc/Bio/BWA/Library.html +229 -0
  12. data/doc/_index.html +119 -0
  13. data/doc/class_list.html +36 -0
  14. data/doc/css/common.css +1 -0
  15. data/doc/css/full_list.css +53 -0
  16. data/doc/css/style.css +310 -0
  17. data/doc/file.LICENSE.html +88 -0
  18. data/doc/file.README.html +119 -0
  19. data/doc/file_list.html +41 -0
  20. data/doc/frames.html +13 -0
  21. data/doc/index.html +119 -0
  22. data/doc/js/app.js +203 -0
  23. data/doc/js/full_list.js +149 -0
  24. data/doc/js/jquery.js +154 -0
  25. data/doc/method_list.html +171 -0
  26. data/doc/top-level-namespace.html +88 -0
  27. data/ext/COPYING +674 -0
  28. data/ext/ChangeLog +3864 -0
  29. data/ext/NEWS +555 -0
  30. data/ext/README +29 -0
  31. data/ext/bamlite.c +155 -0
  32. data/ext/bamlite.h +94 -0
  33. data/ext/bntseq.c +303 -0
  34. data/ext/bntseq.h +80 -0
  35. data/ext/bwa.1 +562 -0
  36. data/ext/bwape.c +807 -0
  37. data/ext/bwase.c +686 -0
  38. data/ext/bwase.h +27 -0
  39. data/ext/bwaseqio.c +222 -0
  40. data/ext/bwt.c +250 -0
  41. data/ext/bwt.h +105 -0
  42. data/ext/bwt_gen/Makefile +23 -0
  43. data/ext/bwt_gen/QSufSort.c +496 -0
  44. data/ext/bwt_gen/QSufSort.h +40 -0
  45. data/ext/bwt_gen/bwt_gen.c +1547 -0
  46. data/ext/bwt_gen/bwt_gen.h +105 -0
  47. data/ext/bwt_lite.c +94 -0
  48. data/ext/bwt_lite.h +29 -0
  49. data/ext/bwtaln.c +345 -0
  50. data/ext/bwtaln.h +150 -0
  51. data/ext/bwtgap.c +264 -0
  52. data/ext/bwtgap.h +38 -0
  53. data/ext/bwtindex.c +186 -0
  54. data/ext/bwtio.c +77 -0
  55. data/ext/bwtmisc.c +269 -0
  56. data/ext/bwtsw2.h +51 -0
  57. data/ext/bwtsw2_aux.c +650 -0
  58. data/ext/bwtsw2_chain.c +107 -0
  59. data/ext/bwtsw2_core.c +594 -0
  60. data/ext/bwtsw2_main.c +100 -0
  61. data/ext/cs2nt.c +191 -0
  62. data/ext/is.c +218 -0
  63. data/ext/khash.h +506 -0
  64. data/ext/kseq.h +208 -0
  65. data/ext/ksort.h +269 -0
  66. data/ext/kstring.c +35 -0
  67. data/ext/kstring.h +46 -0
  68. data/ext/kvec.h +90 -0
  69. data/ext/main.c +63 -0
  70. data/ext/main.h +29 -0
  71. data/ext/mkrf_conf.rb +49 -0
  72. data/ext/qualfa2fq.pl +27 -0
  73. data/ext/simple_dp.c +162 -0
  74. data/ext/simpletest.c +23 -0
  75. data/ext/solid2fastq.pl +111 -0
  76. data/ext/stdaln.c +1072 -0
  77. data/ext/stdaln.h +162 -0
  78. data/ext/utils.c +82 -0
  79. data/ext/utils.h +54 -0
  80. data/lib/bio-bwa.rb +7 -0
  81. data/lib/bio/bwa.rb +312 -0
  82. data/lib/bio/bwa/library.rb +42 -0
  83. data/test/data/testdata.fa +602 -0
  84. data/test/data/testdata.long.fa +175 -0
  85. data/test/data/testdata.short.fa +2 -0
  86. data/test/helper.rb +18 -0
  87. data/test/test_bio-bwa_basic.rb +62 -0
  88. data/test/test_bio-bwa_make_index.rb +42 -0
  89. data/test/test_bio-bwa_run_aln.rb +49 -0
  90. data/test/test_bio-bwa_sam_conversion.rb +49 -0
  91. metadata +218 -0
data/ext/stdaln.h ADDED
@@ -0,0 +1,162 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2003-2006, 2008, by Heng Li <lh3lh3@gmail.com>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /*
27
+ 2009-07-23, 0.10.0
28
+
29
+ - Use 32-bit to store CIGAR
30
+
31
+ - Report suboptimal aligments
32
+
33
+ - Implemented half-fixed-half-open DP
34
+
35
+ 2009-04-26, 0.9.10
36
+
37
+ - Allow to set a threshold for local alignment
38
+
39
+ 2009-02-18, 0.9.9
40
+
41
+ - Fixed a bug when no residue matches
42
+
43
+ 2008-08-04, 0.9.8
44
+
45
+ - Fixed the wrong declaration of aln_stdaln_aux()
46
+
47
+ - Avoid 0 coordinate for global alignment
48
+
49
+ 2008-08-01, 0.9.7
50
+
51
+ - Change gap_end penalty to 5 in aln_param_bwa
52
+
53
+ - Add function to convert path_t to the CIGAR format
54
+
55
+ 2008-08-01, 0.9.6
56
+
57
+ - The first gap now costs (gap_open+gap_ext), instead of
58
+ gap_open. Scoring systems are modified accordingly.
59
+
60
+ - Gap end is now correctly handled. Previously it is not correct.
61
+
62
+ - Change license to MIT.
63
+
64
+ */
65
+
66
+ #ifndef LH3_STDALN_H_
67
+ #define LH3_STDALN_H_
68
+
69
+
70
+ #define STDALN_VERSION 0.11.0
71
+
72
+ #include <stdint.h>
73
+
74
+ #define FROM_M 0
75
+ #define FROM_I 1
76
+ #define FROM_D 2
77
+ #define FROM_S 3
78
+
79
+ #define ALN_TYPE_LOCAL 0
80
+ #define ALN_TYPE_GLOBAL 1
81
+ #define ALN_TYPE_EXTEND 2
82
+
83
+ /* This is the smallest integer. It might be CPU-dependent in very RARE cases. */
84
+ #define MINOR_INF -1073741823
85
+
86
+ typedef struct
87
+ {
88
+ int gap_open;
89
+ int gap_ext;
90
+ int gap_end;
91
+
92
+ int *matrix;
93
+ int row;
94
+ int band_width;
95
+ } AlnParam;
96
+
97
+ typedef struct
98
+ {
99
+ int i, j;
100
+ unsigned char ctype;
101
+ } path_t;
102
+
103
+ typedef struct
104
+ {
105
+ path_t *path; /* for advanced users... :-) */
106
+ int path_len; /* for advanced users... :-) */
107
+ int start1, end1; /* start and end of the first sequence, coordinations are 1-based */
108
+ int start2, end2; /* start and end of the second sequence, coordinations are 1-based */
109
+ int score, subo; /* score */
110
+
111
+ char *out1, *out2; /* print them, and then you will know */
112
+ char *outm;
113
+
114
+ int n_cigar;
115
+ uint32_t *cigar32;
116
+ } AlnAln;
117
+
118
+ #ifdef __cplusplus
119
+ extern "C" {
120
+ #endif
121
+
122
+ AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
123
+ int type, int do_align, int len1, int len2);
124
+ AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int type, int do_align);
125
+ void aln_free_AlnAln(AlnAln *aa);
126
+
127
+ int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
128
+ path_t *path, int *path_len);
129
+ int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
130
+ path_t *path, int *path_len, int _thres, int *_subo);
131
+ int aln_extend_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
132
+ path_t *path, int *path_len, int G0, uint8_t *_mem);
133
+ uint16_t *aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
134
+ uint32_t *aln_path2cigar32(const path_t *path, int path_len, int *n_cigar);
135
+
136
+ #ifdef __cplusplus
137
+ }
138
+ #endif
139
+
140
+ /********************
141
+ * global variables *
142
+ ********************/
143
+
144
+ extern AlnParam aln_param_bwa; /* = { 37, 9, 0, aln_sm_maq, 5, 50 }; */
145
+ extern AlnParam aln_param_blast; /* = { 5, 2, 2, aln_sm_blast, 5, 50 }; */
146
+ extern AlnParam aln_param_nt2nt; /* = { 10, 2, 2, aln_sm_nt, 16, 75 }; */
147
+ extern AlnParam aln_param_aa2aa; /* = { 20, 19, 19, aln_sm_read, 16, 75 }; */
148
+ extern AlnParam aln_param_rd2rd; /* = { 12, 2, 2, aln_sm_blosum62, 22, 50 }; */
149
+
150
+ /* common nucleotide score matrix for 16 bases */
151
+ extern int aln_sm_nt[], aln_sm_bwa[];
152
+
153
+ /* BLOSUM62 and BLOSUM45 */
154
+ extern int aln_sm_blosum62[], aln_sm_blosum45[];
155
+
156
+ /* common read for 16 bases. note that read alignment is quite different from common nucleotide alignment */
157
+ extern int aln_sm_read[];
158
+
159
+ /* human-mouse score matrix for 4 bases */
160
+ extern int aln_sm_hs[];
161
+
162
+ #endif
data/ext/utils.c ADDED
@@ -0,0 +1,82 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008 Genome Research Ltd (GRL).
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
+
28
+ #include <stdio.h>
29
+ #include <stdarg.h>
30
+ #include <stdlib.h>
31
+ #include <string.h>
32
+ #include <zlib.h>
33
+ #include "utils.h"
34
+
35
+ FILE *err_xopen_core(const char *func, const char *fn, const char *mode)
36
+ {
37
+ FILE *fp = 0;
38
+ if (strcmp(fn, "-") == 0)
39
+ return (strstr(mode, "r"))? stdin : stdout;
40
+ if ((fp = fopen(fn, mode)) == 0) {
41
+ fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
42
+ abort();
43
+ }
44
+ return fp;
45
+ }
46
+ FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp)
47
+ {
48
+ if (freopen(fn, mode, fp) == 0) {
49
+ fprintf(stderr, "[%s] fail to open file '%s': ", func, fn);
50
+ perror(NULL);
51
+ fprintf(stderr, "Abort!\n");
52
+ abort();
53
+ }
54
+ return fp;
55
+ }
56
+ gzFile err_xzopen_core(const char *func, const char *fn, const char *mode)
57
+ {
58
+ gzFile fp;
59
+ if (strcmp(fn, "-") == 0)
60
+ return gzdopen(fileno((strstr(mode, "r"))? stdin : stdout), mode);
61
+ if ((fp = gzopen(fn, mode)) == 0) {
62
+ fprintf(stderr, "[%s] fail to open file '%s'. Abort!\n", func, fn);
63
+ abort();
64
+ }
65
+ return fp;
66
+ }
67
+ void err_fatal(const char *header, const char *fmt, ...)
68
+ {
69
+ va_list args;
70
+ va_start(args, fmt);
71
+ fprintf(stderr, "[%s] ", header);
72
+ vfprintf(stderr, fmt, args);
73
+ fprintf(stderr, " Abort!\n");
74
+ va_end(args);
75
+ abort();
76
+ }
77
+
78
+ void err_fatal_simple_core(const char *func, const char *msg)
79
+ {
80
+ fprintf(stderr, "[%s] %s Abort!\n", func, msg);
81
+ abort();
82
+ }
data/ext/utils.h ADDED
@@ -0,0 +1,54 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008 Genome Research Ltd (GRL).
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
+
28
+ #ifndef LH3_UTILS_H
29
+ #define LH3_UTILS_H
30
+
31
+ #include <stdio.h>
32
+ #include <zlib.h>
33
+
34
+ #define err_fatal_simple(msg) err_fatal_simple_core(__func__, msg)
35
+ #define xopen(fn, mode) err_xopen_core(__func__, fn, mode)
36
+ #define xreopen(fn, mode, fp) err_xreopen_core(__func__, fn, mode, fp)
37
+ #define xzopen(fn, mode) err_xzopen_core(__func__, fn, mode)
38
+ #define xassert(cond, msg) if ((cond) == 0) err_fatal_simple_core(__func__, msg)
39
+
40
+ #ifdef __cplusplus
41
+ extern "C" {
42
+ #endif
43
+
44
+ void err_fatal(const char *header, const char *fmt, ...);
45
+ void err_fatal_simple_core(const char *func, const char *msg);
46
+ FILE *err_xopen_core(const char *func, const char *fn, const char *mode);
47
+ FILE *err_xreopen_core(const char *func, const char *fn, const char *mode, FILE *fp);
48
+ gzFile err_xzopen_core(const char *func, const char *fn, const char *mode);
49
+
50
+ #ifdef __cplusplus
51
+ }
52
+ #endif
53
+
54
+ #endif
data/lib/bio-bwa.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'ffi'
3
+ require 'bio/bwa/library'
4
+ require 'bio/bwa'
5
+
6
+
7
+
data/lib/bio/bwa.rb ADDED
@@ -0,0 +1,312 @@
1
+ module Bio
2
+ # @author Francesco Strozzi https://github.com/fstrozzi
3
+ class BWA
4
+ extend FFI::Library
5
+ ffi_lib Bio::BWA::Library.load
6
+
7
+ # Convert a Fasta to Packed format
8
+ # @param [Hash]. params Options.
9
+ # @option params [String] :file_in the Fasta or FastQ file (REQUIRED)
10
+ # @option params [String] :prefix the prefix name for the PAC file
11
+ def self.fa2pac(params={})
12
+ valid_params = %q(file_in prefix)
13
+ last_params = [:file_in, :prefix]
14
+ mandatory_params = [:file_in]
15
+ check_mandatory(mandatory_params, params)
16
+ args = build_parameters("fa2pac",valid_params,params,last_params)
17
+ call_BWA_function(args)
18
+ end
19
+
20
+ # Convert a Packed file format to Burrows-Wheeler Transform format
21
+ # @param [Hash]. params Options.
22
+ # @option params [String] :file_in the PAC file (REQUIRED)
23
+ # @option params [String] :file_out the name of the BWT file (REQUIRED)
24
+ def self.pac2bwt(params={})
25
+ valid_params = %q(file_in file_out)
26
+ last_params = [:file_in,:file_out]
27
+ check_mandatory(last_params, params)
28
+ args = build_parameters("pac2bwt",valid_params,params,last_params)
29
+ call_BWA_function(args)
30
+ end
31
+
32
+ # Convert a BWT file to the new BWT format
33
+ # @param [Hash]. params Options.
34
+ # @option params [String] :file_in the BWT file (REQUIRED)
35
+ # @note this method overwrite existing BWT file
36
+ def self.bwtupdate(params={})
37
+ valid_params = %w(file_in)
38
+ last_params = [:file_in]
39
+ check_mandatory(last_params, params)
40
+ args = build_parameters("bwtupdate",valid_params,params,last_params)
41
+ call_BWA_function(args)
42
+ end
43
+
44
+ # Generate reverse Packed format
45
+ # @param [Hash]. params Options.
46
+ # @option params [String] :file_in the PAC file (REQUIRED)
47
+ # @option params [String] :file_out the name of the REV PAC (REQUIRED)
48
+ def self.pac_rev(params={})
49
+ valid_params = %w(file_in file_out)
50
+ last_params = [:file_in,:file_out]
51
+ check_mandatory(last_params, params)
52
+ args = build_parameters("pac_rev",valid_params,params,last_params)
53
+ call_BWA_function(args)
54
+ end
55
+
56
+ # Generate SA file from BWT and Occ files
57
+ # @param [Hash]. params Options.
58
+ # @option params [String] :file_in the PAC file (REQUIRED)
59
+ # @option params [String] :file_out the name of the REV PAC (REQUIRED)
60
+ def self.bwt2sa(params={})
61
+ valid_params = %q(file_in file_out i)
62
+ last_params = [:file_in,:file_out]
63
+ check_mandatory(last_params, params)
64
+ args = build_parameters("bwt2sa",valid_params,params,last_params)
65
+ call_BWA_function(args)
66
+ end
67
+
68
+ # Generate the BWT index for a Fasta database
69
+ # @param [Hash]. params Options.
70
+ # @option params [String] :file_in the Fasta file (REQUIRED)
71
+ # @option params [String] :p the prefix for the database files that will be generated [default is Fasta name]
72
+ # @option params [String] :a the algorithm to be used for indexing: 'is' (short database)[default] or 'bwtsw' (long database)
73
+ # @option params [Boolean] :c colorspace database index
74
+ # @note Boolean values must be set to 'true'
75
+ def self.make_index(params = {})
76
+ valid_params = %w(file_in p a c)
77
+ mandatory_params = [:file_in]
78
+ last_params = [:file_in]
79
+ check_mandatory(mandatory_params, params)
80
+ params = change_arg_name(params,:prefix,:p) if params[:prefix]
81
+ args = build_parameters("index",valid_params,params,last_params)
82
+ call_BWA_function(args)
83
+ end
84
+
85
+ # Run the alignment for short query sequences
86
+ # @param [Hash] params Options
87
+ # @option params [String] :file_in the FastQ file (REQUIRED)
88
+ # @option params [String] :prefix the prefix of the database index files (REQUIRED)
89
+ # @option params [String] :file_out the output of the alignment in SAI format (REQUIRED)
90
+ # @option params [Integer] :n max #diff (int) or missing prob under 0.02 err rate (float) [0.04]
91
+ # @option params [Integer] :o maximum number or fraction of gap opens [1]
92
+ # @option params [Integer] :e maximum number of gap extensions, -1 for disabling long gaps [-1]
93
+ # @option params [Integer] :m maximum entries in the queue [2000000]
94
+ # @option params [Integer] :t number of threads [1]
95
+ # @option params [Integer] :M mismatch penalty [3]
96
+ # @option params [Integer] :O gap open penalty [11]
97
+ # @option params [Integer] :R stop searching when there are >INT equally best hits [30]
98
+ # @option params [Integer] :q quality threshold for read trimming down to 35bp [0]
99
+ # @option params [Integer] :B length of barcode
100
+ # @option params [Boolean] :c input sequences are in the color space
101
+ # @option params [Boolean] :L log-scaled gap penalty for long deletions
102
+ # @option params [Boolean] :N non-iterative mode: search for all n-difference hits (slow)
103
+ # @option params [Boolean] :I the input is in the Illumina 1.3+ FASTQ-like format
104
+ # @option params [Boolean] :b the input read file is in the BAM format
105
+ # @option params [Boolean] :single use single-end reads only (effective with -b)
106
+ # @option params [Boolean] :first use the 1st read in a pair (effective with -b)
107
+ # @option params [Boolean] :second use the 2nd read in a pair (effective with -b)
108
+ # @option params [Integer] :i do not put an indel within INT bp towards the ends [5]
109
+ # @option params [Integer] :d maximum occurrences for extending a long deletion [10]
110
+ # @option params [Integer] :l seed length [32]
111
+ # @option params [Integer] :k maximum differences in the seed [2]
112
+ # @option params [Integer] :E gap extension penalty [4]
113
+ # @note Boolean values must be set to 'true'
114
+ def self.short_read_alignment(params={})
115
+ args = ["aln"]
116
+ valid_params = %w(n o e i d l k c L R m t N M O E q f b single first second I B prefix file_in)
117
+ mandatory_params = [:prefix,:file_in,:file_out]
118
+ last_params = [:prefix,:file_in]
119
+ check_mandatory(mandatory_params, params)
120
+ params = change_arg_name(params,:file_out,:f) if params[:file_out]
121
+ params = change_arg_name(params,:single,"0") if params[:single]
122
+ params = change_arg_name(params,:first,"1") if params[:first]
123
+ params = change_arg_name(params,:second,"2") if params[:second]
124
+ args = build_parameters("aln",valid_params,params,last_params)
125
+ call_BWA_function(args)
126
+ end
127
+
128
+ # Convert the SAI alignment output into SAM format (single end)
129
+ # @param [Hash] params Options
130
+ # @option params [String] :fastq the FastQ file (REQUIRED)
131
+ # @option params [String] :prefix the prefix of the database index files (REQUIRED)
132
+ # @option params [String] :sai the alignment file in SAI format (REQUIRED)
133
+ # @option params [String] :file_out the file name of the SAM output
134
+ # @option params [Integer] :n max_occ
135
+ # @option params [String] :r RG_line
136
+ def self.sai_to_sam_single(params = {})
137
+ valid_params = %w(n r fastq sai prefix f)
138
+ mandatory_params = [:prefix,:sai,:fastq]
139
+ last_params = [:prefix,:sai,:fastq]
140
+ check_mandatory(mandatory_params, params)
141
+ params = change_arg_name(params,:file_out,:f) if params[:file_out]
142
+ args = build_parameters("sai2sam_se",valid_params,params,last_params)
143
+ call_BWA_function(args)
144
+ end
145
+
146
+
147
+ # Convert the SAI alignment output into SAM format (paired ends)
148
+ # @param [Hash] params Options
149
+ # @option params [String] :prefix the prefix of the database index files (REQUIRED)
150
+ # @option params [Array] :sai the two alignment files in SAI format (REQUIRED)
151
+ # @option params [Array] :fastq the two fastq files (REQUIRED)
152
+ # @option params [Integer] :a maximum insert size [500]
153
+ # @option params [Integer] :o maximum occurrences for one end [100000]
154
+ # @option params [Integer] :n maximum hits to output for paired reads [3]
155
+ # @option params [Integer] :N maximum hits to output for discordant pairs [10]
156
+ # @option params [Float] :c prior of chimeric rate (lower bound) [1.0e-05]
157
+ # @option params [String] :r read group header line such as '@RG\tID:foo\tSM:bar'
158
+ # @option params [Boolean] :P preload index into memory (for base-space reads only)
159
+ # @option params [Boolean] :s disable Smith-Waterman for the unmapped mate
160
+ # @option params [Boolean] :A disable insert size estimate (force :s)
161
+ # @note Boolean values must be set to 'true'
162
+ def self.sai_to_sam_paired(params = {})
163
+ valid_params = %w(a o s P n N c f A r prefix first_sai second_sai first_fastq second_fastq)
164
+ mandatory_params = [:prefix, :sai, :fastq]
165
+ last_params = [:prefix, :first_sai, :second_sai, :first_fastq, :second_fastq]
166
+ check_mandatory(mandatory_params, params)
167
+ params = change_arg_name(params,:file_out,:f) if params[:file_out]
168
+ if params[:sai]
169
+ raise ArgumentError,"you must provide an array with two SAI files!" unless params[:sai].is_a?(Array) and params[:sai].size == 2
170
+ params[:first_sai] = params[:sai][0]
171
+ params[:second_sai] = params[:sai][1]
172
+ params.delete(:sai)
173
+ end
174
+ if params[:fastq]
175
+ raise ArgumentError,"you must provide an array with two FastQ files!" unless params[:fastq].is_a?(Array) and params[:fastq].size == 2
176
+ params[:first_fastq] = params[:fastq][0]
177
+ params[:second_fastq] = params[:fastq][1]
178
+ params.delete(:fastq)
179
+ end
180
+ args = build_parameters("sai2sam_pe",valid_params,params,last_params)
181
+ call_BWA_function(args)
182
+ end
183
+
184
+ # Run the alignment for long query sequences
185
+ # @param [Hash] params Options
186
+ # @option params [String] :file_in the FastQ file (REQUIRED)
187
+ # @option params [String] :prefix the prefix of the database index files (REQUIRED)
188
+ # @option params [String] :file_out the output of the alignment in SAM format (REQUIRED)
189
+ # @option params [Integer] :a score for a match [1]
190
+ # @option params [Integer] :b mismatch penalty [3]
191
+ # @option params [Integer] :q gap open penalty [5]
192
+ # @option params [Integer] :r gap extension penalty [2]
193
+ # @option params [Integer] :t number of threads [1]
194
+ # @option params [Integer] :w band width [50]
195
+ # @option params [Float] :m mask level [0.50]
196
+ # @option params [Integer] :T score threshold divided by a [30]
197
+ # @option params [Integer] :s maximum seeding interval size [3]
198
+ # @option params [Integer] :z Z-best [1]
199
+ # @option params [Integer] :N number of seeds to trigger reverse alignment [5]
200
+ # @option params [Float] :c coefficient of length-threshold adjustment [5.5]
201
+ # @option params [Boolean] :H in SAM output, use hard clipping rather than soft
202
+ # @note Boolean arguments must be set to 'true'
203
+ def self.long_read_alignment(params = {})
204
+ valid_params = %w(q r a b t T w d z m y s c N H f prefix file_in)
205
+ mandatory_params = [:prefix, :file_in, :file_out]
206
+ last_params = [:prefix,:file_in]
207
+ check_mandatory(mandatory_params, params)
208
+ params = change_arg_name(params,:file_out,:f) if params[:file_out]
209
+ args = build_parameters("bwtsw2",valid_params,params,last_params)
210
+ call_BWA_function(args)
211
+ end
212
+
213
+ # Run the alignment between multiple short sequences and ONE long sequence
214
+ # @param [Hash] params Options
215
+ # @option params [String] :short_seq the short query sequence (REQUIRED)
216
+ # @option params [String] :long_seq the long database sequence (REQUIRED)
217
+ # @option params [String] :file_out the alignment output
218
+ # @option params [Integer] :T minimum score [1]
219
+ # @option params [Boolean] :p protein alignment (suppressing :r)
220
+ # @option params [Boolean] :f forward strand only
221
+ # @option params [Boolean] :r reverse strand only
222
+ # @option params [Boolean] :g global alignment
223
+ # @note Boolean values must be set to 'true'
224
+ def self.simple_SW(params = {})
225
+ args = ["stdsw"]
226
+ valid_params = %w(g T f r p file_out long_seq short_seq)
227
+ mandatory_params = [:long_seq,:short_seq]
228
+ last_params = mandatory_params
229
+ check_mandatory(mandatory_params, params)
230
+ file_out = params[:file_out]
231
+ params.delete(:file_out)
232
+ args = build_parameters("stdsw",valid_params,params,last_params)
233
+ $stdout.reopen(file_out,"w") if file_out
234
+ call_BWA_function(args)
235
+ $stdout.reopen("/dev/tty","w") if file_out
236
+ end
237
+
238
+
239
+
240
+ ######## Methods to handle C functions and arguments ########
241
+
242
+ attach_function :bwa_fa2pac, [:int,:pointer], :int
243
+ attach_function :bwa_pac2bwt, [:int,:pointer], :int
244
+ attach_function :bwa_bwtupdate, [:int,:pointer], :int
245
+ attach_function :bwa_pac_rev, [:int,:pointer], :int
246
+ attach_function :bwa_bwt2sa, [:int,:pointer], :int
247
+ attach_function :bwa_index, [:int,:pointer], :int
248
+ attach_function :bwa_aln, [:int,:pointer], :int
249
+ attach_function :bwa_sai2sam_se, [:int, :pointer], :int
250
+ attach_function :bwa_sai2sam_pe, [:int,:pointer], :int
251
+ attach_function :bwa_bwtsw2, [:int, :pointer], :int
252
+ attach_function :bwa_stdsw, [:int, :pointer], :int
253
+
254
+ # Internal method to call the BWA C functions
255
+ # @note this method should not be called directly
256
+ def self.call_BWA_function(args)
257
+ c_args = build_args_for_BWA(args)
258
+ self.send("bwa_#{args[0]}".to_sym,args.size,c_args) # call the C function and pass the arguments size and parameters list (same as int argc, char *argv[])
259
+ end
260
+
261
+ # Internal method to build argument list for BWA C functions
262
+ # @note this method should not be called directly
263
+ def self.build_args_for_BWA(args)
264
+ cmd_args = args.map do |arg|
265
+ FFI::MemoryPointer.from_string(arg.to_s) # convert every parameters into a string and then into a memory pointer
266
+ end
267
+ exec_args = FFI::MemoryPointer.new(:pointer, cmd_args.length) # creating a pointer to an array of pointers
268
+ cmd_args.each_with_index do |arg, i|
269
+ exec_args[i].put_pointer(0, arg) # filling in the array of pointers
270
+ end
271
+ return exec_args
272
+ end
273
+
274
+ # Internal method to produce a correct parameter list for BWA functions
275
+ # @note this method should not be called directly
276
+ def self.build_parameters(function_name,valid_params,params,last_params)
277
+ args = [function_name]
278
+ params.each_key do |k|
279
+ raise ArgumentError, "Unknown parameter '#{k}'" unless valid_params.include?(k.to_s)
280
+ if params[k] and !last_params.include?(k) then # check if value exists and if is not a last_params (required at the end of BWA functions)
281
+ args << "-#{k}"
282
+ args << params[k] unless params[k] == true # skipping boolean values. just include the param name
283
+ end
284
+ end
285
+ last_params.each {|p| args << params[p]} # now adding the last_params so the parameter list is in the correct order for BWA functions
286
+ return args
287
+ end
288
+
289
+ # Internal method to check if mandatory params have been set
290
+ # @note this method should not be called directly
291
+ def self.check_mandatory(mandatory_params, params)
292
+ mandatory_params.each {|mp| raise ArgumentError,"You must provide parameter '#{mp}'" unless params.include?(mp)}
293
+ end
294
+
295
+ # Internal method used to change parameters name from Ruby to BWA functions
296
+ # @note this method should not be called directly
297
+ def self.change_arg_name(hash,key,new_key)
298
+ hash[new_key] = hash[key]
299
+ hash.delete(key)
300
+ return hash
301
+ end
302
+
303
+ private_class_method :call_BWA_function
304
+ private_class_method :build_args_for_BWA
305
+ private_class_method :build_parameters
306
+ private_class_method :check_mandatory
307
+
308
+ end
309
+ end
310
+
311
+
312
+