bio-bwa 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
 - data/Gemfile +15 -0
 - data/Gemfile.lock +28 -0
 - data/LICENSE.txt +35 -0
 - data/README.rdoc +33 -0
 - data/Rakefile +56 -0
 - data/VERSION +1 -0
 - data/bio-bwa.gemspec +152 -0
 - data/doc/Bio.html +93 -0
 - data/doc/Bio/BWA.html +2884 -0
 - data/doc/Bio/BWA/Library.html +229 -0
 - data/doc/_index.html +119 -0
 - data/doc/class_list.html +36 -0
 - data/doc/css/common.css +1 -0
 - data/doc/css/full_list.css +53 -0
 - data/doc/css/style.css +310 -0
 - data/doc/file.LICENSE.html +88 -0
 - data/doc/file.README.html +119 -0
 - data/doc/file_list.html +41 -0
 - data/doc/frames.html +13 -0
 - data/doc/index.html +119 -0
 - data/doc/js/app.js +203 -0
 - data/doc/js/full_list.js +149 -0
 - data/doc/js/jquery.js +154 -0
 - data/doc/method_list.html +171 -0
 - data/doc/top-level-namespace.html +88 -0
 - data/ext/COPYING +674 -0
 - data/ext/ChangeLog +3864 -0
 - data/ext/NEWS +555 -0
 - data/ext/README +29 -0
 - data/ext/bamlite.c +155 -0
 - data/ext/bamlite.h +94 -0
 - data/ext/bntseq.c +303 -0
 - data/ext/bntseq.h +80 -0
 - data/ext/bwa.1 +562 -0
 - data/ext/bwape.c +807 -0
 - data/ext/bwase.c +686 -0
 - data/ext/bwase.h +27 -0
 - data/ext/bwaseqio.c +222 -0
 - data/ext/bwt.c +250 -0
 - data/ext/bwt.h +105 -0
 - data/ext/bwt_gen/Makefile +23 -0
 - data/ext/bwt_gen/QSufSort.c +496 -0
 - data/ext/bwt_gen/QSufSort.h +40 -0
 - data/ext/bwt_gen/bwt_gen.c +1547 -0
 - data/ext/bwt_gen/bwt_gen.h +105 -0
 - data/ext/bwt_lite.c +94 -0
 - data/ext/bwt_lite.h +29 -0
 - data/ext/bwtaln.c +345 -0
 - data/ext/bwtaln.h +150 -0
 - data/ext/bwtgap.c +264 -0
 - data/ext/bwtgap.h +38 -0
 - data/ext/bwtindex.c +186 -0
 - data/ext/bwtio.c +77 -0
 - data/ext/bwtmisc.c +269 -0
 - data/ext/bwtsw2.h +51 -0
 - data/ext/bwtsw2_aux.c +650 -0
 - data/ext/bwtsw2_chain.c +107 -0
 - data/ext/bwtsw2_core.c +594 -0
 - data/ext/bwtsw2_main.c +100 -0
 - data/ext/cs2nt.c +191 -0
 - data/ext/is.c +218 -0
 - data/ext/khash.h +506 -0
 - data/ext/kseq.h +208 -0
 - data/ext/ksort.h +269 -0
 - data/ext/kstring.c +35 -0
 - data/ext/kstring.h +46 -0
 - data/ext/kvec.h +90 -0
 - data/ext/main.c +63 -0
 - data/ext/main.h +29 -0
 - data/ext/mkrf_conf.rb +49 -0
 - data/ext/qualfa2fq.pl +27 -0
 - data/ext/simple_dp.c +162 -0
 - data/ext/simpletest.c +23 -0
 - data/ext/solid2fastq.pl +111 -0
 - data/ext/stdaln.c +1072 -0
 - data/ext/stdaln.h +162 -0
 - data/ext/utils.c +82 -0
 - data/ext/utils.h +54 -0
 - data/lib/bio-bwa.rb +7 -0
 - data/lib/bio/bwa.rb +312 -0
 - data/lib/bio/bwa/library.rb +42 -0
 - data/test/data/testdata.fa +602 -0
 - data/test/data/testdata.long.fa +175 -0
 - data/test/data/testdata.short.fa +2 -0
 - data/test/helper.rb +18 -0
 - data/test/test_bio-bwa_basic.rb +62 -0
 - data/test/test_bio-bwa_make_index.rb +42 -0
 - data/test/test_bio-bwa_run_aln.rb +49 -0
 - data/test/test_bio-bwa_sam_conversion.rb +49 -0
 - metadata +218 -0
 
    
        data/ext/kstring.c
    ADDED
    
    | 
         @@ -0,0 +1,35 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #include <stdarg.h>
         
     | 
| 
      
 2 
     | 
    
         
            +
            #include <stdio.h>
         
     | 
| 
      
 3 
     | 
    
         
            +
            #include "kstring.h"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            int ksprintf(kstring_t *s, const char *fmt, ...)
         
     | 
| 
      
 6 
     | 
    
         
            +
            {
         
     | 
| 
      
 7 
     | 
    
         
            +
            	va_list ap;
         
     | 
| 
      
 8 
     | 
    
         
            +
            	int l;
         
     | 
| 
      
 9 
     | 
    
         
            +
            	va_start(ap, fmt);
         
     | 
| 
      
 10 
     | 
    
         
            +
            	l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
         
     | 
| 
      
 11 
     | 
    
         
            +
            	va_end(ap);
         
     | 
| 
      
 12 
     | 
    
         
            +
            	if (l + 1 > s->m - s->l) {
         
     | 
| 
      
 13 
     | 
    
         
            +
            		s->m = s->l + l + 2;
         
     | 
| 
      
 14 
     | 
    
         
            +
            		kroundup32(s->m);
         
     | 
| 
      
 15 
     | 
    
         
            +
            		s->s = (char*)realloc(s->s, s->m);
         
     | 
| 
      
 16 
     | 
    
         
            +
            		va_start(ap, fmt);
         
     | 
| 
      
 17 
     | 
    
         
            +
            		l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
         
     | 
| 
      
 18 
     | 
    
         
            +
            	}
         
     | 
| 
      
 19 
     | 
    
         
            +
            	va_end(ap);
         
     | 
| 
      
 20 
     | 
    
         
            +
            	s->l += l;
         
     | 
| 
      
 21 
     | 
    
         
            +
            	return l;
         
     | 
| 
      
 22 
     | 
    
         
            +
            }
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            #ifdef KSTRING_MAIN
         
     | 
| 
      
 25 
     | 
    
         
            +
            #include <stdio.h>
         
     | 
| 
      
 26 
     | 
    
         
            +
            int main()
         
     | 
| 
      
 27 
     | 
    
         
            +
            {
         
     | 
| 
      
 28 
     | 
    
         
            +
            	kstring_t *s;
         
     | 
| 
      
 29 
     | 
    
         
            +
            	s = (kstring_t*)calloc(1, sizeof(kstring_t));
         
     | 
| 
      
 30 
     | 
    
         
            +
            	ksprintf(s, "abcdefg: %d", 100);
         
     | 
| 
      
 31 
     | 
    
         
            +
            	printf("%s\n", s->s);
         
     | 
| 
      
 32 
     | 
    
         
            +
            	free(s);
         
     | 
| 
      
 33 
     | 
    
         
            +
            	return 0;
         
     | 
| 
      
 34 
     | 
    
         
            +
            }
         
     | 
| 
      
 35 
     | 
    
         
            +
            #endif
         
     | 
    
        data/ext/kstring.h
    ADDED
    
    | 
         @@ -0,0 +1,46 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #ifndef KSTRING_H
         
     | 
| 
      
 2 
     | 
    
         
            +
            #define KSTRING_H
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            #include <stdlib.h>
         
     | 
| 
      
 5 
     | 
    
         
            +
            #include <string.h>
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            #ifndef kroundup32
         
     | 
| 
      
 8 
     | 
    
         
            +
            #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
         
     | 
| 
      
 9 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            #ifndef KSTRING_T
         
     | 
| 
      
 12 
     | 
    
         
            +
            #define KSTRING_T kstring_t
         
     | 
| 
      
 13 
     | 
    
         
            +
            typedef struct __kstring_t {
         
     | 
| 
      
 14 
     | 
    
         
            +
            	size_t l, m;
         
     | 
| 
      
 15 
     | 
    
         
            +
            	char *s;
         
     | 
| 
      
 16 
     | 
    
         
            +
            } kstring_t;
         
     | 
| 
      
 17 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            static inline int kputs(const char *p, kstring_t *s)
         
     | 
| 
      
 20 
     | 
    
         
            +
            {
         
     | 
| 
      
 21 
     | 
    
         
            +
            	int l = strlen(p);
         
     | 
| 
      
 22 
     | 
    
         
            +
            	if (s->l + l + 1 >= s->m) {
         
     | 
| 
      
 23 
     | 
    
         
            +
            		s->m = s->l + l + 2;
         
     | 
| 
      
 24 
     | 
    
         
            +
            		kroundup32(s->m);
         
     | 
| 
      
 25 
     | 
    
         
            +
            		s->s = (char*)realloc(s->s, s->m);
         
     | 
| 
      
 26 
     | 
    
         
            +
            	}
         
     | 
| 
      
 27 
     | 
    
         
            +
            	strcpy(s->s + s->l, p);
         
     | 
| 
      
 28 
     | 
    
         
            +
            	s->l += l;
         
     | 
| 
      
 29 
     | 
    
         
            +
            	return l;
         
     | 
| 
      
 30 
     | 
    
         
            +
            }
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            static inline int kputc(int c, kstring_t *s)
         
     | 
| 
      
 33 
     | 
    
         
            +
            {
         
     | 
| 
      
 34 
     | 
    
         
            +
            	if (s->l + 1 >= s->m) {
         
     | 
| 
      
 35 
     | 
    
         
            +
            		s->m = s->l + 2;
         
     | 
| 
      
 36 
     | 
    
         
            +
            		kroundup32(s->m);
         
     | 
| 
      
 37 
     | 
    
         
            +
            		s->s = (char*)realloc(s->s, s->m);
         
     | 
| 
      
 38 
     | 
    
         
            +
            	}
         
     | 
| 
      
 39 
     | 
    
         
            +
            	s->s[s->l++] = c;
         
     | 
| 
      
 40 
     | 
    
         
            +
            	s->s[s->l] = 0;
         
     | 
| 
      
 41 
     | 
    
         
            +
            	return c;
         
     | 
| 
      
 42 
     | 
    
         
            +
            }
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
            int ksprintf(kstring_t *s, const char *fmt, ...);
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            #endif
         
     | 
    
        data/ext/kvec.h
    ADDED
    
    | 
         @@ -0,0 +1,90 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            /* The MIT License
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
               Copyright (c) 2008, by Attractive Chaos <attractivechaos@aol.co.uk>
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
               Permission is hereby granted, free of charge, to any person obtaining
         
     | 
| 
      
 6 
     | 
    
         
            +
               a copy of this software and associated documentation files (the
         
     | 
| 
      
 7 
     | 
    
         
            +
               "Software"), to deal in the Software without restriction, including
         
     | 
| 
      
 8 
     | 
    
         
            +
               without limitation the rights to use, copy, modify, merge, publish,
         
     | 
| 
      
 9 
     | 
    
         
            +
               distribute, sublicense, and/or sell copies of the Software, and to
         
     | 
| 
      
 10 
     | 
    
         
            +
               permit persons to whom the Software is furnished to do so, subject to
         
     | 
| 
      
 11 
     | 
    
         
            +
               the following conditions:
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
               The above copyright notice and this permission notice shall be
         
     | 
| 
      
 14 
     | 
    
         
            +
               included in all copies or substantial portions of the Software.
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
               THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
         
     | 
| 
      
 17 
     | 
    
         
            +
               EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
         
     | 
| 
      
 18 
     | 
    
         
            +
               MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
         
     | 
| 
      
 19 
     | 
    
         
            +
               NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
         
     | 
| 
      
 20 
     | 
    
         
            +
               BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
         
     | 
| 
      
 21 
     | 
    
         
            +
               ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
         
     | 
| 
      
 22 
     | 
    
         
            +
               CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         
     | 
| 
      
 23 
     | 
    
         
            +
               SOFTWARE.
         
     | 
| 
      
 24 
     | 
    
         
            +
            */
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            /*
         
     | 
| 
      
 27 
     | 
    
         
            +
              An example:
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            #include "kvec.h"
         
     | 
| 
      
 30 
     | 
    
         
            +
            int main() {
         
     | 
| 
      
 31 
     | 
    
         
            +
            	kvec_t(int) array;
         
     | 
| 
      
 32 
     | 
    
         
            +
            	kv_init(array);
         
     | 
| 
      
 33 
     | 
    
         
            +
            	kv_push(int, array, 10); // append
         
     | 
| 
      
 34 
     | 
    
         
            +
            	kv_a(int, array, 20) = 5; // dynamic
         
     | 
| 
      
 35 
     | 
    
         
            +
            	kv_A(array, 20) = 4; // static
         
     | 
| 
      
 36 
     | 
    
         
            +
            	kv_destroy(array);
         
     | 
| 
      
 37 
     | 
    
         
            +
            	return 0;
         
     | 
| 
      
 38 
     | 
    
         
            +
            }
         
     | 
| 
      
 39 
     | 
    
         
            +
            */
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
            /*
         
     | 
| 
      
 42 
     | 
    
         
            +
              2008-09-22 (0.1.0):
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
            	* The initial version.
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            */
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
            #ifndef AC_KVEC_H
         
     | 
| 
      
 49 
     | 
    
         
            +
            #define AC_KVEC_H
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
            #include <stdlib.h>
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            #define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
            #define kvec_t(type) struct { size_t n, m; type *a; }
         
     | 
| 
      
 56 
     | 
    
         
            +
            #define kv_init(v) ((v).n = (v).m = 0, (v).a = 0)
         
     | 
| 
      
 57 
     | 
    
         
            +
            #define kv_destroy(v) free((v).a)
         
     | 
| 
      
 58 
     | 
    
         
            +
            #define kv_A(v, i) ((v).a[(i)])
         
     | 
| 
      
 59 
     | 
    
         
            +
            #define kv_pop(v) ((v).a[--(v).n])
         
     | 
| 
      
 60 
     | 
    
         
            +
            #define kv_size(v) ((v).n)
         
     | 
| 
      
 61 
     | 
    
         
            +
            #define kv_max(v) ((v).m)
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
            #define kv_resize(type, v, s)  ((v).m = (s), (v).a = (type*)realloc((v).a, sizeof(type) * (v).m))
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
            #define kv_copy(type, v1, v0) do {							\
         
     | 
| 
      
 66 
     | 
    
         
            +
            		if ((v1).m < (v0).n) kv_resize(type, v1, (v0).n);	\
         
     | 
| 
      
 67 
     | 
    
         
            +
            		(v1).n = (v0).n;									\
         
     | 
| 
      
 68 
     | 
    
         
            +
            		memcpy((v1).a, (v0).a, sizeof(type) * (v0).n);		\
         
     | 
| 
      
 69 
     | 
    
         
            +
            	} while (0)												\
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
            #define kv_push(type, v, x) do {									\
         
     | 
| 
      
 72 
     | 
    
         
            +
            		if ((v).n == (v).m) {										\
         
     | 
| 
      
 73 
     | 
    
         
            +
            			(v).m = (v).m? (v).m<<1 : 2;							\
         
     | 
| 
      
 74 
     | 
    
         
            +
            			(v).a = (type*)realloc((v).a, sizeof(type) * (v).m);	\
         
     | 
| 
      
 75 
     | 
    
         
            +
            		}															\
         
     | 
| 
      
 76 
     | 
    
         
            +
            		(v).a[(v).n++] = (x);										\
         
     | 
| 
      
 77 
     | 
    
         
            +
            	} while (0)
         
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
      
 79 
     | 
    
         
            +
            #define kv_pushp(type, v) (((v).n == (v).m)?							\
         
     | 
| 
      
 80 
     | 
    
         
            +
            						   ((v).m = ((v).m? (v).m<<1 : 2),				\
         
     | 
| 
      
 81 
     | 
    
         
            +
            							(v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0)	\
         
     | 
| 
      
 82 
     | 
    
         
            +
            						   : 0), ((v).a + ((v).n++))
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
            #define kv_a(type, v, i) ((v).m <= (size_t)(i)?						\
         
     | 
| 
      
 85 
     | 
    
         
            +
            						  ((v).m = (v).n = (i) + 1, kv_roundup32((v).m), \
         
     | 
| 
      
 86 
     | 
    
         
            +
            						   (v).a = (type*)realloc((v).a, sizeof(type) * (v).m), 0) \
         
     | 
| 
      
 87 
     | 
    
         
            +
            						  : (v).n <= (size_t)(i)? (v).n = (i)			\
         
     | 
| 
      
 88 
     | 
    
         
            +
            						  : 0), (v).a[(i)]
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
            #endif
         
     | 
    
        data/ext/main.c
    ADDED
    
    | 
         @@ -0,0 +1,63 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #include <stdio.h>
         
     | 
| 
      
 2 
     | 
    
         
            +
            #include <string.h>
         
     | 
| 
      
 3 
     | 
    
         
            +
            #include "main.h"
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            #ifndef PACKAGE_VERSION
         
     | 
| 
      
 6 
     | 
    
         
            +
            #define PACKAGE_VERSION "0.5.9-r16"
         
     | 
| 
      
 7 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            static int usage()
         
     | 
| 
      
 10 
     | 
    
         
            +
            {
         
     | 
| 
      
 11 
     | 
    
         
            +
            	fprintf(stderr, "\n");
         
     | 
| 
      
 12 
     | 
    
         
            +
            	fprintf(stderr, "Program: bwa (alignment via Burrows-Wheeler transformation)\n");
         
     | 
| 
      
 13 
     | 
    
         
            +
            	fprintf(stderr, "Version: %s\n", PACKAGE_VERSION);
         
     | 
| 
      
 14 
     | 
    
         
            +
            	fprintf(stderr, "Contact: Heng Li <lh3@sanger.ac.uk>\n\n");
         
     | 
| 
      
 15 
     | 
    
         
            +
            	fprintf(stderr, "Usage:   bwa <command> [options]\n\n");
         
     | 
| 
      
 16 
     | 
    
         
            +
            	fprintf(stderr, "Command: index         index sequences in the FASTA format\n");
         
     | 
| 
      
 17 
     | 
    
         
            +
            	fprintf(stderr, "         aln           gapped/ungapped alignment\n");
         
     | 
| 
      
 18 
     | 
    
         
            +
            	fprintf(stderr, "         samse         generate alignment (single ended)\n");
         
     | 
| 
      
 19 
     | 
    
         
            +
            	fprintf(stderr, "         sampe         generate alignment (paired ended)\n");
         
     | 
| 
      
 20 
     | 
    
         
            +
            	fprintf(stderr, "         bwasw         BWA-SW for long queries\n");
         
     | 
| 
      
 21 
     | 
    
         
            +
            	fprintf(stderr, "\n");
         
     | 
| 
      
 22 
     | 
    
         
            +
            	fprintf(stderr, "         fa2pac        convert FASTA to PAC format\n");
         
     | 
| 
      
 23 
     | 
    
         
            +
            	fprintf(stderr, "         pac2bwt       generate BWT from PAC\n");
         
     | 
| 
      
 24 
     | 
    
         
            +
            	fprintf(stderr, "         pac2bwtgen    alternative algorithm for generating BWT\n");
         
     | 
| 
      
 25 
     | 
    
         
            +
            	fprintf(stderr, "         bwtupdate     update .bwt to the new format\n");
         
     | 
| 
      
 26 
     | 
    
         
            +
            	fprintf(stderr, "         pac_rev       generate reverse PAC\n");
         
     | 
| 
      
 27 
     | 
    
         
            +
            	fprintf(stderr, "         bwt2sa        generate SA from BWT and Occ\n");
         
     | 
| 
      
 28 
     | 
    
         
            +
            	fprintf(stderr, "         pac2cspac     convert PAC to color-space PAC\n");
         
     | 
| 
      
 29 
     | 
    
         
            +
            	fprintf(stderr, "         stdsw         standard SW/NW alignment\n");
         
     | 
| 
      
 30 
     | 
    
         
            +
            	fprintf(stderr, "\n");
         
     | 
| 
      
 31 
     | 
    
         
            +
            	return 1;
         
     | 
| 
      
 32 
     | 
    
         
            +
            }
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
            void bwa_print_sam_PG()
         
     | 
| 
      
 35 
     | 
    
         
            +
            {
         
     | 
| 
      
 36 
     | 
    
         
            +
            	printf("@PG\tID:bwa\tPN:bwa\tVN:%s\n", PACKAGE_VERSION);
         
     | 
| 
      
 37 
     | 
    
         
            +
            }
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            int main(int argc, char *argv[])
         
     | 
| 
      
 40 
     | 
    
         
            +
            {
         
     | 
| 
      
 41 
     | 
    
         
            +
            	if (argc < 2) return usage();
         
     | 
| 
      
 42 
     | 
    
         
            +
            	if (strcmp(argv[1], "fa2pac") == 0) return bwa_fa2pac(argc-1, argv+1);
         
     | 
| 
      
 43 
     | 
    
         
            +
            	else if (strcmp(argv[1], "pac2bwt") == 0) return bwa_pac2bwt(argc-1, argv+1);
         
     | 
| 
      
 44 
     | 
    
         
            +
            	else if (strcmp(argv[1], "pac2bwtgen") == 0) return bwt_bwtgen_main(argc-1, argv+1);
         
     | 
| 
      
 45 
     | 
    
         
            +
            	else if (strcmp(argv[1], "bwtupdate") == 0) return bwa_bwtupdate(argc-1, argv+1);
         
     | 
| 
      
 46 
     | 
    
         
            +
            	else if (strcmp(argv[1], "pac_rev") == 0) return bwa_pac_rev(argc-1, argv+1);
         
     | 
| 
      
 47 
     | 
    
         
            +
            	else if (strcmp(argv[1], "bwt2sa") == 0) return bwa_bwt2sa(argc-1, argv+1);
         
     | 
| 
      
 48 
     | 
    
         
            +
            	else if (strcmp(argv[1], "index") == 0) return bwa_index(argc-1, argv+1);
         
     | 
| 
      
 49 
     | 
    
         
            +
            	else if (strcmp(argv[1], "aln") == 0) return bwa_aln(argc-1, argv+1);
         
     | 
| 
      
 50 
     | 
    
         
            +
            	else if (strcmp(argv[1], "sw") == 0) return bwa_stdsw(argc-1, argv+1);
         
     | 
| 
      
 51 
     | 
    
         
            +
            	else if (strcmp(argv[1], "samse") == 0) return bwa_sai2sam_se(argc-1, argv+1);
         
     | 
| 
      
 52 
     | 
    
         
            +
            	else if (strcmp(argv[1], "sampe") == 0) return bwa_sai2sam_pe(argc-1, argv+1);
         
     | 
| 
      
 53 
     | 
    
         
            +
            	else if (strcmp(argv[1], "pac2cspac") == 0) return bwa_pac2cspac(argc-1, argv+1);
         
     | 
| 
      
 54 
     | 
    
         
            +
            	else if (strcmp(argv[1], "stdsw") == 0) return bwa_stdsw(argc-1, argv+1);
         
     | 
| 
      
 55 
     | 
    
         
            +
            	else if (strcmp(argv[1], "bwtsw2") == 0) return bwa_bwtsw2(argc-1, argv+1);
         
     | 
| 
      
 56 
     | 
    
         
            +
            	else if (strcmp(argv[1], "dbwtsw") == 0) return bwa_bwtsw2(argc-1, argv+1);
         
     | 
| 
      
 57 
     | 
    
         
            +
            	else if (strcmp(argv[1], "bwasw") == 0) return bwa_bwtsw2(argc-1, argv+1);
         
     | 
| 
      
 58 
     | 
    
         
            +
            	else {
         
     | 
| 
      
 59 
     | 
    
         
            +
            		fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
         
     | 
| 
      
 60 
     | 
    
         
            +
            		return 1;
         
     | 
| 
      
 61 
     | 
    
         
            +
            	}
         
     | 
| 
      
 62 
     | 
    
         
            +
            	return 0;
         
     | 
| 
      
 63 
     | 
    
         
            +
            }
         
     | 
    
        data/ext/main.h
    ADDED
    
    | 
         @@ -0,0 +1,29 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #ifndef BWA_MAIN_H
         
     | 
| 
      
 2 
     | 
    
         
            +
            #define BWA_MAIN_H
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            #ifdef __cplusplus
         
     | 
| 
      
 5 
     | 
    
         
            +
            extern "C" {
         
     | 
| 
      
 6 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            	int bwa_fa2pac(int argc, char *argv[]);
         
     | 
| 
      
 9 
     | 
    
         
            +
            	int bwa_pac_rev(int argc, char *argv[]);
         
     | 
| 
      
 10 
     | 
    
         
            +
            	int bwa_pac2cspac(int argc, char *argv[]);
         
     | 
| 
      
 11 
     | 
    
         
            +
            	int bwa_pac2bwt(int argc, char *argv[]);
         
     | 
| 
      
 12 
     | 
    
         
            +
            	int bwa_bwtupdate(int argc, char *argv[]);
         
     | 
| 
      
 13 
     | 
    
         
            +
            	int bwa_bwt2sa(int argc, char *argv[]);
         
     | 
| 
      
 14 
     | 
    
         
            +
            	int bwa_index(int argc, char *argv[]);
         
     | 
| 
      
 15 
     | 
    
         
            +
            	int bwa_aln(int argc, char *argv[]);
         
     | 
| 
      
 16 
     | 
    
         
            +
            	int bwt_bwtgen_main(int argc, char *argv[]);
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            	int bwa_sai2sam_se(int argc, char *argv[]);
         
     | 
| 
      
 19 
     | 
    
         
            +
            	int bwa_sai2sam_pe(int argc, char *argv[]);
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            	int bwa_stdsw(int argc, char *argv[]);
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            	int bwa_bwtsw2(int argc, char *argv[]);
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            #ifdef __cplusplus
         
     | 
| 
      
 26 
     | 
    
         
            +
            }
         
     | 
| 
      
 27 
     | 
    
         
            +
            #endif
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            #endif
         
     | 
    
        data/ext/mkrf_conf.rb
    ADDED
    
    | 
         @@ -0,0 +1,49 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # create Rakefile for shared library compilation
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require File.join("..",File.dirname(__FILE__),"lib/bio/bwa/library")
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            path = File.expand_path(File.dirname(__FILE__))
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            ext = Bio::BWA::Library.lib_extension
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            flags = ""
         
     | 
| 
      
 10 
     | 
    
         
            +
            case ext
         
     | 
| 
      
 11 
     | 
    
         
            +
              when "so" then flags = "-shared -Wl,-soname,libbwa.so"
         
     | 
| 
      
 12 
     | 
    
         
            +
              when "dylib" then flags = "-bundle -undefined dynamic_lookup -flat_namespace"
         
     | 
| 
      
 13 
     | 
    
         
            +
            end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            File.open(File.join(path,"Rakefile"),"w") do |rakefile|
         
     | 
| 
      
 17 
     | 
    
         
            +
            rakefile.write <<-RAKE
         
     | 
| 
      
 18 
     | 
    
         
            +
            require 'rake/clean'
         
     | 
| 
      
 19 
     | 
    
         
            +
                
         
     | 
| 
      
 20 
     | 
    
         
            +
            source = %w(utils.c bwt.c bwtio.c bwtaln.c bwtgap.c is.c bntseq.c bwtmisc.c bwtindex.c stdaln.c simple_dp.c bwaseqio.c bwase.c bwape.c kstring.c cs2nt.c bwtsw2_core.c bwtsw2_main.c bwtsw2_aux.c bwt_lite.c bwtsw2_chain.c bamlite.c main.c)
         
     | 
| 
      
 21 
     | 
    
         
            +
                
         
     | 
| 
      
 22 
     | 
    
         
            +
            CLEAN.include('*.o')
         
     | 
| 
      
 23 
     | 
    
         
            +
            CLEAN.include('bwt_gen/*.o')
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            GEN = FileList['bwt_gen/*.c']
         
     | 
| 
      
 26 
     | 
    
         
            +
            OBJ_GEN = GEN.ext('o')
         
     | 
| 
      
 27 
     | 
    
         
            +
            SRC = FileList.new(source)
         
     | 
| 
      
 28 
     | 
    
         
            +
            OBJ_SRC = SRC.ext('o')    
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            rule '.o' => '.c' do |t|
         
     | 
| 
      
 31 
     | 
    
         
            +
              sh "gcc -c -g -Wall -O2 -DHAVE_PTHREAD "+t.source+" -o "+t.name
         
     | 
| 
      
 32 
     | 
    
         
            +
            end
         
     | 
| 
      
 33 
     | 
    
         
            +
                
         
     | 
| 
      
 34 
     | 
    
         
            +
            task :compile_gen => OBJ_GEN do
         
     | 
| 
      
 35 
     | 
    
         
            +
              sh "ar -cru bwt_gen/libbwtgen.a "+OBJ_GEN.join(" ")
         
     | 
| 
      
 36 
     | 
    
         
            +
            end
         
     | 
| 
      
 37 
     | 
    
         
            +
                
         
     | 
| 
      
 38 
     | 
    
         
            +
            task :compile_lib => OBJ_SRC do
         
     | 
| 
      
 39 
     | 
    
         
            +
              sh "gcc #{flags} "+OBJ_SRC.join(" ")+" -o libbwa.#{ext} -lm -lz -lpthread -Lbwt_gen -lbwtgen"
         
     | 
| 
      
 40 
     | 
    
         
            +
            end
         
     | 
| 
      
 41 
     | 
    
         
            +
              
         
     | 
| 
      
 42 
     | 
    
         
            +
            task :default => [:compile_gen, :compile_lib, :clean]
         
     | 
| 
      
 43 
     | 
    
         
            +
              
         
     | 
| 
      
 44 
     | 
    
         
            +
            RAKE
         
     | 
| 
      
 45 
     | 
    
         
            +
              
         
     | 
| 
      
 46 
     | 
    
         
            +
            end
         
     | 
| 
      
 47 
     | 
    
         
            +
              
         
     | 
| 
      
 48 
     | 
    
         
            +
              
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
    
        data/ext/qualfa2fq.pl
    ADDED
    
    | 
         @@ -0,0 +1,27 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/perl -w
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            use strict;
         
     | 
| 
      
 4 
     | 
    
         
            +
            use warnings;
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            die("Usage: qualfa2fq.pl <in.fasta> <in.qual>\n") if (@ARGV != 2);
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            my ($fhs, $fhq, $q);
         
     | 
| 
      
 9 
     | 
    
         
            +
            open($fhs, ($ARGV[0] =~ /\.gz$/)? "gzip -dc $ARGV[0] |" : $ARGV[0]) || die;
         
     | 
| 
      
 10 
     | 
    
         
            +
            open($fhq, ($ARGV[1] =~ /\.gz$/)? "gzip -dc $ARGV[1] |" : $ARGV[1]) || die;
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
            $/ = ">"; <$fhs>; <$fhq>; $/ = "\n";
         
     | 
| 
      
 13 
     | 
    
         
            +
            while (<$fhs>) {
         
     | 
| 
      
 14 
     | 
    
         
            +
              $q = <$fhq>;
         
     | 
| 
      
 15 
     | 
    
         
            +
              print "\@$_";
         
     | 
| 
      
 16 
     | 
    
         
            +
              $/ = ">";
         
     | 
| 
      
 17 
     | 
    
         
            +
              $_ = <$fhs>; $q = <$fhq>;
         
     | 
| 
      
 18 
     | 
    
         
            +
              chomp; chomp($q);
         
     | 
| 
      
 19 
     | 
    
         
            +
              $q =~ s/\s*(\d+)\s*/chr($1+33)/eg;
         
     | 
| 
      
 20 
     | 
    
         
            +
              print $_, "+\n";
         
     | 
| 
      
 21 
     | 
    
         
            +
              for (my $i = 0; $i < length($q); $i += 60) {
         
     | 
| 
      
 22 
     | 
    
         
            +
            	print substr($q, $i, 60), "\n";
         
     | 
| 
      
 23 
     | 
    
         
            +
              }
         
     | 
| 
      
 24 
     | 
    
         
            +
              $/ = "\n";
         
     | 
| 
      
 25 
     | 
    
         
            +
            }
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            close($fhs); close($fhq);
         
     | 
    
        data/ext/simple_dp.c
    ADDED
    
    | 
         @@ -0,0 +1,162 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #include <stdlib.h>
         
     | 
| 
      
 2 
     | 
    
         
            +
            #include <stdio.h>
         
     | 
| 
      
 3 
     | 
    
         
            +
            #include <unistd.h>
         
     | 
| 
      
 4 
     | 
    
         
            +
            #include <string.h>
         
     | 
| 
      
 5 
     | 
    
         
            +
            #include <zlib.h>
         
     | 
| 
      
 6 
     | 
    
         
            +
            #include <stdint.h>
         
     | 
| 
      
 7 
     | 
    
         
            +
            #include "stdaln.h"
         
     | 
| 
      
 8 
     | 
    
         
            +
            #include "utils.h"
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            #include "kseq.h"
         
     | 
| 
      
 11 
     | 
    
         
            +
            KSEQ_INIT(gzFile, gzread)
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            typedef struct {
         
     | 
| 
      
 14 
     | 
    
         
            +
            	int l;
         
     | 
| 
      
 15 
     | 
    
         
            +
            	unsigned char *s;
         
     | 
| 
      
 16 
     | 
    
         
            +
            	char *n;
         
     | 
| 
      
 17 
     | 
    
         
            +
            } seq1_t;
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            typedef struct {
         
     | 
| 
      
 20 
     | 
    
         
            +
            	int n_seqs, m_seqs;
         
     | 
| 
      
 21 
     | 
    
         
            +
            	seq1_t *seqs;
         
     | 
| 
      
 22 
     | 
    
         
            +
            } seqs_t;
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            unsigned char aln_rev_table[256] = {
         
     | 
| 
      
 25 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
         
     | 
| 
      
 26 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
         
     | 
| 
      
 27 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
         
     | 
| 
      
 28 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
         
     | 
| 
      
 29 
     | 
    
         
            +
            	'N','T','V','G', 'H','N','N','C', 'D','N','N','M', 'N','K','N','N',
         
     | 
| 
      
 30 
     | 
    
         
            +
            	'N','N','Y','S', 'A','N','B','W', 'X','R','N','N', 'N','N','N','N',
         
     | 
| 
      
 31 
     | 
    
         
            +
            	'N','t','v','g', 'h','N','N','c', 'd','N','N','m', 'N','k','N','N',
         
     | 
| 
      
 32 
     | 
    
         
            +
            	'N','N','y','s', 'a','N','b','w', 'x','r','N','N', 'N','N','N','N',
         
     | 
| 
      
 33 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
         
     | 
| 
      
 34 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
         
     | 
| 
      
 35 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
         
     | 
| 
      
 36 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
         
     | 
| 
      
 37 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
         
     | 
| 
      
 38 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
         
     | 
| 
      
 39 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N',
         
     | 
| 
      
 40 
     | 
    
         
            +
            	'N','N','N','N', 'N','N','N','N', 'N','N','N','N', 'N','N','N','N'
         
     | 
| 
      
 41 
     | 
    
         
            +
            };
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
            static int g_is_global = 0, g_thres = 1, g_strand = 0, g_aa = 0;
         
     | 
| 
      
 44 
     | 
    
         
            +
            static AlnParam g_aln_param;
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            static void revseq(int len, uint8_t *seq)
         
     | 
| 
      
 47 
     | 
    
         
            +
            {
         
     | 
| 
      
 48 
     | 
    
         
            +
            	int i;
         
     | 
| 
      
 49 
     | 
    
         
            +
            	for (i = 0; i < len>>1; ++i) {
         
     | 
| 
      
 50 
     | 
    
         
            +
            		uint8_t tmp = aln_rev_table[seq[len-1-i]];
         
     | 
| 
      
 51 
     | 
    
         
            +
            		seq[len-1-i] = aln_rev_table[seq[i]];
         
     | 
| 
      
 52 
     | 
    
         
            +
            		seq[i] = tmp;
         
     | 
| 
      
 53 
     | 
    
         
            +
            	}
         
     | 
| 
      
 54 
     | 
    
         
            +
            	if (len&1) seq[i] = aln_rev_table[seq[i]];
         
     | 
| 
      
 55 
     | 
    
         
            +
            }
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
            static seqs_t *load_seqs(const char *fn)
         
     | 
| 
      
 58 
     | 
    
         
            +
            {
         
     | 
| 
      
 59 
     | 
    
         
            +
            	seqs_t *s;
         
     | 
| 
      
 60 
     | 
    
         
            +
            	seq1_t *p;
         
     | 
| 
      
 61 
     | 
    
         
            +
            	gzFile fp;
         
     | 
| 
      
 62 
     | 
    
         
            +
            	int l;
         
     | 
| 
      
 63 
     | 
    
         
            +
            	kseq_t *seq;
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
            	fp = xzopen(fn, "r");
         
     | 
| 
      
 66 
     | 
    
         
            +
            	seq = kseq_init(fp);
         
     | 
| 
      
 67 
     | 
    
         
            +
            	s = (seqs_t*)calloc(1, sizeof(seqs_t));
         
     | 
| 
      
 68 
     | 
    
         
            +
            	s->m_seqs = 256;
         
     | 
| 
      
 69 
     | 
    
         
            +
            	s->seqs = (seq1_t*)calloc(s->m_seqs, sizeof(seq1_t));
         
     | 
| 
      
 70 
     | 
    
         
            +
            	while ((l = kseq_read(seq)) >= 0) {
         
     | 
| 
      
 71 
     | 
    
         
            +
            		if (s->n_seqs == s->m_seqs) {
         
     | 
| 
      
 72 
     | 
    
         
            +
            			s->m_seqs <<= 1;
         
     | 
| 
      
 73 
     | 
    
         
            +
            			s->seqs = (seq1_t*)realloc(s->seqs, s->m_seqs * sizeof(seq1_t));
         
     | 
| 
      
 74 
     | 
    
         
            +
            		}
         
     | 
| 
      
 75 
     | 
    
         
            +
            		p = s->seqs + (s->n_seqs++);
         
     | 
| 
      
 76 
     | 
    
         
            +
            		p->l = seq->seq.l;
         
     | 
| 
      
 77 
     | 
    
         
            +
            		p->s = (unsigned char*)malloc(p->l + 1);
         
     | 
| 
      
 78 
     | 
    
         
            +
            		memcpy(p->s, seq->seq.s, p->l);
         
     | 
| 
      
 79 
     | 
    
         
            +
            		p->s[p->l] = 0;
         
     | 
| 
      
 80 
     | 
    
         
            +
            		p->n = strdup((const char*)seq->name.s);
         
     | 
| 
      
 81 
     | 
    
         
            +
            	}
         
     | 
| 
      
 82 
     | 
    
         
            +
            	kseq_destroy(seq);
         
     | 
| 
      
 83 
     | 
    
         
            +
            	gzclose(fp);
         
     | 
| 
      
 84 
     | 
    
         
            +
            	fprintf(stderr, "[load_seqs] %d sequences are loaded.\n", s->n_seqs);
         
     | 
| 
      
 85 
     | 
    
         
            +
            	return s;
         
     | 
| 
      
 86 
     | 
    
         
            +
            }
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
            static void aln_1seq(const seqs_t *ss, const char *name, int l, const char *s, char strand)
         
     | 
| 
      
 89 
     | 
    
         
            +
            {
         
     | 
| 
      
 90 
     | 
    
         
            +
            	int i;
         
     | 
| 
      
 91 
     | 
    
         
            +
            	for (i = 0; i < ss->n_seqs; ++i) {
         
     | 
| 
      
 92 
     | 
    
         
            +
            		AlnAln *aa;
         
     | 
| 
      
 93 
     | 
    
         
            +
            		seq1_t *p = ss->seqs + i;
         
     | 
| 
      
 94 
     | 
    
         
            +
            		g_aln_param.band_width = l + p->l;
         
     | 
| 
      
 95 
     | 
    
         
            +
            		aa = aln_stdaln_aux(s, (const char*)p->s, &g_aln_param, g_is_global, g_thres, l, p->l);
         
     | 
| 
      
 96 
     | 
    
         
            +
            		if (aa->score >= g_thres || g_is_global) {
         
     | 
| 
      
 97 
     | 
    
         
            +
            			printf(">%s\t%d\t%d\t%s\t%c\t%d\t%d\t%d\t%d\t", p->n, aa->start1? aa->start1 : 1, aa->end1, name, strand,
         
     | 
| 
      
 98 
     | 
    
         
            +
            				   aa->start2? aa->start2 : 1, aa->end2, aa->score, aa->subo);
         
     | 
| 
      
 99 
     | 
    
         
            +
            			// NB: I put the short sequence as the first sequence in SW, an insertion to
         
     | 
| 
      
 100 
     | 
    
         
            +
            			// the reference becomes a deletion from the short sequence. Therefore, I use
         
     | 
| 
      
 101 
     | 
    
         
            +
            			// "MDI" here rather than "MID", and print ->out2 first rather than ->out1.
         
     | 
| 
      
 102 
     | 
    
         
            +
            			for (i = 0; i != aa->n_cigar; ++i)
         
     | 
| 
      
 103 
     | 
    
         
            +
            				printf("%d%c", aa->cigar32[i]>>4, "MDI"[aa->cigar32[i]&0xf]);
         
     | 
| 
      
 104 
     | 
    
         
            +
            			printf("\n%s\n%s\n%s\n", aa->out2, aa->outm, aa->out1);
         
     | 
| 
      
 105 
     | 
    
         
            +
            		}
         
     | 
| 
      
 106 
     | 
    
         
            +
            		aln_free_AlnAln(aa);
         
     | 
| 
      
 107 
     | 
    
         
            +
            	}
         
     | 
| 
      
 108 
     | 
    
         
            +
            }
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
            static void aln_seqs(const seqs_t *ss, const char *fn)
         
     | 
| 
      
 111 
     | 
    
         
            +
            {
         
     | 
| 
      
 112 
     | 
    
         
            +
            	gzFile fp;
         
     | 
| 
      
 113 
     | 
    
         
            +
            	kseq_t *seq;
         
     | 
| 
      
 114 
     | 
    
         
            +
            	int l;
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
            	fp = xzopen(fn, "r");
         
     | 
| 
      
 117 
     | 
    
         
            +
            	seq = kseq_init(fp);
         
     | 
| 
      
 118 
     | 
    
         
            +
            	while ((l = kseq_read(seq)) >= 0) {
         
     | 
| 
      
 119 
     | 
    
         
            +
            		if (g_strand&1) aln_1seq(ss, (char*)seq->name.s, l, seq->seq.s, '+');
         
     | 
| 
      
 120 
     | 
    
         
            +
            		if (g_strand&2) {
         
     | 
| 
      
 121 
     | 
    
         
            +
            			revseq(l, (uint8_t*)seq->seq.s);
         
     | 
| 
      
 122 
     | 
    
         
            +
            			aln_1seq(ss, (char*)seq->name.s, l, seq->seq.s, '-');
         
     | 
| 
      
 123 
     | 
    
         
            +
            		}
         
     | 
| 
      
 124 
     | 
    
         
            +
            	}
         
     | 
| 
      
 125 
     | 
    
         
            +
            	kseq_destroy(seq);
         
     | 
| 
      
 126 
     | 
    
         
            +
            	gzclose(fp);
         
     | 
| 
      
 127 
     | 
    
         
            +
            }
         
     | 
| 
      
 128 
     | 
    
         
            +
             
     | 
| 
      
 129 
     | 
    
         
            +
            int bwa_stdsw(int argc, char *argv[])
         
     | 
| 
      
 130 
     | 
    
         
            +
            {
         
     | 
| 
      
 131 
     | 
    
         
            +
            	int c;
         
     | 
| 
      
 132 
     | 
    
         
            +
            	seqs_t *ss;
         
     | 
| 
      
 133 
     | 
    
         
            +
            	optind = 1;
         
     | 
| 
      
 134 
     | 
    
         
            +
            	while ((c = getopt(argc, argv, "gT:frp")) >= 0) {
         
     | 
| 
      
 135 
     | 
    
         
            +
            		switch (c) {
         
     | 
| 
      
 136 
     | 
    
         
            +
            		case 'g': g_is_global = 1; break;
         
     | 
| 
      
 137 
     | 
    
         
            +
            		case 'T': g_thres = atoi(optarg); break;
         
     | 
| 
      
 138 
     | 
    
         
            +
            		case 'f': g_strand |= 1; break;
         
     | 
| 
      
 139 
     | 
    
         
            +
            		case 'r': g_strand |= 2; break;
         
     | 
| 
      
 140 
     | 
    
         
            +
            		case 'p': g_aa = 1; break;
         
     | 
| 
      
 141 
     | 
    
         
            +
            		}
         
     | 
| 
      
 142 
     | 
    
         
            +
            	}
         
     | 
| 
      
 143 
     | 
    
         
            +
            	if (g_strand == 0) g_strand = 3;
         
     | 
| 
      
 144 
     | 
    
         
            +
            	if (g_aa) g_strand = 1;
         
     | 
| 
      
 145 
     | 
    
         
            +
            	if (optind + 1 >= argc) {
         
     | 
| 
      
 146 
     | 
    
         
            +
            		fprintf(stderr, "\nUsage:   bwa stdsw [options] <seq1.long.fa> <seq2.short.fa>\n\n");
         
     | 
| 
      
 147 
     | 
    
         
            +
            		fprintf(stderr, "Options: -T INT    minimum score [%d]\n", g_thres);
         
     | 
| 
      
 148 
     | 
    
         
            +
            		fprintf(stderr, "         -p        protein alignment (suppressing -r)\n");
         
     | 
| 
      
 149 
     | 
    
         
            +
            		fprintf(stderr, "         -f        forward strand only\n");
         
     | 
| 
      
 150 
     | 
    
         
            +
            		fprintf(stderr, "         -r        reverse strand only\n");
         
     | 
| 
      
 151 
     | 
    
         
            +
            		fprintf(stderr, "         -g        global alignment\n\n");
         
     | 
| 
      
 152 
     | 
    
         
            +
            		fprintf(stderr, "Note: This program is specifically designed for alignment between multiple short\n");
         
     | 
| 
      
 153 
     | 
    
         
            +
            		fprintf(stderr, "      sequences and ONE long sequence. It outputs the suboptimal score on the long\n");
         
     | 
| 
      
 154 
     | 
    
         
            +
            		fprintf(stderr, "      sequence.\n\n");
         
     | 
| 
      
 155 
     | 
    
         
            +
            		return 1;
         
     | 
| 
      
 156 
     | 
    
         
            +
            	}
         
     | 
| 
      
 157 
     | 
    
         
            +
            	g_aln_param = g_aa? aln_param_aa2aa : aln_param_blast;
         
     | 
| 
      
 158 
     | 
    
         
            +
            	g_aln_param.gap_end = 0;
         
     | 
| 
      
 159 
     | 
    
         
            +
            	ss = load_seqs(argv[optind]);
         
     | 
| 
      
 160 
     | 
    
         
            +
            	aln_seqs(ss, argv[optind+1]);
         
     | 
| 
      
 161 
     | 
    
         
            +
            	return 0;
         
     | 
| 
      
 162 
     | 
    
         
            +
            }
         
     |