ngs_server 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/tabix/ChangeLog +593 -0
- data/ext/tabix/Makefile +65 -0
- data/ext/tabix/NEWS +126 -0
- data/ext/tabix/TabixReader.java +395 -0
- data/ext/tabix/bam_endian.h +42 -0
- data/ext/tabix/bedidx.c +156 -0
- data/ext/tabix/bgzf.c +714 -0
- data/ext/tabix/bgzf.h +157 -0
- data/ext/tabix/bgzip.c +206 -0
- data/ext/tabix/example.gtf.gz +0 -0
- data/ext/tabix/example.gtf.gz.tbi +0 -0
- data/ext/tabix/extconf.rb +1 -0
- data/ext/tabix/index.c +998 -0
- data/ext/tabix/khash.h +486 -0
- data/ext/tabix/knetfile.c +632 -0
- data/ext/tabix/knetfile.h +75 -0
- data/ext/tabix/kseq.h +227 -0
- data/ext/tabix/ksort.h +271 -0
- data/ext/tabix/kstring.c +165 -0
- data/ext/tabix/kstring.h +68 -0
- data/ext/tabix/main.c +290 -0
- data/ext/tabix/perl/MANIFEST +8 -0
- data/ext/tabix/perl/Makefile.PL +8 -0
- data/ext/tabix/perl/Tabix.pm +76 -0
- data/ext/tabix/perl/Tabix.xs +71 -0
- data/ext/tabix/perl/TabixIterator.pm +41 -0
- data/ext/tabix/perl/t/01local.t +28 -0
- data/ext/tabix/perl/t/02remote.t +28 -0
- data/ext/tabix/perl/typemap +3 -0
- data/ext/tabix/python/setup.py +55 -0
- data/ext/tabix/python/tabixmodule.c +408 -0
- data/ext/tabix/python/test.py +91 -0
- data/ext/tabix/tabix.1 +132 -0
- data/ext/tabix/tabix.h +145 -0
- data/ext/tabix/tabix.py +87 -0
- data/ext/tabix/tabix.tex +121 -0
- data/ext/vcftools/perl/Vcf.pm +5 -3
- data/ext/vcftools/perl/vcf-query +2 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +12 -11
- data/ngs_server.gemspec +1 -2
- metadata +39 -2
    
        data/ext/tabix/kstring.h
    ADDED
    
    | @@ -0,0 +1,68 @@ | |
| 1 | 
            +
            #ifndef KSTRING_H
         | 
| 2 | 
            +
            #define KSTRING_H
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            #include <stdlib.h>
         | 
| 5 | 
            +
            #include <string.h>
         | 
| 6 | 
            +
            #include <stdint.h>
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            #ifndef kroundup32
         | 
| 9 | 
            +
            #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
         | 
| 10 | 
            +
            #endif
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            #ifndef KSTRING_T
         | 
| 13 | 
            +
            #define KSTRING_T kstring_t
         | 
| 14 | 
            +
            typedef struct __kstring_t {
         | 
| 15 | 
            +
            	size_t l, m;
         | 
| 16 | 
            +
            	char *s;
         | 
| 17 | 
            +
            } kstring_t;
         | 
| 18 | 
            +
            #endif
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            int ksprintf(kstring_t *s, const char *fmt, ...);
         | 
| 21 | 
            +
            int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            // calculate the auxiliary array, allocated by calloc()
         | 
| 24 | 
            +
            int *ksBM_prep(const uint8_t *pat, int m);
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            /* Search pat in str and returned the list of matches. The size of the
         | 
| 27 | 
            +
             * list is returned as n_matches. _prep is the array returned by
         | 
| 28 | 
            +
             * ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */
         | 
| 29 | 
            +
            int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches);
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            static inline int kputsn(const char *p, int l, kstring_t *s)
         | 
| 32 | 
            +
            {
         | 
| 33 | 
            +
            	if (s->l + l + 1 >= s->m) {
         | 
| 34 | 
            +
            		s->m = s->l + l + 2;
         | 
| 35 | 
            +
            		kroundup32(s->m);
         | 
| 36 | 
            +
            		s->s = (char*)realloc(s->s, s->m);
         | 
| 37 | 
            +
            	}
         | 
| 38 | 
            +
            	strncpy(s->s + s->l, p, l);
         | 
| 39 | 
            +
            	s->l += l;
         | 
| 40 | 
            +
            	s->s[s->l] = 0;
         | 
| 41 | 
            +
            	return l;
         | 
| 42 | 
            +
            }
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            static inline int kputs(const char *p, kstring_t *s)
         | 
| 45 | 
            +
            {
         | 
| 46 | 
            +
            	return kputsn(p, strlen(p), s);
         | 
| 47 | 
            +
            }
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            static inline int kputc(int c, kstring_t *s)
         | 
| 50 | 
            +
            {
         | 
| 51 | 
            +
            	if (s->l + 1 >= s->m) {
         | 
| 52 | 
            +
            		s->m = s->l + 2;
         | 
| 53 | 
            +
            		kroundup32(s->m);
         | 
| 54 | 
            +
            		s->s = (char*)realloc(s->s, s->m);
         | 
| 55 | 
            +
            	}
         | 
| 56 | 
            +
            	s->s[s->l++] = c;
         | 
| 57 | 
            +
            	s->s[s->l] = 0;
         | 
| 58 | 
            +
            	return c;
         | 
| 59 | 
            +
            }
         | 
| 60 | 
            +
             | 
| 61 | 
            +
            static inline int *ksplit(kstring_t *s, int delimiter, int *n)
         | 
| 62 | 
            +
            {
         | 
| 63 | 
            +
            	int max = 0, *offsets = 0;
         | 
| 64 | 
            +
            	*n = ksplit_core(s->s, delimiter, &max, &offsets);
         | 
| 65 | 
            +
            	return offsets;
         | 
| 66 | 
            +
            }
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            #endif
         | 
    
        data/ext/tabix/main.c
    ADDED
    
    | @@ -0,0 +1,290 @@ | |
| 1 | 
            +
            #include <string.h>
         | 
| 2 | 
            +
            #include <unistd.h>
         | 
| 3 | 
            +
            #include <stdlib.h>
         | 
| 4 | 
            +
            #include <stdio.h>
         | 
| 5 | 
            +
            #include <sys/stat.h>
         | 
| 6 | 
            +
            #include <errno.h>
         | 
| 7 | 
            +
            #include "bgzf.h"
         | 
| 8 | 
            +
            #include "tabix.h"
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            #define PACKAGE_VERSION "0.2.5 (r964)"
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            #define error(...) { fprintf(stderr,__VA_ARGS__); return -1; }
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            int reheader_file(const char *header, const char *file, int meta)
         | 
| 15 | 
            +
            {
         | 
| 16 | 
            +
                BGZF *fp = bgzf_open(file,"r");
         | 
| 17 | 
            +
                if (bgzf_read_block(fp) != 0 || !fp->block_length)
         | 
| 18 | 
            +
                    return -1;
         | 
| 19 | 
            +
                
         | 
| 20 | 
            +
                char *buffer = fp->uncompressed_block;
         | 
| 21 | 
            +
                int skip_until = 0;
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                if ( buffer[0]==meta )
         | 
| 24 | 
            +
                {
         | 
| 25 | 
            +
                    skip_until = 1;
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    // Skip the header
         | 
| 28 | 
            +
                    while (1)
         | 
| 29 | 
            +
                    {
         | 
| 30 | 
            +
                        if ( buffer[skip_until]=='\n' )
         | 
| 31 | 
            +
                        {
         | 
| 32 | 
            +
                            skip_until++;
         | 
| 33 | 
            +
                            if ( skip_until>=fp->block_length )
         | 
| 34 | 
            +
                            {
         | 
| 35 | 
            +
                                if (bgzf_read_block(fp) != 0 || !fp->block_length)
         | 
| 36 | 
            +
                                    error("no body?\n");
         | 
| 37 | 
            +
                                skip_until = 0;
         | 
| 38 | 
            +
                            }
         | 
| 39 | 
            +
                            // The header has finished
         | 
| 40 | 
            +
                            if ( buffer[skip_until]!=meta ) break;
         | 
| 41 | 
            +
                        }
         | 
| 42 | 
            +
                        skip_until++;
         | 
| 43 | 
            +
                        if ( skip_until>=fp->block_length )
         | 
| 44 | 
            +
                        {
         | 
| 45 | 
            +
                            if (bgzf_read_block(fp) != 0 || !fp->block_length)
         | 
| 46 | 
            +
                                error("no body?\n");
         | 
| 47 | 
            +
                            skip_until = 0;
         | 
| 48 | 
            +
                        }
         | 
| 49 | 
            +
                    }
         | 
| 50 | 
            +
                }
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                FILE *fh = fopen(header,"r");
         | 
| 53 | 
            +
                if ( !fh )
         | 
| 54 | 
            +
                    error("%s: %s", header,strerror(errno));
         | 
| 55 | 
            +
                int page_size = getpagesize();
         | 
| 56 | 
            +
                char *buf = valloc(page_size);
         | 
| 57 | 
            +
                BGZF *bgzf_out = bgzf_fdopen(fileno(stdout), "w");
         | 
| 58 | 
            +
                ssize_t nread;
         | 
| 59 | 
            +
                while ( (nread=fread(buf,1,page_size-1,fh))>0 )
         | 
| 60 | 
            +
                {
         | 
| 61 | 
            +
                    if ( nread<page_size-1 && buf[nread-1]!='\n' )
         | 
| 62 | 
            +
                        buf[nread++] = '\n';
         | 
| 63 | 
            +
                    if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %s\n",bgzf_out->error);
         | 
| 64 | 
            +
                }
         | 
| 65 | 
            +
                fclose(fh);
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                if ( fp->block_length - skip_until > 0 )
         | 
| 68 | 
            +
                {
         | 
| 69 | 
            +
                    if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0) 
         | 
| 70 | 
            +
                        error("Error: %s\n",fp->error);
         | 
| 71 | 
            +
                }
         | 
| 72 | 
            +
                if (bgzf_flush(bgzf_out) < 0) 
         | 
| 73 | 
            +
                    error("Error: %s\n",bgzf_out->error);
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                while (1)
         | 
| 76 | 
            +
                {
         | 
| 77 | 
            +
            #ifdef _USE_KNETFILE
         | 
| 78 | 
            +
                    nread = knet_read(fp->x.fpr, buf, page_size);
         | 
| 79 | 
            +
            #else
         | 
| 80 | 
            +
                    nread = fread(buf, 1, page_size, fp->file);
         | 
| 81 | 
            +
            #endif
         | 
| 82 | 
            +
                    if ( nread<=0 ) 
         | 
| 83 | 
            +
                        break;
         | 
| 84 | 
            +
             | 
| 85 | 
            +
            #ifdef _USE_KNETFILE
         | 
| 86 | 
            +
                    int count = fwrite(buf, 1, nread, bgzf_out->x.fpw);
         | 
| 87 | 
            +
            #else
         | 
| 88 | 
            +
                    int count = fwrite(buf, 1, nread, bgzf_out->file);
         | 
| 89 | 
            +
            #endif
         | 
| 90 | 
            +
                    if (count != nread)
         | 
| 91 | 
            +
                        error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
         | 
| 92 | 
            +
                }
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                if (bgzf_close(bgzf_out) < 0) 
         | 
| 95 | 
            +
                    error("Error: %s\n",bgzf_out->error);
         | 
| 96 | 
            +
               
         | 
| 97 | 
            +
                return 0;
         | 
| 98 | 
            +
            }
         | 
| 99 | 
            +
             | 
| 100 | 
            +
             | 
| 101 | 
            +
            int main(int argc, char *argv[])
         | 
| 102 | 
            +
            {
         | 
| 103 | 
            +
            	int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, bed_reg = 0;
         | 
| 104 | 
            +
            	ti_conf_t conf = ti_conf_gff;
         | 
| 105 | 
            +
                const char *reheader = NULL;
         | 
| 106 | 
            +
            	while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhfBr:")) >= 0) {
         | 
| 107 | 
            +
            		switch (c) {
         | 
| 108 | 
            +
            		case 'B': bed_reg = 1; break;
         | 
| 109 | 
            +
            		case '0': conf.preset |= TI_FLAG_UCSC; break;
         | 
| 110 | 
            +
            		case 'S': skip = atoi(optarg); break;
         | 
| 111 | 
            +
            		case 'c': meta = optarg[0]; break;
         | 
| 112 | 
            +
            		case 'p':
         | 
| 113 | 
            +
            			if (strcmp(optarg, "gff") == 0) conf = ti_conf_gff;
         | 
| 114 | 
            +
            			else if (strcmp(optarg, "bed") == 0) conf = ti_conf_bed;
         | 
| 115 | 
            +
            			else if (strcmp(optarg, "sam") == 0) conf = ti_conf_sam;
         | 
| 116 | 
            +
            			else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf = ti_conf_vcf;
         | 
| 117 | 
            +
            			else if (strcmp(optarg, "psltbl") == 0) conf = ti_conf_psltbl;
         | 
| 118 | 
            +
            			else {
         | 
| 119 | 
            +
            				fprintf(stderr, "[main] unrecognized preset '%s'\n", optarg);
         | 
| 120 | 
            +
            				return 1;
         | 
| 121 | 
            +
            			}
         | 
| 122 | 
            +
            			break;
         | 
| 123 | 
            +
            		case 's': conf.sc = atoi(optarg); break;
         | 
| 124 | 
            +
            		case 'b': conf.bc = atoi(optarg); break;
         | 
| 125 | 
            +
            		case 'e': conf.ec = atoi(optarg); break;
         | 
| 126 | 
            +
                    case 'l': list_chrms = 1; break;
         | 
| 127 | 
            +
                    case 'h': print_header = 1; break;
         | 
| 128 | 
            +
            		case 'f': force = 1; break;
         | 
| 129 | 
            +
                    case 'r': reheader = optarg; break;
         | 
| 130 | 
            +
            		}
         | 
| 131 | 
            +
            	}
         | 
| 132 | 
            +
            	if (skip >= 0) conf.line_skip = skip;
         | 
| 133 | 
            +
            	if (meta >= 0) conf.meta_char = meta;
         | 
| 134 | 
            +
            	if (optind == argc) {
         | 
| 135 | 
            +
            		fprintf(stderr, "\n");
         | 
| 136 | 
            +
            		fprintf(stderr, "Program: tabix (TAB-delimited file InderXer)\n");
         | 
| 137 | 
            +
            		fprintf(stderr, "Version: %s\n\n", PACKAGE_VERSION);
         | 
| 138 | 
            +
            		fprintf(stderr, "Usage:   tabix <in.tab.bgz> [region1 [region2 [...]]]\n\n");
         | 
| 139 | 
            +
            		fprintf(stderr, "Options: -p STR     preset: gff, bed, sam, vcf, psltbl [gff]\n");
         | 
| 140 | 
            +
            		fprintf(stderr, "         -s INT     sequence name column [1]\n");
         | 
| 141 | 
            +
            		fprintf(stderr, "         -b INT     start column [4]\n");
         | 
| 142 | 
            +
            		fprintf(stderr, "         -e INT     end column; can be identical to '-b' [5]\n");
         | 
| 143 | 
            +
            		fprintf(stderr, "         -S INT     skip first INT lines [0]\n");
         | 
| 144 | 
            +
            		fprintf(stderr, "         -c CHAR    symbol for comment/meta lines [#]\n");
         | 
| 145 | 
            +
            	    fprintf(stderr, "         -r FILE    replace the header with the content of FILE [null]\n");
         | 
| 146 | 
            +
            		fprintf(stderr, "         -B         region1 is a BED file (entire file will be read)\n");
         | 
| 147 | 
            +
            		fprintf(stderr, "         -0         zero-based coordinate\n");
         | 
| 148 | 
            +
            		fprintf(stderr, "         -h         print the header lines\n");
         | 
| 149 | 
            +
            		fprintf(stderr, "         -l         list chromosome names\n");
         | 
| 150 | 
            +
            		fprintf(stderr, "         -f         force to overwrite the index\n");
         | 
| 151 | 
            +
            		fprintf(stderr, "\n");
         | 
| 152 | 
            +
            		return 1;
         | 
| 153 | 
            +
            	}
         | 
| 154 | 
            +
                if (list_chrms) {
         | 
| 155 | 
            +
            		ti_index_t *idx;
         | 
| 156 | 
            +
            		int i, n;
         | 
| 157 | 
            +
            		const char **names;
         | 
| 158 | 
            +
            		idx = ti_index_load(argv[optind]);
         | 
| 159 | 
            +
            		if (idx == 0) {
         | 
| 160 | 
            +
            			fprintf(stderr, "[main] fail to load the index file.\n");
         | 
| 161 | 
            +
            			return 1;
         | 
| 162 | 
            +
            		}
         | 
| 163 | 
            +
            		names = ti_seqname(idx, &n);
         | 
| 164 | 
            +
            		for (i = 0; i < n; ++i) printf("%s\n", names[i]);
         | 
| 165 | 
            +
            		free(names);
         | 
| 166 | 
            +
            		ti_index_destroy(idx);
         | 
| 167 | 
            +
            		return 0;
         | 
| 168 | 
            +
            	}
         | 
| 169 | 
            +
                if (reheader)
         | 
| 170 | 
            +
                    return reheader_file(reheader,argv[optind],conf.meta_char);
         | 
| 171 | 
            +
             | 
| 172 | 
            +
            	struct stat stat_tbi,stat_vcf;
         | 
| 173 | 
            +
                char *fnidx = calloc(strlen(argv[optind]) + 5, 1);
         | 
| 174 | 
            +
               	strcat(strcpy(fnidx, argv[optind]), ".tbi");
         | 
| 175 | 
            +
             | 
| 176 | 
            +
            	if (optind + 1 == argc) {
         | 
| 177 | 
            +
            		if (force == 0) {
         | 
| 178 | 
            +
            			if (stat(fnidx, &stat_tbi) == 0) 
         | 
| 179 | 
            +
                        {
         | 
| 180 | 
            +
                            // Before complaining, check if the VCF file isn't newer. This is a common source of errors,
         | 
| 181 | 
            +
                            //  people tend not to notice that tabix failed
         | 
| 182 | 
            +
                            stat(argv[optind], &stat_vcf);
         | 
| 183 | 
            +
                            if ( stat_vcf.st_mtime <= stat_tbi.st_mtime )
         | 
| 184 | 
            +
                            {
         | 
| 185 | 
            +
                                fprintf(stderr, "[tabix] the index file exists. Please use '-f' to overwrite.\n");
         | 
| 186 | 
            +
                                free(fnidx);
         | 
| 187 | 
            +
                                return 1;
         | 
| 188 | 
            +
                            }
         | 
| 189 | 
            +
            			}
         | 
| 190 | 
            +
            		}
         | 
| 191 | 
            +
                    if ( bgzf_check_bgzf(argv[optind])!=1 )
         | 
| 192 | 
            +
                    {
         | 
| 193 | 
            +
                        fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]);
         | 
| 194 | 
            +
                        free(fnidx);
         | 
| 195 | 
            +
                        return 1;
         | 
| 196 | 
            +
                    }
         | 
| 197 | 
            +
            		return ti_index_build(argv[optind], &conf);
         | 
| 198 | 
            +
            	}
         | 
| 199 | 
            +
            	{ // retrieve
         | 
| 200 | 
            +
            		tabix_t *t;
         | 
| 201 | 
            +
                    // Common source of errors: new VCF is used with an old index
         | 
| 202 | 
            +
                    stat(fnidx, &stat_tbi);
         | 
| 203 | 
            +
                    stat(argv[optind], &stat_vcf);
         | 
| 204 | 
            +
                    if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime )
         | 
| 205 | 
            +
                    {
         | 
| 206 | 
            +
                        fprintf(stderr, "[tabix] the index file is older than the vcf file. Please use '-f' to overwrite or reindex.\n");
         | 
| 207 | 
            +
                        free(fnidx);
         | 
| 208 | 
            +
                        return 1;
         | 
| 209 | 
            +
                    }
         | 
| 210 | 
            +
                    free(fnidx);
         | 
| 211 | 
            +
             | 
| 212 | 
            +
            		if ((t = ti_open(argv[optind], 0)) == 0) {
         | 
| 213 | 
            +
            			fprintf(stderr, "[main] fail to open the data file.\n");
         | 
| 214 | 
            +
            			return 1;
         | 
| 215 | 
            +
            		}
         | 
| 216 | 
            +
            		if (strcmp(argv[optind+1], ".") == 0) { // retrieve all
         | 
| 217 | 
            +
            			ti_iter_t iter;
         | 
| 218 | 
            +
            			const char *s;
         | 
| 219 | 
            +
            			int len;
         | 
| 220 | 
            +
            			iter = ti_query(t, 0, 0, 0);
         | 
| 221 | 
            +
            			while ((s = ti_read(t, iter, &len)) != 0) {
         | 
| 222 | 
            +
            				fputs(s, stdout); fputc('\n', stdout);
         | 
| 223 | 
            +
            			}
         | 
| 224 | 
            +
            			ti_iter_destroy(iter);
         | 
| 225 | 
            +
            		} else { // retrieve from specified regions
         | 
| 226 | 
            +
            			int i, len;
         | 
| 227 | 
            +
                        ti_iter_t iter;
         | 
| 228 | 
            +
                        const char *s;
         | 
| 229 | 
            +
            			const ti_conf_t *idxconf;
         | 
| 230 | 
            +
             | 
| 231 | 
            +
            			if (ti_lazy_index_load(t) < 0 && bed_reg == 0) {
         | 
| 232 | 
            +
                            fprintf(stderr,"[tabix] failed to load the index file.\n");
         | 
| 233 | 
            +
                            return 1;
         | 
| 234 | 
            +
                        }
         | 
| 235 | 
            +
            			idxconf = ti_get_conf(t->idx);
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                        if ( print_header )
         | 
| 238 | 
            +
                        {
         | 
| 239 | 
            +
                            // If requested, print the header lines here
         | 
| 240 | 
            +
                            iter = ti_query(t, 0, 0, 0);
         | 
| 241 | 
            +
                            while ((s = ti_read(t, iter, &len)) != 0) {
         | 
| 242 | 
            +
                                if ((int)(*s) != idxconf->meta_char) break;
         | 
| 243 | 
            +
                                fputs(s, stdout); fputc('\n', stdout);
         | 
| 244 | 
            +
                            }
         | 
| 245 | 
            +
                            ti_iter_destroy(iter);
         | 
| 246 | 
            +
                        }
         | 
| 247 | 
            +
            			if (bed_reg) {
         | 
| 248 | 
            +
            				extern int bed_overlap(const void *_h, const char *chr, int beg, int end);
         | 
| 249 | 
            +
            				extern void *bed_read(const char *fn);
         | 
| 250 | 
            +
            				extern void bed_destroy(void *_h);
         | 
| 251 | 
            +
             | 
| 252 | 
            +
            				const ti_conf_t *conf_ = idxconf? idxconf : &conf; // use the index file if available
         | 
| 253 | 
            +
            				void *bed = bed_read(argv[optind+1]); // load the BED file
         | 
| 254 | 
            +
            				ti_interval_t intv;
         | 
| 255 | 
            +
             | 
| 256 | 
            +
            				if (bed == 0) {
         | 
| 257 | 
            +
            					fprintf(stderr, "[main] fail to read the BED file.\n");
         | 
| 258 | 
            +
            					return 1;
         | 
| 259 | 
            +
            				}
         | 
| 260 | 
            +
            				iter = ti_query(t, 0, 0, 0);
         | 
| 261 | 
            +
            				while ((s = ti_read(t, iter, &len)) != 0) {
         | 
| 262 | 
            +
            					int c;
         | 
| 263 | 
            +
            					ti_get_intv(conf_, len, (char*)s, &intv);
         | 
| 264 | 
            +
            					c = *intv.se; *intv.se = '\0';
         | 
| 265 | 
            +
            					if (bed_overlap(bed, intv.ss, intv.beg, intv.end)) {
         | 
| 266 | 
            +
            						*intv.se = c;
         | 
| 267 | 
            +
            						puts(s);
         | 
| 268 | 
            +
            					}
         | 
| 269 | 
            +
            					*intv.se = c;
         | 
| 270 | 
            +
            				}
         | 
| 271 | 
            +
                            ti_iter_destroy(iter);
         | 
| 272 | 
            +
            				bed_destroy(bed);
         | 
| 273 | 
            +
            			} else {
         | 
| 274 | 
            +
            				for (i = optind + 1; i < argc; ++i) {
         | 
| 275 | 
            +
            					int tid, beg, end;
         | 
| 276 | 
            +
            					if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) {
         | 
| 277 | 
            +
            						iter = ti_queryi(t, tid, beg, end);
         | 
| 278 | 
            +
            							while ((s = ti_read(t, iter, &len)) != 0) {
         | 
| 279 | 
            +
            							fputs(s, stdout); fputc('\n', stdout);
         | 
| 280 | 
            +
            						}
         | 
| 281 | 
            +
            						ti_iter_destroy(iter);
         | 
| 282 | 
            +
            					} 
         | 
| 283 | 
            +
                        	    // else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");
         | 
| 284 | 
            +
            				}
         | 
| 285 | 
            +
            			}
         | 
| 286 | 
            +
            		}
         | 
| 287 | 
            +
            		ti_close(t);
         | 
| 288 | 
            +
            	}
         | 
| 289 | 
            +
            	return 0;
         | 
| 290 | 
            +
            }
         | 
| @@ -0,0 +1,76 @@ | |
| 1 | 
            +
            package Tabix;
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            use strict;
         | 
| 4 | 
            +
            use warnings;
         | 
| 5 | 
            +
            use Carp qw/croak/;
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            use TabixIterator;
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            require Exporter;
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            our @ISA = qw/Exporter/;
         | 
| 12 | 
            +
            our @EXPORT = qw/tabix_open tabix_close tabix_read tabix_query tabix_getnames tabix_iter_free/;
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            our $VERSION = '0.2.0';
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            require XSLoader;
         | 
| 17 | 
            +
            XSLoader::load('Tabix', $VERSION);
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            sub new {
         | 
| 20 | 
            +
              my $invocant = shift;
         | 
| 21 | 
            +
              my %args = @_;
         | 
| 22 | 
            +
              $args{-data} || croak("-data argument required");
         | 
| 23 | 
            +
              my $class = ref($invocant) || $invocant;
         | 
| 24 | 
            +
              my $self = {};
         | 
| 25 | 
            +
              bless($self, $class);
         | 
| 26 | 
            +
              $self->open($args{-data}, $args{-index});
         | 
| 27 | 
            +
              return $self;
         | 
| 28 | 
            +
            }
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            sub open {
         | 
| 31 | 
            +
              my ($self, $fn, $fnidx) = @_;
         | 
| 32 | 
            +
              $self->close;
         | 
| 33 | 
            +
              $self->{_fn} = $fn;
         | 
| 34 | 
            +
              $self->{_fnidx} = $fnidx;
         | 
| 35 | 
            +
              $self->{_} = $fnidx? tabix_open($fn, $fnidx) : tabix_open($fn);
         | 
| 36 | 
            +
            }
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            sub close {
         | 
| 39 | 
            +
              my $self = shift;
         | 
| 40 | 
            +
              if ($self->{_}) {
         | 
| 41 | 
            +
            	tabix_close($self->{_});
         | 
| 42 | 
            +
            	delete($self->{_}); delete($self->{_fn}); delete($self->{_fnidx});
         | 
| 43 | 
            +
              }
         | 
| 44 | 
            +
            }
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            sub DESTROY {
         | 
| 47 | 
            +
              my $self = shift;
         | 
| 48 | 
            +
              $self->close;
         | 
| 49 | 
            +
            }
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            sub query {
         | 
| 52 | 
            +
              my $self = shift;
         | 
| 53 | 
            +
              my $iter;
         | 
| 54 | 
            +
              if (@_) {
         | 
| 55 | 
            +
            	$iter = tabix_query($self->{_}, @_);
         | 
| 56 | 
            +
              } else {
         | 
| 57 | 
            +
            	$iter = tabix_query($self->{_});
         | 
| 58 | 
            +
              }
         | 
| 59 | 
            +
              my $i = TabixIterator->new;
         | 
| 60 | 
            +
              $i->set($iter);
         | 
| 61 | 
            +
              return $i;
         | 
| 62 | 
            +
            }
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            sub read {
         | 
| 65 | 
            +
              my $self = shift;
         | 
| 66 | 
            +
              my $iter = shift;
         | 
| 67 | 
            +
              return tabix_read($self->{_}, $iter->get);
         | 
| 68 | 
            +
            }
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            sub getnames {
         | 
| 71 | 
            +
              my $self = shift;
         | 
| 72 | 
            +
              return tabix_getnames($self->{_});
         | 
| 73 | 
            +
            }
         | 
| 74 | 
            +
             | 
| 75 | 
            +
            1;
         | 
| 76 | 
            +
            __END__
         | 
| @@ -0,0 +1,71 @@ | |
| 1 | 
            +
            #include "EXTERN.h"
         | 
| 2 | 
            +
            #include "perl.h"
         | 
| 3 | 
            +
            #include "XSUB.h"
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            #include <stdlib.h>
         | 
| 6 | 
            +
            #include "tabix.h"
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            MODULE = Tabix PACKAGE = Tabix
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            tabix_t*
         | 
| 11 | 
            +
            tabix_open(fn, fnidx=0)
         | 
| 12 | 
            +
            	char *fn
         | 
| 13 | 
            +
            	char *fnidx
         | 
| 14 | 
            +
              CODE:
         | 
| 15 | 
            +
            	RETVAL = ti_open(fn, fnidx);
         | 
| 16 | 
            +
              OUTPUT:
         | 
| 17 | 
            +
            	RETVAL
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            void
         | 
| 20 | 
            +
            tabix_close(t)
         | 
| 21 | 
            +
            	tabix_t *t
         | 
| 22 | 
            +
              CODE:
         | 
| 23 | 
            +
            	ti_close(t);
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            ti_iter_t
         | 
| 26 | 
            +
            tabix_query(t, seq=0, beg=0, end=0x7fffffff)
         | 
| 27 | 
            +
            	tabix_t *t
         | 
| 28 | 
            +
            	const char *seq
         | 
| 29 | 
            +
            	int beg
         | 
| 30 | 
            +
            	int end
         | 
| 31 | 
            +
              PREINIT:
         | 
| 32 | 
            +
              CODE:
         | 
| 33 | 
            +
            	RETVAL = ti_query(t, seq, beg, end);
         | 
| 34 | 
            +
              OUTPUT:
         | 
| 35 | 
            +
            	RETVAL
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            SV*
         | 
| 38 | 
            +
            tabix_read(t, iter)
         | 
| 39 | 
            +
            	tabix_t *t
         | 
| 40 | 
            +
            	ti_iter_t iter
         | 
| 41 | 
            +
              PREINIT:
         | 
| 42 | 
            +
            	const char *s;
         | 
| 43 | 
            +
            	int len;
         | 
| 44 | 
            +
              CODE:
         | 
| 45 | 
            +
            	s = ti_read(t, iter, &len);
         | 
| 46 | 
            +
            	if (s == 0)
         | 
| 47 | 
            +
            	   return XSRETURN_EMPTY;
         | 
| 48 | 
            +
            	RETVAL = newSVpv(s, len);
         | 
| 49 | 
            +
              OUTPUT:
         | 
| 50 | 
            +
            	RETVAL
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            void
         | 
| 53 | 
            +
            tabix_getnames(t)
         | 
| 54 | 
            +
            	tabix_t *t
         | 
| 55 | 
            +
              PREINIT:
         | 
| 56 | 
            +
            	const char **names;
         | 
| 57 | 
            +
            	int i, n;
         | 
| 58 | 
            +
              PPCODE:
         | 
| 59 | 
            +
            	ti_lazy_index_load(t);
         | 
| 60 | 
            +
            	names = ti_seqname(t->idx, &n);
         | 
| 61 | 
            +
            	for (i = 0; i < n; ++i)
         | 
| 62 | 
            +
            		XPUSHs(sv_2mortal(newSVpv(names[i], 0)));
         | 
| 63 | 
            +
            	free(names);
         | 
| 64 | 
            +
             | 
| 65 | 
            +
            MODULE = Tabix PACKAGE = TabixIterator
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            void
         | 
| 68 | 
            +
            tabix_iter_free(iter)
         | 
| 69 | 
            +
            	ti_iter_t iter
         | 
| 70 | 
            +
              CODE:
         | 
| 71 | 
            +
            	ti_iter_destroy(iter);
         | 
| @@ -0,0 +1,41 @@ | |
| 1 | 
            +
            package TabixIterator;
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            use strict;
         | 
| 4 | 
            +
            use warnings;
         | 
| 5 | 
            +
            use Carp qw/croak/;
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            require Exporter;
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            our @ISA = qw/Exporter/;
         | 
| 10 | 
            +
            our @EXPORT = qw/tabix_iter_free/;
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            our $VERSION = '0.2.0';
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            require XSLoader;
         | 
| 15 | 
            +
            XSLoader::load('Tabix', $VERSION);
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            sub new {
         | 
| 18 | 
            +
              my $invocant = shift;
         | 
| 19 | 
            +
              my $class = ref($invocant) || $invocant;
         | 
| 20 | 
            +
              my $self = {};
         | 
| 21 | 
            +
              bless($self, $class);
         | 
| 22 | 
            +
              return $self;
         | 
| 23 | 
            +
            }
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            sub set {
         | 
| 26 | 
            +
              my ($self, $iter) = @_;
         | 
| 27 | 
            +
              $self->{_} = $iter;
         | 
| 28 | 
            +
            }
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            sub get {
         | 
| 31 | 
            +
              my $self = shift;
         | 
| 32 | 
            +
              return $self->{_};
         | 
| 33 | 
            +
            }
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            sub DESTROY {
         | 
| 36 | 
            +
              my $self = shift;
         | 
| 37 | 
            +
              tabix_iter_free($self->{_}) if ($self->{_});
         | 
| 38 | 
            +
            }
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            1;
         | 
| 41 | 
            +
            __END__
         | 
| @@ -0,0 +1,28 @@ | |
| 1 | 
            +
            #-*-Perl-*-
         | 
| 2 | 
            +
            use Test::More tests => 9;
         | 
| 3 | 
            +
            BEGIN { use_ok('Tabix') };
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            { # C-like low-level interface
         | 
| 6 | 
            +
            	my $t = tabix_open("../example.gtf.gz");
         | 
| 7 | 
            +
            	ok($t);
         | 
| 8 | 
            +
            	my $iter = tabix_query($t, "chr1", 0, 2000);
         | 
| 9 | 
            +
            	ok($iter);
         | 
| 10 | 
            +
            	$_ = 0;
         | 
| 11 | 
            +
            	++$_ while (tabix_read($t, $iter));
         | 
| 12 | 
            +
            	is($_, 6);
         | 
| 13 | 
            +
            	tabix_iter_free($iter);
         | 
| 14 | 
            +
            	@_ = tabix_getnames($t);
         | 
| 15 | 
            +
            	is(scalar(@_), 2);
         | 
| 16 | 
            +
            }
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            { # OOP high-level interface
         | 
| 19 | 
            +
            	my $t = Tabix->new(-data=>"../example.gtf.gz");
         | 
| 20 | 
            +
            	ok($t);
         | 
| 21 | 
            +
            	my $iter = $t->query("chr1", 3000, 5000);
         | 
| 22 | 
            +
            	ok($iter);
         | 
| 23 | 
            +
            	$_ = 0;
         | 
| 24 | 
            +
            	++$_ while ($t->read($iter));
         | 
| 25 | 
            +
            	is($_, 27);
         | 
| 26 | 
            +
            	@_ = $t->getnames;
         | 
| 27 | 
            +
            	is($_[1], "chr2");
         | 
| 28 | 
            +
            }
         | 
| @@ -0,0 +1,28 @@ | |
| 1 | 
            +
            #-*-Perl-*-
         | 
| 2 | 
            +
            use Test::More tests => 9;
         | 
| 3 | 
            +
            BEGIN { use_ok('Tabix') };
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            { # FTP access
         | 
| 6 | 
            +
            	my $t = Tabix->new(-data=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz");
         | 
| 7 | 
            +
            	ok($t);
         | 
| 8 | 
            +
            	my $iter = $t->query("1", 1000000, 1100000);
         | 
| 9 | 
            +
            	ok($iter);
         | 
| 10 | 
            +
            	$_ = 0;
         | 
| 11 | 
            +
            	++$_ while ($t->read($iter));
         | 
| 12 | 
            +
            	is($_, 306);
         | 
| 13 | 
            +
            	@_ = $t->getnames;
         | 
| 14 | 
            +
            	is(scalar(@_), 22);
         | 
| 15 | 
            +
            }
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            { # FTP access plus FTP index
         | 
| 18 | 
            +
            	my $t = Tabix->new(-data=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz",
         | 
| 19 | 
            +
            					   -index=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz.tbi");
         | 
| 20 | 
            +
            	ok($t);
         | 
| 21 | 
            +
            	my $iter = $t->query("19", 10000000, 10100000);
         | 
| 22 | 
            +
            	ok($iter);
         | 
| 23 | 
            +
            	$_ = 0;
         | 
| 24 | 
            +
            	++$_ while ($t->read($iter));
         | 
| 25 | 
            +
            	is($_, 268);
         | 
| 26 | 
            +
            	@_ = $t->getnames;
         | 
| 27 | 
            +
            	is(scalar(@_), 22);
         | 
| 28 | 
            +
            }
         | 
| @@ -0,0 +1,55 @@ | |
| 1 | 
            +
            #!/usr/bin/env python
         | 
| 2 | 
            +
            #
         | 
| 3 | 
            +
            # The MIT License
         | 
| 4 | 
            +
            #
         | 
| 5 | 
            +
            # Copyright (c) 2011 Seoul National University.
         | 
| 6 | 
            +
            #
         | 
| 7 | 
            +
            # Permission is hereby granted, free of charge, to any person obtaining
         | 
| 8 | 
            +
            # a copy of this software and associated documentation files (the
         | 
| 9 | 
            +
            # "Software"), to deal in the Software without restriction, including
         | 
| 10 | 
            +
            # without limitation the rights to use, copy, modify, merge, publish,
         | 
| 11 | 
            +
            # distribute, sublicense, and/or sell copies of the Software, and to
         | 
| 12 | 
            +
            # permit persons to whom the Software is furnished to do so, subject to
         | 
| 13 | 
            +
            # the following conditions:
         | 
| 14 | 
            +
            #
         | 
| 15 | 
            +
            # The above copyright notice and this permission notice shall be
         | 
| 16 | 
            +
            # included in all copies or substantial portions of the Software.
         | 
| 17 | 
            +
            #
         | 
| 18 | 
            +
            # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
         | 
| 19 | 
            +
            # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
         | 
| 20 | 
            +
            # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
         | 
| 21 | 
            +
            # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
         | 
| 22 | 
            +
            # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
         | 
| 23 | 
            +
            # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
         | 
| 24 | 
            +
            # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         | 
| 25 | 
            +
            # SOFTWARE.
         | 
| 26 | 
            +
            #
         | 
| 27 | 
            +
            # Contact: Hyeshik Chang <hyeshik@snu.ac.kr>
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            from distutils.core import setup, Extension
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            # Change this to True when you need the knetfile support.
         | 
| 32 | 
            +
            USE_KNETFILE = False
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            TABIX_SOURCE_FILES = [
         | 
| 35 | 
            +
                '../bgzf.c', '../bgzip.c', '../index.c', '../knetfile.c', '../kstring.c'
         | 
| 36 | 
            +
            ]
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            define_options = [('_FILE_OFFSET_BITS', 64)]
         | 
| 39 | 
            +
            if USE_KNETFILE:
         | 
| 40 | 
            +
                define_options.append(('_USE_KNETFILE', 1))
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            ext_modules = [Extension("tabix", ["tabixmodule.c"] + TABIX_SOURCE_FILES,
         | 
| 43 | 
            +
                                     include_dirs=['..'],
         | 
| 44 | 
            +
                                     libraries=['z'],
         | 
| 45 | 
            +
                                     define_macros=define_options)]
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            setup (name = 'tabix',
         | 
| 48 | 
            +
                   version = '1.0',
         | 
| 49 | 
            +
                   description = 'Python interface to tabix, a generic indexer '
         | 
| 50 | 
            +
                                 'for TAB-delimited genome position files',
         | 
| 51 | 
            +
                   author = 'Hyeshik Chang',
         | 
| 52 | 
            +
                   author_email = 'hyeshik@snu.ac.kr',
         | 
| 53 | 
            +
                   license = 'MIT',
         | 
| 54 | 
            +
                   ext_modules = ext_modules
         | 
| 55 | 
            +
            )
         |