ngs_server 0.4 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/tabix/ChangeLog +593 -0
- data/ext/tabix/Makefile +65 -0
- data/ext/tabix/NEWS +126 -0
- data/ext/tabix/TabixReader.java +395 -0
- data/ext/tabix/bam_endian.h +42 -0
- data/ext/tabix/bedidx.c +156 -0
- data/ext/tabix/bgzf.c +714 -0
- data/ext/tabix/bgzf.h +157 -0
- data/ext/tabix/bgzip.c +206 -0
- data/ext/tabix/example.gtf.gz +0 -0
- data/ext/tabix/example.gtf.gz.tbi +0 -0
- data/ext/tabix/extconf.rb +1 -0
- data/ext/tabix/index.c +998 -0
- data/ext/tabix/khash.h +486 -0
- data/ext/tabix/knetfile.c +632 -0
- data/ext/tabix/knetfile.h +75 -0
- data/ext/tabix/kseq.h +227 -0
- data/ext/tabix/ksort.h +271 -0
- data/ext/tabix/kstring.c +165 -0
- data/ext/tabix/kstring.h +68 -0
- data/ext/tabix/main.c +290 -0
- data/ext/tabix/perl/MANIFEST +8 -0
- data/ext/tabix/perl/Makefile.PL +8 -0
- data/ext/tabix/perl/Tabix.pm +76 -0
- data/ext/tabix/perl/Tabix.xs +71 -0
- data/ext/tabix/perl/TabixIterator.pm +41 -0
- data/ext/tabix/perl/t/01local.t +28 -0
- data/ext/tabix/perl/t/02remote.t +28 -0
- data/ext/tabix/perl/typemap +3 -0
- data/ext/tabix/python/setup.py +55 -0
- data/ext/tabix/python/tabixmodule.c +408 -0
- data/ext/tabix/python/test.py +91 -0
- data/ext/tabix/tabix.1 +132 -0
- data/ext/tabix/tabix.h +145 -0
- data/ext/tabix/tabix.py +87 -0
- data/ext/tabix/tabix.tex +121 -0
- data/ext/vcftools/perl/Vcf.pm +5 -3
- data/ext/vcftools/perl/vcf-query +2 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +12 -11
- data/ngs_server.gemspec +1 -2
- metadata +39 -2
data/ext/tabix/kstring.h
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
#ifndef KSTRING_H
|
2
|
+
#define KSTRING_H
|
3
|
+
|
4
|
+
#include <stdlib.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <stdint.h>
|
7
|
+
|
8
|
+
#ifndef kroundup32
|
9
|
+
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
10
|
+
#endif
|
11
|
+
|
12
|
+
#ifndef KSTRING_T
|
13
|
+
#define KSTRING_T kstring_t
|
14
|
+
typedef struct __kstring_t {
|
15
|
+
size_t l, m;
|
16
|
+
char *s;
|
17
|
+
} kstring_t;
|
18
|
+
#endif
|
19
|
+
|
20
|
+
int ksprintf(kstring_t *s, const char *fmt, ...);
|
21
|
+
int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
|
22
|
+
|
23
|
+
// calculate the auxiliary array, allocated by calloc()
|
24
|
+
int *ksBM_prep(const uint8_t *pat, int m);
|
25
|
+
|
26
|
+
/* Search pat in str and returned the list of matches. The size of the
|
27
|
+
* list is returned as n_matches. _prep is the array returned by
|
28
|
+
* ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */
|
29
|
+
int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches);
|
30
|
+
|
31
|
+
static inline int kputsn(const char *p, int l, kstring_t *s)
|
32
|
+
{
|
33
|
+
if (s->l + l + 1 >= s->m) {
|
34
|
+
s->m = s->l + l + 2;
|
35
|
+
kroundup32(s->m);
|
36
|
+
s->s = (char*)realloc(s->s, s->m);
|
37
|
+
}
|
38
|
+
strncpy(s->s + s->l, p, l);
|
39
|
+
s->l += l;
|
40
|
+
s->s[s->l] = 0;
|
41
|
+
return l;
|
42
|
+
}
|
43
|
+
|
44
|
+
static inline int kputs(const char *p, kstring_t *s)
|
45
|
+
{
|
46
|
+
return kputsn(p, strlen(p), s);
|
47
|
+
}
|
48
|
+
|
49
|
+
static inline int kputc(int c, kstring_t *s)
|
50
|
+
{
|
51
|
+
if (s->l + 1 >= s->m) {
|
52
|
+
s->m = s->l + 2;
|
53
|
+
kroundup32(s->m);
|
54
|
+
s->s = (char*)realloc(s->s, s->m);
|
55
|
+
}
|
56
|
+
s->s[s->l++] = c;
|
57
|
+
s->s[s->l] = 0;
|
58
|
+
return c;
|
59
|
+
}
|
60
|
+
|
61
|
+
static inline int *ksplit(kstring_t *s, int delimiter, int *n)
|
62
|
+
{
|
63
|
+
int max = 0, *offsets = 0;
|
64
|
+
*n = ksplit_core(s->s, delimiter, &max, &offsets);
|
65
|
+
return offsets;
|
66
|
+
}
|
67
|
+
|
68
|
+
#endif
|
data/ext/tabix/main.c
ADDED
@@ -0,0 +1,290 @@
|
|
1
|
+
#include <string.h>
|
2
|
+
#include <unistd.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <stdio.h>
|
5
|
+
#include <sys/stat.h>
|
6
|
+
#include <errno.h>
|
7
|
+
#include "bgzf.h"
|
8
|
+
#include "tabix.h"
|
9
|
+
|
10
|
+
#define PACKAGE_VERSION "0.2.5 (r964)"
|
11
|
+
|
12
|
+
#define error(...) { fprintf(stderr,__VA_ARGS__); return -1; }
|
13
|
+
|
14
|
+
int reheader_file(const char *header, const char *file, int meta)
|
15
|
+
{
|
16
|
+
BGZF *fp = bgzf_open(file,"r");
|
17
|
+
if (bgzf_read_block(fp) != 0 || !fp->block_length)
|
18
|
+
return -1;
|
19
|
+
|
20
|
+
char *buffer = fp->uncompressed_block;
|
21
|
+
int skip_until = 0;
|
22
|
+
|
23
|
+
if ( buffer[0]==meta )
|
24
|
+
{
|
25
|
+
skip_until = 1;
|
26
|
+
|
27
|
+
// Skip the header
|
28
|
+
while (1)
|
29
|
+
{
|
30
|
+
if ( buffer[skip_until]=='\n' )
|
31
|
+
{
|
32
|
+
skip_until++;
|
33
|
+
if ( skip_until>=fp->block_length )
|
34
|
+
{
|
35
|
+
if (bgzf_read_block(fp) != 0 || !fp->block_length)
|
36
|
+
error("no body?\n");
|
37
|
+
skip_until = 0;
|
38
|
+
}
|
39
|
+
// The header has finished
|
40
|
+
if ( buffer[skip_until]!=meta ) break;
|
41
|
+
}
|
42
|
+
skip_until++;
|
43
|
+
if ( skip_until>=fp->block_length )
|
44
|
+
{
|
45
|
+
if (bgzf_read_block(fp) != 0 || !fp->block_length)
|
46
|
+
error("no body?\n");
|
47
|
+
skip_until = 0;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
FILE *fh = fopen(header,"r");
|
53
|
+
if ( !fh )
|
54
|
+
error("%s: %s", header,strerror(errno));
|
55
|
+
int page_size = getpagesize();
|
56
|
+
char *buf = valloc(page_size);
|
57
|
+
BGZF *bgzf_out = bgzf_fdopen(fileno(stdout), "w");
|
58
|
+
ssize_t nread;
|
59
|
+
while ( (nread=fread(buf,1,page_size-1,fh))>0 )
|
60
|
+
{
|
61
|
+
if ( nread<page_size-1 && buf[nread-1]!='\n' )
|
62
|
+
buf[nread++] = '\n';
|
63
|
+
if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %s\n",bgzf_out->error);
|
64
|
+
}
|
65
|
+
fclose(fh);
|
66
|
+
|
67
|
+
if ( fp->block_length - skip_until > 0 )
|
68
|
+
{
|
69
|
+
if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0)
|
70
|
+
error("Error: %s\n",fp->error);
|
71
|
+
}
|
72
|
+
if (bgzf_flush(bgzf_out) < 0)
|
73
|
+
error("Error: %s\n",bgzf_out->error);
|
74
|
+
|
75
|
+
while (1)
|
76
|
+
{
|
77
|
+
#ifdef _USE_KNETFILE
|
78
|
+
nread = knet_read(fp->x.fpr, buf, page_size);
|
79
|
+
#else
|
80
|
+
nread = fread(buf, 1, page_size, fp->file);
|
81
|
+
#endif
|
82
|
+
if ( nread<=0 )
|
83
|
+
break;
|
84
|
+
|
85
|
+
#ifdef _USE_KNETFILE
|
86
|
+
int count = fwrite(buf, 1, nread, bgzf_out->x.fpw);
|
87
|
+
#else
|
88
|
+
int count = fwrite(buf, 1, nread, bgzf_out->file);
|
89
|
+
#endif
|
90
|
+
if (count != nread)
|
91
|
+
error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
|
92
|
+
}
|
93
|
+
|
94
|
+
if (bgzf_close(bgzf_out) < 0)
|
95
|
+
error("Error: %s\n",bgzf_out->error);
|
96
|
+
|
97
|
+
return 0;
|
98
|
+
}
|
99
|
+
|
100
|
+
|
101
|
+
int main(int argc, char *argv[])
|
102
|
+
{
|
103
|
+
int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, bed_reg = 0;
|
104
|
+
ti_conf_t conf = ti_conf_gff;
|
105
|
+
const char *reheader = NULL;
|
106
|
+
while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhfBr:")) >= 0) {
|
107
|
+
switch (c) {
|
108
|
+
case 'B': bed_reg = 1; break;
|
109
|
+
case '0': conf.preset |= TI_FLAG_UCSC; break;
|
110
|
+
case 'S': skip = atoi(optarg); break;
|
111
|
+
case 'c': meta = optarg[0]; break;
|
112
|
+
case 'p':
|
113
|
+
if (strcmp(optarg, "gff") == 0) conf = ti_conf_gff;
|
114
|
+
else if (strcmp(optarg, "bed") == 0) conf = ti_conf_bed;
|
115
|
+
else if (strcmp(optarg, "sam") == 0) conf = ti_conf_sam;
|
116
|
+
else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf = ti_conf_vcf;
|
117
|
+
else if (strcmp(optarg, "psltbl") == 0) conf = ti_conf_psltbl;
|
118
|
+
else {
|
119
|
+
fprintf(stderr, "[main] unrecognized preset '%s'\n", optarg);
|
120
|
+
return 1;
|
121
|
+
}
|
122
|
+
break;
|
123
|
+
case 's': conf.sc = atoi(optarg); break;
|
124
|
+
case 'b': conf.bc = atoi(optarg); break;
|
125
|
+
case 'e': conf.ec = atoi(optarg); break;
|
126
|
+
case 'l': list_chrms = 1; break;
|
127
|
+
case 'h': print_header = 1; break;
|
128
|
+
case 'f': force = 1; break;
|
129
|
+
case 'r': reheader = optarg; break;
|
130
|
+
}
|
131
|
+
}
|
132
|
+
if (skip >= 0) conf.line_skip = skip;
|
133
|
+
if (meta >= 0) conf.meta_char = meta;
|
134
|
+
if (optind == argc) {
|
135
|
+
fprintf(stderr, "\n");
|
136
|
+
fprintf(stderr, "Program: tabix (TAB-delimited file InderXer)\n");
|
137
|
+
fprintf(stderr, "Version: %s\n\n", PACKAGE_VERSION);
|
138
|
+
fprintf(stderr, "Usage: tabix <in.tab.bgz> [region1 [region2 [...]]]\n\n");
|
139
|
+
fprintf(stderr, "Options: -p STR preset: gff, bed, sam, vcf, psltbl [gff]\n");
|
140
|
+
fprintf(stderr, " -s INT sequence name column [1]\n");
|
141
|
+
fprintf(stderr, " -b INT start column [4]\n");
|
142
|
+
fprintf(stderr, " -e INT end column; can be identical to '-b' [5]\n");
|
143
|
+
fprintf(stderr, " -S INT skip first INT lines [0]\n");
|
144
|
+
fprintf(stderr, " -c CHAR symbol for comment/meta lines [#]\n");
|
145
|
+
fprintf(stderr, " -r FILE replace the header with the content of FILE [null]\n");
|
146
|
+
fprintf(stderr, " -B region1 is a BED file (entire file will be read)\n");
|
147
|
+
fprintf(stderr, " -0 zero-based coordinate\n");
|
148
|
+
fprintf(stderr, " -h print the header lines\n");
|
149
|
+
fprintf(stderr, " -l list chromosome names\n");
|
150
|
+
fprintf(stderr, " -f force to overwrite the index\n");
|
151
|
+
fprintf(stderr, "\n");
|
152
|
+
return 1;
|
153
|
+
}
|
154
|
+
if (list_chrms) {
|
155
|
+
ti_index_t *idx;
|
156
|
+
int i, n;
|
157
|
+
const char **names;
|
158
|
+
idx = ti_index_load(argv[optind]);
|
159
|
+
if (idx == 0) {
|
160
|
+
fprintf(stderr, "[main] fail to load the index file.\n");
|
161
|
+
return 1;
|
162
|
+
}
|
163
|
+
names = ti_seqname(idx, &n);
|
164
|
+
for (i = 0; i < n; ++i) printf("%s\n", names[i]);
|
165
|
+
free(names);
|
166
|
+
ti_index_destroy(idx);
|
167
|
+
return 0;
|
168
|
+
}
|
169
|
+
if (reheader)
|
170
|
+
return reheader_file(reheader,argv[optind],conf.meta_char);
|
171
|
+
|
172
|
+
struct stat stat_tbi,stat_vcf;
|
173
|
+
char *fnidx = calloc(strlen(argv[optind]) + 5, 1);
|
174
|
+
strcat(strcpy(fnidx, argv[optind]), ".tbi");
|
175
|
+
|
176
|
+
if (optind + 1 == argc) {
|
177
|
+
if (force == 0) {
|
178
|
+
if (stat(fnidx, &stat_tbi) == 0)
|
179
|
+
{
|
180
|
+
// Before complaining, check if the VCF file isn't newer. This is a common source of errors,
|
181
|
+
// people tend not to notice that tabix failed
|
182
|
+
stat(argv[optind], &stat_vcf);
|
183
|
+
if ( stat_vcf.st_mtime <= stat_tbi.st_mtime )
|
184
|
+
{
|
185
|
+
fprintf(stderr, "[tabix] the index file exists. Please use '-f' to overwrite.\n");
|
186
|
+
free(fnidx);
|
187
|
+
return 1;
|
188
|
+
}
|
189
|
+
}
|
190
|
+
}
|
191
|
+
if ( bgzf_check_bgzf(argv[optind])!=1 )
|
192
|
+
{
|
193
|
+
fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]);
|
194
|
+
free(fnidx);
|
195
|
+
return 1;
|
196
|
+
}
|
197
|
+
return ti_index_build(argv[optind], &conf);
|
198
|
+
}
|
199
|
+
{ // retrieve
|
200
|
+
tabix_t *t;
|
201
|
+
// Common source of errors: new VCF is used with an old index
|
202
|
+
stat(fnidx, &stat_tbi);
|
203
|
+
stat(argv[optind], &stat_vcf);
|
204
|
+
if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime )
|
205
|
+
{
|
206
|
+
fprintf(stderr, "[tabix] the index file is older than the vcf file. Please use '-f' to overwrite or reindex.\n");
|
207
|
+
free(fnidx);
|
208
|
+
return 1;
|
209
|
+
}
|
210
|
+
free(fnidx);
|
211
|
+
|
212
|
+
if ((t = ti_open(argv[optind], 0)) == 0) {
|
213
|
+
fprintf(stderr, "[main] fail to open the data file.\n");
|
214
|
+
return 1;
|
215
|
+
}
|
216
|
+
if (strcmp(argv[optind+1], ".") == 0) { // retrieve all
|
217
|
+
ti_iter_t iter;
|
218
|
+
const char *s;
|
219
|
+
int len;
|
220
|
+
iter = ti_query(t, 0, 0, 0);
|
221
|
+
while ((s = ti_read(t, iter, &len)) != 0) {
|
222
|
+
fputs(s, stdout); fputc('\n', stdout);
|
223
|
+
}
|
224
|
+
ti_iter_destroy(iter);
|
225
|
+
} else { // retrieve from specified regions
|
226
|
+
int i, len;
|
227
|
+
ti_iter_t iter;
|
228
|
+
const char *s;
|
229
|
+
const ti_conf_t *idxconf;
|
230
|
+
|
231
|
+
if (ti_lazy_index_load(t) < 0 && bed_reg == 0) {
|
232
|
+
fprintf(stderr,"[tabix] failed to load the index file.\n");
|
233
|
+
return 1;
|
234
|
+
}
|
235
|
+
idxconf = ti_get_conf(t->idx);
|
236
|
+
|
237
|
+
if ( print_header )
|
238
|
+
{
|
239
|
+
// If requested, print the header lines here
|
240
|
+
iter = ti_query(t, 0, 0, 0);
|
241
|
+
while ((s = ti_read(t, iter, &len)) != 0) {
|
242
|
+
if ((int)(*s) != idxconf->meta_char) break;
|
243
|
+
fputs(s, stdout); fputc('\n', stdout);
|
244
|
+
}
|
245
|
+
ti_iter_destroy(iter);
|
246
|
+
}
|
247
|
+
if (bed_reg) {
|
248
|
+
extern int bed_overlap(const void *_h, const char *chr, int beg, int end);
|
249
|
+
extern void *bed_read(const char *fn);
|
250
|
+
extern void bed_destroy(void *_h);
|
251
|
+
|
252
|
+
const ti_conf_t *conf_ = idxconf? idxconf : &conf; // use the index file if available
|
253
|
+
void *bed = bed_read(argv[optind+1]); // load the BED file
|
254
|
+
ti_interval_t intv;
|
255
|
+
|
256
|
+
if (bed == 0) {
|
257
|
+
fprintf(stderr, "[main] fail to read the BED file.\n");
|
258
|
+
return 1;
|
259
|
+
}
|
260
|
+
iter = ti_query(t, 0, 0, 0);
|
261
|
+
while ((s = ti_read(t, iter, &len)) != 0) {
|
262
|
+
int c;
|
263
|
+
ti_get_intv(conf_, len, (char*)s, &intv);
|
264
|
+
c = *intv.se; *intv.se = '\0';
|
265
|
+
if (bed_overlap(bed, intv.ss, intv.beg, intv.end)) {
|
266
|
+
*intv.se = c;
|
267
|
+
puts(s);
|
268
|
+
}
|
269
|
+
*intv.se = c;
|
270
|
+
}
|
271
|
+
ti_iter_destroy(iter);
|
272
|
+
bed_destroy(bed);
|
273
|
+
} else {
|
274
|
+
for (i = optind + 1; i < argc; ++i) {
|
275
|
+
int tid, beg, end;
|
276
|
+
if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) {
|
277
|
+
iter = ti_queryi(t, tid, beg, end);
|
278
|
+
while ((s = ti_read(t, iter, &len)) != 0) {
|
279
|
+
fputs(s, stdout); fputc('\n', stdout);
|
280
|
+
}
|
281
|
+
ti_iter_destroy(iter);
|
282
|
+
}
|
283
|
+
// else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");
|
284
|
+
}
|
285
|
+
}
|
286
|
+
}
|
287
|
+
ti_close(t);
|
288
|
+
}
|
289
|
+
return 0;
|
290
|
+
}
|
@@ -0,0 +1,76 @@
|
|
1
|
+
package Tabix;
|
2
|
+
|
3
|
+
use strict;
|
4
|
+
use warnings;
|
5
|
+
use Carp qw/croak/;
|
6
|
+
|
7
|
+
use TabixIterator;
|
8
|
+
|
9
|
+
require Exporter;
|
10
|
+
|
11
|
+
our @ISA = qw/Exporter/;
|
12
|
+
our @EXPORT = qw/tabix_open tabix_close tabix_read tabix_query tabix_getnames tabix_iter_free/;
|
13
|
+
|
14
|
+
our $VERSION = '0.2.0';
|
15
|
+
|
16
|
+
require XSLoader;
|
17
|
+
XSLoader::load('Tabix', $VERSION);
|
18
|
+
|
19
|
+
sub new {
|
20
|
+
my $invocant = shift;
|
21
|
+
my %args = @_;
|
22
|
+
$args{-data} || croak("-data argument required");
|
23
|
+
my $class = ref($invocant) || $invocant;
|
24
|
+
my $self = {};
|
25
|
+
bless($self, $class);
|
26
|
+
$self->open($args{-data}, $args{-index});
|
27
|
+
return $self;
|
28
|
+
}
|
29
|
+
|
30
|
+
sub open {
|
31
|
+
my ($self, $fn, $fnidx) = @_;
|
32
|
+
$self->close;
|
33
|
+
$self->{_fn} = $fn;
|
34
|
+
$self->{_fnidx} = $fnidx;
|
35
|
+
$self->{_} = $fnidx? tabix_open($fn, $fnidx) : tabix_open($fn);
|
36
|
+
}
|
37
|
+
|
38
|
+
sub close {
|
39
|
+
my $self = shift;
|
40
|
+
if ($self->{_}) {
|
41
|
+
tabix_close($self->{_});
|
42
|
+
delete($self->{_}); delete($self->{_fn}); delete($self->{_fnidx});
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
sub DESTROY {
|
47
|
+
my $self = shift;
|
48
|
+
$self->close;
|
49
|
+
}
|
50
|
+
|
51
|
+
sub query {
|
52
|
+
my $self = shift;
|
53
|
+
my $iter;
|
54
|
+
if (@_) {
|
55
|
+
$iter = tabix_query($self->{_}, @_);
|
56
|
+
} else {
|
57
|
+
$iter = tabix_query($self->{_});
|
58
|
+
}
|
59
|
+
my $i = TabixIterator->new;
|
60
|
+
$i->set($iter);
|
61
|
+
return $i;
|
62
|
+
}
|
63
|
+
|
64
|
+
sub read {
|
65
|
+
my $self = shift;
|
66
|
+
my $iter = shift;
|
67
|
+
return tabix_read($self->{_}, $iter->get);
|
68
|
+
}
|
69
|
+
|
70
|
+
sub getnames {
|
71
|
+
my $self = shift;
|
72
|
+
return tabix_getnames($self->{_});
|
73
|
+
}
|
74
|
+
|
75
|
+
1;
|
76
|
+
__END__
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#include "EXTERN.h"
|
2
|
+
#include "perl.h"
|
3
|
+
#include "XSUB.h"
|
4
|
+
|
5
|
+
#include <stdlib.h>
|
6
|
+
#include "tabix.h"
|
7
|
+
|
8
|
+
MODULE = Tabix PACKAGE = Tabix
|
9
|
+
|
10
|
+
tabix_t*
|
11
|
+
tabix_open(fn, fnidx=0)
|
12
|
+
char *fn
|
13
|
+
char *fnidx
|
14
|
+
CODE:
|
15
|
+
RETVAL = ti_open(fn, fnidx);
|
16
|
+
OUTPUT:
|
17
|
+
RETVAL
|
18
|
+
|
19
|
+
void
|
20
|
+
tabix_close(t)
|
21
|
+
tabix_t *t
|
22
|
+
CODE:
|
23
|
+
ti_close(t);
|
24
|
+
|
25
|
+
ti_iter_t
|
26
|
+
tabix_query(t, seq=0, beg=0, end=0x7fffffff)
|
27
|
+
tabix_t *t
|
28
|
+
const char *seq
|
29
|
+
int beg
|
30
|
+
int end
|
31
|
+
PREINIT:
|
32
|
+
CODE:
|
33
|
+
RETVAL = ti_query(t, seq, beg, end);
|
34
|
+
OUTPUT:
|
35
|
+
RETVAL
|
36
|
+
|
37
|
+
SV*
|
38
|
+
tabix_read(t, iter)
|
39
|
+
tabix_t *t
|
40
|
+
ti_iter_t iter
|
41
|
+
PREINIT:
|
42
|
+
const char *s;
|
43
|
+
int len;
|
44
|
+
CODE:
|
45
|
+
s = ti_read(t, iter, &len);
|
46
|
+
if (s == 0)
|
47
|
+
return XSRETURN_EMPTY;
|
48
|
+
RETVAL = newSVpv(s, len);
|
49
|
+
OUTPUT:
|
50
|
+
RETVAL
|
51
|
+
|
52
|
+
void
|
53
|
+
tabix_getnames(t)
|
54
|
+
tabix_t *t
|
55
|
+
PREINIT:
|
56
|
+
const char **names;
|
57
|
+
int i, n;
|
58
|
+
PPCODE:
|
59
|
+
ti_lazy_index_load(t);
|
60
|
+
names = ti_seqname(t->idx, &n);
|
61
|
+
for (i = 0; i < n; ++i)
|
62
|
+
XPUSHs(sv_2mortal(newSVpv(names[i], 0)));
|
63
|
+
free(names);
|
64
|
+
|
65
|
+
MODULE = Tabix PACKAGE = TabixIterator
|
66
|
+
|
67
|
+
void
|
68
|
+
tabix_iter_free(iter)
|
69
|
+
ti_iter_t iter
|
70
|
+
CODE:
|
71
|
+
ti_iter_destroy(iter);
|
@@ -0,0 +1,41 @@
|
|
1
|
+
package TabixIterator;
|
2
|
+
|
3
|
+
use strict;
|
4
|
+
use warnings;
|
5
|
+
use Carp qw/croak/;
|
6
|
+
|
7
|
+
require Exporter;
|
8
|
+
|
9
|
+
our @ISA = qw/Exporter/;
|
10
|
+
our @EXPORT = qw/tabix_iter_free/;
|
11
|
+
|
12
|
+
our $VERSION = '0.2.0';
|
13
|
+
|
14
|
+
require XSLoader;
|
15
|
+
XSLoader::load('Tabix', $VERSION);
|
16
|
+
|
17
|
+
sub new {
|
18
|
+
my $invocant = shift;
|
19
|
+
my $class = ref($invocant) || $invocant;
|
20
|
+
my $self = {};
|
21
|
+
bless($self, $class);
|
22
|
+
return $self;
|
23
|
+
}
|
24
|
+
|
25
|
+
sub set {
|
26
|
+
my ($self, $iter) = @_;
|
27
|
+
$self->{_} = $iter;
|
28
|
+
}
|
29
|
+
|
30
|
+
sub get {
|
31
|
+
my $self = shift;
|
32
|
+
return $self->{_};
|
33
|
+
}
|
34
|
+
|
35
|
+
sub DESTROY {
|
36
|
+
my $self = shift;
|
37
|
+
tabix_iter_free($self->{_}) if ($self->{_});
|
38
|
+
}
|
39
|
+
|
40
|
+
1;
|
41
|
+
__END__
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#-*-Perl-*-
|
2
|
+
use Test::More tests => 9;
|
3
|
+
BEGIN { use_ok('Tabix') };
|
4
|
+
|
5
|
+
{ # C-like low-level interface
|
6
|
+
my $t = tabix_open("../example.gtf.gz");
|
7
|
+
ok($t);
|
8
|
+
my $iter = tabix_query($t, "chr1", 0, 2000);
|
9
|
+
ok($iter);
|
10
|
+
$_ = 0;
|
11
|
+
++$_ while (tabix_read($t, $iter));
|
12
|
+
is($_, 6);
|
13
|
+
tabix_iter_free($iter);
|
14
|
+
@_ = tabix_getnames($t);
|
15
|
+
is(scalar(@_), 2);
|
16
|
+
}
|
17
|
+
|
18
|
+
{ # OOP high-level interface
|
19
|
+
my $t = Tabix->new(-data=>"../example.gtf.gz");
|
20
|
+
ok($t);
|
21
|
+
my $iter = $t->query("chr1", 3000, 5000);
|
22
|
+
ok($iter);
|
23
|
+
$_ = 0;
|
24
|
+
++$_ while ($t->read($iter));
|
25
|
+
is($_, 27);
|
26
|
+
@_ = $t->getnames;
|
27
|
+
is($_[1], "chr2");
|
28
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#-*-Perl-*-
|
2
|
+
use Test::More tests => 9;
|
3
|
+
BEGIN { use_ok('Tabix') };
|
4
|
+
|
5
|
+
{ # FTP access
|
6
|
+
my $t = Tabix->new(-data=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz");
|
7
|
+
ok($t);
|
8
|
+
my $iter = $t->query("1", 1000000, 1100000);
|
9
|
+
ok($iter);
|
10
|
+
$_ = 0;
|
11
|
+
++$_ while ($t->read($iter));
|
12
|
+
is($_, 306);
|
13
|
+
@_ = $t->getnames;
|
14
|
+
is(scalar(@_), 22);
|
15
|
+
}
|
16
|
+
|
17
|
+
{ # FTP access plus FTP index
|
18
|
+
my $t = Tabix->new(-data=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz",
|
19
|
+
-index=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz.tbi");
|
20
|
+
ok($t);
|
21
|
+
my $iter = $t->query("19", 10000000, 10100000);
|
22
|
+
ok($iter);
|
23
|
+
$_ = 0;
|
24
|
+
++$_ while ($t->read($iter));
|
25
|
+
is($_, 268);
|
26
|
+
@_ = $t->getnames;
|
27
|
+
is(scalar(@_), 22);
|
28
|
+
}
|
@@ -0,0 +1,55 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
#
|
3
|
+
# The MIT License
|
4
|
+
#
|
5
|
+
# Copyright (c) 2011 Seoul National University.
|
6
|
+
#
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
8
|
+
# a copy of this software and associated documentation files (the
|
9
|
+
# "Software"), to deal in the Software without restriction, including
|
10
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
11
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
12
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
13
|
+
# the following conditions:
|
14
|
+
#
|
15
|
+
# The above copyright notice and this permission notice shall be
|
16
|
+
# included in all copies or substantial portions of the Software.
|
17
|
+
#
|
18
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
19
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
20
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
21
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
22
|
+
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
23
|
+
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
24
|
+
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
25
|
+
# SOFTWARE.
|
26
|
+
#
|
27
|
+
# Contact: Hyeshik Chang <hyeshik@snu.ac.kr>
|
28
|
+
|
29
|
+
from distutils.core import setup, Extension
|
30
|
+
|
31
|
+
# Change this to True when you need the knetfile support.
|
32
|
+
USE_KNETFILE = False
|
33
|
+
|
34
|
+
TABIX_SOURCE_FILES = [
|
35
|
+
'../bgzf.c', '../bgzip.c', '../index.c', '../knetfile.c', '../kstring.c'
|
36
|
+
]
|
37
|
+
|
38
|
+
define_options = [('_FILE_OFFSET_BITS', 64)]
|
39
|
+
if USE_KNETFILE:
|
40
|
+
define_options.append(('_USE_KNETFILE', 1))
|
41
|
+
|
42
|
+
ext_modules = [Extension("tabix", ["tabixmodule.c"] + TABIX_SOURCE_FILES,
|
43
|
+
include_dirs=['..'],
|
44
|
+
libraries=['z'],
|
45
|
+
define_macros=define_options)]
|
46
|
+
|
47
|
+
setup (name = 'tabix',
|
48
|
+
version = '1.0',
|
49
|
+
description = 'Python interface to tabix, a generic indexer '
|
50
|
+
'for TAB-delimited genome position files',
|
51
|
+
author = 'Hyeshik Chang',
|
52
|
+
author_email = 'hyeshik@snu.ac.kr',
|
53
|
+
license = 'MIT',
|
54
|
+
ext_modules = ext_modules
|
55
|
+
)
|