ngs_server 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/tabix/ChangeLog +593 -0
- data/ext/tabix/Makefile +65 -0
- data/ext/tabix/NEWS +126 -0
- data/ext/tabix/TabixReader.java +395 -0
- data/ext/tabix/bam_endian.h +42 -0
- data/ext/tabix/bedidx.c +156 -0
- data/ext/tabix/bgzf.c +714 -0
- data/ext/tabix/bgzf.h +157 -0
- data/ext/tabix/bgzip.c +206 -0
- data/ext/tabix/example.gtf.gz +0 -0
- data/ext/tabix/example.gtf.gz.tbi +0 -0
- data/ext/tabix/extconf.rb +1 -0
- data/ext/tabix/index.c +998 -0
- data/ext/tabix/khash.h +486 -0
- data/ext/tabix/knetfile.c +632 -0
- data/ext/tabix/knetfile.h +75 -0
- data/ext/tabix/kseq.h +227 -0
- data/ext/tabix/ksort.h +271 -0
- data/ext/tabix/kstring.c +165 -0
- data/ext/tabix/kstring.h +68 -0
- data/ext/tabix/main.c +290 -0
- data/ext/tabix/perl/MANIFEST +8 -0
- data/ext/tabix/perl/Makefile.PL +8 -0
- data/ext/tabix/perl/Tabix.pm +76 -0
- data/ext/tabix/perl/Tabix.xs +71 -0
- data/ext/tabix/perl/TabixIterator.pm +41 -0
- data/ext/tabix/perl/t/01local.t +28 -0
- data/ext/tabix/perl/t/02remote.t +28 -0
- data/ext/tabix/perl/typemap +3 -0
- data/ext/tabix/python/setup.py +55 -0
- data/ext/tabix/python/tabixmodule.c +408 -0
- data/ext/tabix/python/test.py +91 -0
- data/ext/tabix/tabix.1 +132 -0
- data/ext/tabix/tabix.h +145 -0
- data/ext/tabix/tabix.py +87 -0
- data/ext/tabix/tabix.tex +121 -0
- data/ext/vcftools/perl/Vcf.pm +5 -3
- data/ext/vcftools/perl/vcf-query +2 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +12 -11
- data/ngs_server.gemspec +1 -2
- metadata +39 -2
data/ext/tabix/kstring.h
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
#ifndef KSTRING_H
|
2
|
+
#define KSTRING_H
|
3
|
+
|
4
|
+
#include <stdlib.h>
|
5
|
+
#include <string.h>
|
6
|
+
#include <stdint.h>
|
7
|
+
|
8
|
+
#ifndef kroundup32
|
9
|
+
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
10
|
+
#endif
|
11
|
+
|
12
|
+
#ifndef KSTRING_T
|
13
|
+
#define KSTRING_T kstring_t
|
14
|
+
typedef struct __kstring_t {
|
15
|
+
size_t l, m;
|
16
|
+
char *s;
|
17
|
+
} kstring_t;
|
18
|
+
#endif
|
19
|
+
|
20
|
+
int ksprintf(kstring_t *s, const char *fmt, ...);
|
21
|
+
int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
|
22
|
+
|
23
|
+
// calculate the auxiliary array, allocated by calloc()
|
24
|
+
int *ksBM_prep(const uint8_t *pat, int m);
|
25
|
+
|
26
|
+
/* Search pat in str and returned the list of matches. The size of the
|
27
|
+
* list is returned as n_matches. _prep is the array returned by
|
28
|
+
* ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */
|
29
|
+
int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches);
|
30
|
+
|
31
|
+
static inline int kputsn(const char *p, int l, kstring_t *s)
|
32
|
+
{
|
33
|
+
if (s->l + l + 1 >= s->m) {
|
34
|
+
s->m = s->l + l + 2;
|
35
|
+
kroundup32(s->m);
|
36
|
+
s->s = (char*)realloc(s->s, s->m);
|
37
|
+
}
|
38
|
+
strncpy(s->s + s->l, p, l);
|
39
|
+
s->l += l;
|
40
|
+
s->s[s->l] = 0;
|
41
|
+
return l;
|
42
|
+
}
|
43
|
+
|
44
|
+
static inline int kputs(const char *p, kstring_t *s)
|
45
|
+
{
|
46
|
+
return kputsn(p, strlen(p), s);
|
47
|
+
}
|
48
|
+
|
49
|
+
static inline int kputc(int c, kstring_t *s)
|
50
|
+
{
|
51
|
+
if (s->l + 1 >= s->m) {
|
52
|
+
s->m = s->l + 2;
|
53
|
+
kroundup32(s->m);
|
54
|
+
s->s = (char*)realloc(s->s, s->m);
|
55
|
+
}
|
56
|
+
s->s[s->l++] = c;
|
57
|
+
s->s[s->l] = 0;
|
58
|
+
return c;
|
59
|
+
}
|
60
|
+
|
61
|
+
static inline int *ksplit(kstring_t *s, int delimiter, int *n)
|
62
|
+
{
|
63
|
+
int max = 0, *offsets = 0;
|
64
|
+
*n = ksplit_core(s->s, delimiter, &max, &offsets);
|
65
|
+
return offsets;
|
66
|
+
}
|
67
|
+
|
68
|
+
#endif
|
data/ext/tabix/main.c
ADDED
@@ -0,0 +1,290 @@
|
|
1
|
+
#include <string.h>
|
2
|
+
#include <unistd.h>
|
3
|
+
#include <stdlib.h>
|
4
|
+
#include <stdio.h>
|
5
|
+
#include <sys/stat.h>
|
6
|
+
#include <errno.h>
|
7
|
+
#include "bgzf.h"
|
8
|
+
#include "tabix.h"
|
9
|
+
|
10
|
+
#define PACKAGE_VERSION "0.2.5 (r964)"
|
11
|
+
|
12
|
+
#define error(...) { fprintf(stderr,__VA_ARGS__); return -1; }
|
13
|
+
|
14
|
+
int reheader_file(const char *header, const char *file, int meta)
|
15
|
+
{
|
16
|
+
BGZF *fp = bgzf_open(file,"r");
|
17
|
+
if (bgzf_read_block(fp) != 0 || !fp->block_length)
|
18
|
+
return -1;
|
19
|
+
|
20
|
+
char *buffer = fp->uncompressed_block;
|
21
|
+
int skip_until = 0;
|
22
|
+
|
23
|
+
if ( buffer[0]==meta )
|
24
|
+
{
|
25
|
+
skip_until = 1;
|
26
|
+
|
27
|
+
// Skip the header
|
28
|
+
while (1)
|
29
|
+
{
|
30
|
+
if ( buffer[skip_until]=='\n' )
|
31
|
+
{
|
32
|
+
skip_until++;
|
33
|
+
if ( skip_until>=fp->block_length )
|
34
|
+
{
|
35
|
+
if (bgzf_read_block(fp) != 0 || !fp->block_length)
|
36
|
+
error("no body?\n");
|
37
|
+
skip_until = 0;
|
38
|
+
}
|
39
|
+
// The header has finished
|
40
|
+
if ( buffer[skip_until]!=meta ) break;
|
41
|
+
}
|
42
|
+
skip_until++;
|
43
|
+
if ( skip_until>=fp->block_length )
|
44
|
+
{
|
45
|
+
if (bgzf_read_block(fp) != 0 || !fp->block_length)
|
46
|
+
error("no body?\n");
|
47
|
+
skip_until = 0;
|
48
|
+
}
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
FILE *fh = fopen(header,"r");
|
53
|
+
if ( !fh )
|
54
|
+
error("%s: %s", header,strerror(errno));
|
55
|
+
int page_size = getpagesize();
|
56
|
+
char *buf = valloc(page_size);
|
57
|
+
BGZF *bgzf_out = bgzf_fdopen(fileno(stdout), "w");
|
58
|
+
ssize_t nread;
|
59
|
+
while ( (nread=fread(buf,1,page_size-1,fh))>0 )
|
60
|
+
{
|
61
|
+
if ( nread<page_size-1 && buf[nread-1]!='\n' )
|
62
|
+
buf[nread++] = '\n';
|
63
|
+
if (bgzf_write(bgzf_out, buf, nread) < 0) error("Error: %s\n",bgzf_out->error);
|
64
|
+
}
|
65
|
+
fclose(fh);
|
66
|
+
|
67
|
+
if ( fp->block_length - skip_until > 0 )
|
68
|
+
{
|
69
|
+
if (bgzf_write(bgzf_out, buffer+skip_until, fp->block_length-skip_until) < 0)
|
70
|
+
error("Error: %s\n",fp->error);
|
71
|
+
}
|
72
|
+
if (bgzf_flush(bgzf_out) < 0)
|
73
|
+
error("Error: %s\n",bgzf_out->error);
|
74
|
+
|
75
|
+
while (1)
|
76
|
+
{
|
77
|
+
#ifdef _USE_KNETFILE
|
78
|
+
nread = knet_read(fp->x.fpr, buf, page_size);
|
79
|
+
#else
|
80
|
+
nread = fread(buf, 1, page_size, fp->file);
|
81
|
+
#endif
|
82
|
+
if ( nread<=0 )
|
83
|
+
break;
|
84
|
+
|
85
|
+
#ifdef _USE_KNETFILE
|
86
|
+
int count = fwrite(buf, 1, nread, bgzf_out->x.fpw);
|
87
|
+
#else
|
88
|
+
int count = fwrite(buf, 1, nread, bgzf_out->file);
|
89
|
+
#endif
|
90
|
+
if (count != nread)
|
91
|
+
error("Write failed, wrote %d instead of %d bytes.\n", count,(int)nread);
|
92
|
+
}
|
93
|
+
|
94
|
+
if (bgzf_close(bgzf_out) < 0)
|
95
|
+
error("Error: %s\n",bgzf_out->error);
|
96
|
+
|
97
|
+
return 0;
|
98
|
+
}
|
99
|
+
|
100
|
+
|
101
|
+
int main(int argc, char *argv[])
|
102
|
+
{
|
103
|
+
int c, skip = -1, meta = -1, list_chrms = 0, force = 0, print_header = 0, bed_reg = 0;
|
104
|
+
ti_conf_t conf = ti_conf_gff;
|
105
|
+
const char *reheader = NULL;
|
106
|
+
while ((c = getopt(argc, argv, "p:s:b:e:0S:c:lhfBr:")) >= 0) {
|
107
|
+
switch (c) {
|
108
|
+
case 'B': bed_reg = 1; break;
|
109
|
+
case '0': conf.preset |= TI_FLAG_UCSC; break;
|
110
|
+
case 'S': skip = atoi(optarg); break;
|
111
|
+
case 'c': meta = optarg[0]; break;
|
112
|
+
case 'p':
|
113
|
+
if (strcmp(optarg, "gff") == 0) conf = ti_conf_gff;
|
114
|
+
else if (strcmp(optarg, "bed") == 0) conf = ti_conf_bed;
|
115
|
+
else if (strcmp(optarg, "sam") == 0) conf = ti_conf_sam;
|
116
|
+
else if (strcmp(optarg, "vcf") == 0 || strcmp(optarg, "vcf4") == 0) conf = ti_conf_vcf;
|
117
|
+
else if (strcmp(optarg, "psltbl") == 0) conf = ti_conf_psltbl;
|
118
|
+
else {
|
119
|
+
fprintf(stderr, "[main] unrecognized preset '%s'\n", optarg);
|
120
|
+
return 1;
|
121
|
+
}
|
122
|
+
break;
|
123
|
+
case 's': conf.sc = atoi(optarg); break;
|
124
|
+
case 'b': conf.bc = atoi(optarg); break;
|
125
|
+
case 'e': conf.ec = atoi(optarg); break;
|
126
|
+
case 'l': list_chrms = 1; break;
|
127
|
+
case 'h': print_header = 1; break;
|
128
|
+
case 'f': force = 1; break;
|
129
|
+
case 'r': reheader = optarg; break;
|
130
|
+
}
|
131
|
+
}
|
132
|
+
if (skip >= 0) conf.line_skip = skip;
|
133
|
+
if (meta >= 0) conf.meta_char = meta;
|
134
|
+
if (optind == argc) {
|
135
|
+
fprintf(stderr, "\n");
|
136
|
+
fprintf(stderr, "Program: tabix (TAB-delimited file InderXer)\n");
|
137
|
+
fprintf(stderr, "Version: %s\n\n", PACKAGE_VERSION);
|
138
|
+
fprintf(stderr, "Usage: tabix <in.tab.bgz> [region1 [region2 [...]]]\n\n");
|
139
|
+
fprintf(stderr, "Options: -p STR preset: gff, bed, sam, vcf, psltbl [gff]\n");
|
140
|
+
fprintf(stderr, " -s INT sequence name column [1]\n");
|
141
|
+
fprintf(stderr, " -b INT start column [4]\n");
|
142
|
+
fprintf(stderr, " -e INT end column; can be identical to '-b' [5]\n");
|
143
|
+
fprintf(stderr, " -S INT skip first INT lines [0]\n");
|
144
|
+
fprintf(stderr, " -c CHAR symbol for comment/meta lines [#]\n");
|
145
|
+
fprintf(stderr, " -r FILE replace the header with the content of FILE [null]\n");
|
146
|
+
fprintf(stderr, " -B region1 is a BED file (entire file will be read)\n");
|
147
|
+
fprintf(stderr, " -0 zero-based coordinate\n");
|
148
|
+
fprintf(stderr, " -h print the header lines\n");
|
149
|
+
fprintf(stderr, " -l list chromosome names\n");
|
150
|
+
fprintf(stderr, " -f force to overwrite the index\n");
|
151
|
+
fprintf(stderr, "\n");
|
152
|
+
return 1;
|
153
|
+
}
|
154
|
+
if (list_chrms) {
|
155
|
+
ti_index_t *idx;
|
156
|
+
int i, n;
|
157
|
+
const char **names;
|
158
|
+
idx = ti_index_load(argv[optind]);
|
159
|
+
if (idx == 0) {
|
160
|
+
fprintf(stderr, "[main] fail to load the index file.\n");
|
161
|
+
return 1;
|
162
|
+
}
|
163
|
+
names = ti_seqname(idx, &n);
|
164
|
+
for (i = 0; i < n; ++i) printf("%s\n", names[i]);
|
165
|
+
free(names);
|
166
|
+
ti_index_destroy(idx);
|
167
|
+
return 0;
|
168
|
+
}
|
169
|
+
if (reheader)
|
170
|
+
return reheader_file(reheader,argv[optind],conf.meta_char);
|
171
|
+
|
172
|
+
struct stat stat_tbi,stat_vcf;
|
173
|
+
char *fnidx = calloc(strlen(argv[optind]) + 5, 1);
|
174
|
+
strcat(strcpy(fnidx, argv[optind]), ".tbi");
|
175
|
+
|
176
|
+
if (optind + 1 == argc) {
|
177
|
+
if (force == 0) {
|
178
|
+
if (stat(fnidx, &stat_tbi) == 0)
|
179
|
+
{
|
180
|
+
// Before complaining, check if the VCF file isn't newer. This is a common source of errors,
|
181
|
+
// people tend not to notice that tabix failed
|
182
|
+
stat(argv[optind], &stat_vcf);
|
183
|
+
if ( stat_vcf.st_mtime <= stat_tbi.st_mtime )
|
184
|
+
{
|
185
|
+
fprintf(stderr, "[tabix] the index file exists. Please use '-f' to overwrite.\n");
|
186
|
+
free(fnidx);
|
187
|
+
return 1;
|
188
|
+
}
|
189
|
+
}
|
190
|
+
}
|
191
|
+
if ( bgzf_check_bgzf(argv[optind])!=1 )
|
192
|
+
{
|
193
|
+
fprintf(stderr,"[tabix] was bgzip used to compress this file? %s\n", argv[optind]);
|
194
|
+
free(fnidx);
|
195
|
+
return 1;
|
196
|
+
}
|
197
|
+
return ti_index_build(argv[optind], &conf);
|
198
|
+
}
|
199
|
+
{ // retrieve
|
200
|
+
tabix_t *t;
|
201
|
+
// Common source of errors: new VCF is used with an old index
|
202
|
+
stat(fnidx, &stat_tbi);
|
203
|
+
stat(argv[optind], &stat_vcf);
|
204
|
+
if ( force==0 && stat_vcf.st_mtime > stat_tbi.st_mtime )
|
205
|
+
{
|
206
|
+
fprintf(stderr, "[tabix] the index file is older than the vcf file. Please use '-f' to overwrite or reindex.\n");
|
207
|
+
free(fnidx);
|
208
|
+
return 1;
|
209
|
+
}
|
210
|
+
free(fnidx);
|
211
|
+
|
212
|
+
if ((t = ti_open(argv[optind], 0)) == 0) {
|
213
|
+
fprintf(stderr, "[main] fail to open the data file.\n");
|
214
|
+
return 1;
|
215
|
+
}
|
216
|
+
if (strcmp(argv[optind+1], ".") == 0) { // retrieve all
|
217
|
+
ti_iter_t iter;
|
218
|
+
const char *s;
|
219
|
+
int len;
|
220
|
+
iter = ti_query(t, 0, 0, 0);
|
221
|
+
while ((s = ti_read(t, iter, &len)) != 0) {
|
222
|
+
fputs(s, stdout); fputc('\n', stdout);
|
223
|
+
}
|
224
|
+
ti_iter_destroy(iter);
|
225
|
+
} else { // retrieve from specified regions
|
226
|
+
int i, len;
|
227
|
+
ti_iter_t iter;
|
228
|
+
const char *s;
|
229
|
+
const ti_conf_t *idxconf;
|
230
|
+
|
231
|
+
if (ti_lazy_index_load(t) < 0 && bed_reg == 0) {
|
232
|
+
fprintf(stderr,"[tabix] failed to load the index file.\n");
|
233
|
+
return 1;
|
234
|
+
}
|
235
|
+
idxconf = ti_get_conf(t->idx);
|
236
|
+
|
237
|
+
if ( print_header )
|
238
|
+
{
|
239
|
+
// If requested, print the header lines here
|
240
|
+
iter = ti_query(t, 0, 0, 0);
|
241
|
+
while ((s = ti_read(t, iter, &len)) != 0) {
|
242
|
+
if ((int)(*s) != idxconf->meta_char) break;
|
243
|
+
fputs(s, stdout); fputc('\n', stdout);
|
244
|
+
}
|
245
|
+
ti_iter_destroy(iter);
|
246
|
+
}
|
247
|
+
if (bed_reg) {
|
248
|
+
extern int bed_overlap(const void *_h, const char *chr, int beg, int end);
|
249
|
+
extern void *bed_read(const char *fn);
|
250
|
+
extern void bed_destroy(void *_h);
|
251
|
+
|
252
|
+
const ti_conf_t *conf_ = idxconf? idxconf : &conf; // use the index file if available
|
253
|
+
void *bed = bed_read(argv[optind+1]); // load the BED file
|
254
|
+
ti_interval_t intv;
|
255
|
+
|
256
|
+
if (bed == 0) {
|
257
|
+
fprintf(stderr, "[main] fail to read the BED file.\n");
|
258
|
+
return 1;
|
259
|
+
}
|
260
|
+
iter = ti_query(t, 0, 0, 0);
|
261
|
+
while ((s = ti_read(t, iter, &len)) != 0) {
|
262
|
+
int c;
|
263
|
+
ti_get_intv(conf_, len, (char*)s, &intv);
|
264
|
+
c = *intv.se; *intv.se = '\0';
|
265
|
+
if (bed_overlap(bed, intv.ss, intv.beg, intv.end)) {
|
266
|
+
*intv.se = c;
|
267
|
+
puts(s);
|
268
|
+
}
|
269
|
+
*intv.se = c;
|
270
|
+
}
|
271
|
+
ti_iter_destroy(iter);
|
272
|
+
bed_destroy(bed);
|
273
|
+
} else {
|
274
|
+
for (i = optind + 1; i < argc; ++i) {
|
275
|
+
int tid, beg, end;
|
276
|
+
if (ti_parse_region(t->idx, argv[i], &tid, &beg, &end) == 0) {
|
277
|
+
iter = ti_queryi(t, tid, beg, end);
|
278
|
+
while ((s = ti_read(t, iter, &len)) != 0) {
|
279
|
+
fputs(s, stdout); fputc('\n', stdout);
|
280
|
+
}
|
281
|
+
ti_iter_destroy(iter);
|
282
|
+
}
|
283
|
+
// else fprintf(stderr, "[main] invalid region: unknown target name or minus interval.\n");
|
284
|
+
}
|
285
|
+
}
|
286
|
+
}
|
287
|
+
ti_close(t);
|
288
|
+
}
|
289
|
+
return 0;
|
290
|
+
}
|
@@ -0,0 +1,76 @@
|
|
1
|
+
package Tabix;
|
2
|
+
|
3
|
+
use strict;
|
4
|
+
use warnings;
|
5
|
+
use Carp qw/croak/;
|
6
|
+
|
7
|
+
use TabixIterator;
|
8
|
+
|
9
|
+
require Exporter;
|
10
|
+
|
11
|
+
our @ISA = qw/Exporter/;
|
12
|
+
our @EXPORT = qw/tabix_open tabix_close tabix_read tabix_query tabix_getnames tabix_iter_free/;
|
13
|
+
|
14
|
+
our $VERSION = '0.2.0';
|
15
|
+
|
16
|
+
require XSLoader;
|
17
|
+
XSLoader::load('Tabix', $VERSION);
|
18
|
+
|
19
|
+
sub new {
|
20
|
+
my $invocant = shift;
|
21
|
+
my %args = @_;
|
22
|
+
$args{-data} || croak("-data argument required");
|
23
|
+
my $class = ref($invocant) || $invocant;
|
24
|
+
my $self = {};
|
25
|
+
bless($self, $class);
|
26
|
+
$self->open($args{-data}, $args{-index});
|
27
|
+
return $self;
|
28
|
+
}
|
29
|
+
|
30
|
+
sub open {
|
31
|
+
my ($self, $fn, $fnidx) = @_;
|
32
|
+
$self->close;
|
33
|
+
$self->{_fn} = $fn;
|
34
|
+
$self->{_fnidx} = $fnidx;
|
35
|
+
$self->{_} = $fnidx? tabix_open($fn, $fnidx) : tabix_open($fn);
|
36
|
+
}
|
37
|
+
|
38
|
+
sub close {
|
39
|
+
my $self = shift;
|
40
|
+
if ($self->{_}) {
|
41
|
+
tabix_close($self->{_});
|
42
|
+
delete($self->{_}); delete($self->{_fn}); delete($self->{_fnidx});
|
43
|
+
}
|
44
|
+
}
|
45
|
+
|
46
|
+
sub DESTROY {
|
47
|
+
my $self = shift;
|
48
|
+
$self->close;
|
49
|
+
}
|
50
|
+
|
51
|
+
sub query {
|
52
|
+
my $self = shift;
|
53
|
+
my $iter;
|
54
|
+
if (@_) {
|
55
|
+
$iter = tabix_query($self->{_}, @_);
|
56
|
+
} else {
|
57
|
+
$iter = tabix_query($self->{_});
|
58
|
+
}
|
59
|
+
my $i = TabixIterator->new;
|
60
|
+
$i->set($iter);
|
61
|
+
return $i;
|
62
|
+
}
|
63
|
+
|
64
|
+
sub read {
|
65
|
+
my $self = shift;
|
66
|
+
my $iter = shift;
|
67
|
+
return tabix_read($self->{_}, $iter->get);
|
68
|
+
}
|
69
|
+
|
70
|
+
sub getnames {
|
71
|
+
my $self = shift;
|
72
|
+
return tabix_getnames($self->{_});
|
73
|
+
}
|
74
|
+
|
75
|
+
1;
|
76
|
+
__END__
|
@@ -0,0 +1,71 @@
|
|
1
|
+
#include "EXTERN.h"
|
2
|
+
#include "perl.h"
|
3
|
+
#include "XSUB.h"
|
4
|
+
|
5
|
+
#include <stdlib.h>
|
6
|
+
#include "tabix.h"
|
7
|
+
|
8
|
+
MODULE = Tabix PACKAGE = Tabix
|
9
|
+
|
10
|
+
tabix_t*
|
11
|
+
tabix_open(fn, fnidx=0)
|
12
|
+
char *fn
|
13
|
+
char *fnidx
|
14
|
+
CODE:
|
15
|
+
RETVAL = ti_open(fn, fnidx);
|
16
|
+
OUTPUT:
|
17
|
+
RETVAL
|
18
|
+
|
19
|
+
void
|
20
|
+
tabix_close(t)
|
21
|
+
tabix_t *t
|
22
|
+
CODE:
|
23
|
+
ti_close(t);
|
24
|
+
|
25
|
+
ti_iter_t
|
26
|
+
tabix_query(t, seq=0, beg=0, end=0x7fffffff)
|
27
|
+
tabix_t *t
|
28
|
+
const char *seq
|
29
|
+
int beg
|
30
|
+
int end
|
31
|
+
PREINIT:
|
32
|
+
CODE:
|
33
|
+
RETVAL = ti_query(t, seq, beg, end);
|
34
|
+
OUTPUT:
|
35
|
+
RETVAL
|
36
|
+
|
37
|
+
SV*
|
38
|
+
tabix_read(t, iter)
|
39
|
+
tabix_t *t
|
40
|
+
ti_iter_t iter
|
41
|
+
PREINIT:
|
42
|
+
const char *s;
|
43
|
+
int len;
|
44
|
+
CODE:
|
45
|
+
s = ti_read(t, iter, &len);
|
46
|
+
if (s == 0)
|
47
|
+
return XSRETURN_EMPTY;
|
48
|
+
RETVAL = newSVpv(s, len);
|
49
|
+
OUTPUT:
|
50
|
+
RETVAL
|
51
|
+
|
52
|
+
void
|
53
|
+
tabix_getnames(t)
|
54
|
+
tabix_t *t
|
55
|
+
PREINIT:
|
56
|
+
const char **names;
|
57
|
+
int i, n;
|
58
|
+
PPCODE:
|
59
|
+
ti_lazy_index_load(t);
|
60
|
+
names = ti_seqname(t->idx, &n);
|
61
|
+
for (i = 0; i < n; ++i)
|
62
|
+
XPUSHs(sv_2mortal(newSVpv(names[i], 0)));
|
63
|
+
free(names);
|
64
|
+
|
65
|
+
MODULE = Tabix PACKAGE = TabixIterator
|
66
|
+
|
67
|
+
void
|
68
|
+
tabix_iter_free(iter)
|
69
|
+
ti_iter_t iter
|
70
|
+
CODE:
|
71
|
+
ti_iter_destroy(iter);
|
@@ -0,0 +1,41 @@
|
|
1
|
+
package TabixIterator;
|
2
|
+
|
3
|
+
use strict;
|
4
|
+
use warnings;
|
5
|
+
use Carp qw/croak/;
|
6
|
+
|
7
|
+
require Exporter;
|
8
|
+
|
9
|
+
our @ISA = qw/Exporter/;
|
10
|
+
our @EXPORT = qw/tabix_iter_free/;
|
11
|
+
|
12
|
+
our $VERSION = '0.2.0';
|
13
|
+
|
14
|
+
require XSLoader;
|
15
|
+
XSLoader::load('Tabix', $VERSION);
|
16
|
+
|
17
|
+
sub new {
|
18
|
+
my $invocant = shift;
|
19
|
+
my $class = ref($invocant) || $invocant;
|
20
|
+
my $self = {};
|
21
|
+
bless($self, $class);
|
22
|
+
return $self;
|
23
|
+
}
|
24
|
+
|
25
|
+
sub set {
|
26
|
+
my ($self, $iter) = @_;
|
27
|
+
$self->{_} = $iter;
|
28
|
+
}
|
29
|
+
|
30
|
+
sub get {
|
31
|
+
my $self = shift;
|
32
|
+
return $self->{_};
|
33
|
+
}
|
34
|
+
|
35
|
+
sub DESTROY {
|
36
|
+
my $self = shift;
|
37
|
+
tabix_iter_free($self->{_}) if ($self->{_});
|
38
|
+
}
|
39
|
+
|
40
|
+
1;
|
41
|
+
__END__
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#-*-Perl-*-
|
2
|
+
use Test::More tests => 9;
|
3
|
+
BEGIN { use_ok('Tabix') };
|
4
|
+
|
5
|
+
{ # C-like low-level interface
|
6
|
+
my $t = tabix_open("../example.gtf.gz");
|
7
|
+
ok($t);
|
8
|
+
my $iter = tabix_query($t, "chr1", 0, 2000);
|
9
|
+
ok($iter);
|
10
|
+
$_ = 0;
|
11
|
+
++$_ while (tabix_read($t, $iter));
|
12
|
+
is($_, 6);
|
13
|
+
tabix_iter_free($iter);
|
14
|
+
@_ = tabix_getnames($t);
|
15
|
+
is(scalar(@_), 2);
|
16
|
+
}
|
17
|
+
|
18
|
+
{ # OOP high-level interface
|
19
|
+
my $t = Tabix->new(-data=>"../example.gtf.gz");
|
20
|
+
ok($t);
|
21
|
+
my $iter = $t->query("chr1", 3000, 5000);
|
22
|
+
ok($iter);
|
23
|
+
$_ = 0;
|
24
|
+
++$_ while ($t->read($iter));
|
25
|
+
is($_, 27);
|
26
|
+
@_ = $t->getnames;
|
27
|
+
is($_[1], "chr2");
|
28
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#-*-Perl-*-
|
2
|
+
use Test::More tests => 9;
|
3
|
+
BEGIN { use_ok('Tabix') };
|
4
|
+
|
5
|
+
{ # FTP access
|
6
|
+
my $t = Tabix->new(-data=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz");
|
7
|
+
ok($t);
|
8
|
+
my $iter = $t->query("1", 1000000, 1100000);
|
9
|
+
ok($iter);
|
10
|
+
$_ = 0;
|
11
|
+
++$_ while ($t->read($iter));
|
12
|
+
is($_, 306);
|
13
|
+
@_ = $t->getnames;
|
14
|
+
is(scalar(@_), 22);
|
15
|
+
}
|
16
|
+
|
17
|
+
{ # FTP access plus FTP index
|
18
|
+
my $t = Tabix->new(-data=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz",
|
19
|
+
-index=>"ftp://ftp.ncbi.nih.gov/1000genomes/ftp/pilot_data/release/2010_03/pilot1/CEU.SRP000031.2010_03.genotypes.vcf.gz.tbi");
|
20
|
+
ok($t);
|
21
|
+
my $iter = $t->query("19", 10000000, 10100000);
|
22
|
+
ok($iter);
|
23
|
+
$_ = 0;
|
24
|
+
++$_ while ($t->read($iter));
|
25
|
+
is($_, 268);
|
26
|
+
@_ = $t->getnames;
|
27
|
+
is(scalar(@_), 22);
|
28
|
+
}
|
@@ -0,0 +1,55 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
#
|
3
|
+
# The MIT License
|
4
|
+
#
|
5
|
+
# Copyright (c) 2011 Seoul National University.
|
6
|
+
#
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
8
|
+
# a copy of this software and associated documentation files (the
|
9
|
+
# "Software"), to deal in the Software without restriction, including
|
10
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
11
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
12
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
13
|
+
# the following conditions:
|
14
|
+
#
|
15
|
+
# The above copyright notice and this permission notice shall be
|
16
|
+
# included in all copies or substantial portions of the Software.
|
17
|
+
#
|
18
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
19
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
20
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
21
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
22
|
+
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
23
|
+
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
24
|
+
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
25
|
+
# SOFTWARE.
|
26
|
+
#
|
27
|
+
# Contact: Hyeshik Chang <hyeshik@snu.ac.kr>
|
28
|
+
|
29
|
+
from distutils.core import setup, Extension
|
30
|
+
|
31
|
+
# Change this to True when you need the knetfile support.
|
32
|
+
USE_KNETFILE = False
|
33
|
+
|
34
|
+
TABIX_SOURCE_FILES = [
|
35
|
+
'../bgzf.c', '../bgzip.c', '../index.c', '../knetfile.c', '../kstring.c'
|
36
|
+
]
|
37
|
+
|
38
|
+
define_options = [('_FILE_OFFSET_BITS', 64)]
|
39
|
+
if USE_KNETFILE:
|
40
|
+
define_options.append(('_USE_KNETFILE', 1))
|
41
|
+
|
42
|
+
ext_modules = [Extension("tabix", ["tabixmodule.c"] + TABIX_SOURCE_FILES,
|
43
|
+
include_dirs=['..'],
|
44
|
+
libraries=['z'],
|
45
|
+
define_macros=define_options)]
|
46
|
+
|
47
|
+
setup (name = 'tabix',
|
48
|
+
version = '1.0',
|
49
|
+
description = 'Python interface to tabix, a generic indexer '
|
50
|
+
'for TAB-delimited genome position files',
|
51
|
+
author = 'Hyeshik Chang',
|
52
|
+
author_email = 'hyeshik@snu.ac.kr',
|
53
|
+
license = 'MIT',
|
54
|
+
ext_modules = ext_modules
|
55
|
+
)
|