ngs_server 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,408 @@
1
+ /*-
2
+ * The MIT License
3
+ *
4
+ * Copyright (c) 2011 Seoul National University.
5
+ *
6
+ * Permission is hereby granted, free of charge, to any person obtaining
7
+ * a copy of this software and associated documentation files (the
8
+ * "Software"), to deal in the Software without restriction, including
9
+ * without limitation the rights to use, copy, modify, merge, publish,
10
+ * distribute, sublicense, and/or sell copies of the Software, and to
11
+ * permit persons to whom the Software is furnished to do so, subject to
12
+ * the following conditions:
13
+ *
14
+ * The above copyright notice and this permission notice shall be
15
+ * included in all copies or substantial portions of the Software.
16
+ *
17
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ * SOFTWARE.
25
+ */
26
+
27
+ /*
28
+ * Contact: Hyeshik Chang <hyeshik@snu.ac.kr>
29
+ */
30
+
31
+ #define PY_SSIZE_T_CLEAN
32
+ #include "Python.h"
33
+ #include "tabix.h"
34
+
35
+ static PyObject *TabixError;
36
+
37
+ typedef struct {
38
+ PyObject_HEAD
39
+ tabix_t *tb;
40
+ char *fn;
41
+ } TabixObject;
42
+
43
+ typedef struct {
44
+ PyObject_HEAD
45
+ TabixObject *tbobj;
46
+ ti_iter_t iter;
47
+ } TabixIteratorObject;
48
+
49
+ static PyTypeObject Tabix_Type, TabixIterator_Type;
50
+
51
+ /* --- TabixIterator --------------------------------------------------- */
52
+
53
+ static PyObject *
54
+ tabixiter_create(TabixObject *parentidx, ti_iter_t iter)
55
+ {
56
+ TabixIteratorObject *self;
57
+
58
+ self = PyObject_New(TabixIteratorObject, &TabixIterator_Type);
59
+ if (self == NULL)
60
+ return NULL;
61
+
62
+ Py_INCREF(parentidx);
63
+ self->tbobj = parentidx;
64
+ self->iter = iter;
65
+
66
+ return (PyObject *)self;
67
+ }
68
+
69
+ static void
70
+ tabixiter_dealloc(TabixIteratorObject *self)
71
+ {
72
+ ti_iter_destroy(self->iter);
73
+ Py_DECREF(self->tbobj);
74
+ PyObject_Del(self);
75
+ }
76
+
77
+ static PyObject *
78
+ tabixiter_iter(PyObject *self)
79
+ {
80
+ Py_INCREF(self);
81
+ return self;
82
+ }
83
+
84
+ #if PY_MAJOR_VERSION < 3
85
+ # define PYOBJECT_FROM_STRING_AND_SIZE PyString_FromStringAndSize
86
+ #else
87
+ # define PYOBJECT_FROM_STRING_AND_SIZE PyUnicode_FromStringAndSize
88
+ #endif
89
+
90
+ static PyObject *
91
+ tabixiter_iternext(TabixIteratorObject *self)
92
+ {
93
+ const char *chunk;
94
+ int len, i;
95
+
96
+ chunk = ti_read(self->tbobj->tb, self->iter, &len);
97
+ if (chunk != NULL) {
98
+ PyObject *ret, *column;
99
+ Py_ssize_t colidx;
100
+ const char *ptr, *begin;
101
+
102
+ ret = PyList_New(0);
103
+ if (ret == NULL)
104
+ return NULL;
105
+
106
+ colidx = 0;
107
+ ptr = begin = chunk;
108
+ for (i = len; i > 0; i--, ptr++)
109
+ if (*ptr == '\t') {
110
+ column = PYOBJECT_FROM_STRING_AND_SIZE(begin,
111
+ (Py_ssize_t)(ptr - begin));
112
+ if (column == NULL || PyList_Append(ret, column) == -1) {
113
+ Py_DECREF(ret);
114
+ return NULL;
115
+ }
116
+
117
+ Py_DECREF(column);
118
+ begin = ptr + 1;
119
+ colidx++;
120
+ }
121
+
122
+ column = PYOBJECT_FROM_STRING_AND_SIZE(begin, (Py_ssize_t)(ptr - begin));
123
+ if (column == NULL || PyList_Append(ret, column) == -1) {
124
+ Py_DECREF(ret);
125
+ return NULL;
126
+ }
127
+ Py_DECREF(column);
128
+
129
+ return ret;
130
+ }
131
+ else
132
+ return NULL;
133
+ }
134
+
135
+ static PyMethodDef tabixiter_methods[] = {
136
+ {NULL, NULL} /* sentinel */
137
+ };
138
+
139
+ static PyTypeObject TabixIterator_Type = {
140
+ PyVarObject_HEAD_INIT(NULL, 0)
141
+ "tabix.TabixIterator", /*tp_name*/
142
+ sizeof(TabixIteratorObject), /*tp_basicsize*/
143
+ 0, /*tp_itemsize*/
144
+ /* methods */
145
+ (destructor)tabixiter_dealloc, /*tp_dealloc*/
146
+ 0, /*tp_print*/
147
+ 0, /*tp_getattr*/
148
+ 0, /*tp_setattr*/
149
+ 0, /*tp_compare*/
150
+ 0, /*tp_repr*/
151
+ 0, /*tp_as_number*/
152
+ 0, /*tp_as_sequence*/
153
+ 0, /*tp_as_mapping*/
154
+ 0, /*tp_hash*/
155
+ 0, /*tp_call*/
156
+ 0, /*tp_str*/
157
+ 0, /*tp_getattro*/
158
+ 0, /*tp_setattro*/
159
+ 0, /*tp_as_buffer*/
160
+ Py_TPFLAGS_DEFAULT, /*tp_flags*/
161
+ 0, /*tp_doc*/
162
+ 0, /*tp_traverse*/
163
+ 0, /*tp_clear*/
164
+ 0, /*tp_richcompare*/
165
+ 0, /*tp_weaklistoffset*/
166
+ tabixiter_iter, /*tp_iter*/
167
+ (iternextfunc)tabixiter_iternext, /*tp_iternext*/
168
+ tabixiter_methods, /*tp_methods*/
169
+ 0, /*tp_members*/
170
+ 0, /*tp_getset*/
171
+ 0, /*tp_base*/
172
+ 0, /*tp_dict*/
173
+ 0, /*tp_descr_get*/
174
+ 0, /*tp_descr_set*/
175
+ 0, /*tp_dictoffset*/
176
+ 0, /*tp_init*/
177
+ 0, /*tp_alloc*/
178
+ 0, /*tp_new*/
179
+ 0, /*tp_free*/
180
+ 0, /*tp_is_gc*/
181
+ };
182
+
183
+
184
+ /* --- Tabix ----------------------------------------------------------- */
185
+
186
+ static PyObject *
187
+ tabix_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
188
+ {
189
+ TabixObject *self;
190
+ const char *fn, *fnidx=NULL;
191
+ static char *kwnames[]={"fn", "fnidx", NULL};
192
+ tabix_t *tb;
193
+
194
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|z:Tabix",
195
+ kwnames, &fn, &fnidx))
196
+ return NULL;
197
+
198
+ tb = ti_open(fn, fnidx);
199
+ if (tb == NULL) {
200
+ PyErr_SetString(TabixError, "Can't open the index file.");
201
+ return NULL;
202
+ }
203
+
204
+ self = (TabixObject *)type->tp_alloc(type, 0);
205
+ if (self == NULL)
206
+ return NULL;
207
+
208
+ self->tb = tb;
209
+ self->fn = strdup(fn);
210
+
211
+ return (PyObject *)self;
212
+ }
213
+
214
+ static void
215
+ tabix_dealloc(TabixObject *self)
216
+ {
217
+ free(self->fn);
218
+ ti_close(self->tb);
219
+ PyObject_Del(self);
220
+ }
221
+
222
+ static PyObject *
223
+ tabix_query(TabixObject *self, PyObject *args)
224
+ {
225
+ char *name;
226
+ int begin, end;
227
+ ti_iter_t result;
228
+
229
+ if (!PyArg_ParseTuple(args, "sii:query", &name, &begin, &end))
230
+ return NULL;
231
+
232
+ result = ti_query(self->tb, name, begin, end);
233
+ if (result == NULL) {
234
+ PyErr_SetString(TabixError, "query failed");
235
+ return NULL;
236
+ }
237
+
238
+ return tabixiter_create(self, result);
239
+ }
240
+
241
+ static PyObject *
242
+ tabix_queryi(TabixObject *self, PyObject *args)
243
+ {
244
+ int tid, begin, end;
245
+ ti_iter_t result;
246
+
247
+ if (!PyArg_ParseTuple(args, "iii:queryi", &tid, &begin, &end))
248
+ return NULL;
249
+
250
+ result = ti_queryi(self->tb, tid, begin, end);
251
+ if (result == NULL) {
252
+ PyErr_SetString(TabixError, "query failed");
253
+ return NULL;
254
+ }
255
+
256
+ return tabixiter_create(self, result);
257
+ }
258
+
259
+ static PyObject *
260
+ tabix_querys(TabixObject *self, PyObject *args)
261
+ {
262
+ const char *reg;
263
+ ti_iter_t result;
264
+
265
+ if (!PyArg_ParseTuple(args, "s:querys", &reg))
266
+ return NULL;
267
+
268
+ result = ti_querys(self->tb, reg);
269
+ if (result == NULL) {
270
+ PyErr_SetString(TabixError, "query failed");
271
+ return NULL;
272
+ }
273
+
274
+ return tabixiter_create(self, result);
275
+ }
276
+
277
+ static PyObject *
278
+ tabix_repr(TabixObject *self)
279
+ {
280
+ #if PY_MAJOR_VERSION < 3
281
+ return PyString_FromFormat("<Tabix fn=\"%s\">", self->fn);
282
+ #else
283
+ return PyUnicode_FromFormat("<Tabix fn=\"%s\">", self->fn);
284
+ #endif
285
+ }
286
+
287
+ static PyMethodDef tabix_methods[] = {
288
+ {"query", (PyCFunction)tabix_query, METH_VARARGS,
289
+ PyDoc_STR("T.query(name, begin, end) -> iterator")},
290
+ {"queryi", (PyCFunction)tabix_queryi, METH_VARARGS,
291
+ PyDoc_STR("T.queryi(tid, begin, id) -> iterator")},
292
+ {"querys", (PyCFunction)tabix_querys, METH_VARARGS,
293
+ PyDoc_STR("T.querys(region) -> iterator")},
294
+ {NULL, NULL} /* sentinel */
295
+ };
296
+
297
+ static PyTypeObject Tabix_Type = {
298
+ /* The ob_type field must be initialized in the module init function
299
+ * to be portable to Windows without using C++. */
300
+ PyVarObject_HEAD_INIT(NULL, 0)
301
+ "tabix.Tabix", /*tp_name*/
302
+ sizeof(TabixObject), /*tp_basicsize*/
303
+ 0, /*tp_itemsize*/
304
+ /* methods */
305
+ (destructor)tabix_dealloc, /*tp_dealloc*/
306
+ 0, /*tp_print*/
307
+ 0, /*tp_getattr*/
308
+ 0, /*tp_setattr*/
309
+ 0, /*tp_compare*/
310
+ (reprfunc)tabix_repr, /*tp_repr*/
311
+ 0, /*tp_as_number*/
312
+ 0, /*tp_as_sequence*/
313
+ 0, /*tp_as_mapping*/
314
+ 0, /*tp_hash*/
315
+ 0, /*tp_call*/
316
+ 0, /*tp_str*/
317
+ 0, /*tp_getattro*/
318
+ 0, /*tp_setattro*/
319
+ 0, /*tp_as_buffer*/
320
+ Py_TPFLAGS_DEFAULT, /*tp_flags*/
321
+ 0, /*tp_doc*/
322
+ 0, /*tp_traverse*/
323
+ 0, /*tp_clear*/
324
+ 0, /*tp_richcompare*/
325
+ 0, /*tp_weaklistoffset*/
326
+ 0, /*tp_iter*/
327
+ 0, /*tp_iternext*/
328
+ tabix_methods, /*tp_methods*/
329
+ 0, /*tp_members*/
330
+ 0, /*tp_getset*/
331
+ 0, /*tp_base*/
332
+ 0, /*tp_dict*/
333
+ 0, /*tp_descr_get*/
334
+ 0, /*tp_descr_set*/
335
+ 0, /*tp_dictoffset*/
336
+ 0, /*tp_init*/
337
+ 0, /*tp_alloc*/
338
+ (newfunc)tabix_new, /*tp_new*/
339
+ 0, /*tp_free*/
340
+ 0, /*tp_is_gc*/
341
+ };
342
+ /* --------------------------------------------------------------------- */
343
+
344
+ static PyMethodDef tabix_functions[] = {
345
+ {NULL, NULL} /* sentinel */
346
+ };
347
+
348
+ PyDoc_STRVAR(module_doc,
349
+ "Python interface to tabix, Heng Li's generic indexer for TAB-delimited "
350
+ "genome position filesThis is a template module just for instruction.");
351
+
352
+ #if PY_MAJOR_VERSION >= 3
353
+ static struct PyModuleDef tabixmodule = {
354
+ PyModuleDef_HEAD_INIT,
355
+ "tabix",
356
+ module_doc,
357
+ -1,
358
+ tabix_functions,
359
+ NULL,
360
+ NULL,
361
+ NULL,
362
+ NULL
363
+ };
364
+ #endif
365
+
366
+ #if PY_MAJOR_VERSION < 3
367
+ PyMODINIT_FUNC inittabix(void)
368
+ #else
369
+ PyMODINIT_FUNC PyInit_tabix(void)
370
+ #endif
371
+ {
372
+ PyObject *m;
373
+
374
+ if (PyType_Ready(&Tabix_Type) < 0)
375
+ goto fail;
376
+ if (PyType_Ready(&TabixIterator_Type) < 0)
377
+ goto fail;
378
+
379
+ #if PY_MAJOR_VERSION < 3
380
+ m = Py_InitModule3("tabix", tabix_functions, module_doc);
381
+ #else
382
+ m = PyModule_Create(&tabixmodule);
383
+ #endif
384
+ if (m == NULL)
385
+ goto fail;
386
+
387
+ if (TabixError == NULL) {
388
+ TabixError = PyErr_NewException("tabix.error", NULL, NULL);
389
+ if (TabixError == NULL)
390
+ goto fail;
391
+ }
392
+ Py_INCREF(TabixError);
393
+ PyModule_AddObject(m, "error", TabixError);
394
+
395
+ PyModule_AddObject(m, "Tabix", (PyObject *)&Tabix_Type);
396
+ PyModule_AddObject(m, "TabixIterator", (PyObject *)&TabixIterator_Type);
397
+
398
+ #if PY_MAJOR_VERSION >= 3
399
+ return m;
400
+ #endif
401
+
402
+ fail:
403
+ #if PY_MAJOR_VERSION < 3
404
+ return;
405
+ #else
406
+ return NULL;
407
+ #endif
408
+ }
@@ -0,0 +1,91 @@
1
+ #
2
+ # The MIT License
3
+ #
4
+ # Copyright (c) 2011 Seoul National University.
5
+ #
6
+ # Permission is hereby granted, free of charge, to any person obtaining
7
+ # a copy of this software and associated documentation files (the
8
+ # "Software"), to deal in the Software without restriction, including
9
+ # without limitation the rights to use, copy, modify, merge, publish,
10
+ # distribute, sublicense, and/or sell copies of the Software, and to
11
+ # permit persons to whom the Software is furnished to do so, subject to
12
+ # the following conditions:
13
+ #
14
+ # The above copyright notice and this permission notice shall be
15
+ # included in all copies or substantial portions of the Software.
16
+ #
17
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21
+ # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22
+ # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23
+ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ # SOFTWARE.
25
+ #
26
+ # Contact: Hyeshik Chang <hyeshik@snu.ac.kr>
27
+
28
+ import unittest
29
+ import random
30
+ import gzip
31
+ import tabix
32
+
33
+ EXAMPLEFILE = '../example.gtf.gz'
34
+
35
+ def load_example_regions(path):
36
+ alldata = []
37
+ for line in gzip.GzipFile(EXAMPLEFILE):
38
+ fields = line.decode('ascii')[:-1].split('\t')
39
+ seqid = fields[0]
40
+ begin = int(fields[3])
41
+ end = int(fields[4])
42
+ alldata.append((seqid, begin, end, fields[:7]))
43
+
44
+ return alldata
45
+
46
+ def does_overlap(A, B, C, D):
47
+ return (A <= D <= B) or (C <= B <= D)
48
+
49
+ def sample_test_dataset(regions, ntests):
50
+ seqids = [seqid for seqid, _, _, _ in regions]
51
+ lowerbound = max(0, min(begin for _, begin, _, _ in regions) - 1000)
52
+ upperbound = max(end for _, _, end, _ in regions) + 1000
53
+
54
+ tests = []
55
+ for i in range(ntests):
56
+ seqid = random.choice(seqids)
57
+ low = random.randrange(lowerbound, upperbound)
58
+ high = random.randrange(low, upperbound)
59
+
60
+ # for 1-based both-end inclusive intervals
61
+ matches = [info for seq, begin, end, info in regions
62
+ if seqid == seq and does_overlap(begin, end, low, high)]
63
+
64
+ tests.append((seqid, low, high, matches))
65
+
66
+ return tests
67
+
68
+ def tbresult2excerpt(tbmatches):
69
+ return [fields[:7] for fields in tbmatches]
70
+
71
+ class TabixTest(unittest.TestCase):
72
+ regions = load_example_regions(EXAMPLEFILE)
73
+ testset = sample_test_dataset(regions, 500)
74
+
75
+ def setUp(self):
76
+ self.tb = tabix.Tabix(EXAMPLEFILE)
77
+
78
+ def testQuery(self):
79
+ for seqid, low, high, matches in self.testset:
80
+ tbresult = tbresult2excerpt(self.tb.query(seqid, low, high))
81
+ self.assertEqual(tbresult, matches)
82
+
83
+ def testQueryS(self):
84
+ for seqid, low, high, matches in self.testset:
85
+ tbresult = tbresult2excerpt(self.tb.querys('%s:%d-%d' %
86
+ (seqid, low, high)))
87
+ self.assertEqual(tbresult, matches)
88
+
89
+
90
+ if __name__ == '__main__':
91
+ unittest.main()
data/ext/tabix/tabix.1 ADDED
@@ -0,0 +1,132 @@
1
+ .TH tabix 1 "11 May 2010" "tabix-0.2.0" "Bioinformatics tools"
2
+ .SH NAME
3
+ .PP
4
+ bgzip - Block compression/decompression utility
5
+ .PP
6
+ tabix - Generic indexer for TAB-delimited genome position files
7
+ .SH SYNOPSIS
8
+ .PP
9
+ .B bgzip
10
+ .RB [ \-cdhB ]
11
+ .RB [ \-b
12
+ .IR virtualOffset ]
13
+ .RB [ \-s
14
+ .IR size ]
15
+ .RI [ file ]
16
+ .PP
17
+ .B tabix
18
+ .RB [ \-0lf ]
19
+ .RB [ \-p
20
+ .R gff|bed|sam|vcf]
21
+ .RB [ \-s
22
+ .IR seqCol ]
23
+ .RB [ \-b
24
+ .IR begCol ]
25
+ .RB [ \-e
26
+ .IR endCol ]
27
+ .RB [ \-S
28
+ .IR lineSkip ]
29
+ .RB [ \-c
30
+ .IR metaChar ]
31
+ .I in.tab.bgz
32
+ .RI [ "region1 " [ "region2 " [ ... "]]]"
33
+
34
+ .SH DESCRIPTION
35
+ .PP
36
+ Tabix indexes a TAB-delimited genome position file
37
+ .I in.tab.bgz
38
+ and creates an index file
39
+ .I in.tab.bgz.tbi
40
+ when
41
+ .I region
42
+ is absent from the command-line. The input data file must be position
43
+ sorted and compressed by
44
+ .B bgzip
45
+ which has a
46
+ .BR gzip (1)
47
+ like interface. After indexing, tabix is able to quickly retrieve data
48
+ lines overlapping
49
+ .I regions
50
+ specified in the format "chr:beginPos-endPos". Fast data retrieval also
51
+ works over network if URI is given as a file name and in this case the
52
+ index file will be downloaded if it is not present locally.
53
+
54
+ .SH OPTIONS OF TABIX
55
+ .TP 10
56
+ .BI "-p " STR
57
+ Input format for indexing. Valid values are: gff, bed, sam, vcf and
58
+ psltab. This option should not be applied together with any of
59
+ .BR \-s ", " \-b ", " \-e ", " \-c " and " \-0 ;
60
+ it is not used for data retrieval because this setting is stored in
61
+ the index file. [gff]
62
+ .TP
63
+ .BI "-s " INT
64
+ Column of sequence name. Option
65
+ .BR \-s ", " \-b ", " \-e ", " \-S ", " \-c " and " \-0
66
+ are all stored in the index file and thus not used in data retrieval. [1]
67
+ .TP
68
+ .BI "-b " INT
69
+ Column of start chromosomal position. [4]
70
+ .TP
71
+ .BI "-e " INT
72
+ Column of end chromosomal position. The end column can be the same as the
73
+ start column. [5]
74
+ .TP
75
+ .BI "-S " INT
76
+ Skip first INT lines in the data file. [0]
77
+ .TP
78
+ .BI "-c " CHAR
79
+ Skip lines started with character CHAR. [#]
80
+ .TP
81
+ .B -0
82
+ Specify that the position in the data file is 0-based (e.g. UCSC files)
83
+ rather than 1-based.
84
+ .TP
85
+ .B -h
86
+ Print the header/meta lines.
87
+ .TP
88
+ .B -B
89
+ The second argument is a BED file. When this option is in use, the input
90
+ file may not be sorted or indexed. The entire input will be read sequentially. Nonetheless,
91
+ with this option, the format of the input must be specificed correctly on the command line.
92
+ .TP
93
+ .B -f
94
+ Force to overwrite the index file if it is present.
95
+ .TP
96
+ .B -l
97
+ List the sequence names stored in the index file.
98
+ .RE
99
+
100
+ .SH EXAMPLE
101
+ (grep ^"#" in.gff; grep -v ^"#" in.gff | sort -k1,1 -k4,4n) | bgzip > sorted.gff.gz;
102
+
103
+ tabix -p gff sorted.gff.gz;
104
+
105
+ tabix sorted.gff.gz chr1:10,000,000-20,000,000;
106
+
107
+ .SH NOTES
108
+ It is straightforward to achieve overlap queries using the standard
109
+ B-tree index (with or without binning) implemented in all SQL databases,
110
+ or the R-tree index in PostgreSQL and Oracle. But there are still many
111
+ reasons to use tabix. Firstly, tabix directly works with a lot of widely
112
+ used TAB-delimited formats such as GFF/GTF and BED. We do not need to
113
+ design database schema or specialized binary formats. Data do not need
114
+ to be duplicated in different formats, either. Secondly, tabix works on
115
+ compressed data files while most SQL databases do not. The GenCode
116
+ annotation GTF can be compressed down to 4%. Thirdly, tabix is
117
+ fast. The same indexing algorithm is known to work efficiently for an
118
+ alignment with a few billion short reads. SQL databases probably cannot
119
+ easily handle data at this scale. Last but not the least, tabix supports
120
+ remote data retrieval. One can put the data file and the index at an FTP
121
+ or HTTP server, and other users or even web services will be able to get
122
+ a slice without downloading the entire file.
123
+
124
+ .SH AUTHOR
125
+ .PP
126
+ Tabix was written by Heng Li. The BGZF library was originally
127
+ implemented by Bob Handsaker and modified by Heng Li for remote file
128
+ access and in-memory caching.
129
+
130
+ .SH SEE ALSO
131
+ .PP
132
+ .BR samtools (1)