ngs_server 0.4 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/tabix/ChangeLog +593 -0
- data/ext/tabix/Makefile +65 -0
- data/ext/tabix/NEWS +126 -0
- data/ext/tabix/TabixReader.java +395 -0
- data/ext/tabix/bam_endian.h +42 -0
- data/ext/tabix/bedidx.c +156 -0
- data/ext/tabix/bgzf.c +714 -0
- data/ext/tabix/bgzf.h +157 -0
- data/ext/tabix/bgzip.c +206 -0
- data/ext/tabix/example.gtf.gz +0 -0
- data/ext/tabix/example.gtf.gz.tbi +0 -0
- data/ext/tabix/extconf.rb +1 -0
- data/ext/tabix/index.c +998 -0
- data/ext/tabix/khash.h +486 -0
- data/ext/tabix/knetfile.c +632 -0
- data/ext/tabix/knetfile.h +75 -0
- data/ext/tabix/kseq.h +227 -0
- data/ext/tabix/ksort.h +271 -0
- data/ext/tabix/kstring.c +165 -0
- data/ext/tabix/kstring.h +68 -0
- data/ext/tabix/main.c +290 -0
- data/ext/tabix/perl/MANIFEST +8 -0
- data/ext/tabix/perl/Makefile.PL +8 -0
- data/ext/tabix/perl/Tabix.pm +76 -0
- data/ext/tabix/perl/Tabix.xs +71 -0
- data/ext/tabix/perl/TabixIterator.pm +41 -0
- data/ext/tabix/perl/t/01local.t +28 -0
- data/ext/tabix/perl/t/02remote.t +28 -0
- data/ext/tabix/perl/typemap +3 -0
- data/ext/tabix/python/setup.py +55 -0
- data/ext/tabix/python/tabixmodule.c +408 -0
- data/ext/tabix/python/test.py +91 -0
- data/ext/tabix/tabix.1 +132 -0
- data/ext/tabix/tabix.h +145 -0
- data/ext/tabix/tabix.py +87 -0
- data/ext/tabix/tabix.tex +121 -0
- data/ext/vcftools/perl/Vcf.pm +5 -3
- data/ext/vcftools/perl/vcf-query +2 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +12 -11
- data/ngs_server.gemspec +1 -2
- metadata +39 -2
data/ext/tabix/tabix.h
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2009 Genome Research Ltd (GRL), 2010 Broad Institute
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
20
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
22
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
23
|
+
SOFTWARE.
|
24
|
+
*/
|
25
|
+
|
26
|
+
/* Contact: Heng Li <lh3@live.co.uk> */
|
27
|
+
|
28
|
+
#ifndef __TABIDX_H
|
29
|
+
#define __TABIDX_H
|
30
|
+
|
31
|
+
#include <stdint.h>
|
32
|
+
#include "kstring.h"
|
33
|
+
#include "bgzf.h"
|
34
|
+
|
35
|
+
#define TI_PRESET_GENERIC 0
|
36
|
+
#define TI_PRESET_SAM 1
|
37
|
+
#define TI_PRESET_VCF 2
|
38
|
+
|
39
|
+
#define TI_FLAG_UCSC 0x10000
|
40
|
+
|
41
|
+
typedef int (*ti_fetch_f)(int l, const char *s, void *data);
|
42
|
+
|
43
|
+
struct __ti_index_t;
|
44
|
+
typedef struct __ti_index_t ti_index_t;
|
45
|
+
|
46
|
+
struct __ti_iter_t;
|
47
|
+
typedef struct __ti_iter_t *ti_iter_t;
|
48
|
+
|
49
|
+
typedef struct {
|
50
|
+
BGZF *fp;
|
51
|
+
ti_index_t *idx;
|
52
|
+
char *fn, *fnidx;
|
53
|
+
} tabix_t;
|
54
|
+
|
55
|
+
typedef struct {
|
56
|
+
int32_t preset;
|
57
|
+
int32_t sc, bc, ec; // seq col., beg col. and end col.
|
58
|
+
int32_t meta_char, line_skip;
|
59
|
+
} ti_conf_t;
|
60
|
+
|
61
|
+
typedef struct {
|
62
|
+
int beg, end;
|
63
|
+
char *ss, *se;
|
64
|
+
} ti_interval_t;
|
65
|
+
|
66
|
+
extern ti_conf_t ti_conf_gff, ti_conf_bed, ti_conf_psltbl, ti_conf_vcf, ti_conf_sam; // preset
|
67
|
+
|
68
|
+
#ifdef __cplusplus
|
69
|
+
extern "C" {
|
70
|
+
#endif
|
71
|
+
|
72
|
+
/*******************
|
73
|
+
* High-level APIs *
|
74
|
+
*******************/
|
75
|
+
|
76
|
+
tabix_t *ti_open(const char *fn, const char *fnidx);
|
77
|
+
int ti_lazy_index_load(tabix_t *t);
|
78
|
+
void ti_close(tabix_t *t);
|
79
|
+
ti_iter_t ti_query(tabix_t *t, const char *name, int beg, int end);
|
80
|
+
ti_iter_t ti_queryi(tabix_t *t, int tid, int beg, int end);
|
81
|
+
ti_iter_t ti_querys(tabix_t *t, const char *reg);
|
82
|
+
const char *ti_read(tabix_t *t, ti_iter_t iter, int *len);
|
83
|
+
|
84
|
+
/* Destroy the iterator */
|
85
|
+
void ti_iter_destroy(ti_iter_t iter);
|
86
|
+
|
87
|
+
/* Get the list of sequence names. Each "char*" pointer points to a
|
88
|
+
* internal member of the index, so DO NOT modify the returned
|
89
|
+
* pointer; otherwise the index will be corrupted. The returned
|
90
|
+
* pointer should be freed by a single free() call by the routine
|
91
|
+
* calling this function. The number of sequences is returned at *n. */
|
92
|
+
const char **ti_seqname(const ti_index_t *idx, int *n);
|
93
|
+
|
94
|
+
/******************
|
95
|
+
* Low-level APIs *
|
96
|
+
******************/
|
97
|
+
|
98
|
+
/* Build the index for file <fn>. File <fn>.tbi will be generated
|
99
|
+
* and overwrite the file of the same name. Return -1 on failure. */
|
100
|
+
int ti_index_build(const char *fn, const ti_conf_t *conf);
|
101
|
+
|
102
|
+
/* Load the index from file <fn>.tbi. If <fn> is a URL and the index
|
103
|
+
* file is not in the working directory, <fn>.tbi will be
|
104
|
+
* downloaded. Return NULL on failure. */
|
105
|
+
ti_index_t *ti_index_load(const char *fn);
|
106
|
+
|
107
|
+
ti_index_t *ti_index_load_local(const char *fnidx);
|
108
|
+
|
109
|
+
/* Destroy the index */
|
110
|
+
void ti_index_destroy(ti_index_t *idx);
|
111
|
+
|
112
|
+
/* Parse a region like: chr2, chr2:100, chr2:100-200. Return -1 on failure. */
|
113
|
+
int ti_parse_region(const ti_index_t *idx, const char *str, int *tid, int *begin, int *end);
|
114
|
+
|
115
|
+
int ti_get_tid(const ti_index_t *idx, const char *name);
|
116
|
+
|
117
|
+
/* Get the iterator pointing to the first record at the current file
|
118
|
+
* position. If the file is just openned, the iterator points to the
|
119
|
+
* first record in the file. */
|
120
|
+
ti_iter_t ti_iter_first(void);
|
121
|
+
|
122
|
+
/* Get the iterator pointing to the first record in region tid:beg-end */
|
123
|
+
ti_iter_t ti_iter_query(const ti_index_t *idx, int tid, int beg, int end);
|
124
|
+
|
125
|
+
/* Get the data line pointed by the iterator and iterate to the next record. */
|
126
|
+
const char *ti_iter_read(BGZF *fp, ti_iter_t iter, int *len);
|
127
|
+
|
128
|
+
const ti_conf_t *ti_get_conf(ti_index_t *idx);
|
129
|
+
int ti_get_intv(const ti_conf_t *conf, int len, char *line, ti_interval_t *intv);
|
130
|
+
|
131
|
+
/*******************
|
132
|
+
* Deprecated APIs *
|
133
|
+
*******************/
|
134
|
+
|
135
|
+
/* The callback version for random access */
|
136
|
+
int ti_fetch(BGZF *fp, const ti_index_t *idx, int tid, int beg, int end, void *data, ti_fetch_f func);
|
137
|
+
|
138
|
+
/* Read one line. */
|
139
|
+
int ti_readline(BGZF *fp, kstring_t *str);
|
140
|
+
|
141
|
+
#ifdef __cplusplus
|
142
|
+
}
|
143
|
+
#endif
|
144
|
+
|
145
|
+
#endif
|
data/ext/tabix/tabix.py
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Author: Heng Li and Aaron Quinlan
|
4
|
+
# License: MIT/X11
|
5
|
+
|
6
|
+
import sys
|
7
|
+
from ctypes import *
|
8
|
+
from ctypes.util import find_library
|
9
|
+
import glob, platform
|
10
|
+
|
11
|
+
def load_shared_library(lib, _path='.', ver='*'):
|
12
|
+
"""Search for and load the tabix library. The
|
13
|
+
expectation is that the library is located in
|
14
|
+
the current directory (ie. "./")
|
15
|
+
"""
|
16
|
+
# find from the system path
|
17
|
+
path = find_library(lib)
|
18
|
+
if (path == None): # if fail, search in the custom directory
|
19
|
+
s = platform.system()
|
20
|
+
if (s == 'Darwin'): suf = ver+'.dylib'
|
21
|
+
elif (s == 'Linux'): suf = '.so'+ver
|
22
|
+
candidates = glob.glob(_path+'/lib'+lib+suf);
|
23
|
+
if (len(candidates) == 1): path = candidates[0]
|
24
|
+
else: return None
|
25
|
+
cdll.LoadLibrary(path)
|
26
|
+
return CDLL(path)
|
27
|
+
|
28
|
+
def tabix_init():
|
29
|
+
"""Initialize and return a tabix reader object
|
30
|
+
for subsequent tabix_get() calls.
|
31
|
+
"""
|
32
|
+
tabix = load_shared_library('tabix')
|
33
|
+
if (tabix == None): return None
|
34
|
+
tabix.ti_read.restype = c_char_p
|
35
|
+
# on Mac OS X 10.6, the following declarations are required.
|
36
|
+
tabix.ti_open.restype = c_void_p
|
37
|
+
tabix.ti_querys.argtypes = [c_void_p, c_char_p]
|
38
|
+
tabix.ti_querys.restype = c_void_p
|
39
|
+
tabix.ti_query.argtypes = [c_void_p, c_char_p, c_int, c_int]
|
40
|
+
tabix.ti_query.restype = c_void_p
|
41
|
+
tabix.ti_read.argtypes = [c_void_p, c_void_p, c_void_p]
|
42
|
+
tabix.ti_iter_destroy.argtypes = [c_void_p]
|
43
|
+
tabix.ti_close.argtypes = [c_void_p]
|
44
|
+
# FIXME: explicit declarations for APIs not used in this script
|
45
|
+
return tabix
|
46
|
+
|
47
|
+
# OOP interface
|
48
|
+
class Tabix:
|
49
|
+
def __init__(self, fn, fnidx=0):
|
50
|
+
self.tabix = tabix_init();
|
51
|
+
if (self.tabix == None):
|
52
|
+
sys.stderr.write("[Tabix] Please make sure the shared library is compiled and available.\n")
|
53
|
+
return
|
54
|
+
self.fp = self.tabix.ti_open(fn, fnidx);
|
55
|
+
|
56
|
+
def __del__(self):
|
57
|
+
if (self.tabix): self.tabix.ti_close(self.fp)
|
58
|
+
|
59
|
+
def fetch(self, chr, start=-1, end=-1):
|
60
|
+
"""Generator function that will yield each interval
|
61
|
+
within the requested range from the requested file.
|
62
|
+
"""
|
63
|
+
if (self.tabix == None): return
|
64
|
+
if (start < 0): iter = self.tabix.ti_querys(self.fp, chr) # chr looks like: "chr2:1,000-2,000" or "chr2"
|
65
|
+
else: iter = self.tabix.ti_query(self.fp, chr, start, end) # chr must be a sequence name
|
66
|
+
if (iter == None):
|
67
|
+
sys.stderr.write("[Tabix] Malformatted query or wrong sequence name.\n")
|
68
|
+
return
|
69
|
+
while (1): # iterate
|
70
|
+
s = self.tabix.ti_read(self.fp, iter, 0)
|
71
|
+
if (s == None): break
|
72
|
+
yield s
|
73
|
+
self.tabix.ti_iter_destroy(iter)
|
74
|
+
|
75
|
+
# command-line interface
|
76
|
+
def main():
|
77
|
+
if (len(sys.argv) < 3):
|
78
|
+
sys.stderr.write("Usage: tabix.py <in.gz> <reg>\n")
|
79
|
+
sys.exit(1)
|
80
|
+
|
81
|
+
# report the features in the requested interval
|
82
|
+
tabix = Tabix(sys.argv[1])
|
83
|
+
for line in tabix.fetch(sys.argv[2]):
|
84
|
+
print line
|
85
|
+
|
86
|
+
if __name__ == '__main__':
|
87
|
+
main()
|
data/ext/tabix/tabix.tex
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
\documentclass[10pt]{article}
|
2
|
+
\usepackage{color}
|
3
|
+
\definecolor{gray}{rgb}{0.7,0.7,0.7}
|
4
|
+
|
5
|
+
\setlength{\topmargin}{0.0cm}
|
6
|
+
\setlength{\textheight}{21.5cm}
|
7
|
+
\setlength{\oddsidemargin}{0cm}
|
8
|
+
\setlength{\textwidth}{16.5cm}
|
9
|
+
\setlength{\columnsep}{0.6cm}
|
10
|
+
|
11
|
+
\begin{document}
|
12
|
+
|
13
|
+
\title{The Tabix index file format}
|
14
|
+
\author{Heng Li}
|
15
|
+
\date{}
|
16
|
+
|
17
|
+
\maketitle
|
18
|
+
|
19
|
+
\begin{center}
|
20
|
+
\begin{tabular}{|l|l|l|l|l|l|l|}
|
21
|
+
\hline
|
22
|
+
\multicolumn{4}{|c|}{\bf Field} & \multicolumn{1}{c|}{\bf Descrption} & \multicolumn{1}{c|}{\bf Type} & \multicolumn{1}{c|}{\bf Value} \\
|
23
|
+
\hline\hline
|
24
|
+
\multicolumn{4}{|l|}{\tt magic} & Magic string & {\tt char[4]} & TBI$\backslash$1 \\
|
25
|
+
\hline
|
26
|
+
\multicolumn{4}{|l|}{\tt n\_ref} & \# sequences & {\tt int32\_t} & \\
|
27
|
+
\hline
|
28
|
+
\multicolumn{4}{|l|}{\tt format} & Format (0: generic; 1: SAM; 2: VCF) & {\tt int32\_t} & \\
|
29
|
+
\hline
|
30
|
+
\multicolumn{4}{|l|}{\tt col\_seq} & Column for the sequence name & {\tt int32\_t} & \\
|
31
|
+
\hline
|
32
|
+
\multicolumn{4}{|l|}{\tt col\_beg} & Column for the start of a region & {\tt int32\_t} & \\
|
33
|
+
\hline
|
34
|
+
\multicolumn{4}{|l|}{\tt col\_end} & Column for the end of a region & {\tt int32\_t} & \\
|
35
|
+
\hline
|
36
|
+
\multicolumn{4}{|l|}{\tt meta} & Leading character for comment lines & {\tt int32\_t} & \\
|
37
|
+
\hline
|
38
|
+
\multicolumn{4}{|l|}{\tt skip} & \# lines to skip at the beginning & {\tt int32\_t} & \\
|
39
|
+
\hline
|
40
|
+
\multicolumn{4}{|l|}{\tt l\_nm} & Length of concatenated sequence names & {\tt int32\_t} & \\
|
41
|
+
\hline
|
42
|
+
\multicolumn{4}{|l|}{\tt names} & Concatenated names, each zero terminated & {\tt char[l\_nm]} & \\
|
43
|
+
\hline
|
44
|
+
\multicolumn{7}{|c|}{\textcolor{gray}{\it List of indices (n=n\_ref)}}\\
|
45
|
+
\cline{2-7}
|
46
|
+
\hspace{0.1cm} & \multicolumn{3}{l|}{\tt n\_bin} & \# distinct bins (for the binning index) & {\tt int32\_t} & \\
|
47
|
+
\cline{2-7}
|
48
|
+
& \multicolumn{6}{c|}{\textcolor{gray}{\it List of distinct bins (n=n\_bin)}} \\
|
49
|
+
\cline{3-7}
|
50
|
+
& \hspace{0.1cm} & \multicolumn{2}{l|}{\tt bin} & Distinct bin number & {\tt uint32\_t} & \\
|
51
|
+
\cline{3-7}
|
52
|
+
& & \multicolumn{2}{l|}{\tt n\_chunk} & \# chunks & {\tt int32\_t} & \\
|
53
|
+
\cline{3-7}
|
54
|
+
& & \multicolumn{5}{c|}{\textcolor{gray}{\it List of chunks (n=n\_chunk)}} \\
|
55
|
+
\cline{4-7}
|
56
|
+
& & \hspace{0.1cm} & {\tt cnk\_beg} & Virtual file offset of the start of the chunk & {\tt uint64\_t} & \\
|
57
|
+
\cline{4-7}
|
58
|
+
& & & {\tt cnk\_end} & Virtual file offset of the end of the chunk & {\tt uint64\_t} & \\
|
59
|
+
\cline{2-7}
|
60
|
+
& \multicolumn{3}{l|}{\tt n\_intv} & \# 16kb intervals (for the linear index) & {\tt int32\_t} & \\
|
61
|
+
\cline{2-7}
|
62
|
+
& \multicolumn{6}{c|}{\textcolor{gray}{\it List of distinct intervals (n=n\_intv)}} \\
|
63
|
+
\cline{3-7}
|
64
|
+
& & \multicolumn{2}{l|}{\tt ioff} & File offset of the first record in the interval & {\tt uint64\_t} & \\
|
65
|
+
\hline
|
66
|
+
\end{tabular}
|
67
|
+
\end{center}
|
68
|
+
|
69
|
+
{\bf Notes:}
|
70
|
+
|
71
|
+
\begin{itemize}
|
72
|
+
\item The index file is BGZF compressed.
|
73
|
+
\item All integers are little-endian.
|
74
|
+
\item When {\tt (format\&0x10000)} is true, the coordinate follows the
|
75
|
+
{\tt BED} rule (i.e. half-closed-half-open and zero based); otherwise,
|
76
|
+
the coordinate follows the {\tt GFF} rule (closed and one based).
|
77
|
+
\item For the SAM format, the end of a region equals {\tt POS} plus the
|
78
|
+
reference length in the alignment, inferred from {\tt CIGAR}. For the
|
79
|
+
VCF format, the end of a region equals {\tt POS} plus the size of the
|
80
|
+
deletion.
|
81
|
+
\item Field {\tt col\_beg} may equal {\tt col\_end}, and in this case,
|
82
|
+
the end of a region is {\tt end}={\tt beg+1}.
|
83
|
+
\item Example. For {\tt GFF}, {\tt format}=0, {\tt col\_seq}=1, {\tt
|
84
|
+
col\_beg}=4, {\tt col\_end}=5, {\tt meta}=`{\tt \#}' and {\tt
|
85
|
+
skip}=0. For {\tt BED}, {\tt format}=0x10000, {\tt col\_seq}=1, {\tt
|
86
|
+
col\_beg}=2, {\tt col\_end}=3, {\tt meta}=`{\tt \#}' and {\tt
|
87
|
+
skip}=0.
|
88
|
+
\item Given a zero-based, half-closed and half-open region {\tt
|
89
|
+
[beg,end)}, the {\tt bin} number is calculated with the following C
|
90
|
+
function:
|
91
|
+
\begin{verbatim}
|
92
|
+
int reg2bin(int beg, int end) {
|
93
|
+
--end;
|
94
|
+
if (beg>>14 == end>>14) return ((1<<15)-1)/7 + (beg>>14);
|
95
|
+
if (beg>>17 == end>>17) return ((1<<12)-1)/7 + (beg>>17);
|
96
|
+
if (beg>>20 == end>>20) return ((1<<9)-1)/7 + (beg>>20);
|
97
|
+
if (beg>>23 == end>>23) return ((1<<6)-1)/7 + (beg>>23);
|
98
|
+
if (beg>>26 == end>>26) return ((1<<3)-1)/7 + (beg>>26);
|
99
|
+
return 0;
|
100
|
+
}
|
101
|
+
\end{verbatim}
|
102
|
+
\item The list of bins that may overlap a region {\tt [beg,end)} can be
|
103
|
+
obtained with the following C function.
|
104
|
+
\begin{verbatim}
|
105
|
+
#define MAX_BIN (((1<<18)-1)/7)
|
106
|
+
int reg2bins(int rbeg, int rend, uint16_t list[MAX_BIN])
|
107
|
+
{
|
108
|
+
int i = 0, k;
|
109
|
+
--rend;
|
110
|
+
list[i++] = 0;
|
111
|
+
for (k = 1 + (rbeg>>26); k <= 1 + (rend>>26); ++k) list[i++] = k;
|
112
|
+
for (k = 9 + (rbeg>>23); k <= 9 + (rend>>23); ++k) list[i++] = k;
|
113
|
+
for (k = 73 + (rbeg>>20); k <= 73 + (rend>>20); ++k) list[i++] = k;
|
114
|
+
for (k = 585 + (rbeg>>17); k <= 585 + (rend>>17); ++k) list[i++] = k;
|
115
|
+
for (k = 4681 + (rbeg>>14); k <= 4681 + (rend>>14); ++k) list[i++] = k;
|
116
|
+
return i; // #elements in list[]
|
117
|
+
}
|
118
|
+
\end{verbatim}
|
119
|
+
\end{itemize}
|
120
|
+
|
121
|
+
\end{document}
|
data/ext/vcftools/perl/Vcf.pm
CHANGED
@@ -72,6 +72,7 @@ use Carp;
|
|
72
72
|
use Exporter;
|
73
73
|
use Data::Dumper;
|
74
74
|
use POSIX ":sys_wait_h";
|
75
|
+
use FindBin;
|
75
76
|
|
76
77
|
use vars qw/@ISA @EXPORT/;
|
77
78
|
@ISA = qw/Exporter/;
|
@@ -157,6 +158,7 @@ sub new
|
|
157
158
|
$$self{has_header} = 0;
|
158
159
|
$$self{default_version} = '4.1';
|
159
160
|
$$self{versions} = [ qw(Vcf3_2 Vcf3_3 Vcf4_0 Vcf4_1) ];
|
161
|
+
$$self{tabix} = "$FindBin::RealBin/../../tabix/tabix";
|
160
162
|
if ( !exists($$self{max_line_len}) && exists($ENV{MAX_VCF_LINE_LEN}) ) { $$self{max_line_len} = $ENV{MAX_VCF_LINE_LEN} }
|
161
163
|
$$self{fix_v40_AGtags} = $ENV{DONT_FIX_VCF40_AG_TAGS} ? 0 : 1;
|
162
164
|
my %open_args = ();
|
@@ -203,7 +205,7 @@ sub _open
|
|
203
205
|
{
|
204
206
|
if ( exists($args{region}) && defined($args{region}) )
|
205
207
|
{
|
206
|
-
$cmd = "tabix $tabix_args |";
|
208
|
+
$cmd = "$$self{tabix} $tabix_args |";
|
207
209
|
}
|
208
210
|
else { $cmd = "gunzip -c $$self{file} |"; }
|
209
211
|
$$self{check_exit_status} = 1;
|
@@ -211,7 +213,7 @@ sub _open
|
|
211
213
|
elsif ( $$self{file}=~m{^(?:http|ftp)://} )
|
212
214
|
{
|
213
215
|
if ( !exists($args{region}) ) { $tabix_args .= ' .'; }
|
214
|
-
$cmd = "tabix $tabix_args |";
|
216
|
+
$cmd = "$$self{tabix} $tabix_args |";
|
215
217
|
$$self{check_exit_status} = 1;
|
216
218
|
}
|
217
219
|
open($$self{fh},$cmd) or $self->throw("$cmd: $!");
|
@@ -2152,7 +2154,7 @@ sub get_chromosomes
|
|
2152
2154
|
{
|
2153
2155
|
my ($self) = @_;
|
2154
2156
|
if ( !$$self{file} ) { $self->throw(qq[The parameter "file" not set.\n]); }
|
2155
|
-
my (@out) =
|
2157
|
+
my (@out) = `$$self{tabix} -l $$self{file}`;
|
2156
2158
|
if ( $? )
|
2157
2159
|
{
|
2158
2160
|
$self->throw(qq[The command "tabix -l $$self{file}" exited with an error. Is the file tabix indexed?\n]);
|
data/ext/vcftools/perl/vcf-query
CHANGED
data/lib/ngs_server/version.rb
CHANGED
data/lib/ngs_server.rb
CHANGED
@@ -4,23 +4,22 @@ require 'sinatra/base'
|
|
4
4
|
require 'json'
|
5
5
|
|
6
6
|
|
7
|
+
|
7
8
|
class MyNgsServer < Sinatra::Base
|
9
|
+
|
8
10
|
set :server, 'thin'
|
9
11
|
|
10
12
|
gempath = File.join(File.dirname(__FILE__), "../")
|
11
13
|
datapath = File.join(gempath, 'data')
|
12
14
|
bamtools_path = "#{gempath}/ext/bamtools/bin/bamtools"
|
13
15
|
vcftools_path = "#{gempath}/ext/vcftools/bin/vcf-query"
|
14
|
-
get '/test' do
|
15
|
-
|
16
|
-
'hi'
|
17
|
-
end
|
18
|
-
|
19
16
|
|
20
17
|
get '/json/bam/*' do |path|
|
21
|
-
|
22
|
-
|
23
|
-
|
18
|
+
|
19
|
+
# turn off json content type so a preflight cors doesn't need to be done
|
20
|
+
#content_type :json
|
21
|
+
response['Access-Control-Allow-Origin'] = '*';
|
22
|
+
|
24
23
|
|
25
24
|
# invoke with eg: base_url/json/bam/subset22-sorted.bam?min=30000000&max=30010000&segment=22
|
26
25
|
json = `#{bamtools_path} convert -in #{datapath}/#{path} -format json -region #{params["segment"]}:#{params["min"]}..#{params["max"]}`
|
@@ -29,8 +28,9 @@ class MyNgsServer < Sinatra::Base
|
|
29
28
|
end
|
30
29
|
|
31
30
|
get '/json/vcf/*' do |path|
|
32
|
-
|
33
|
-
|
31
|
+
|
32
|
+
# turn off json content type so a preflight cors doesn't need to be done
|
33
|
+
#content_type :json
|
34
34
|
response['Access-Control-Allow-Origin'] = '*';
|
35
35
|
|
36
36
|
# invoke with eg: base_url/json/vcf/ALL.2of4intersection.20100804.sites.vcf.gz?min=6992179&max=6992190&segment=1
|
@@ -41,7 +41,8 @@ class MyNgsServer < Sinatra::Base
|
|
41
41
|
|
42
42
|
get '/json/sources/:extension' do
|
43
43
|
|
44
|
-
|
44
|
+
# turn off json content type so a preflight cors doesn't need to be done
|
45
|
+
# content_type :json
|
45
46
|
response['Access-Control-Allow-Origin'] = '*';
|
46
47
|
|
47
48
|
# invoke with eg: base_url/json/vcf/file=ALL.2of4intersection.20100804.sites.vcf.gz?min=6992179&max=6992190&segment=1
|
data/ngs_server.gemspec
CHANGED
@@ -20,7 +20,6 @@ Gem::Specification.new do |s|
|
|
20
20
|
s.files = `git ls-files`.split("\n")
|
21
21
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
22
22
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
23
|
-
|
24
|
-
s.extensions = ["ext/bamtools/extconf.rb", "ext/vcftools/extconf.rb"]
|
23
|
+
s.extensions = ["ext/bamtools/extconf.rb", "ext/vcftools/extconf.rb", "ext/tabix/extconf.rb"]
|
25
24
|
s.require_paths = ["lib"]
|
26
25
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: ngs_server
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: "0.
|
5
|
+
version: "0.5"
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Chase Miller
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-12-
|
13
|
+
date: 2011-12-28 00:00:00 -05:00
|
14
14
|
default_executable:
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -54,6 +54,7 @@ executables:
|
|
54
54
|
extensions:
|
55
55
|
- ext/bamtools/extconf.rb
|
56
56
|
- ext/vcftools/extconf.rb
|
57
|
+
- ext/tabix/extconf.rb
|
57
58
|
extra_rdoc_files: []
|
58
59
|
|
59
60
|
files:
|
@@ -338,6 +339,42 @@ files:
|
|
338
339
|
- ext/bamtools/src/utils/bamtools_variant.h
|
339
340
|
- ext/bamtools/src/utils/cmake_install.cmake
|
340
341
|
- ext/bamtools/src/utils/utils_global.h
|
342
|
+
- ext/tabix/ChangeLog
|
343
|
+
- ext/tabix/Makefile
|
344
|
+
- ext/tabix/NEWS
|
345
|
+
- ext/tabix/TabixReader.java
|
346
|
+
- ext/tabix/bam_endian.h
|
347
|
+
- ext/tabix/bedidx.c
|
348
|
+
- ext/tabix/bgzf.c
|
349
|
+
- ext/tabix/bgzf.h
|
350
|
+
- ext/tabix/bgzip.c
|
351
|
+
- ext/tabix/example.gtf.gz
|
352
|
+
- ext/tabix/example.gtf.gz.tbi
|
353
|
+
- ext/tabix/extconf.rb
|
354
|
+
- ext/tabix/index.c
|
355
|
+
- ext/tabix/khash.h
|
356
|
+
- ext/tabix/knetfile.c
|
357
|
+
- ext/tabix/knetfile.h
|
358
|
+
- ext/tabix/kseq.h
|
359
|
+
- ext/tabix/ksort.h
|
360
|
+
- ext/tabix/kstring.c
|
361
|
+
- ext/tabix/kstring.h
|
362
|
+
- ext/tabix/main.c
|
363
|
+
- ext/tabix/perl/MANIFEST
|
364
|
+
- ext/tabix/perl/Makefile.PL
|
365
|
+
- ext/tabix/perl/Tabix.pm
|
366
|
+
- ext/tabix/perl/Tabix.xs
|
367
|
+
- ext/tabix/perl/TabixIterator.pm
|
368
|
+
- ext/tabix/perl/t/01local.t
|
369
|
+
- ext/tabix/perl/t/02remote.t
|
370
|
+
- ext/tabix/perl/typemap
|
371
|
+
- ext/tabix/python/setup.py
|
372
|
+
- ext/tabix/python/tabixmodule.c
|
373
|
+
- ext/tabix/python/test.py
|
374
|
+
- ext/tabix/tabix.1
|
375
|
+
- ext/tabix/tabix.h
|
376
|
+
- ext/tabix/tabix.py
|
377
|
+
- ext/tabix/tabix.tex
|
341
378
|
- ext/vcftools/Makefile
|
342
379
|
- ext/vcftools/README.txt
|
343
380
|
- ext/vcftools/cpp/.svn/all-wcprops
|