ngs_server 0.4 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/tabix/ChangeLog +593 -0
- data/ext/tabix/Makefile +65 -0
- data/ext/tabix/NEWS +126 -0
- data/ext/tabix/TabixReader.java +395 -0
- data/ext/tabix/bam_endian.h +42 -0
- data/ext/tabix/bedidx.c +156 -0
- data/ext/tabix/bgzf.c +714 -0
- data/ext/tabix/bgzf.h +157 -0
- data/ext/tabix/bgzip.c +206 -0
- data/ext/tabix/example.gtf.gz +0 -0
- data/ext/tabix/example.gtf.gz.tbi +0 -0
- data/ext/tabix/extconf.rb +1 -0
- data/ext/tabix/index.c +998 -0
- data/ext/tabix/khash.h +486 -0
- data/ext/tabix/knetfile.c +632 -0
- data/ext/tabix/knetfile.h +75 -0
- data/ext/tabix/kseq.h +227 -0
- data/ext/tabix/ksort.h +271 -0
- data/ext/tabix/kstring.c +165 -0
- data/ext/tabix/kstring.h +68 -0
- data/ext/tabix/main.c +290 -0
- data/ext/tabix/perl/MANIFEST +8 -0
- data/ext/tabix/perl/Makefile.PL +8 -0
- data/ext/tabix/perl/Tabix.pm +76 -0
- data/ext/tabix/perl/Tabix.xs +71 -0
- data/ext/tabix/perl/TabixIterator.pm +41 -0
- data/ext/tabix/perl/t/01local.t +28 -0
- data/ext/tabix/perl/t/02remote.t +28 -0
- data/ext/tabix/perl/typemap +3 -0
- data/ext/tabix/python/setup.py +55 -0
- data/ext/tabix/python/tabixmodule.c +408 -0
- data/ext/tabix/python/test.py +91 -0
- data/ext/tabix/tabix.1 +132 -0
- data/ext/tabix/tabix.h +145 -0
- data/ext/tabix/tabix.py +87 -0
- data/ext/tabix/tabix.tex +121 -0
- data/ext/vcftools/perl/Vcf.pm +5 -3
- data/ext/vcftools/perl/vcf-query +2 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +12 -11
- data/ngs_server.gemspec +1 -2
- metadata +39 -2
data/ext/tabix/bgzf.h
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
22
|
+
*/
|
23
|
+
|
24
|
+
#ifndef __BGZF_H
|
25
|
+
#define __BGZF_H
|
26
|
+
|
27
|
+
#include <stdint.h>
|
28
|
+
#include <stdio.h>
|
29
|
+
#include <zlib.h>
|
30
|
+
#ifdef _USE_KNETFILE
|
31
|
+
#include "knetfile.h"
|
32
|
+
#endif
|
33
|
+
|
34
|
+
//typedef int8_t bool;
|
35
|
+
|
36
|
+
typedef struct {
|
37
|
+
int file_descriptor;
|
38
|
+
char open_mode; // 'r' or 'w'
|
39
|
+
int16_t owned_file, compress_level;
|
40
|
+
#ifdef _USE_KNETFILE
|
41
|
+
union {
|
42
|
+
knetFile *fpr;
|
43
|
+
FILE *fpw;
|
44
|
+
} x;
|
45
|
+
#else
|
46
|
+
FILE* file;
|
47
|
+
#endif
|
48
|
+
int uncompressed_block_size;
|
49
|
+
int compressed_block_size;
|
50
|
+
void* uncompressed_block;
|
51
|
+
void* compressed_block;
|
52
|
+
int64_t block_address;
|
53
|
+
int block_length;
|
54
|
+
int block_offset;
|
55
|
+
int cache_size;
|
56
|
+
const char* error;
|
57
|
+
void *cache; // a pointer to a hash table
|
58
|
+
} BGZF;
|
59
|
+
|
60
|
+
#ifdef __cplusplus
|
61
|
+
extern "C" {
|
62
|
+
#endif
|
63
|
+
|
64
|
+
/*
|
65
|
+
* Open an existing file descriptor for reading or writing.
|
66
|
+
* Mode must be either "r" or "w".
|
67
|
+
* A subsequent bgzf_close will not close the file descriptor.
|
68
|
+
* Returns null on error.
|
69
|
+
*/
|
70
|
+
BGZF* bgzf_fdopen(int fd, const char* __restrict mode);
|
71
|
+
|
72
|
+
/*
|
73
|
+
* Open the specified file for reading or writing.
|
74
|
+
* Mode must be either "r" or "w".
|
75
|
+
* Returns null on error.
|
76
|
+
*/
|
77
|
+
BGZF* bgzf_open(const char* path, const char* __restrict mode);
|
78
|
+
|
79
|
+
/*
|
80
|
+
* Close the BGZ file and free all associated resources.
|
81
|
+
* Does not close the underlying file descriptor if created with bgzf_fdopen.
|
82
|
+
* Returns zero on success, -1 on error.
|
83
|
+
*/
|
84
|
+
int bgzf_close(BGZF* fp);
|
85
|
+
|
86
|
+
/*
|
87
|
+
* Read up to length bytes from the file storing into data.
|
88
|
+
* Returns the number of bytes actually read.
|
89
|
+
* Returns zero on end of file.
|
90
|
+
* Returns -1 on error.
|
91
|
+
*/
|
92
|
+
int bgzf_read(BGZF* fp, void* data, int length);
|
93
|
+
|
94
|
+
/*
|
95
|
+
* Write length bytes from data to the file.
|
96
|
+
* Returns the number of bytes written.
|
97
|
+
* Returns -1 on error.
|
98
|
+
*/
|
99
|
+
int bgzf_write(BGZF* fp, const void* data, int length);
|
100
|
+
|
101
|
+
/*
|
102
|
+
* Return a virtual file pointer to the current location in the file.
|
103
|
+
* No interpetation of the value should be made, other than a subsequent
|
104
|
+
* call to bgzf_seek can be used to position the file at the same point.
|
105
|
+
* Return value is non-negative on success.
|
106
|
+
* Returns -1 on error.
|
107
|
+
*/
|
108
|
+
#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF))
|
109
|
+
|
110
|
+
/*
|
111
|
+
* Set the file to read from the location specified by pos, which must
|
112
|
+
* be a value previously returned by bgzf_tell for this file (but not
|
113
|
+
* necessarily one returned by this file handle).
|
114
|
+
* The where argument must be SEEK_SET.
|
115
|
+
* Seeking on a file opened for write is not supported.
|
116
|
+
* Returns zero on success, -1 on error.
|
117
|
+
*/
|
118
|
+
int64_t bgzf_seek(BGZF* fp, int64_t pos, int where);
|
119
|
+
|
120
|
+
/*
|
121
|
+
* Set the cache size. Zero to disable. By default, caching is
|
122
|
+
* disabled. The recommended cache size for frequent random access is
|
123
|
+
* about 8M bytes.
|
124
|
+
*/
|
125
|
+
void bgzf_set_cache_size(BGZF *fp, int cache_size);
|
126
|
+
|
127
|
+
int bgzf_check_EOF(BGZF *fp);
|
128
|
+
int bgzf_read_block(BGZF* fp);
|
129
|
+
int bgzf_flush(BGZF* fp);
|
130
|
+
int bgzf_flush_try(BGZF *fp, int size);
|
131
|
+
int bgzf_check_bgzf(const char *fn);
|
132
|
+
|
133
|
+
#ifdef __cplusplus
|
134
|
+
}
|
135
|
+
#endif
|
136
|
+
|
137
|
+
static inline int bgzf_getc(BGZF *fp)
|
138
|
+
{
|
139
|
+
int c;
|
140
|
+
if (fp->block_offset >= fp->block_length) {
|
141
|
+
if (bgzf_read_block(fp) != 0) return -2; /* error */
|
142
|
+
if (fp->block_length == 0) return -1; /* end-of-file */
|
143
|
+
}
|
144
|
+
c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++];
|
145
|
+
if (fp->block_offset == fp->block_length) {
|
146
|
+
#ifdef _USE_KNETFILE
|
147
|
+
fp->block_address = knet_tell(fp->x.fpr);
|
148
|
+
#else
|
149
|
+
fp->block_address = ftello(fp->file);
|
150
|
+
#endif
|
151
|
+
fp->block_offset = 0;
|
152
|
+
fp->block_length = 0;
|
153
|
+
}
|
154
|
+
return c;
|
155
|
+
}
|
156
|
+
|
157
|
+
#endif
|
data/ext/tabix/bgzip.c
ADDED
@@ -0,0 +1,206 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
22
|
+
*/
|
23
|
+
|
24
|
+
#include <stdlib.h>
|
25
|
+
#include <string.h>
|
26
|
+
#include <stdio.h>
|
27
|
+
#include <fcntl.h>
|
28
|
+
#include <unistd.h>
|
29
|
+
#include <errno.h>
|
30
|
+
#include <sys/select.h>
|
31
|
+
#include <sys/stat.h>
|
32
|
+
#include "bgzf.h"
|
33
|
+
|
34
|
+
static const int WINDOW_SIZE = 64 * 1024;
|
35
|
+
|
36
|
+
static int bgzip_main_usage()
|
37
|
+
{
|
38
|
+
fprintf(stderr, "\n");
|
39
|
+
fprintf(stderr, "Usage: bgzip [options] [file] ...\n\n");
|
40
|
+
fprintf(stderr, "Options: -c write on standard output, keep original files unchanged\n");
|
41
|
+
fprintf(stderr, " -d decompress\n");
|
42
|
+
fprintf(stderr, " -f overwrite files without asking\n");
|
43
|
+
fprintf(stderr, " -b INT decompress at virtual file pointer INT\n");
|
44
|
+
fprintf(stderr, " -s INT decompress INT bytes in the uncompressed file\n");
|
45
|
+
fprintf(stderr, " -h give this help\n");
|
46
|
+
fprintf(stderr, "\n");
|
47
|
+
return 1;
|
48
|
+
}
|
49
|
+
|
50
|
+
static int write_open(const char *fn, int is_forced)
|
51
|
+
{
|
52
|
+
int fd = -1;
|
53
|
+
char c;
|
54
|
+
if (!is_forced) {
|
55
|
+
if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) {
|
56
|
+
fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn);
|
57
|
+
scanf("%c", &c);
|
58
|
+
if (c != 'Y' && c != 'y') {
|
59
|
+
fprintf(stderr, "[bgzip] not overwritten\n");
|
60
|
+
exit(1);
|
61
|
+
}
|
62
|
+
}
|
63
|
+
}
|
64
|
+
if (fd < 0) {
|
65
|
+
if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) {
|
66
|
+
fprintf(stderr, "[bgzip] %s: Fail to write\n", fn);
|
67
|
+
exit(1);
|
68
|
+
}
|
69
|
+
}
|
70
|
+
return fd;
|
71
|
+
}
|
72
|
+
|
73
|
+
static void fail(BGZF* fp)
|
74
|
+
{
|
75
|
+
fprintf(stderr, "Error: %s\n", fp->error);
|
76
|
+
exit(1);
|
77
|
+
}
|
78
|
+
|
79
|
+
int main(int argc, char **argv)
|
80
|
+
{
|
81
|
+
int c, compress, pstdout, is_forced;
|
82
|
+
BGZF *fp;
|
83
|
+
void *buffer;
|
84
|
+
long start, end, size;
|
85
|
+
|
86
|
+
compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
|
87
|
+
while((c = getopt(argc, argv, "cdhfb:s:")) >= 0){
|
88
|
+
switch(c){
|
89
|
+
case 'h': return bgzip_main_usage();
|
90
|
+
case 'd': compress = 0; break;
|
91
|
+
case 'c': pstdout = 1; break;
|
92
|
+
case 'b': start = atol(optarg); break;
|
93
|
+
case 's': size = atol(optarg); break;
|
94
|
+
case 'f': is_forced = 1; break;
|
95
|
+
}
|
96
|
+
}
|
97
|
+
if (size >= 0) end = start + size;
|
98
|
+
if (end >= 0 && end < start) {
|
99
|
+
fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
|
100
|
+
return 1;
|
101
|
+
}
|
102
|
+
if (compress == 1) {
|
103
|
+
struct stat sbuf;
|
104
|
+
int f_src = fileno(stdin);
|
105
|
+
int f_dst = fileno(stdout);
|
106
|
+
|
107
|
+
if ( argc>optind )
|
108
|
+
{
|
109
|
+
if ( stat(argv[optind],&sbuf)<0 )
|
110
|
+
{
|
111
|
+
fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
|
112
|
+
return 1;
|
113
|
+
}
|
114
|
+
|
115
|
+
if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
|
116
|
+
fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
|
117
|
+
return 1;
|
118
|
+
}
|
119
|
+
|
120
|
+
if (pstdout)
|
121
|
+
f_dst = fileno(stdout);
|
122
|
+
else
|
123
|
+
{
|
124
|
+
char *name = malloc(strlen(argv[optind]) + 5);
|
125
|
+
strcpy(name, argv[optind]);
|
126
|
+
strcat(name, ".gz");
|
127
|
+
f_dst = write_open(name, is_forced);
|
128
|
+
if (f_dst < 0) return 1;
|
129
|
+
free(name);
|
130
|
+
}
|
131
|
+
}
|
132
|
+
else if (!pstdout && isatty(fileno((FILE *)stdout)) )
|
133
|
+
return bgzip_main_usage();
|
134
|
+
|
135
|
+
fp = bgzf_fdopen(f_dst, "w");
|
136
|
+
buffer = malloc(WINDOW_SIZE);
|
137
|
+
while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
|
138
|
+
if (bgzf_write(fp, buffer, c) < 0) fail(fp);
|
139
|
+
// f_dst will be closed here
|
140
|
+
if (bgzf_close(fp) < 0) fail(fp);
|
141
|
+
if (argc > optind && !pstdout) unlink(argv[optind]);
|
142
|
+
free(buffer);
|
143
|
+
close(f_src);
|
144
|
+
return 0;
|
145
|
+
} else {
|
146
|
+
struct stat sbuf;
|
147
|
+
int f_dst;
|
148
|
+
|
149
|
+
if ( argc>optind )
|
150
|
+
{
|
151
|
+
if ( stat(argv[optind],&sbuf)<0 )
|
152
|
+
{
|
153
|
+
fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
|
154
|
+
return 1;
|
155
|
+
}
|
156
|
+
char *name;
|
157
|
+
int len = strlen(argv[optind]);
|
158
|
+
if ( strcmp(argv[optind]+len-3,".gz") )
|
159
|
+
{
|
160
|
+
fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
|
161
|
+
return 1;
|
162
|
+
}
|
163
|
+
fp = bgzf_open(argv[optind], "r");
|
164
|
+
if (fp == NULL) {
|
165
|
+
fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
|
166
|
+
return 1;
|
167
|
+
}
|
168
|
+
|
169
|
+
if (pstdout) {
|
170
|
+
f_dst = fileno(stdout);
|
171
|
+
}
|
172
|
+
else {
|
173
|
+
name = strdup(argv[optind]);
|
174
|
+
name[strlen(name) - 3] = '\0';
|
175
|
+
f_dst = write_open(name, is_forced);
|
176
|
+
free(name);
|
177
|
+
}
|
178
|
+
}
|
179
|
+
else if (!pstdout && isatty(fileno((FILE *)stdin)) )
|
180
|
+
return bgzip_main_usage();
|
181
|
+
else
|
182
|
+
{
|
183
|
+
f_dst = fileno(stdout);
|
184
|
+
fp = bgzf_fdopen(fileno(stdin), "r");
|
185
|
+
if (fp == NULL) {
|
186
|
+
fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
|
187
|
+
return 1;
|
188
|
+
}
|
189
|
+
}
|
190
|
+
buffer = malloc(WINDOW_SIZE);
|
191
|
+
if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp);
|
192
|
+
while (1) {
|
193
|
+
if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
|
194
|
+
else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
|
195
|
+
if (c == 0) break;
|
196
|
+
if (c < 0) fail(fp);
|
197
|
+
start += c;
|
198
|
+
write(f_dst, buffer, c);
|
199
|
+
if (end >= 0 && start >= end) break;
|
200
|
+
}
|
201
|
+
free(buffer);
|
202
|
+
if (bgzf_close(fp) < 0) fail(fp);
|
203
|
+
if (!pstdout) unlink(argv[optind]);
|
204
|
+
return 0;
|
205
|
+
}
|
206
|
+
}
|
Binary file
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
puts "nothing to do"
|