ngs_server 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
data/ext/tabix/bgzf.h ADDED
@@ -0,0 +1,157 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
22
+ */
23
+
24
+ #ifndef __BGZF_H
25
+ #define __BGZF_H
26
+
27
+ #include <stdint.h>
28
+ #include <stdio.h>
29
+ #include <zlib.h>
30
+ #ifdef _USE_KNETFILE
31
+ #include "knetfile.h"
32
+ #endif
33
+
34
+ //typedef int8_t bool;
35
+
36
+ typedef struct {
37
+ int file_descriptor;
38
+ char open_mode; // 'r' or 'w'
39
+ int16_t owned_file, compress_level;
40
+ #ifdef _USE_KNETFILE
41
+ union {
42
+ knetFile *fpr;
43
+ FILE *fpw;
44
+ } x;
45
+ #else
46
+ FILE* file;
47
+ #endif
48
+ int uncompressed_block_size;
49
+ int compressed_block_size;
50
+ void* uncompressed_block;
51
+ void* compressed_block;
52
+ int64_t block_address;
53
+ int block_length;
54
+ int block_offset;
55
+ int cache_size;
56
+ const char* error;
57
+ void *cache; // a pointer to a hash table
58
+ } BGZF;
59
+
60
+ #ifdef __cplusplus
61
+ extern "C" {
62
+ #endif
63
+
64
+ /*
65
+ * Open an existing file descriptor for reading or writing.
66
+ * Mode must be either "r" or "w".
67
+ * A subsequent bgzf_close will not close the file descriptor.
68
+ * Returns null on error.
69
+ */
70
+ BGZF* bgzf_fdopen(int fd, const char* __restrict mode);
71
+
72
+ /*
73
+ * Open the specified file for reading or writing.
74
+ * Mode must be either "r" or "w".
75
+ * Returns null on error.
76
+ */
77
+ BGZF* bgzf_open(const char* path, const char* __restrict mode);
78
+
79
+ /*
80
+ * Close the BGZ file and free all associated resources.
81
+ * Does not close the underlying file descriptor if created with bgzf_fdopen.
82
+ * Returns zero on success, -1 on error.
83
+ */
84
+ int bgzf_close(BGZF* fp);
85
+
86
+ /*
87
+ * Read up to length bytes from the file storing into data.
88
+ * Returns the number of bytes actually read.
89
+ * Returns zero on end of file.
90
+ * Returns -1 on error.
91
+ */
92
+ int bgzf_read(BGZF* fp, void* data, int length);
93
+
94
+ /*
95
+ * Write length bytes from data to the file.
96
+ * Returns the number of bytes written.
97
+ * Returns -1 on error.
98
+ */
99
+ int bgzf_write(BGZF* fp, const void* data, int length);
100
+
101
+ /*
102
+ * Return a virtual file pointer to the current location in the file.
103
+ * No interpetation of the value should be made, other than a subsequent
104
+ * call to bgzf_seek can be used to position the file at the same point.
105
+ * Return value is non-negative on success.
106
+ * Returns -1 on error.
107
+ */
108
+ #define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF))
109
+
110
+ /*
111
+ * Set the file to read from the location specified by pos, which must
112
+ * be a value previously returned by bgzf_tell for this file (but not
113
+ * necessarily one returned by this file handle).
114
+ * The where argument must be SEEK_SET.
115
+ * Seeking on a file opened for write is not supported.
116
+ * Returns zero on success, -1 on error.
117
+ */
118
+ int64_t bgzf_seek(BGZF* fp, int64_t pos, int where);
119
+
120
+ /*
121
+ * Set the cache size. Zero to disable. By default, caching is
122
+ * disabled. The recommended cache size for frequent random access is
123
+ * about 8M bytes.
124
+ */
125
+ void bgzf_set_cache_size(BGZF *fp, int cache_size);
126
+
127
+ int bgzf_check_EOF(BGZF *fp);
128
+ int bgzf_read_block(BGZF* fp);
129
+ int bgzf_flush(BGZF* fp);
130
+ int bgzf_flush_try(BGZF *fp, int size);
131
+ int bgzf_check_bgzf(const char *fn);
132
+
133
+ #ifdef __cplusplus
134
+ }
135
+ #endif
136
+
137
+ static inline int bgzf_getc(BGZF *fp)
138
+ {
139
+ int c;
140
+ if (fp->block_offset >= fp->block_length) {
141
+ if (bgzf_read_block(fp) != 0) return -2; /* error */
142
+ if (fp->block_length == 0) return -1; /* end-of-file */
143
+ }
144
+ c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++];
145
+ if (fp->block_offset == fp->block_length) {
146
+ #ifdef _USE_KNETFILE
147
+ fp->block_address = knet_tell(fp->x.fpr);
148
+ #else
149
+ fp->block_address = ftello(fp->file);
150
+ #endif
151
+ fp->block_offset = 0;
152
+ fp->block_length = 0;
153
+ }
154
+ return c;
155
+ }
156
+
157
+ #endif
data/ext/tabix/bgzip.c ADDED
@@ -0,0 +1,206 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
22
+ */
23
+
24
+ #include <stdlib.h>
25
+ #include <string.h>
26
+ #include <stdio.h>
27
+ #include <fcntl.h>
28
+ #include <unistd.h>
29
+ #include <errno.h>
30
+ #include <sys/select.h>
31
+ #include <sys/stat.h>
32
+ #include "bgzf.h"
33
+
34
+ static const int WINDOW_SIZE = 64 * 1024;
35
+
36
+ static int bgzip_main_usage()
37
+ {
38
+ fprintf(stderr, "\n");
39
+ fprintf(stderr, "Usage: bgzip [options] [file] ...\n\n");
40
+ fprintf(stderr, "Options: -c write on standard output, keep original files unchanged\n");
41
+ fprintf(stderr, " -d decompress\n");
42
+ fprintf(stderr, " -f overwrite files without asking\n");
43
+ fprintf(stderr, " -b INT decompress at virtual file pointer INT\n");
44
+ fprintf(stderr, " -s INT decompress INT bytes in the uncompressed file\n");
45
+ fprintf(stderr, " -h give this help\n");
46
+ fprintf(stderr, "\n");
47
+ return 1;
48
+ }
49
+
50
+ static int write_open(const char *fn, int is_forced)
51
+ {
52
+ int fd = -1;
53
+ char c;
54
+ if (!is_forced) {
55
+ if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) {
56
+ fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn);
57
+ scanf("%c", &c);
58
+ if (c != 'Y' && c != 'y') {
59
+ fprintf(stderr, "[bgzip] not overwritten\n");
60
+ exit(1);
61
+ }
62
+ }
63
+ }
64
+ if (fd < 0) {
65
+ if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) {
66
+ fprintf(stderr, "[bgzip] %s: Fail to write\n", fn);
67
+ exit(1);
68
+ }
69
+ }
70
+ return fd;
71
+ }
72
+
73
+ static void fail(BGZF* fp)
74
+ {
75
+ fprintf(stderr, "Error: %s\n", fp->error);
76
+ exit(1);
77
+ }
78
+
79
+ int main(int argc, char **argv)
80
+ {
81
+ int c, compress, pstdout, is_forced;
82
+ BGZF *fp;
83
+ void *buffer;
84
+ long start, end, size;
85
+
86
+ compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
87
+ while((c = getopt(argc, argv, "cdhfb:s:")) >= 0){
88
+ switch(c){
89
+ case 'h': return bgzip_main_usage();
90
+ case 'd': compress = 0; break;
91
+ case 'c': pstdout = 1; break;
92
+ case 'b': start = atol(optarg); break;
93
+ case 's': size = atol(optarg); break;
94
+ case 'f': is_forced = 1; break;
95
+ }
96
+ }
97
+ if (size >= 0) end = start + size;
98
+ if (end >= 0 && end < start) {
99
+ fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
100
+ return 1;
101
+ }
102
+ if (compress == 1) {
103
+ struct stat sbuf;
104
+ int f_src = fileno(stdin);
105
+ int f_dst = fileno(stdout);
106
+
107
+ if ( argc>optind )
108
+ {
109
+ if ( stat(argv[optind],&sbuf)<0 )
110
+ {
111
+ fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
112
+ return 1;
113
+ }
114
+
115
+ if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
116
+ fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
117
+ return 1;
118
+ }
119
+
120
+ if (pstdout)
121
+ f_dst = fileno(stdout);
122
+ else
123
+ {
124
+ char *name = malloc(strlen(argv[optind]) + 5);
125
+ strcpy(name, argv[optind]);
126
+ strcat(name, ".gz");
127
+ f_dst = write_open(name, is_forced);
128
+ if (f_dst < 0) return 1;
129
+ free(name);
130
+ }
131
+ }
132
+ else if (!pstdout && isatty(fileno((FILE *)stdout)) )
133
+ return bgzip_main_usage();
134
+
135
+ fp = bgzf_fdopen(f_dst, "w");
136
+ buffer = malloc(WINDOW_SIZE);
137
+ while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
138
+ if (bgzf_write(fp, buffer, c) < 0) fail(fp);
139
+ // f_dst will be closed here
140
+ if (bgzf_close(fp) < 0) fail(fp);
141
+ if (argc > optind && !pstdout) unlink(argv[optind]);
142
+ free(buffer);
143
+ close(f_src);
144
+ return 0;
145
+ } else {
146
+ struct stat sbuf;
147
+ int f_dst;
148
+
149
+ if ( argc>optind )
150
+ {
151
+ if ( stat(argv[optind],&sbuf)<0 )
152
+ {
153
+ fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
154
+ return 1;
155
+ }
156
+ char *name;
157
+ int len = strlen(argv[optind]);
158
+ if ( strcmp(argv[optind]+len-3,".gz") )
159
+ {
160
+ fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
161
+ return 1;
162
+ }
163
+ fp = bgzf_open(argv[optind], "r");
164
+ if (fp == NULL) {
165
+ fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
166
+ return 1;
167
+ }
168
+
169
+ if (pstdout) {
170
+ f_dst = fileno(stdout);
171
+ }
172
+ else {
173
+ name = strdup(argv[optind]);
174
+ name[strlen(name) - 3] = '\0';
175
+ f_dst = write_open(name, is_forced);
176
+ free(name);
177
+ }
178
+ }
179
+ else if (!pstdout && isatty(fileno((FILE *)stdin)) )
180
+ return bgzip_main_usage();
181
+ else
182
+ {
183
+ f_dst = fileno(stdout);
184
+ fp = bgzf_fdopen(fileno(stdin), "r");
185
+ if (fp == NULL) {
186
+ fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
187
+ return 1;
188
+ }
189
+ }
190
+ buffer = malloc(WINDOW_SIZE);
191
+ if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp);
192
+ while (1) {
193
+ if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
194
+ else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
195
+ if (c == 0) break;
196
+ if (c < 0) fail(fp);
197
+ start += c;
198
+ write(f_dst, buffer, c);
199
+ if (end >= 0 && start >= end) break;
200
+ }
201
+ free(buffer);
202
+ if (bgzf_close(fp) < 0) fail(fp);
203
+ if (!pstdout) unlink(argv[optind]);
204
+ return 0;
205
+ }
206
+ }
Binary file
Binary file
@@ -0,0 +1 @@
1
+ puts "nothing to do"