ngs_server 0.4 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/tabix/ChangeLog +593 -0
- data/ext/tabix/Makefile +65 -0
- data/ext/tabix/NEWS +126 -0
- data/ext/tabix/TabixReader.java +395 -0
- data/ext/tabix/bam_endian.h +42 -0
- data/ext/tabix/bedidx.c +156 -0
- data/ext/tabix/bgzf.c +714 -0
- data/ext/tabix/bgzf.h +157 -0
- data/ext/tabix/bgzip.c +206 -0
- data/ext/tabix/example.gtf.gz +0 -0
- data/ext/tabix/example.gtf.gz.tbi +0 -0
- data/ext/tabix/extconf.rb +1 -0
- data/ext/tabix/index.c +998 -0
- data/ext/tabix/khash.h +486 -0
- data/ext/tabix/knetfile.c +632 -0
- data/ext/tabix/knetfile.h +75 -0
- data/ext/tabix/kseq.h +227 -0
- data/ext/tabix/ksort.h +271 -0
- data/ext/tabix/kstring.c +165 -0
- data/ext/tabix/kstring.h +68 -0
- data/ext/tabix/main.c +290 -0
- data/ext/tabix/perl/MANIFEST +8 -0
- data/ext/tabix/perl/Makefile.PL +8 -0
- data/ext/tabix/perl/Tabix.pm +76 -0
- data/ext/tabix/perl/Tabix.xs +71 -0
- data/ext/tabix/perl/TabixIterator.pm +41 -0
- data/ext/tabix/perl/t/01local.t +28 -0
- data/ext/tabix/perl/t/02remote.t +28 -0
- data/ext/tabix/perl/typemap +3 -0
- data/ext/tabix/python/setup.py +55 -0
- data/ext/tabix/python/tabixmodule.c +408 -0
- data/ext/tabix/python/test.py +91 -0
- data/ext/tabix/tabix.1 +132 -0
- data/ext/tabix/tabix.h +145 -0
- data/ext/tabix/tabix.py +87 -0
- data/ext/tabix/tabix.tex +121 -0
- data/ext/vcftools/perl/Vcf.pm +5 -3
- data/ext/vcftools/perl/vcf-query +2 -0
- data/lib/ngs_server/version.rb +1 -1
- data/lib/ngs_server.rb +12 -11
- data/ngs_server.gemspec +1 -2
- metadata +39 -2
@@ -0,0 +1,632 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2008 Genome Research Ltd (GRL).
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
20
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
22
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
23
|
+
SOFTWARE.
|
24
|
+
*/
|
25
|
+
|
26
|
+
/* Contact: Heng Li <lh3@sanger.ac.uk> */
|
27
|
+
|
28
|
+
/* Probably I will not do socket programming in the next few years and
|
29
|
+
therefore I decide to heavily annotate this file, for Linux and
|
30
|
+
Windows as well. -lh3 */
|
31
|
+
|
32
|
+
#include <time.h>
|
33
|
+
#include <stdio.h>
|
34
|
+
#include <ctype.h>
|
35
|
+
#include <stdlib.h>
|
36
|
+
#include <string.h>
|
37
|
+
#include <errno.h>
|
38
|
+
#include <unistd.h>
|
39
|
+
#include <sys/types.h>
|
40
|
+
|
41
|
+
#ifdef _WIN32
|
42
|
+
#include <winsock.h>
|
43
|
+
#else
|
44
|
+
#include <netdb.h>
|
45
|
+
#include <arpa/inet.h>
|
46
|
+
#include <sys/socket.h>
|
47
|
+
#endif
|
48
|
+
|
49
|
+
#include "knetfile.h"
|
50
|
+
|
51
|
+
/* In winsock.h, the type of a socket is SOCKET, which is: "typedef
|
52
|
+
* u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed
|
53
|
+
* integer -1. In knetfile.c, I use "int" for socket type
|
54
|
+
* throughout. This should be improved to avoid confusion.
|
55
|
+
*
|
56
|
+
* In Linux/Mac, recv() and read() do almost the same thing. You can see
|
57
|
+
* in the header file that netread() is simply an alias of read(). In
|
58
|
+
* Windows, however, they are different and using recv() is mandatory.
|
59
|
+
*/
|
60
|
+
|
61
|
+
/* This function tests if the file handler is ready for reading (or
|
62
|
+
* writing if is_read==0). */
|
63
|
+
static int socket_wait(int fd, int is_read)
|
64
|
+
{
|
65
|
+
fd_set fds, *fdr = 0, *fdw = 0;
|
66
|
+
struct timeval tv;
|
67
|
+
int ret;
|
68
|
+
tv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out
|
69
|
+
FD_ZERO(&fds);
|
70
|
+
FD_SET(fd, &fds);
|
71
|
+
if (is_read) fdr = &fds;
|
72
|
+
else fdw = &fds;
|
73
|
+
ret = select(fd+1, fdr, fdw, 0, &tv);
|
74
|
+
#ifndef _WIN32
|
75
|
+
if (ret == -1) perror("select");
|
76
|
+
#else
|
77
|
+
if (ret == 0)
|
78
|
+
fprintf(stderr, "select time-out\n");
|
79
|
+
else if (ret == SOCKET_ERROR)
|
80
|
+
fprintf(stderr, "select: %d\n", WSAGetLastError());
|
81
|
+
#endif
|
82
|
+
return ret;
|
83
|
+
}
|
84
|
+
|
85
|
+
#ifndef _WIN32
|
86
|
+
/* This function does not work with Windows due to the lack of
|
87
|
+
* getaddrinfo() in winsock. It is addapted from an example in "Beej's
|
88
|
+
* Guide to Network Programming" (http://beej.us/guide/bgnet/). */
|
89
|
+
static int socket_connect(const char *host, const char *port)
|
90
|
+
{
|
91
|
+
#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
|
92
|
+
|
93
|
+
int on = 1, fd;
|
94
|
+
struct linger lng = { 0, 0 };
|
95
|
+
struct addrinfo hints, *res;
|
96
|
+
memset(&hints, 0, sizeof(struct addrinfo));
|
97
|
+
hints.ai_family = AF_UNSPEC;
|
98
|
+
hints.ai_socktype = SOCK_STREAM;
|
99
|
+
/* In Unix/Mac, getaddrinfo() is the most convenient way to get
|
100
|
+
* server information. */
|
101
|
+
if (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");
|
102
|
+
if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
|
103
|
+
/* The following two setsockopt() are used by ftplib
|
104
|
+
* (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they
|
105
|
+
* necessary. */
|
106
|
+
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
|
107
|
+
if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
|
108
|
+
if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
|
109
|
+
freeaddrinfo(res);
|
110
|
+
return fd;
|
111
|
+
}
|
112
|
+
#else
|
113
|
+
/* MinGW's printf has problem with "%lld" */
|
114
|
+
char *int64tostr(char *buf, int64_t x)
|
115
|
+
{
|
116
|
+
int cnt;
|
117
|
+
int i = 0;
|
118
|
+
do {
|
119
|
+
buf[i++] = '0' + x % 10;
|
120
|
+
x /= 10;
|
121
|
+
} while (x);
|
122
|
+
buf[i] = 0;
|
123
|
+
for (cnt = i, i = 0; i < cnt/2; ++i) {
|
124
|
+
int c = buf[i]; buf[i] = buf[cnt-i-1]; buf[cnt-i-1] = c;
|
125
|
+
}
|
126
|
+
return buf;
|
127
|
+
}
|
128
|
+
|
129
|
+
int64_t strtoint64(const char *buf)
|
130
|
+
{
|
131
|
+
int64_t x;
|
132
|
+
for (x = 0; *buf != '\0'; ++buf)
|
133
|
+
x = x * 10 + ((int64_t) *buf - 48);
|
134
|
+
return x;
|
135
|
+
}
|
136
|
+
/* In windows, the first thing is to establish the TCP connection. */
|
137
|
+
int knet_win32_init()
|
138
|
+
{
|
139
|
+
WSADATA wsaData;
|
140
|
+
return WSAStartup(MAKEWORD(2, 2), &wsaData);
|
141
|
+
}
|
142
|
+
void knet_win32_destroy()
|
143
|
+
{
|
144
|
+
WSACleanup();
|
145
|
+
}
|
146
|
+
/* A slightly modfied version of the following function also works on
|
147
|
+
* Mac (and presummably Linux). However, this function is not stable on
|
148
|
+
* my Mac. It sometimes works fine but sometimes does not. Therefore for
|
149
|
+
* non-Windows OS, I do not use this one. */
|
150
|
+
static SOCKET socket_connect(const char *host, const char *port)
|
151
|
+
{
|
152
|
+
#define __err_connect(func) \
|
153
|
+
do { \
|
154
|
+
fprintf(stderr, "%s: %d\n", func, WSAGetLastError()); \
|
155
|
+
return -1; \
|
156
|
+
} while (0)
|
157
|
+
|
158
|
+
int on = 1;
|
159
|
+
SOCKET fd;
|
160
|
+
struct linger lng = { 0, 0 };
|
161
|
+
struct sockaddr_in server;
|
162
|
+
struct hostent *hp = 0;
|
163
|
+
// open socket
|
164
|
+
if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect("socket");
|
165
|
+
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) == -1) __err_connect("setsockopt");
|
166
|
+
if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&lng, sizeof(lng)) == -1) __err_connect("setsockopt");
|
167
|
+
// get host info
|
168
|
+
if (isalpha(host[0])) hp = gethostbyname(host);
|
169
|
+
else {
|
170
|
+
struct in_addr addr;
|
171
|
+
addr.s_addr = inet_addr(host);
|
172
|
+
hp = gethostbyaddr((char*)&addr, 4, AF_INET);
|
173
|
+
}
|
174
|
+
if (hp == 0) __err_connect("gethost");
|
175
|
+
// connect
|
176
|
+
server.sin_addr.s_addr = *((unsigned long*)hp->h_addr);
|
177
|
+
server.sin_family= AF_INET;
|
178
|
+
server.sin_port = htons(atoi(port));
|
179
|
+
if (connect(fd, (struct sockaddr*)&server, sizeof(server)) != 0) __err_connect("connect");
|
180
|
+
// freehostent(hp); // strangely in MSDN, hp is NOT freed (memory leak?!)
|
181
|
+
return fd;
|
182
|
+
}
|
183
|
+
#endif
|
184
|
+
|
185
|
+
static off_t my_netread(int fd, void *buf, off_t len)
|
186
|
+
{
|
187
|
+
off_t rest = len, curr, l = 0;
|
188
|
+
/* recv() and read() may not read the required length of data with
|
189
|
+
* one call. They have to be called repeatedly. */
|
190
|
+
while (rest) {
|
191
|
+
if (socket_wait(fd, 1) <= 0) break; // socket is not ready for reading
|
192
|
+
curr = netread(fd, buf + l, rest);
|
193
|
+
/* According to the glibc manual, section 13.2, a zero returned
|
194
|
+
* value indicates end-of-file (EOF), which should mean that
|
195
|
+
* read() will not return zero if EOF has not been met but data
|
196
|
+
* are not immediately available. */
|
197
|
+
if (curr == 0) break;
|
198
|
+
l += curr; rest -= curr;
|
199
|
+
}
|
200
|
+
return l;
|
201
|
+
}
|
202
|
+
|
203
|
+
/*************************
|
204
|
+
* FTP specific routines *
|
205
|
+
*************************/
|
206
|
+
|
207
|
+
static int kftp_get_response(knetFile *ftp)
|
208
|
+
{
|
209
|
+
#ifndef _WIN32
|
210
|
+
unsigned char c;
|
211
|
+
#else
|
212
|
+
char c;
|
213
|
+
#endif
|
214
|
+
int n = 0;
|
215
|
+
char *p;
|
216
|
+
if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0;
|
217
|
+
while (netread(ftp->ctrl_fd, &c, 1)) { // FIXME: this is *VERY BAD* for unbuffered I/O
|
218
|
+
//fputc(c, stderr);
|
219
|
+
if (n >= ftp->max_response) {
|
220
|
+
ftp->max_response = ftp->max_response? ftp->max_response<<1 : 256;
|
221
|
+
ftp->response = realloc(ftp->response, ftp->max_response);
|
222
|
+
}
|
223
|
+
ftp->response[n++] = c;
|
224
|
+
if (c == '\n') {
|
225
|
+
if (n >= 4 && isdigit(ftp->response[0]) && isdigit(ftp->response[1]) && isdigit(ftp->response[2])
|
226
|
+
&& ftp->response[3] != '-') break;
|
227
|
+
n = 0;
|
228
|
+
continue;
|
229
|
+
}
|
230
|
+
}
|
231
|
+
if (n < 2) return -1;
|
232
|
+
ftp->response[n-2] = 0;
|
233
|
+
return strtol(ftp->response, &p, 0);
|
234
|
+
}
|
235
|
+
|
236
|
+
static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get)
|
237
|
+
{
|
238
|
+
if (socket_wait(ftp->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
|
239
|
+
netwrite(ftp->ctrl_fd, cmd, strlen(cmd));
|
240
|
+
return is_get? kftp_get_response(ftp) : 0;
|
241
|
+
}
|
242
|
+
|
243
|
+
static int kftp_pasv_prep(knetFile *ftp)
|
244
|
+
{
|
245
|
+
char *p;
|
246
|
+
int v[6];
|
247
|
+
kftp_send_cmd(ftp, "PASV\r\n", 1);
|
248
|
+
for (p = ftp->response; *p && *p != '('; ++p);
|
249
|
+
if (*p != '(') return -1;
|
250
|
+
++p;
|
251
|
+
sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
|
252
|
+
memcpy(ftp->pasv_ip, v, 4 * sizeof(int));
|
253
|
+
ftp->pasv_port = (v[4]<<8&0xff00) + v[5];
|
254
|
+
return 0;
|
255
|
+
}
|
256
|
+
|
257
|
+
|
258
|
+
static int kftp_pasv_connect(knetFile *ftp)
|
259
|
+
{
|
260
|
+
char host[80], port[10];
|
261
|
+
if (ftp->pasv_port == 0) {
|
262
|
+
fprintf(stderr, "[kftp_pasv_connect] kftp_pasv_prep() is not called before hand.\n");
|
263
|
+
return -1;
|
264
|
+
}
|
265
|
+
sprintf(host, "%d.%d.%d.%d", ftp->pasv_ip[0], ftp->pasv_ip[1], ftp->pasv_ip[2], ftp->pasv_ip[3]);
|
266
|
+
sprintf(port, "%d", ftp->pasv_port);
|
267
|
+
ftp->fd = socket_connect(host, port);
|
268
|
+
if (ftp->fd == -1) return -1;
|
269
|
+
return 0;
|
270
|
+
}
|
271
|
+
|
272
|
+
int kftp_connect(knetFile *ftp)
|
273
|
+
{
|
274
|
+
ftp->ctrl_fd = socket_connect(ftp->host, ftp->port);
|
275
|
+
if (ftp->ctrl_fd == -1) return -1;
|
276
|
+
kftp_get_response(ftp);
|
277
|
+
kftp_send_cmd(ftp, "USER anonymous\r\n", 1);
|
278
|
+
kftp_send_cmd(ftp, "PASS kftp@\r\n", 1);
|
279
|
+
kftp_send_cmd(ftp, "TYPE I\r\n", 1);
|
280
|
+
return 0;
|
281
|
+
}
|
282
|
+
|
283
|
+
int kftp_reconnect(knetFile *ftp)
|
284
|
+
{
|
285
|
+
if (ftp->ctrl_fd != -1) {
|
286
|
+
netclose(ftp->ctrl_fd);
|
287
|
+
ftp->ctrl_fd = -1;
|
288
|
+
}
|
289
|
+
netclose(ftp->fd);
|
290
|
+
ftp->fd = -1;
|
291
|
+
return kftp_connect(ftp);
|
292
|
+
}
|
293
|
+
|
294
|
+
// initialize ->type, ->host, ->retr and ->size
|
295
|
+
knetFile *kftp_parse_url(const char *fn, const char *mode)
|
296
|
+
{
|
297
|
+
knetFile *fp;
|
298
|
+
char *p;
|
299
|
+
int l;
|
300
|
+
if (strstr(fn, "ftp://") != fn) return 0;
|
301
|
+
for (p = (char*)fn + 6; *p && *p != '/'; ++p);
|
302
|
+
if (*p != '/') return 0;
|
303
|
+
l = p - fn - 6;
|
304
|
+
fp = calloc(1, sizeof(knetFile));
|
305
|
+
fp->type = KNF_TYPE_FTP;
|
306
|
+
fp->fd = -1;
|
307
|
+
/* the Linux/Mac version of socket_connect() also recognizes a port
|
308
|
+
* like "ftp", but the Windows version does not. */
|
309
|
+
fp->port = strdup("21");
|
310
|
+
fp->host = calloc(l + 1, 1);
|
311
|
+
if (strchr(mode, 'c')) fp->no_reconnect = 1;
|
312
|
+
strncpy(fp->host, fn + 6, l);
|
313
|
+
fp->retr = calloc(strlen(p) + 8, 1);
|
314
|
+
sprintf(fp->retr, "RETR %s\r\n", p);
|
315
|
+
fp->size_cmd = calloc(strlen(p) + 8, 1);
|
316
|
+
sprintf(fp->size_cmd, "SIZE %s\r\n", p);
|
317
|
+
fp->seek_offset = 0;
|
318
|
+
return fp;
|
319
|
+
}
|
320
|
+
// place ->fd at offset off
|
321
|
+
int kftp_connect_file(knetFile *fp)
|
322
|
+
{
|
323
|
+
int ret;
|
324
|
+
long long file_size;
|
325
|
+
if (fp->fd != -1) {
|
326
|
+
netclose(fp->fd);
|
327
|
+
if (fp->no_reconnect) kftp_get_response(fp);
|
328
|
+
}
|
329
|
+
kftp_pasv_prep(fp);
|
330
|
+
kftp_send_cmd(fp, fp->size_cmd, 1);
|
331
|
+
#ifndef _WIN32
|
332
|
+
if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 )
|
333
|
+
{
|
334
|
+
fprintf(stderr,"[kftp_connect_file] %s\n", fp->response);
|
335
|
+
return -1;
|
336
|
+
}
|
337
|
+
#else
|
338
|
+
const char *p = fp->response;
|
339
|
+
while (*p != ' ') ++p;
|
340
|
+
while (*p < '0' || *p > '9') ++p;
|
341
|
+
file_size = strtoint64(p);
|
342
|
+
#endif
|
343
|
+
fp->file_size = file_size;
|
344
|
+
if (fp->offset>=0) {
|
345
|
+
char tmp[32];
|
346
|
+
#ifndef _WIN32
|
347
|
+
sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
|
348
|
+
#else
|
349
|
+
strcpy(tmp, "REST ");
|
350
|
+
int64tostr(tmp + 5, fp->offset);
|
351
|
+
strcat(tmp, "\r\n");
|
352
|
+
#endif
|
353
|
+
kftp_send_cmd(fp, tmp, 1);
|
354
|
+
}
|
355
|
+
kftp_send_cmd(fp, fp->retr, 0);
|
356
|
+
kftp_pasv_connect(fp);
|
357
|
+
ret = kftp_get_response(fp);
|
358
|
+
if (ret != 150) {
|
359
|
+
fprintf(stderr, "[kftp_connect_file] %s\n", fp->response);
|
360
|
+
netclose(fp->fd);
|
361
|
+
fp->fd = -1;
|
362
|
+
return -1;
|
363
|
+
}
|
364
|
+
fp->is_ready = 1;
|
365
|
+
return 0;
|
366
|
+
}
|
367
|
+
|
368
|
+
|
369
|
+
/**************************
|
370
|
+
* HTTP specific routines *
|
371
|
+
**************************/
|
372
|
+
|
373
|
+
knetFile *khttp_parse_url(const char *fn, const char *mode)
|
374
|
+
{
|
375
|
+
knetFile *fp;
|
376
|
+
char *p, *proxy, *q;
|
377
|
+
int l;
|
378
|
+
if (strstr(fn, "http://") != fn) return 0;
|
379
|
+
// set ->http_host
|
380
|
+
for (p = (char*)fn + 7; *p && *p != '/'; ++p);
|
381
|
+
l = p - fn - 7;
|
382
|
+
fp = calloc(1, sizeof(knetFile));
|
383
|
+
fp->http_host = calloc(l + 1, 1);
|
384
|
+
strncpy(fp->http_host, fn + 7, l);
|
385
|
+
fp->http_host[l] = 0;
|
386
|
+
for (q = fp->http_host; *q && *q != ':'; ++q);
|
387
|
+
if (*q == ':') *q++ = 0;
|
388
|
+
// get http_proxy
|
389
|
+
proxy = getenv("http_proxy");
|
390
|
+
// set ->host, ->port and ->path
|
391
|
+
if (proxy == 0) {
|
392
|
+
fp->host = strdup(fp->http_host); // when there is no proxy, server name is identical to http_host name.
|
393
|
+
fp->port = strdup(*q? q : "80");
|
394
|
+
fp->path = strdup(*p? p : "/");
|
395
|
+
} else {
|
396
|
+
fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
|
397
|
+
for (q = fp->host; *q && *q != ':'; ++q);
|
398
|
+
if (*q == ':') *q++ = 0;
|
399
|
+
fp->port = strdup(*q? q : "80");
|
400
|
+
fp->path = strdup(fn);
|
401
|
+
}
|
402
|
+
fp->type = KNF_TYPE_HTTP;
|
403
|
+
fp->ctrl_fd = fp->fd = -1;
|
404
|
+
fp->seek_offset = 0;
|
405
|
+
return fp;
|
406
|
+
}
|
407
|
+
|
408
|
+
int khttp_connect_file(knetFile *fp)
|
409
|
+
{
|
410
|
+
int ret, l = 0;
|
411
|
+
char *buf, *p;
|
412
|
+
if (fp->fd != -1) netclose(fp->fd);
|
413
|
+
fp->fd = socket_connect(fp->host, fp->port);
|
414
|
+
buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
|
415
|
+
l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
|
416
|
+
l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
|
417
|
+
l += sprintf(buf + l, "\r\n");
|
418
|
+
netwrite(fp->fd, buf, l);
|
419
|
+
l = 0;
|
420
|
+
while (netread(fp->fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency
|
421
|
+
if (buf[l] == '\n' && l >= 3)
|
422
|
+
if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
|
423
|
+
++l;
|
424
|
+
}
|
425
|
+
buf[l] = 0;
|
426
|
+
if (l < 14) { // prematured header
|
427
|
+
netclose(fp->fd);
|
428
|
+
fp->fd = -1;
|
429
|
+
return -1;
|
430
|
+
}
|
431
|
+
ret = strtol(buf + 8, &p, 0); // HTTP return code
|
432
|
+
if (ret == 200 && fp->offset>0) { // 200 (complete result); then skip beginning of the file
|
433
|
+
off_t rest = fp->offset;
|
434
|
+
while (rest) {
|
435
|
+
off_t l = rest < 0x10000? rest : 0x10000;
|
436
|
+
rest -= my_netread(fp->fd, buf, l);
|
437
|
+
}
|
438
|
+
} else if (ret != 206 && ret != 200) {
|
439
|
+
free(buf);
|
440
|
+
fprintf(stderr, "[khttp_connect_file] fail to open file (HTTP code: %d).\n", ret);
|
441
|
+
netclose(fp->fd);
|
442
|
+
fp->fd = -1;
|
443
|
+
return -1;
|
444
|
+
}
|
445
|
+
free(buf);
|
446
|
+
fp->is_ready = 1;
|
447
|
+
return 0;
|
448
|
+
}
|
449
|
+
|
450
|
+
/********************
|
451
|
+
* Generic routines *
|
452
|
+
********************/
|
453
|
+
|
454
|
+
knetFile *knet_open(const char *fn, const char *mode)
|
455
|
+
{
|
456
|
+
knetFile *fp = 0;
|
457
|
+
if (mode[0] != 'r') {
|
458
|
+
fprintf(stderr, "[kftp_open] only mode \"r\" is supported.\n");
|
459
|
+
return 0;
|
460
|
+
}
|
461
|
+
if (strstr(fn, "ftp://") == fn) {
|
462
|
+
fp = kftp_parse_url(fn, mode);
|
463
|
+
if (fp == 0) return 0;
|
464
|
+
if (kftp_connect(fp) == -1) {
|
465
|
+
knet_close(fp);
|
466
|
+
return 0;
|
467
|
+
}
|
468
|
+
kftp_connect_file(fp);
|
469
|
+
} else if (strstr(fn, "http://") == fn) {
|
470
|
+
fp = khttp_parse_url(fn, mode);
|
471
|
+
if (fp == 0) return 0;
|
472
|
+
khttp_connect_file(fp);
|
473
|
+
} else { // local file
|
474
|
+
#ifdef _WIN32
|
475
|
+
/* In windows, O_BINARY is necessary. In Linux/Mac, O_BINARY may
|
476
|
+
* be undefined on some systems, although it is defined on my
|
477
|
+
* Mac and the Linux I have tested on. */
|
478
|
+
int fd = open(fn, O_RDONLY | O_BINARY);
|
479
|
+
#else
|
480
|
+
int fd = open(fn, O_RDONLY);
|
481
|
+
#endif
|
482
|
+
if (fd == -1) {
|
483
|
+
perror("open");
|
484
|
+
return 0;
|
485
|
+
}
|
486
|
+
fp = (knetFile*)calloc(1, sizeof(knetFile));
|
487
|
+
fp->type = KNF_TYPE_LOCAL;
|
488
|
+
fp->fd = fd;
|
489
|
+
fp->ctrl_fd = -1;
|
490
|
+
}
|
491
|
+
if (fp && fp->fd == -1) {
|
492
|
+
knet_close(fp);
|
493
|
+
return 0;
|
494
|
+
}
|
495
|
+
return fp;
|
496
|
+
}
|
497
|
+
|
498
|
+
knetFile *knet_dopen(int fd, const char *mode)
|
499
|
+
{
|
500
|
+
knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
|
501
|
+
fp->type = KNF_TYPE_LOCAL;
|
502
|
+
fp->fd = fd;
|
503
|
+
return fp;
|
504
|
+
}
|
505
|
+
|
506
|
+
off_t knet_read(knetFile *fp, void *buf, off_t len)
|
507
|
+
{
|
508
|
+
off_t l = 0;
|
509
|
+
if (fp->fd == -1) return 0;
|
510
|
+
if (fp->type == KNF_TYPE_FTP) {
|
511
|
+
if (fp->is_ready == 0) {
|
512
|
+
if (!fp->no_reconnect) kftp_reconnect(fp);
|
513
|
+
kftp_connect_file(fp);
|
514
|
+
}
|
515
|
+
} else if (fp->type == KNF_TYPE_HTTP) {
|
516
|
+
if (fp->is_ready == 0)
|
517
|
+
khttp_connect_file(fp);
|
518
|
+
}
|
519
|
+
if (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX
|
520
|
+
off_t rest = len, curr;
|
521
|
+
while (rest) {
|
522
|
+
curr = read(fp->fd, buf + l, rest);
|
523
|
+
if (curr == 0) break;
|
524
|
+
l += curr; rest -= curr;
|
525
|
+
}
|
526
|
+
} else l = my_netread(fp->fd, buf, len);
|
527
|
+
fp->offset += l;
|
528
|
+
return l;
|
529
|
+
}
|
530
|
+
|
531
|
+
off_t knet_seek(knetFile *fp, int64_t off, int whence)
|
532
|
+
{
|
533
|
+
if (whence == SEEK_SET && off == fp->offset) return 0;
|
534
|
+
if (fp->type == KNF_TYPE_LOCAL) {
|
535
|
+
/* Be aware that lseek() returns the offset after seeking,
|
536
|
+
* while fseek() returns zero on success. */
|
537
|
+
off_t offset = lseek(fp->fd, off, whence);
|
538
|
+
if (offset == -1) {
|
539
|
+
// Be silent, it is OK for knet_seek to fail when the file is streamed
|
540
|
+
// fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
|
541
|
+
return -1;
|
542
|
+
}
|
543
|
+
fp->offset = offset;
|
544
|
+
return 0;
|
545
|
+
}
|
546
|
+
else if (fp->type == KNF_TYPE_FTP)
|
547
|
+
{
|
548
|
+
if (whence==SEEK_CUR)
|
549
|
+
fp->offset += off;
|
550
|
+
else if (whence==SEEK_SET)
|
551
|
+
fp->offset = off;
|
552
|
+
else if ( whence==SEEK_END)
|
553
|
+
fp->offset = fp->file_size+off;
|
554
|
+
fp->is_ready = 0;
|
555
|
+
return 0;
|
556
|
+
}
|
557
|
+
else if (fp->type == KNF_TYPE_HTTP)
|
558
|
+
{
|
559
|
+
if (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?
|
560
|
+
fprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\n");
|
561
|
+
errno = ESPIPE;
|
562
|
+
return -1;
|
563
|
+
}
|
564
|
+
if (whence==SEEK_CUR)
|
565
|
+
fp->offset += off;
|
566
|
+
else if (whence==SEEK_SET)
|
567
|
+
fp->offset = off;
|
568
|
+
fp->is_ready = 0;
|
569
|
+
return fp->offset;
|
570
|
+
}
|
571
|
+
errno = EINVAL;
|
572
|
+
fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
|
573
|
+
return -1;
|
574
|
+
}
|
575
|
+
|
576
|
+
int knet_close(knetFile *fp)
|
577
|
+
{
|
578
|
+
if (fp == 0) return 0;
|
579
|
+
if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific
|
580
|
+
if (fp->fd != -1) {
|
581
|
+
/* On Linux/Mac, netclose() is an alias of close(), but on
|
582
|
+
* Windows, it is an alias of closesocket(). */
|
583
|
+
if (fp->type == KNF_TYPE_LOCAL) close(fp->fd);
|
584
|
+
else netclose(fp->fd);
|
585
|
+
}
|
586
|
+
free(fp->host); free(fp->port);
|
587
|
+
free(fp->response); free(fp->retr); free(fp->size_cmd); // FTP specific
|
588
|
+
free(fp->path); free(fp->http_host); // HTTP specific
|
589
|
+
free(fp);
|
590
|
+
return 0;
|
591
|
+
}
|
592
|
+
|
593
|
+
#ifdef KNETFILE_MAIN
|
594
|
+
int main(void)
|
595
|
+
{
|
596
|
+
char *buf;
|
597
|
+
knetFile *fp;
|
598
|
+
int type = 4, l;
|
599
|
+
#ifdef _WIN32
|
600
|
+
knet_win32_init();
|
601
|
+
#endif
|
602
|
+
buf = calloc(0x100000, 1);
|
603
|
+
if (type == 0) {
|
604
|
+
fp = knet_open("knetfile.c", "r");
|
605
|
+
knet_seek(fp, 1000, SEEK_SET);
|
606
|
+
} else if (type == 1) { // NCBI FTP, large file
|
607
|
+
fp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");
|
608
|
+
knet_seek(fp, 2500000000ll, SEEK_SET);
|
609
|
+
l = knet_read(fp, buf, 255);
|
610
|
+
} else if (type == 2) {
|
611
|
+
fp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");
|
612
|
+
knet_seek(fp, 1000, SEEK_SET);
|
613
|
+
} else if (type == 3) {
|
614
|
+
fp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");
|
615
|
+
knet_seek(fp, 1000, SEEK_SET);
|
616
|
+
} else if (type == 4) {
|
617
|
+
fp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");
|
618
|
+
knet_read(fp, buf, 10000);
|
619
|
+
knet_seek(fp, 20000, SEEK_SET);
|
620
|
+
knet_seek(fp, 10000, SEEK_SET);
|
621
|
+
l = knet_read(fp, buf+10000, 10000000) + 10000;
|
622
|
+
}
|
623
|
+
if (type != 4 && type != 1) {
|
624
|
+
knet_read(fp, buf, 255);
|
625
|
+
buf[255] = 0;
|
626
|
+
printf("%s\n", buf);
|
627
|
+
} else write(fileno(stdout), buf, l);
|
628
|
+
knet_close(fp);
|
629
|
+
free(buf);
|
630
|
+
return 0;
|
631
|
+
}
|
632
|
+
#endif
|
@@ -0,0 +1,75 @@
|
|
1
|
+
#ifndef KNETFILE_H
|
2
|
+
#define KNETFILE_H
|
3
|
+
|
4
|
+
#include <stdint.h>
|
5
|
+
#include <fcntl.h>
|
6
|
+
|
7
|
+
#ifndef _WIN32
|
8
|
+
#define netread(fd, ptr, len) read(fd, ptr, len)
|
9
|
+
#define netwrite(fd, ptr, len) write(fd, ptr, len)
|
10
|
+
#define netclose(fd) close(fd)
|
11
|
+
#else
|
12
|
+
#include <winsock2.h>
|
13
|
+
#define netread(fd, ptr, len) recv(fd, ptr, len, 0)
|
14
|
+
#define netwrite(fd, ptr, len) send(fd, ptr, len, 0)
|
15
|
+
#define netclose(fd) closesocket(fd)
|
16
|
+
#endif
|
17
|
+
|
18
|
+
// FIXME: currently I/O is unbuffered
|
19
|
+
|
20
|
+
#define KNF_TYPE_LOCAL 1
|
21
|
+
#define KNF_TYPE_FTP 2
|
22
|
+
#define KNF_TYPE_HTTP 3
|
23
|
+
|
24
|
+
typedef struct knetFile_s {
|
25
|
+
int type, fd;
|
26
|
+
int64_t offset;
|
27
|
+
char *host, *port;
|
28
|
+
|
29
|
+
// the following are for FTP only
|
30
|
+
int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
|
31
|
+
char *response, *retr, *size_cmd;
|
32
|
+
int64_t seek_offset; // for lazy seek
|
33
|
+
int64_t file_size;
|
34
|
+
|
35
|
+
// the following are for HTTP only
|
36
|
+
char *path, *http_host;
|
37
|
+
} knetFile;
|
38
|
+
|
39
|
+
#define knet_tell(fp) ((fp)->offset)
|
40
|
+
#define knet_fileno(fp) ((fp)->fd)
|
41
|
+
|
42
|
+
#ifdef __cplusplus
|
43
|
+
extern "C" {
|
44
|
+
#endif
|
45
|
+
|
46
|
+
#ifdef _WIN32
|
47
|
+
int knet_win32_init();
|
48
|
+
void knet_win32_destroy();
|
49
|
+
#endif
|
50
|
+
|
51
|
+
knetFile *knet_open(const char *fn, const char *mode);
|
52
|
+
|
53
|
+
/*
|
54
|
+
This only works with local files.
|
55
|
+
*/
|
56
|
+
knetFile *knet_dopen(int fd, const char *mode);
|
57
|
+
|
58
|
+
/*
|
59
|
+
If ->is_ready==0, this routine updates ->fd; otherwise, it simply
|
60
|
+
reads from ->fd.
|
61
|
+
*/
|
62
|
+
off_t knet_read(knetFile *fp, void *buf, off_t len);
|
63
|
+
|
64
|
+
/*
|
65
|
+
This routine only sets ->offset and ->is_ready=0. It does not
|
66
|
+
communicate with the FTP server.
|
67
|
+
*/
|
68
|
+
off_t knet_seek(knetFile *fp, int64_t off, int whence);
|
69
|
+
int knet_close(knetFile *fp);
|
70
|
+
|
71
|
+
#ifdef __cplusplus
|
72
|
+
}
|
73
|
+
#endif
|
74
|
+
|
75
|
+
#endif
|