ngs_server 0.4 → 0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,632 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008 Genome Research Ltd (GRL).
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
+
28
+ /* Probably I will not do socket programming in the next few years and
29
+ therefore I decide to heavily annotate this file, for Linux and
30
+ Windows as well. -lh3 */
31
+
32
+ #include <time.h>
33
+ #include <stdio.h>
34
+ #include <ctype.h>
35
+ #include <stdlib.h>
36
+ #include <string.h>
37
+ #include <errno.h>
38
+ #include <unistd.h>
39
+ #include <sys/types.h>
40
+
41
+ #ifdef _WIN32
42
+ #include <winsock.h>
43
+ #else
44
+ #include <netdb.h>
45
+ #include <arpa/inet.h>
46
+ #include <sys/socket.h>
47
+ #endif
48
+
49
+ #include "knetfile.h"
50
+
51
+ /* In winsock.h, the type of a socket is SOCKET, which is: "typedef
52
+ * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed
53
+ * integer -1. In knetfile.c, I use "int" for socket type
54
+ * throughout. This should be improved to avoid confusion.
55
+ *
56
+ * In Linux/Mac, recv() and read() do almost the same thing. You can see
57
+ * in the header file that netread() is simply an alias of read(). In
58
+ * Windows, however, they are different and using recv() is mandatory.
59
+ */
60
+
61
+ /* This function tests if the file handler is ready for reading (or
62
+ * writing if is_read==0). */
63
+ static int socket_wait(int fd, int is_read)
64
+ {
65
+ fd_set fds, *fdr = 0, *fdw = 0;
66
+ struct timeval tv;
67
+ int ret;
68
+ tv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out
69
+ FD_ZERO(&fds);
70
+ FD_SET(fd, &fds);
71
+ if (is_read) fdr = &fds;
72
+ else fdw = &fds;
73
+ ret = select(fd+1, fdr, fdw, 0, &tv);
74
+ #ifndef _WIN32
75
+ if (ret == -1) perror("select");
76
+ #else
77
+ if (ret == 0)
78
+ fprintf(stderr, "select time-out\n");
79
+ else if (ret == SOCKET_ERROR)
80
+ fprintf(stderr, "select: %d\n", WSAGetLastError());
81
+ #endif
82
+ return ret;
83
+ }
84
+
85
+ #ifndef _WIN32
86
+ /* This function does not work with Windows due to the lack of
87
+ * getaddrinfo() in winsock. It is addapted from an example in "Beej's
88
+ * Guide to Network Programming" (http://beej.us/guide/bgnet/). */
89
+ static int socket_connect(const char *host, const char *port)
90
+ {
91
+ #define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
92
+
93
+ int on = 1, fd;
94
+ struct linger lng = { 0, 0 };
95
+ struct addrinfo hints, *res;
96
+ memset(&hints, 0, sizeof(struct addrinfo));
97
+ hints.ai_family = AF_UNSPEC;
98
+ hints.ai_socktype = SOCK_STREAM;
99
+ /* In Unix/Mac, getaddrinfo() is the most convenient way to get
100
+ * server information. */
101
+ if (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");
102
+ if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
103
+ /* The following two setsockopt() are used by ftplib
104
+ * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they
105
+ * necessary. */
106
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
107
+ if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
108
+ if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
109
+ freeaddrinfo(res);
110
+ return fd;
111
+ }
112
+ #else
113
+ /* MinGW's printf has problem with "%lld" */
114
+ char *int64tostr(char *buf, int64_t x)
115
+ {
116
+ int cnt;
117
+ int i = 0;
118
+ do {
119
+ buf[i++] = '0' + x % 10;
120
+ x /= 10;
121
+ } while (x);
122
+ buf[i] = 0;
123
+ for (cnt = i, i = 0; i < cnt/2; ++i) {
124
+ int c = buf[i]; buf[i] = buf[cnt-i-1]; buf[cnt-i-1] = c;
125
+ }
126
+ return buf;
127
+ }
128
+
129
+ int64_t strtoint64(const char *buf)
130
+ {
131
+ int64_t x;
132
+ for (x = 0; *buf != '\0'; ++buf)
133
+ x = x * 10 + ((int64_t) *buf - 48);
134
+ return x;
135
+ }
136
+ /* In windows, the first thing is to establish the TCP connection. */
137
+ int knet_win32_init()
138
+ {
139
+ WSADATA wsaData;
140
+ return WSAStartup(MAKEWORD(2, 2), &wsaData);
141
+ }
142
+ void knet_win32_destroy()
143
+ {
144
+ WSACleanup();
145
+ }
146
+ /* A slightly modfied version of the following function also works on
147
+ * Mac (and presummably Linux). However, this function is not stable on
148
+ * my Mac. It sometimes works fine but sometimes does not. Therefore for
149
+ * non-Windows OS, I do not use this one. */
150
+ static SOCKET socket_connect(const char *host, const char *port)
151
+ {
152
+ #define __err_connect(func) \
153
+ do { \
154
+ fprintf(stderr, "%s: %d\n", func, WSAGetLastError()); \
155
+ return -1; \
156
+ } while (0)
157
+
158
+ int on = 1;
159
+ SOCKET fd;
160
+ struct linger lng = { 0, 0 };
161
+ struct sockaddr_in server;
162
+ struct hostent *hp = 0;
163
+ // open socket
164
+ if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect("socket");
165
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) == -1) __err_connect("setsockopt");
166
+ if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&lng, sizeof(lng)) == -1) __err_connect("setsockopt");
167
+ // get host info
168
+ if (isalpha(host[0])) hp = gethostbyname(host);
169
+ else {
170
+ struct in_addr addr;
171
+ addr.s_addr = inet_addr(host);
172
+ hp = gethostbyaddr((char*)&addr, 4, AF_INET);
173
+ }
174
+ if (hp == 0) __err_connect("gethost");
175
+ // connect
176
+ server.sin_addr.s_addr = *((unsigned long*)hp->h_addr);
177
+ server.sin_family= AF_INET;
178
+ server.sin_port = htons(atoi(port));
179
+ if (connect(fd, (struct sockaddr*)&server, sizeof(server)) != 0) __err_connect("connect");
180
+ // freehostent(hp); // strangely in MSDN, hp is NOT freed (memory leak?!)
181
+ return fd;
182
+ }
183
+ #endif
184
+
185
+ static off_t my_netread(int fd, void *buf, off_t len)
186
+ {
187
+ off_t rest = len, curr, l = 0;
188
+ /* recv() and read() may not read the required length of data with
189
+ * one call. They have to be called repeatedly. */
190
+ while (rest) {
191
+ if (socket_wait(fd, 1) <= 0) break; // socket is not ready for reading
192
+ curr = netread(fd, buf + l, rest);
193
+ /* According to the glibc manual, section 13.2, a zero returned
194
+ * value indicates end-of-file (EOF), which should mean that
195
+ * read() will not return zero if EOF has not been met but data
196
+ * are not immediately available. */
197
+ if (curr == 0) break;
198
+ l += curr; rest -= curr;
199
+ }
200
+ return l;
201
+ }
202
+
203
+ /*************************
204
+ * FTP specific routines *
205
+ *************************/
206
+
207
+ static int kftp_get_response(knetFile *ftp)
208
+ {
209
+ #ifndef _WIN32
210
+ unsigned char c;
211
+ #else
212
+ char c;
213
+ #endif
214
+ int n = 0;
215
+ char *p;
216
+ if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0;
217
+ while (netread(ftp->ctrl_fd, &c, 1)) { // FIXME: this is *VERY BAD* for unbuffered I/O
218
+ //fputc(c, stderr);
219
+ if (n >= ftp->max_response) {
220
+ ftp->max_response = ftp->max_response? ftp->max_response<<1 : 256;
221
+ ftp->response = realloc(ftp->response, ftp->max_response);
222
+ }
223
+ ftp->response[n++] = c;
224
+ if (c == '\n') {
225
+ if (n >= 4 && isdigit(ftp->response[0]) && isdigit(ftp->response[1]) && isdigit(ftp->response[2])
226
+ && ftp->response[3] != '-') break;
227
+ n = 0;
228
+ continue;
229
+ }
230
+ }
231
+ if (n < 2) return -1;
232
+ ftp->response[n-2] = 0;
233
+ return strtol(ftp->response, &p, 0);
234
+ }
235
+
236
+ static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get)
237
+ {
238
+ if (socket_wait(ftp->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
239
+ netwrite(ftp->ctrl_fd, cmd, strlen(cmd));
240
+ return is_get? kftp_get_response(ftp) : 0;
241
+ }
242
+
243
+ static int kftp_pasv_prep(knetFile *ftp)
244
+ {
245
+ char *p;
246
+ int v[6];
247
+ kftp_send_cmd(ftp, "PASV\r\n", 1);
248
+ for (p = ftp->response; *p && *p != '('; ++p);
249
+ if (*p != '(') return -1;
250
+ ++p;
251
+ sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
252
+ memcpy(ftp->pasv_ip, v, 4 * sizeof(int));
253
+ ftp->pasv_port = (v[4]<<8&0xff00) + v[5];
254
+ return 0;
255
+ }
256
+
257
+
258
+ static int kftp_pasv_connect(knetFile *ftp)
259
+ {
260
+ char host[80], port[10];
261
+ if (ftp->pasv_port == 0) {
262
+ fprintf(stderr, "[kftp_pasv_connect] kftp_pasv_prep() is not called before hand.\n");
263
+ return -1;
264
+ }
265
+ sprintf(host, "%d.%d.%d.%d", ftp->pasv_ip[0], ftp->pasv_ip[1], ftp->pasv_ip[2], ftp->pasv_ip[3]);
266
+ sprintf(port, "%d", ftp->pasv_port);
267
+ ftp->fd = socket_connect(host, port);
268
+ if (ftp->fd == -1) return -1;
269
+ return 0;
270
+ }
271
+
272
+ int kftp_connect(knetFile *ftp)
273
+ {
274
+ ftp->ctrl_fd = socket_connect(ftp->host, ftp->port);
275
+ if (ftp->ctrl_fd == -1) return -1;
276
+ kftp_get_response(ftp);
277
+ kftp_send_cmd(ftp, "USER anonymous\r\n", 1);
278
+ kftp_send_cmd(ftp, "PASS kftp@\r\n", 1);
279
+ kftp_send_cmd(ftp, "TYPE I\r\n", 1);
280
+ return 0;
281
+ }
282
+
283
+ int kftp_reconnect(knetFile *ftp)
284
+ {
285
+ if (ftp->ctrl_fd != -1) {
286
+ netclose(ftp->ctrl_fd);
287
+ ftp->ctrl_fd = -1;
288
+ }
289
+ netclose(ftp->fd);
290
+ ftp->fd = -1;
291
+ return kftp_connect(ftp);
292
+ }
293
+
294
+ // initialize ->type, ->host, ->retr and ->size
295
+ knetFile *kftp_parse_url(const char *fn, const char *mode)
296
+ {
297
+ knetFile *fp;
298
+ char *p;
299
+ int l;
300
+ if (strstr(fn, "ftp://") != fn) return 0;
301
+ for (p = (char*)fn + 6; *p && *p != '/'; ++p);
302
+ if (*p != '/') return 0;
303
+ l = p - fn - 6;
304
+ fp = calloc(1, sizeof(knetFile));
305
+ fp->type = KNF_TYPE_FTP;
306
+ fp->fd = -1;
307
+ /* the Linux/Mac version of socket_connect() also recognizes a port
308
+ * like "ftp", but the Windows version does not. */
309
+ fp->port = strdup("21");
310
+ fp->host = calloc(l + 1, 1);
311
+ if (strchr(mode, 'c')) fp->no_reconnect = 1;
312
+ strncpy(fp->host, fn + 6, l);
313
+ fp->retr = calloc(strlen(p) + 8, 1);
314
+ sprintf(fp->retr, "RETR %s\r\n", p);
315
+ fp->size_cmd = calloc(strlen(p) + 8, 1);
316
+ sprintf(fp->size_cmd, "SIZE %s\r\n", p);
317
+ fp->seek_offset = 0;
318
+ return fp;
319
+ }
320
+ // place ->fd at offset off
321
+ int kftp_connect_file(knetFile *fp)
322
+ {
323
+ int ret;
324
+ long long file_size;
325
+ if (fp->fd != -1) {
326
+ netclose(fp->fd);
327
+ if (fp->no_reconnect) kftp_get_response(fp);
328
+ }
329
+ kftp_pasv_prep(fp);
330
+ kftp_send_cmd(fp, fp->size_cmd, 1);
331
+ #ifndef _WIN32
332
+ if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 )
333
+ {
334
+ fprintf(stderr,"[kftp_connect_file] %s\n", fp->response);
335
+ return -1;
336
+ }
337
+ #else
338
+ const char *p = fp->response;
339
+ while (*p != ' ') ++p;
340
+ while (*p < '0' || *p > '9') ++p;
341
+ file_size = strtoint64(p);
342
+ #endif
343
+ fp->file_size = file_size;
344
+ if (fp->offset>=0) {
345
+ char tmp[32];
346
+ #ifndef _WIN32
347
+ sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
348
+ #else
349
+ strcpy(tmp, "REST ");
350
+ int64tostr(tmp + 5, fp->offset);
351
+ strcat(tmp, "\r\n");
352
+ #endif
353
+ kftp_send_cmd(fp, tmp, 1);
354
+ }
355
+ kftp_send_cmd(fp, fp->retr, 0);
356
+ kftp_pasv_connect(fp);
357
+ ret = kftp_get_response(fp);
358
+ if (ret != 150) {
359
+ fprintf(stderr, "[kftp_connect_file] %s\n", fp->response);
360
+ netclose(fp->fd);
361
+ fp->fd = -1;
362
+ return -1;
363
+ }
364
+ fp->is_ready = 1;
365
+ return 0;
366
+ }
367
+
368
+
369
+ /**************************
370
+ * HTTP specific routines *
371
+ **************************/
372
+
373
+ knetFile *khttp_parse_url(const char *fn, const char *mode)
374
+ {
375
+ knetFile *fp;
376
+ char *p, *proxy, *q;
377
+ int l;
378
+ if (strstr(fn, "http://") != fn) return 0;
379
+ // set ->http_host
380
+ for (p = (char*)fn + 7; *p && *p != '/'; ++p);
381
+ l = p - fn - 7;
382
+ fp = calloc(1, sizeof(knetFile));
383
+ fp->http_host = calloc(l + 1, 1);
384
+ strncpy(fp->http_host, fn + 7, l);
385
+ fp->http_host[l] = 0;
386
+ for (q = fp->http_host; *q && *q != ':'; ++q);
387
+ if (*q == ':') *q++ = 0;
388
+ // get http_proxy
389
+ proxy = getenv("http_proxy");
390
+ // set ->host, ->port and ->path
391
+ if (proxy == 0) {
392
+ fp->host = strdup(fp->http_host); // when there is no proxy, server name is identical to http_host name.
393
+ fp->port = strdup(*q? q : "80");
394
+ fp->path = strdup(*p? p : "/");
395
+ } else {
396
+ fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
397
+ for (q = fp->host; *q && *q != ':'; ++q);
398
+ if (*q == ':') *q++ = 0;
399
+ fp->port = strdup(*q? q : "80");
400
+ fp->path = strdup(fn);
401
+ }
402
+ fp->type = KNF_TYPE_HTTP;
403
+ fp->ctrl_fd = fp->fd = -1;
404
+ fp->seek_offset = 0;
405
+ return fp;
406
+ }
407
+
408
+ int khttp_connect_file(knetFile *fp)
409
+ {
410
+ int ret, l = 0;
411
+ char *buf, *p;
412
+ if (fp->fd != -1) netclose(fp->fd);
413
+ fp->fd = socket_connect(fp->host, fp->port);
414
+ buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
415
+ l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
416
+ l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
417
+ l += sprintf(buf + l, "\r\n");
418
+ netwrite(fp->fd, buf, l);
419
+ l = 0;
420
+ while (netread(fp->fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency
421
+ if (buf[l] == '\n' && l >= 3)
422
+ if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
423
+ ++l;
424
+ }
425
+ buf[l] = 0;
426
+ if (l < 14) { // prematured header
427
+ netclose(fp->fd);
428
+ fp->fd = -1;
429
+ return -1;
430
+ }
431
+ ret = strtol(buf + 8, &p, 0); // HTTP return code
432
+ if (ret == 200 && fp->offset>0) { // 200 (complete result); then skip beginning of the file
433
+ off_t rest = fp->offset;
434
+ while (rest) {
435
+ off_t l = rest < 0x10000? rest : 0x10000;
436
+ rest -= my_netread(fp->fd, buf, l);
437
+ }
438
+ } else if (ret != 206 && ret != 200) {
439
+ free(buf);
440
+ fprintf(stderr, "[khttp_connect_file] fail to open file (HTTP code: %d).\n", ret);
441
+ netclose(fp->fd);
442
+ fp->fd = -1;
443
+ return -1;
444
+ }
445
+ free(buf);
446
+ fp->is_ready = 1;
447
+ return 0;
448
+ }
449
+
450
+ /********************
451
+ * Generic routines *
452
+ ********************/
453
+
454
+ knetFile *knet_open(const char *fn, const char *mode)
455
+ {
456
+ knetFile *fp = 0;
457
+ if (mode[0] != 'r') {
458
+ fprintf(stderr, "[kftp_open] only mode \"r\" is supported.\n");
459
+ return 0;
460
+ }
461
+ if (strstr(fn, "ftp://") == fn) {
462
+ fp = kftp_parse_url(fn, mode);
463
+ if (fp == 0) return 0;
464
+ if (kftp_connect(fp) == -1) {
465
+ knet_close(fp);
466
+ return 0;
467
+ }
468
+ kftp_connect_file(fp);
469
+ } else if (strstr(fn, "http://") == fn) {
470
+ fp = khttp_parse_url(fn, mode);
471
+ if (fp == 0) return 0;
472
+ khttp_connect_file(fp);
473
+ } else { // local file
474
+ #ifdef _WIN32
475
+ /* In windows, O_BINARY is necessary. In Linux/Mac, O_BINARY may
476
+ * be undefined on some systems, although it is defined on my
477
+ * Mac and the Linux I have tested on. */
478
+ int fd = open(fn, O_RDONLY | O_BINARY);
479
+ #else
480
+ int fd = open(fn, O_RDONLY);
481
+ #endif
482
+ if (fd == -1) {
483
+ perror("open");
484
+ return 0;
485
+ }
486
+ fp = (knetFile*)calloc(1, sizeof(knetFile));
487
+ fp->type = KNF_TYPE_LOCAL;
488
+ fp->fd = fd;
489
+ fp->ctrl_fd = -1;
490
+ }
491
+ if (fp && fp->fd == -1) {
492
+ knet_close(fp);
493
+ return 0;
494
+ }
495
+ return fp;
496
+ }
497
+
498
+ knetFile *knet_dopen(int fd, const char *mode)
499
+ {
500
+ knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
501
+ fp->type = KNF_TYPE_LOCAL;
502
+ fp->fd = fd;
503
+ return fp;
504
+ }
505
+
506
+ off_t knet_read(knetFile *fp, void *buf, off_t len)
507
+ {
508
+ off_t l = 0;
509
+ if (fp->fd == -1) return 0;
510
+ if (fp->type == KNF_TYPE_FTP) {
511
+ if (fp->is_ready == 0) {
512
+ if (!fp->no_reconnect) kftp_reconnect(fp);
513
+ kftp_connect_file(fp);
514
+ }
515
+ } else if (fp->type == KNF_TYPE_HTTP) {
516
+ if (fp->is_ready == 0)
517
+ khttp_connect_file(fp);
518
+ }
519
+ if (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX
520
+ off_t rest = len, curr;
521
+ while (rest) {
522
+ curr = read(fp->fd, buf + l, rest);
523
+ if (curr == 0) break;
524
+ l += curr; rest -= curr;
525
+ }
526
+ } else l = my_netread(fp->fd, buf, len);
527
+ fp->offset += l;
528
+ return l;
529
+ }
530
+
531
+ off_t knet_seek(knetFile *fp, int64_t off, int whence)
532
+ {
533
+ if (whence == SEEK_SET && off == fp->offset) return 0;
534
+ if (fp->type == KNF_TYPE_LOCAL) {
535
+ /* Be aware that lseek() returns the offset after seeking,
536
+ * while fseek() returns zero on success. */
537
+ off_t offset = lseek(fp->fd, off, whence);
538
+ if (offset == -1) {
539
+ // Be silent, it is OK for knet_seek to fail when the file is streamed
540
+ // fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
541
+ return -1;
542
+ }
543
+ fp->offset = offset;
544
+ return 0;
545
+ }
546
+ else if (fp->type == KNF_TYPE_FTP)
547
+ {
548
+ if (whence==SEEK_CUR)
549
+ fp->offset += off;
550
+ else if (whence==SEEK_SET)
551
+ fp->offset = off;
552
+ else if ( whence==SEEK_END)
553
+ fp->offset = fp->file_size+off;
554
+ fp->is_ready = 0;
555
+ return 0;
556
+ }
557
+ else if (fp->type == KNF_TYPE_HTTP)
558
+ {
559
+ if (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?
560
+ fprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\n");
561
+ errno = ESPIPE;
562
+ return -1;
563
+ }
564
+ if (whence==SEEK_CUR)
565
+ fp->offset += off;
566
+ else if (whence==SEEK_SET)
567
+ fp->offset = off;
568
+ fp->is_ready = 0;
569
+ return fp->offset;
570
+ }
571
+ errno = EINVAL;
572
+ fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
573
+ return -1;
574
+ }
575
+
576
+ int knet_close(knetFile *fp)
577
+ {
578
+ if (fp == 0) return 0;
579
+ if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific
580
+ if (fp->fd != -1) {
581
+ /* On Linux/Mac, netclose() is an alias of close(), but on
582
+ * Windows, it is an alias of closesocket(). */
583
+ if (fp->type == KNF_TYPE_LOCAL) close(fp->fd);
584
+ else netclose(fp->fd);
585
+ }
586
+ free(fp->host); free(fp->port);
587
+ free(fp->response); free(fp->retr); free(fp->size_cmd); // FTP specific
588
+ free(fp->path); free(fp->http_host); // HTTP specific
589
+ free(fp);
590
+ return 0;
591
+ }
592
+
593
+ #ifdef KNETFILE_MAIN
594
+ int main(void)
595
+ {
596
+ char *buf;
597
+ knetFile *fp;
598
+ int type = 4, l;
599
+ #ifdef _WIN32
600
+ knet_win32_init();
601
+ #endif
602
+ buf = calloc(0x100000, 1);
603
+ if (type == 0) {
604
+ fp = knet_open("knetfile.c", "r");
605
+ knet_seek(fp, 1000, SEEK_SET);
606
+ } else if (type == 1) { // NCBI FTP, large file
607
+ fp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");
608
+ knet_seek(fp, 2500000000ll, SEEK_SET);
609
+ l = knet_read(fp, buf, 255);
610
+ } else if (type == 2) {
611
+ fp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");
612
+ knet_seek(fp, 1000, SEEK_SET);
613
+ } else if (type == 3) {
614
+ fp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");
615
+ knet_seek(fp, 1000, SEEK_SET);
616
+ } else if (type == 4) {
617
+ fp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");
618
+ knet_read(fp, buf, 10000);
619
+ knet_seek(fp, 20000, SEEK_SET);
620
+ knet_seek(fp, 10000, SEEK_SET);
621
+ l = knet_read(fp, buf+10000, 10000000) + 10000;
622
+ }
623
+ if (type != 4 && type != 1) {
624
+ knet_read(fp, buf, 255);
625
+ buf[255] = 0;
626
+ printf("%s\n", buf);
627
+ } else write(fileno(stdout), buf, l);
628
+ knet_close(fp);
629
+ free(buf);
630
+ return 0;
631
+ }
632
+ #endif
@@ -0,0 +1,75 @@
1
+ #ifndef KNETFILE_H
2
+ #define KNETFILE_H
3
+
4
+ #include <stdint.h>
5
+ #include <fcntl.h>
6
+
7
+ #ifndef _WIN32
8
+ #define netread(fd, ptr, len) read(fd, ptr, len)
9
+ #define netwrite(fd, ptr, len) write(fd, ptr, len)
10
+ #define netclose(fd) close(fd)
11
+ #else
12
+ #include <winsock2.h>
13
+ #define netread(fd, ptr, len) recv(fd, ptr, len, 0)
14
+ #define netwrite(fd, ptr, len) send(fd, ptr, len, 0)
15
+ #define netclose(fd) closesocket(fd)
16
+ #endif
17
+
18
+ // FIXME: currently I/O is unbuffered
19
+
20
+ #define KNF_TYPE_LOCAL 1
21
+ #define KNF_TYPE_FTP 2
22
+ #define KNF_TYPE_HTTP 3
23
+
24
+ typedef struct knetFile_s {
25
+ int type, fd;
26
+ int64_t offset;
27
+ char *host, *port;
28
+
29
+ // the following are for FTP only
30
+ int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
31
+ char *response, *retr, *size_cmd;
32
+ int64_t seek_offset; // for lazy seek
33
+ int64_t file_size;
34
+
35
+ // the following are for HTTP only
36
+ char *path, *http_host;
37
+ } knetFile;
38
+
39
+ #define knet_tell(fp) ((fp)->offset)
40
+ #define knet_fileno(fp) ((fp)->fd)
41
+
42
+ #ifdef __cplusplus
43
+ extern "C" {
44
+ #endif
45
+
46
+ #ifdef _WIN32
47
+ int knet_win32_init();
48
+ void knet_win32_destroy();
49
+ #endif
50
+
51
+ knetFile *knet_open(const char *fn, const char *mode);
52
+
53
+ /*
54
+ This only works with local files.
55
+ */
56
+ knetFile *knet_dopen(int fd, const char *mode);
57
+
58
+ /*
59
+ If ->is_ready==0, this routine updates ->fd; otherwise, it simply
60
+ reads from ->fd.
61
+ */
62
+ off_t knet_read(knetFile *fp, void *buf, off_t len);
63
+
64
+ /*
65
+ This routine only sets ->offset and ->is_ready=0. It does not
66
+ communicate with the FTP server.
67
+ */
68
+ off_t knet_seek(knetFile *fp, int64_t off, int whence);
69
+ int knet_close(knetFile *fp);
70
+
71
+ #ifdef __cplusplus
72
+ }
73
+ #endif
74
+
75
+ #endif