ngs_server 0.4 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,632 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2008 Genome Research Ltd (GRL).
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /* Contact: Heng Li <lh3@sanger.ac.uk> */
27
+
28
+ /* Probably I will not do socket programming in the next few years and
29
+ therefore I decide to heavily annotate this file, for Linux and
30
+ Windows as well. -lh3 */
31
+
32
+ #include <time.h>
33
+ #include <stdio.h>
34
+ #include <ctype.h>
35
+ #include <stdlib.h>
36
+ #include <string.h>
37
+ #include <errno.h>
38
+ #include <unistd.h>
39
+ #include <sys/types.h>
40
+
41
+ #ifdef _WIN32
42
+ #include <winsock.h>
43
+ #else
44
+ #include <netdb.h>
45
+ #include <arpa/inet.h>
46
+ #include <sys/socket.h>
47
+ #endif
48
+
49
+ #include "knetfile.h"
50
+
51
+ /* In winsock.h, the type of a socket is SOCKET, which is: "typedef
52
+ * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed
53
+ * integer -1. In knetfile.c, I use "int" for socket type
54
+ * throughout. This should be improved to avoid confusion.
55
+ *
56
+ * In Linux/Mac, recv() and read() do almost the same thing. You can see
57
+ * in the header file that netread() is simply an alias of read(). In
58
+ * Windows, however, they are different and using recv() is mandatory.
59
+ */
60
+
61
+ /* This function tests if the file handler is ready for reading (or
62
+ * writing if is_read==0). */
63
+ static int socket_wait(int fd, int is_read)
64
+ {
65
+ fd_set fds, *fdr = 0, *fdw = 0;
66
+ struct timeval tv;
67
+ int ret;
68
+ tv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out
69
+ FD_ZERO(&fds);
70
+ FD_SET(fd, &fds);
71
+ if (is_read) fdr = &fds;
72
+ else fdw = &fds;
73
+ ret = select(fd+1, fdr, fdw, 0, &tv);
74
+ #ifndef _WIN32
75
+ if (ret == -1) perror("select");
76
+ #else
77
+ if (ret == 0)
78
+ fprintf(stderr, "select time-out\n");
79
+ else if (ret == SOCKET_ERROR)
80
+ fprintf(stderr, "select: %d\n", WSAGetLastError());
81
+ #endif
82
+ return ret;
83
+ }
84
+
85
+ #ifndef _WIN32
86
+ /* This function does not work with Windows due to the lack of
87
+ * getaddrinfo() in winsock. It is addapted from an example in "Beej's
88
+ * Guide to Network Programming" (http://beej.us/guide/bgnet/). */
89
+ static int socket_connect(const char *host, const char *port)
90
+ {
91
+ #define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
92
+
93
+ int on = 1, fd;
94
+ struct linger lng = { 0, 0 };
95
+ struct addrinfo hints, *res;
96
+ memset(&hints, 0, sizeof(struct addrinfo));
97
+ hints.ai_family = AF_UNSPEC;
98
+ hints.ai_socktype = SOCK_STREAM;
99
+ /* In Unix/Mac, getaddrinfo() is the most convenient way to get
100
+ * server information. */
101
+ if (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");
102
+ if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
103
+ /* The following two setsockopt() are used by ftplib
104
+ * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they
105
+ * necessary. */
106
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
107
+ if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
108
+ if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
109
+ freeaddrinfo(res);
110
+ return fd;
111
+ }
112
+ #else
113
+ /* MinGW's printf has problem with "%lld" */
114
+ char *int64tostr(char *buf, int64_t x)
115
+ {
116
+ int cnt;
117
+ int i = 0;
118
+ do {
119
+ buf[i++] = '0' + x % 10;
120
+ x /= 10;
121
+ } while (x);
122
+ buf[i] = 0;
123
+ for (cnt = i, i = 0; i < cnt/2; ++i) {
124
+ int c = buf[i]; buf[i] = buf[cnt-i-1]; buf[cnt-i-1] = c;
125
+ }
126
+ return buf;
127
+ }
128
+
129
+ int64_t strtoint64(const char *buf)
130
+ {
131
+ int64_t x;
132
+ for (x = 0; *buf != '\0'; ++buf)
133
+ x = x * 10 + ((int64_t) *buf - 48);
134
+ return x;
135
+ }
136
+ /* In windows, the first thing is to establish the TCP connection. */
137
+ int knet_win32_init()
138
+ {
139
+ WSADATA wsaData;
140
+ return WSAStartup(MAKEWORD(2, 2), &wsaData);
141
+ }
142
+ void knet_win32_destroy()
143
+ {
144
+ WSACleanup();
145
+ }
146
+ /* A slightly modfied version of the following function also works on
147
+ * Mac (and presummably Linux). However, this function is not stable on
148
+ * my Mac. It sometimes works fine but sometimes does not. Therefore for
149
+ * non-Windows OS, I do not use this one. */
150
+ static SOCKET socket_connect(const char *host, const char *port)
151
+ {
152
+ #define __err_connect(func) \
153
+ do { \
154
+ fprintf(stderr, "%s: %d\n", func, WSAGetLastError()); \
155
+ return -1; \
156
+ } while (0)
157
+
158
+ int on = 1;
159
+ SOCKET fd;
160
+ struct linger lng = { 0, 0 };
161
+ struct sockaddr_in server;
162
+ struct hostent *hp = 0;
163
+ // open socket
164
+ if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect("socket");
165
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) == -1) __err_connect("setsockopt");
166
+ if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&lng, sizeof(lng)) == -1) __err_connect("setsockopt");
167
+ // get host info
168
+ if (isalpha(host[0])) hp = gethostbyname(host);
169
+ else {
170
+ struct in_addr addr;
171
+ addr.s_addr = inet_addr(host);
172
+ hp = gethostbyaddr((char*)&addr, 4, AF_INET);
173
+ }
174
+ if (hp == 0) __err_connect("gethost");
175
+ // connect
176
+ server.sin_addr.s_addr = *((unsigned long*)hp->h_addr);
177
+ server.sin_family= AF_INET;
178
+ server.sin_port = htons(atoi(port));
179
+ if (connect(fd, (struct sockaddr*)&server, sizeof(server)) != 0) __err_connect("connect");
180
+ // freehostent(hp); // strangely in MSDN, hp is NOT freed (memory leak?!)
181
+ return fd;
182
+ }
183
+ #endif
184
+
185
+ static off_t my_netread(int fd, void *buf, off_t len)
186
+ {
187
+ off_t rest = len, curr, l = 0;
188
+ /* recv() and read() may not read the required length of data with
189
+ * one call. They have to be called repeatedly. */
190
+ while (rest) {
191
+ if (socket_wait(fd, 1) <= 0) break; // socket is not ready for reading
192
+ curr = netread(fd, buf + l, rest);
193
+ /* According to the glibc manual, section 13.2, a zero returned
194
+ * value indicates end-of-file (EOF), which should mean that
195
+ * read() will not return zero if EOF has not been met but data
196
+ * are not immediately available. */
197
+ if (curr == 0) break;
198
+ l += curr; rest -= curr;
199
+ }
200
+ return l;
201
+ }
202
+
203
+ /*************************
204
+ * FTP specific routines *
205
+ *************************/
206
+
207
+ static int kftp_get_response(knetFile *ftp)
208
+ {
209
+ #ifndef _WIN32
210
+ unsigned char c;
211
+ #else
212
+ char c;
213
+ #endif
214
+ int n = 0;
215
+ char *p;
216
+ if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0;
217
+ while (netread(ftp->ctrl_fd, &c, 1)) { // FIXME: this is *VERY BAD* for unbuffered I/O
218
+ //fputc(c, stderr);
219
+ if (n >= ftp->max_response) {
220
+ ftp->max_response = ftp->max_response? ftp->max_response<<1 : 256;
221
+ ftp->response = realloc(ftp->response, ftp->max_response);
222
+ }
223
+ ftp->response[n++] = c;
224
+ if (c == '\n') {
225
+ if (n >= 4 && isdigit(ftp->response[0]) && isdigit(ftp->response[1]) && isdigit(ftp->response[2])
226
+ && ftp->response[3] != '-') break;
227
+ n = 0;
228
+ continue;
229
+ }
230
+ }
231
+ if (n < 2) return -1;
232
+ ftp->response[n-2] = 0;
233
+ return strtol(ftp->response, &p, 0);
234
+ }
235
+
236
+ static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get)
237
+ {
238
+ if (socket_wait(ftp->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
239
+ netwrite(ftp->ctrl_fd, cmd, strlen(cmd));
240
+ return is_get? kftp_get_response(ftp) : 0;
241
+ }
242
+
243
+ static int kftp_pasv_prep(knetFile *ftp)
244
+ {
245
+ char *p;
246
+ int v[6];
247
+ kftp_send_cmd(ftp, "PASV\r\n", 1);
248
+ for (p = ftp->response; *p && *p != '('; ++p);
249
+ if (*p != '(') return -1;
250
+ ++p;
251
+ sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
252
+ memcpy(ftp->pasv_ip, v, 4 * sizeof(int));
253
+ ftp->pasv_port = (v[4]<<8&0xff00) + v[5];
254
+ return 0;
255
+ }
256
+
257
+
258
+ static int kftp_pasv_connect(knetFile *ftp)
259
+ {
260
+ char host[80], port[10];
261
+ if (ftp->pasv_port == 0) {
262
+ fprintf(stderr, "[kftp_pasv_connect] kftp_pasv_prep() is not called before hand.\n");
263
+ return -1;
264
+ }
265
+ sprintf(host, "%d.%d.%d.%d", ftp->pasv_ip[0], ftp->pasv_ip[1], ftp->pasv_ip[2], ftp->pasv_ip[3]);
266
+ sprintf(port, "%d", ftp->pasv_port);
267
+ ftp->fd = socket_connect(host, port);
268
+ if (ftp->fd == -1) return -1;
269
+ return 0;
270
+ }
271
+
272
+ int kftp_connect(knetFile *ftp)
273
+ {
274
+ ftp->ctrl_fd = socket_connect(ftp->host, ftp->port);
275
+ if (ftp->ctrl_fd == -1) return -1;
276
+ kftp_get_response(ftp);
277
+ kftp_send_cmd(ftp, "USER anonymous\r\n", 1);
278
+ kftp_send_cmd(ftp, "PASS kftp@\r\n", 1);
279
+ kftp_send_cmd(ftp, "TYPE I\r\n", 1);
280
+ return 0;
281
+ }
282
+
283
+ int kftp_reconnect(knetFile *ftp)
284
+ {
285
+ if (ftp->ctrl_fd != -1) {
286
+ netclose(ftp->ctrl_fd);
287
+ ftp->ctrl_fd = -1;
288
+ }
289
+ netclose(ftp->fd);
290
+ ftp->fd = -1;
291
+ return kftp_connect(ftp);
292
+ }
293
+
294
+ // initialize ->type, ->host, ->retr and ->size
295
+ knetFile *kftp_parse_url(const char *fn, const char *mode)
296
+ {
297
+ knetFile *fp;
298
+ char *p;
299
+ int l;
300
+ if (strstr(fn, "ftp://") != fn) return 0;
301
+ for (p = (char*)fn + 6; *p && *p != '/'; ++p);
302
+ if (*p != '/') return 0;
303
+ l = p - fn - 6;
304
+ fp = calloc(1, sizeof(knetFile));
305
+ fp->type = KNF_TYPE_FTP;
306
+ fp->fd = -1;
307
+ /* the Linux/Mac version of socket_connect() also recognizes a port
308
+ * like "ftp", but the Windows version does not. */
309
+ fp->port = strdup("21");
310
+ fp->host = calloc(l + 1, 1);
311
+ if (strchr(mode, 'c')) fp->no_reconnect = 1;
312
+ strncpy(fp->host, fn + 6, l);
313
+ fp->retr = calloc(strlen(p) + 8, 1);
314
+ sprintf(fp->retr, "RETR %s\r\n", p);
315
+ fp->size_cmd = calloc(strlen(p) + 8, 1);
316
+ sprintf(fp->size_cmd, "SIZE %s\r\n", p);
317
+ fp->seek_offset = 0;
318
+ return fp;
319
+ }
320
+ // place ->fd at offset off
321
+ int kftp_connect_file(knetFile *fp)
322
+ {
323
+ int ret;
324
+ long long file_size;
325
+ if (fp->fd != -1) {
326
+ netclose(fp->fd);
327
+ if (fp->no_reconnect) kftp_get_response(fp);
328
+ }
329
+ kftp_pasv_prep(fp);
330
+ kftp_send_cmd(fp, fp->size_cmd, 1);
331
+ #ifndef _WIN32
332
+ if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 )
333
+ {
334
+ fprintf(stderr,"[kftp_connect_file] %s\n", fp->response);
335
+ return -1;
336
+ }
337
+ #else
338
+ const char *p = fp->response;
339
+ while (*p != ' ') ++p;
340
+ while (*p < '0' || *p > '9') ++p;
341
+ file_size = strtoint64(p);
342
+ #endif
343
+ fp->file_size = file_size;
344
+ if (fp->offset>=0) {
345
+ char tmp[32];
346
+ #ifndef _WIN32
347
+ sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
348
+ #else
349
+ strcpy(tmp, "REST ");
350
+ int64tostr(tmp + 5, fp->offset);
351
+ strcat(tmp, "\r\n");
352
+ #endif
353
+ kftp_send_cmd(fp, tmp, 1);
354
+ }
355
+ kftp_send_cmd(fp, fp->retr, 0);
356
+ kftp_pasv_connect(fp);
357
+ ret = kftp_get_response(fp);
358
+ if (ret != 150) {
359
+ fprintf(stderr, "[kftp_connect_file] %s\n", fp->response);
360
+ netclose(fp->fd);
361
+ fp->fd = -1;
362
+ return -1;
363
+ }
364
+ fp->is_ready = 1;
365
+ return 0;
366
+ }
367
+
368
+
369
+ /**************************
370
+ * HTTP specific routines *
371
+ **************************/
372
+
373
+ knetFile *khttp_parse_url(const char *fn, const char *mode)
374
+ {
375
+ knetFile *fp;
376
+ char *p, *proxy, *q;
377
+ int l;
378
+ if (strstr(fn, "http://") != fn) return 0;
379
+ // set ->http_host
380
+ for (p = (char*)fn + 7; *p && *p != '/'; ++p);
381
+ l = p - fn - 7;
382
+ fp = calloc(1, sizeof(knetFile));
383
+ fp->http_host = calloc(l + 1, 1);
384
+ strncpy(fp->http_host, fn + 7, l);
385
+ fp->http_host[l] = 0;
386
+ for (q = fp->http_host; *q && *q != ':'; ++q);
387
+ if (*q == ':') *q++ = 0;
388
+ // get http_proxy
389
+ proxy = getenv("http_proxy");
390
+ // set ->host, ->port and ->path
391
+ if (proxy == 0) {
392
+ fp->host = strdup(fp->http_host); // when there is no proxy, server name is identical to http_host name.
393
+ fp->port = strdup(*q? q : "80");
394
+ fp->path = strdup(*p? p : "/");
395
+ } else {
396
+ fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
397
+ for (q = fp->host; *q && *q != ':'; ++q);
398
+ if (*q == ':') *q++ = 0;
399
+ fp->port = strdup(*q? q : "80");
400
+ fp->path = strdup(fn);
401
+ }
402
+ fp->type = KNF_TYPE_HTTP;
403
+ fp->ctrl_fd = fp->fd = -1;
404
+ fp->seek_offset = 0;
405
+ return fp;
406
+ }
407
+
408
+ int khttp_connect_file(knetFile *fp)
409
+ {
410
+ int ret, l = 0;
411
+ char *buf, *p;
412
+ if (fp->fd != -1) netclose(fp->fd);
413
+ fp->fd = socket_connect(fp->host, fp->port);
414
+ buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
415
+ l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
416
+ l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
417
+ l += sprintf(buf + l, "\r\n");
418
+ netwrite(fp->fd, buf, l);
419
+ l = 0;
420
+ while (netread(fp->fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency
421
+ if (buf[l] == '\n' && l >= 3)
422
+ if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
423
+ ++l;
424
+ }
425
+ buf[l] = 0;
426
+ if (l < 14) { // prematured header
427
+ netclose(fp->fd);
428
+ fp->fd = -1;
429
+ return -1;
430
+ }
431
+ ret = strtol(buf + 8, &p, 0); // HTTP return code
432
+ if (ret == 200 && fp->offset>0) { // 200 (complete result); then skip beginning of the file
433
+ off_t rest = fp->offset;
434
+ while (rest) {
435
+ off_t l = rest < 0x10000? rest : 0x10000;
436
+ rest -= my_netread(fp->fd, buf, l);
437
+ }
438
+ } else if (ret != 206 && ret != 200) {
439
+ free(buf);
440
+ fprintf(stderr, "[khttp_connect_file] fail to open file (HTTP code: %d).\n", ret);
441
+ netclose(fp->fd);
442
+ fp->fd = -1;
443
+ return -1;
444
+ }
445
+ free(buf);
446
+ fp->is_ready = 1;
447
+ return 0;
448
+ }
449
+
450
+ /********************
451
+ * Generic routines *
452
+ ********************/
453
+
454
+ knetFile *knet_open(const char *fn, const char *mode)
455
+ {
456
+ knetFile *fp = 0;
457
+ if (mode[0] != 'r') {
458
+ fprintf(stderr, "[kftp_open] only mode \"r\" is supported.\n");
459
+ return 0;
460
+ }
461
+ if (strstr(fn, "ftp://") == fn) {
462
+ fp = kftp_parse_url(fn, mode);
463
+ if (fp == 0) return 0;
464
+ if (kftp_connect(fp) == -1) {
465
+ knet_close(fp);
466
+ return 0;
467
+ }
468
+ kftp_connect_file(fp);
469
+ } else if (strstr(fn, "http://") == fn) {
470
+ fp = khttp_parse_url(fn, mode);
471
+ if (fp == 0) return 0;
472
+ khttp_connect_file(fp);
473
+ } else { // local file
474
+ #ifdef _WIN32
475
+ /* In windows, O_BINARY is necessary. In Linux/Mac, O_BINARY may
476
+ * be undefined on some systems, although it is defined on my
477
+ * Mac and the Linux I have tested on. */
478
+ int fd = open(fn, O_RDONLY | O_BINARY);
479
+ #else
480
+ int fd = open(fn, O_RDONLY);
481
+ #endif
482
+ if (fd == -1) {
483
+ perror("open");
484
+ return 0;
485
+ }
486
+ fp = (knetFile*)calloc(1, sizeof(knetFile));
487
+ fp->type = KNF_TYPE_LOCAL;
488
+ fp->fd = fd;
489
+ fp->ctrl_fd = -1;
490
+ }
491
+ if (fp && fp->fd == -1) {
492
+ knet_close(fp);
493
+ return 0;
494
+ }
495
+ return fp;
496
+ }
497
+
498
+ knetFile *knet_dopen(int fd, const char *mode)
499
+ {
500
+ knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
501
+ fp->type = KNF_TYPE_LOCAL;
502
+ fp->fd = fd;
503
+ return fp;
504
+ }
505
+
506
+ off_t knet_read(knetFile *fp, void *buf, off_t len)
507
+ {
508
+ off_t l = 0;
509
+ if (fp->fd == -1) return 0;
510
+ if (fp->type == KNF_TYPE_FTP) {
511
+ if (fp->is_ready == 0) {
512
+ if (!fp->no_reconnect) kftp_reconnect(fp);
513
+ kftp_connect_file(fp);
514
+ }
515
+ } else if (fp->type == KNF_TYPE_HTTP) {
516
+ if (fp->is_ready == 0)
517
+ khttp_connect_file(fp);
518
+ }
519
+ if (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX
520
+ off_t rest = len, curr;
521
+ while (rest) {
522
+ curr = read(fp->fd, buf + l, rest);
523
+ if (curr == 0) break;
524
+ l += curr; rest -= curr;
525
+ }
526
+ } else l = my_netread(fp->fd, buf, len);
527
+ fp->offset += l;
528
+ return l;
529
+ }
530
+
531
+ off_t knet_seek(knetFile *fp, int64_t off, int whence)
532
+ {
533
+ if (whence == SEEK_SET && off == fp->offset) return 0;
534
+ if (fp->type == KNF_TYPE_LOCAL) {
535
+ /* Be aware that lseek() returns the offset after seeking,
536
+ * while fseek() returns zero on success. */
537
+ off_t offset = lseek(fp->fd, off, whence);
538
+ if (offset == -1) {
539
+ // Be silent, it is OK for knet_seek to fail when the file is streamed
540
+ // fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
541
+ return -1;
542
+ }
543
+ fp->offset = offset;
544
+ return 0;
545
+ }
546
+ else if (fp->type == KNF_TYPE_FTP)
547
+ {
548
+ if (whence==SEEK_CUR)
549
+ fp->offset += off;
550
+ else if (whence==SEEK_SET)
551
+ fp->offset = off;
552
+ else if ( whence==SEEK_END)
553
+ fp->offset = fp->file_size+off;
554
+ fp->is_ready = 0;
555
+ return 0;
556
+ }
557
+ else if (fp->type == KNF_TYPE_HTTP)
558
+ {
559
+ if (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?
560
+ fprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\n");
561
+ errno = ESPIPE;
562
+ return -1;
563
+ }
564
+ if (whence==SEEK_CUR)
565
+ fp->offset += off;
566
+ else if (whence==SEEK_SET)
567
+ fp->offset = off;
568
+ fp->is_ready = 0;
569
+ return fp->offset;
570
+ }
571
+ errno = EINVAL;
572
+ fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
573
+ return -1;
574
+ }
575
+
576
+ int knet_close(knetFile *fp)
577
+ {
578
+ if (fp == 0) return 0;
579
+ if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific
580
+ if (fp->fd != -1) {
581
+ /* On Linux/Mac, netclose() is an alias of close(), but on
582
+ * Windows, it is an alias of closesocket(). */
583
+ if (fp->type == KNF_TYPE_LOCAL) close(fp->fd);
584
+ else netclose(fp->fd);
585
+ }
586
+ free(fp->host); free(fp->port);
587
+ free(fp->response); free(fp->retr); free(fp->size_cmd); // FTP specific
588
+ free(fp->path); free(fp->http_host); // HTTP specific
589
+ free(fp);
590
+ return 0;
591
+ }
592
+
593
+ #ifdef KNETFILE_MAIN
594
+ int main(void)
595
+ {
596
+ char *buf;
597
+ knetFile *fp;
598
+ int type = 4, l;
599
+ #ifdef _WIN32
600
+ knet_win32_init();
601
+ #endif
602
+ buf = calloc(0x100000, 1);
603
+ if (type == 0) {
604
+ fp = knet_open("knetfile.c", "r");
605
+ knet_seek(fp, 1000, SEEK_SET);
606
+ } else if (type == 1) { // NCBI FTP, large file
607
+ fp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");
608
+ knet_seek(fp, 2500000000ll, SEEK_SET);
609
+ l = knet_read(fp, buf, 255);
610
+ } else if (type == 2) {
611
+ fp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");
612
+ knet_seek(fp, 1000, SEEK_SET);
613
+ } else if (type == 3) {
614
+ fp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");
615
+ knet_seek(fp, 1000, SEEK_SET);
616
+ } else if (type == 4) {
617
+ fp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");
618
+ knet_read(fp, buf, 10000);
619
+ knet_seek(fp, 20000, SEEK_SET);
620
+ knet_seek(fp, 10000, SEEK_SET);
621
+ l = knet_read(fp, buf+10000, 10000000) + 10000;
622
+ }
623
+ if (type != 4 && type != 1) {
624
+ knet_read(fp, buf, 255);
625
+ buf[255] = 0;
626
+ printf("%s\n", buf);
627
+ } else write(fileno(stdout), buf, l);
628
+ knet_close(fp);
629
+ free(buf);
630
+ return 0;
631
+ }
632
+ #endif
@@ -0,0 +1,75 @@
1
+ #ifndef KNETFILE_H
2
+ #define KNETFILE_H
3
+
4
+ #include <stdint.h>
5
+ #include <fcntl.h>
6
+
7
+ #ifndef _WIN32
8
+ #define netread(fd, ptr, len) read(fd, ptr, len)
9
+ #define netwrite(fd, ptr, len) write(fd, ptr, len)
10
+ #define netclose(fd) close(fd)
11
+ #else
12
+ #include <winsock2.h>
13
+ #define netread(fd, ptr, len) recv(fd, ptr, len, 0)
14
+ #define netwrite(fd, ptr, len) send(fd, ptr, len, 0)
15
+ #define netclose(fd) closesocket(fd)
16
+ #endif
17
+
18
+ // FIXME: currently I/O is unbuffered
19
+
20
+ #define KNF_TYPE_LOCAL 1
21
+ #define KNF_TYPE_FTP 2
22
+ #define KNF_TYPE_HTTP 3
23
+
24
+ typedef struct knetFile_s {
25
+ int type, fd;
26
+ int64_t offset;
27
+ char *host, *port;
28
+
29
+ // the following are for FTP only
30
+ int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
31
+ char *response, *retr, *size_cmd;
32
+ int64_t seek_offset; // for lazy seek
33
+ int64_t file_size;
34
+
35
+ // the following are for HTTP only
36
+ char *path, *http_host;
37
+ } knetFile;
38
+
39
+ #define knet_tell(fp) ((fp)->offset)
40
+ #define knet_fileno(fp) ((fp)->fd)
41
+
42
+ #ifdef __cplusplus
43
+ extern "C" {
44
+ #endif
45
+
46
+ #ifdef _WIN32
47
+ int knet_win32_init();
48
+ void knet_win32_destroy();
49
+ #endif
50
+
51
+ knetFile *knet_open(const char *fn, const char *mode);
52
+
53
+ /*
54
+ This only works with local files.
55
+ */
56
+ knetFile *knet_dopen(int fd, const char *mode);
57
+
58
+ /*
59
+ If ->is_ready==0, this routine updates ->fd; otherwise, it simply
60
+ reads from ->fd.
61
+ */
62
+ off_t knet_read(knetFile *fp, void *buf, off_t len);
63
+
64
+ /*
65
+ This routine only sets ->offset and ->is_ready=0. It does not
66
+ communicate with the FTP server.
67
+ */
68
+ off_t knet_seek(knetFile *fp, int64_t off, int whence);
69
+ int knet_close(knetFile *fp);
70
+
71
+ #ifdef __cplusplus
72
+ }
73
+ #endif
74
+
75
+ #endif